various: markdown improvements

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: I81fda8247814da19eed1e76dbe97bd5b6a6a6964
This commit is contained in:
raf 2026-02-05 15:39:05 +03:00
commit 80a8b5c7ca
Signed by: NotAShelf
GPG key ID: 29D95B64378DB4BF
23 changed files with 3458 additions and 30 deletions

View file

@ -1,7 +1,14 @@
use dioxus::prelude::*;
/// Event handler for wikilink clicks. Called with the target note name.
pub type WikilinkClickHandler = EventHandler<String>;
#[component]
pub fn MarkdownViewer(content_url: String, media_type: String) -> Element {
pub fn MarkdownViewer(
content_url: String,
media_type: String,
#[props(default)] on_wikilink_click: Option<WikilinkClickHandler>,
) -> Element {
let mut rendered_html = use_signal(String::new);
let mut frontmatter_html = use_signal(|| Option::<String>::None);
let mut loading = use_signal(|| true);
@ -133,6 +140,9 @@ fn pod_to_display(pod: &gray_matter::Pod) -> String {
fn render_markdown(text: &str) -> String {
use pulldown_cmark::{Options, Parser, html};
// First, convert wikilinks to standard markdown links
let text_with_links = convert_wikilinks(text);
let mut options = Options::empty();
options.insert(Options::ENABLE_TABLES);
options.insert(Options::ENABLE_STRIKETHROUGH);
@ -140,12 +150,47 @@ fn render_markdown(text: &str) -> String {
options.insert(Options::ENABLE_FOOTNOTES);
options.insert(Options::ENABLE_HEADING_ATTRIBUTES);
let parser = Parser::new_ext(text, options);
let parser = Parser::new_ext(&text_with_links, options);
let mut html_output = String::new();
html::push_html(&mut html_output, parser);
// Strip script tags for safety
strip_script_tags(&html_output)
// Sanitize HTML using ammonia with a safe allowlist
sanitize_html(&html_output)
}
/// Convert wikilinks [[target]] and [[target|display]] to styled HTML links.
/// Uses data attributes only - no inline JavaScript for security.
fn convert_wikilinks(text: &str) -> String {
use regex::Regex;
// Match embeds ![[target]] first, convert to a placeholder image/embed span
let embed_re = Regex::new(r"!\[\[([^\]|]+)(?:\|([^\]]+))?\]\]").unwrap();
let text = embed_re.replace_all(text, |caps: &regex::Captures| {
let target = caps.get(1).unwrap().as_str().trim();
let alt = caps.get(2).map(|m| m.as_str().trim()).unwrap_or(target);
format!(
"<span class=\"wikilink-embed\" data-target=\"{}\" title=\"Embed: {}\">[Embed: {}]</span>",
escape_html_attr(target),
escape_html_attr(target),
escape_html(alt)
)
});
// Match wikilinks [[target]] or [[target|display]]
let wikilink_re = Regex::new(r"\[\[([^\]|]+)(?:\|([^\]]+))?\]\]").unwrap();
let text = wikilink_re.replace_all(&text, |caps: &regex::Captures| {
let target = caps.get(1).unwrap().as_str().trim();
let display = caps.get(2).map(|m| m.as_str().trim()).unwrap_or(target);
// Create a styled link with data attributes only - no inline JavaScript.
// Event handling is done via event delegation in the frontend.
format!(
"<a href=\"#wikilink\" class=\"wikilink\" data-wikilink-target=\"{}\">{}</a>",
escape_html_attr(target),
escape_html(display)
)
});
text.to_string()
}
fn render_plaintext(text: &str) -> String {
@ -153,6 +198,7 @@ fn render_plaintext(text: &str) -> String {
format!("<pre><code>{escaped}</code></pre>")
}
/// Escape text for display in HTML content.
fn escape_html(text: &str) -> String {
text.replace('&', "&amp;")
.replace('<', "&lt;")
@ -160,21 +206,59 @@ fn escape_html(text: &str) -> String {
.replace('"', "&quot;")
}
fn strip_script_tags(html: &str) -> String {
// Simple removal of <script> tags
let mut result = html.to_string();
while let Some(start) = result.to_lowercase().find("<script") {
if let Some(end) = result.to_lowercase()[start..].find("</script>") {
result = format!(
"{}{}",
&result[..start],
&result[start + end + "</script>".len()..]
);
} else {
// Malformed script tag - remove to end
result = result[..start].to_string();
break;
}
}
result
/// Escape text for use in HTML attributes (includes single quotes).
fn escape_html_attr(text: &str) -> String {
text.replace('&', "&amp;")
.replace('<', "&lt;")
.replace('>', "&gt;")
.replace('"', "&quot;")
.replace('\'', "&#x27;")
}
/// Sanitize HTML using ammonia with a safe allowlist.
/// This prevents XSS attacks by removing dangerous elements and attributes.
fn sanitize_html(html: &str) -> String {
use ammonia::Builder;
use std::collections::HashSet;
// Build a custom sanitizer that allows safe markdown elements
// but strips all event handlers and dangerous elements
let mut builder = Builder::default();
// Allow common markdown elements
let allowed_tags: HashSet<&str> = [
"a", "abbr", "acronym", "b", "blockquote", "br", "code", "dd", "del",
"details", "div", "dl", "dt", "em", "h1", "h2", "h3", "h4", "h5", "h6",
"hr", "i", "img", "ins", "kbd", "li", "mark", "ol", "p", "pre", "q",
"s", "samp", "small", "span", "strong", "sub", "summary", "sup",
"table", "tbody", "td", "tfoot", "th", "thead", "tr", "u", "ul", "var",
// Task list support
"input",
]
.into_iter()
.collect();
// Allow safe attributes
let allowed_attrs: HashSet<&str> = [
"href", "src", "alt", "title", "class", "id", "name",
"width", "height", "align", "valign",
"colspan", "rowspan", "scope",
// Data attributes for wikilinks (safe - no code execution)
"data-target", "data-wikilink-target",
// Task list checkbox support
"type", "checked", "disabled",
]
.into_iter()
.collect();
builder
.tags(allowed_tags)
.generic_attributes(allowed_attrs)
// Allow relative URLs and fragment-only URLs for internal links
.url_schemes(["http", "https", "mailto"].into_iter().collect())
.link_rel(Some("noopener noreferrer"))
// Strip all event handler attributes (onclick, onerror, etc.)
.strip_comments(true)
.clean(html)
.to_string()
}