pinakes-core: exclude markdown images from link extraction

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: I6977f90d5ef845eeef099c1be4eb587b6a6a6964
This commit is contained in:
raf 2026-02-09 13:17:02 +03:00
commit 9afe4a4f6a
Signed by: NotAShelf
GPG key ID: 29D95B64378DB4BF

View file

@ -123,6 +123,14 @@ fn extract_markdown_links(source_media_id: MediaId, content: &str) -> Vec<Markdo
for (line_num, line) in content.lines().enumerate() {
for cap in re.captures_iter(line) {
let full_match = cap.get(0).unwrap();
let match_start = full_match.start();
// Skip markdown images: ![alt](image.png)
// Check if the character immediately before '[' is '!'
if match_start > 0 && line.as_bytes().get(match_start - 1) == Some(&b'!') {
continue;
}
let text = cap.get(1).unwrap().as_str().trim();
let path = cap.get(2).unwrap().as_str().trim();
@ -455,4 +463,88 @@ aliases: Single Alias
assert_eq!(wikilinks[0].target_path, "note");
assert_eq!(embeds[0].target_path, "image.png");
}
#[test]
fn test_exclude_markdown_images() {
// Test that markdown images ![alt](image.png) are NOT extracted as links
let content = r#"
# My Note
Here's a regular link: [documentation](docs/guide.md)
Here's an image: ![Screenshot](images/screenshot.png)
Another link: [config](config.toml)
Multiple images:
![Logo](logo.png) and ![Banner](banner.jpg)
Mixed: [link](file.md) then ![image](pic.png) then [another](other.md)
"#;
let links = extract_links(test_media_id(), content);
// Should only extract the 4 markdown links, not the 4 images
assert_eq!(
links.len(),
4,
"Should extract 4 links, not images. Got: {:#?}",
links
);
// Verify all extracted items are MarkdownLink type (not images)
for link in &links {
assert_eq!(
link.link_type,
LinkType::MarkdownLink,
"Link '{}' should be MarkdownLink type",
link.target_path
);
}
// Verify correct targets were extracted (links, not images)
let targets: Vec<&str> = links.iter().map(|l| l.target_path.as_str()).collect();
assert!(
targets.contains(&"docs/guide.md"),
"Should contain docs/guide.md"
);
assert!(
targets.contains(&"config.toml"),
"Should contain config.toml"
);
assert!(targets.contains(&"file.md"), "Should contain file.md");
assert!(targets.contains(&"other.md"), "Should contain other.md");
// Verify images were NOT extracted
assert!(
!targets.contains(&"images/screenshot.png"),
"Should NOT contain screenshot.png (it's an image)"
);
assert!(
!targets.contains(&"logo.png"),
"Should NOT contain logo.png (it's an image)"
);
assert!(
!targets.contains(&"banner.jpg"),
"Should NOT contain banner.jpg (it's an image)"
);
assert!(
!targets.contains(&"pic.png"),
"Should NOT contain pic.png (it's an image)"
);
}
#[test]
fn test_edge_case_image_at_line_start() {
// Test edge case: image at the very start of a line
let content = "![Image at start](start.png)\n[Link](file.md)";
let links = extract_links(test_media_id(), content);
assert_eq!(
links.len(),
1,
"Should only extract the link, not the image"
);
assert_eq!(links[0].target_path, "file.md");
assert_eq!(links[0].link_type, LinkType::MarkdownLink);
}
}