pinakes-core: exclude markdown images from link extraction
Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: I6977f90d5ef845eeef099c1be4eb587b6a6a6964
This commit is contained in:
parent
3e51e4dcc1
commit
9afe4a4f6a
1 changed files with 92 additions and 0 deletions
|
|
@ -123,6 +123,14 @@ fn extract_markdown_links(source_media_id: MediaId, content: &str) -> Vec<Markdo
|
|||
for (line_num, line) in content.lines().enumerate() {
|
||||
for cap in re.captures_iter(line) {
|
||||
let full_match = cap.get(0).unwrap();
|
||||
let match_start = full_match.start();
|
||||
|
||||
// Skip markdown images: 
|
||||
// Check if the character immediately before '[' is '!'
|
||||
if match_start > 0 && line.as_bytes().get(match_start - 1) == Some(&b'!') {
|
||||
continue;
|
||||
}
|
||||
|
||||
let text = cap.get(1).unwrap().as_str().trim();
|
||||
let path = cap.get(2).unwrap().as_str().trim();
|
||||
|
||||
|
|
@ -455,4 +463,88 @@ aliases: Single Alias
|
|||
assert_eq!(wikilinks[0].target_path, "note");
|
||||
assert_eq!(embeds[0].target_path, "image.png");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_exclude_markdown_images() {
|
||||
// Test that markdown images  are NOT extracted as links
|
||||
let content = r#"
|
||||
# My Note
|
||||
|
||||
Here's a regular link: [documentation](docs/guide.md)
|
||||
|
||||
Here's an image: 
|
||||
|
||||
Another link: [config](config.toml)
|
||||
|
||||
Multiple images:
|
||||
 and 
|
||||
|
||||
Mixed: [link](file.md) then  then [another](other.md)
|
||||
"#;
|
||||
let links = extract_links(test_media_id(), content);
|
||||
|
||||
// Should only extract the 4 markdown links, not the 4 images
|
||||
assert_eq!(
|
||||
links.len(),
|
||||
4,
|
||||
"Should extract 4 links, not images. Got: {:#?}",
|
||||
links
|
||||
);
|
||||
|
||||
// Verify all extracted items are MarkdownLink type (not images)
|
||||
for link in &links {
|
||||
assert_eq!(
|
||||
link.link_type,
|
||||
LinkType::MarkdownLink,
|
||||
"Link '{}' should be MarkdownLink type",
|
||||
link.target_path
|
||||
);
|
||||
}
|
||||
|
||||
// Verify correct targets were extracted (links, not images)
|
||||
let targets: Vec<&str> = links.iter().map(|l| l.target_path.as_str()).collect();
|
||||
assert!(
|
||||
targets.contains(&"docs/guide.md"),
|
||||
"Should contain docs/guide.md"
|
||||
);
|
||||
assert!(
|
||||
targets.contains(&"config.toml"),
|
||||
"Should contain config.toml"
|
||||
);
|
||||
assert!(targets.contains(&"file.md"), "Should contain file.md");
|
||||
assert!(targets.contains(&"other.md"), "Should contain other.md");
|
||||
|
||||
// Verify images were NOT extracted
|
||||
assert!(
|
||||
!targets.contains(&"images/screenshot.png"),
|
||||
"Should NOT contain screenshot.png (it's an image)"
|
||||
);
|
||||
assert!(
|
||||
!targets.contains(&"logo.png"),
|
||||
"Should NOT contain logo.png (it's an image)"
|
||||
);
|
||||
assert!(
|
||||
!targets.contains(&"banner.jpg"),
|
||||
"Should NOT contain banner.jpg (it's an image)"
|
||||
);
|
||||
assert!(
|
||||
!targets.contains(&"pic.png"),
|
||||
"Should NOT contain pic.png (it's an image)"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_edge_case_image_at_line_start() {
|
||||
// Test edge case: image at the very start of a line
|
||||
let content = "\n[Link](file.md)";
|
||||
let links = extract_links(test_media_id(), content);
|
||||
|
||||
assert_eq!(
|
||||
links.len(),
|
||||
1,
|
||||
"Should only extract the link, not the image"
|
||||
);
|
||||
assert_eq!(links[0].target_path, "file.md");
|
||||
assert_eq!(links[0].link_type, LinkType::MarkdownLink);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue