pinakes-core: update remaining modules and tests

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: I9e0ff5ea33a5cf697473423e88f167ce6a6a6964
This commit is contained in:
raf 2026-03-08 00:42:29 +03:00
commit 3d9f8933d2
Signed by: NotAShelf
GPG key ID: 29D95B64378DB4BF
44 changed files with 1207 additions and 578 deletions

View file

@ -15,18 +15,17 @@ use uuid::Uuid;
use crate::model::{LinkType, MarkdownLink, MediaId};
// Compile regexes once at startup to avoid recompilation on every call
static WIKILINK_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"\[\[([^\]|]+)(?:\|([^\]]+))?\]\]").expect("valid wikilink regex")
});
// Compile regexes once at startup to avoid recompilation on every call.
// Stored as Option so that initialization failure is handled gracefully
// rather than panicking.
static WIKILINK_RE: LazyLock<Option<Regex>> =
LazyLock::new(|| Regex::new(r"\[\[([^\]|]+)(?:\|([^\]]+))?\]\]").ok());
static EMBED_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"!\[\[([^\]|]+)(?:\|([^\]]+))?\]\]").expect("valid embed regex")
});
static EMBED_RE: LazyLock<Option<Regex>> =
LazyLock::new(|| Regex::new(r"!\[\[([^\]|]+)(?:\|([^\]]+))?\]\]").ok());
static MARKDOWN_LINK_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").expect("valid markdown link regex")
});
static MARKDOWN_LINK_RE: LazyLock<Option<Regex>> =
LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").ok());
/// Configuration for context extraction around links
const CONTEXT_CHARS_BEFORE: usize = 50;
@ -38,6 +37,7 @@ const CONTEXT_CHARS_AFTER: usize = 50;
/// - Wikilinks: `[[target]]` and `[[target|display text]]`
/// - Embeds: `![[target]]`
/// - Markdown links: `[text](path)` (internal paths only, no http/https)
#[must_use]
pub fn extract_links(
source_media_id: MediaId,
content: &str,
@ -63,10 +63,13 @@ fn extract_wikilinks(
source_media_id: MediaId,
content: &str,
) -> Vec<MarkdownLink> {
let Some(re) = WIKILINK_RE.as_ref() else {
return Vec::new();
};
let mut links = Vec::new();
for (line_num, line) in content.lines().enumerate() {
for cap in WIKILINK_RE.captures_iter(line) {
for cap in re.captures_iter(line) {
let Some(full_match) = cap.get(0) else {
continue;
};
@ -100,7 +103,11 @@ fn extract_wikilinks(
target_media_id: None, // Will be resolved later
link_type: LinkType::Wikilink,
link_text: display_text.or_else(|| Some(target.to_string())),
line_number: Some(line_num as i32 + 1), // 1-indexed
line_number: Some(
i32::try_from(line_num)
.unwrap_or(i32::MAX)
.saturating_add(1),
), // 1-indexed
context: Some(context),
created_at: chrono::Utc::now(),
});
@ -116,10 +123,13 @@ fn extract_embeds(
source_media_id: MediaId,
content: &str,
) -> Vec<MarkdownLink> {
let Some(re) = EMBED_RE.as_ref() else {
return Vec::new();
};
let mut links = Vec::new();
for (line_num, line) in content.lines().enumerate() {
for cap in EMBED_RE.captures_iter(line) {
for cap in re.captures_iter(line) {
let Some(full_match) = cap.get(0) else {
continue;
};
@ -143,7 +153,11 @@ fn extract_embeds(
target_media_id: None,
link_type: LinkType::Embed,
link_text: display_text.or_else(|| Some(target.to_string())),
line_number: Some(line_num as i32 + 1),
line_number: Some(
i32::try_from(line_num)
.unwrap_or(i32::MAX)
.saturating_add(1),
),
context: Some(context),
created_at: chrono::Utc::now(),
});
@ -159,10 +173,13 @@ fn extract_markdown_links(
source_media_id: MediaId,
content: &str,
) -> Vec<MarkdownLink> {
let Some(re) = MARKDOWN_LINK_RE.as_ref() else {
return Vec::new();
};
let mut links = Vec::new();
for (line_num, line) in content.lines().enumerate() {
for cap in MARKDOWN_LINK_RE.captures_iter(line) {
for cap in re.captures_iter(line) {
let Some(full_match) = cap.get(0) else {
continue;
};
@ -215,7 +232,11 @@ fn extract_markdown_links(
target_media_id: None,
link_type: LinkType::MarkdownLink,
link_text: Some(text.to_string()),
line_number: Some(line_num as i32 + 1),
line_number: Some(
i32::try_from(line_num)
.unwrap_or(i32::MAX)
.saturating_add(1),
),
context: Some(context),
created_at: chrono::Utc::now(),
});
@ -278,6 +299,7 @@ pub enum ResolutionStrategy {
/// Resolve a link target to possible file paths.
///
/// Returns a list of candidate paths to check, in order of preference.
#[must_use]
pub fn resolve_link_candidates(
target: &str,
source_path: &Path,
@ -307,7 +329,7 @@ pub fn resolve_link_candidates(
candidates.push(relative.clone());
// Also try with .md extension
if !target.ends_with(".md") {
if !target.to_ascii_lowercase().ends_with(".md") {
candidates.push(relative.with_extension("md"));
let mut with_md = relative.clone();
with_md.set_file_name(format!(
@ -319,10 +341,10 @@ pub fn resolve_link_candidates(
}
// 3. Filename with .md extension in root dirs
let target_with_md = if target.ends_with(".md") {
let target_with_md = if target.to_ascii_lowercase().ends_with(".md") {
target.to_string()
} else {
format!("{}.md", target)
format!("{target}.md")
};
for root in root_dirs {
@ -340,6 +362,7 @@ pub fn resolve_link_candidates(
///
/// Obsidian uses the `aliases` field in frontmatter to define alternative names
/// for a note that can be used in wikilinks.
#[must_use]
pub fn extract_aliases(content: &str) -> Vec<String> {
let Ok(parsed) =
gray_matter::Matter::<gray_matter::engine::YAML>::new().parse(content)
@ -441,7 +464,7 @@ mod tests {
#[test]
fn test_multiple_links() {
let content = r#"
let content = r"
# My Note
This links to [[Note A]] and also [[Note B|Note B Title]].
@ -449,7 +472,7 @@ This links to [[Note A]] and also [[Note B|Note B Title]].
We also have a markdown link to [config](./config.md).
And an embedded image: ![[diagram.png]]
"#;
";
let links = extract_links(test_media_id(), content);
assert_eq!(links.len(), 4);
@ -488,7 +511,7 @@ And an embedded image: ![[diagram.png]]
#[test]
fn test_extract_aliases() {
let content = r#"---
let content = r"---
title: My Note
aliases:
- Alternative Name
@ -496,20 +519,20 @@ aliases:
---
# Content here
"#;
";
let aliases = extract_aliases(content);
assert_eq!(aliases, vec!["Alternative Name", "Another Alias"]);
}
#[test]
fn test_extract_single_alias() {
let content = r#"---
let content = r"---
title: My Note
aliases: Single Alias
---
# Content
"#;
";
let aliases = extract_aliases(content);
assert_eq!(aliases, vec!["Single Alias"]);
}
@ -538,7 +561,7 @@ aliases: Single Alias
#[test]
fn test_exclude_markdown_images() {
// Test that markdown images ![alt](image.png) are NOT extracted as links
let content = r#"
let content = r"
# My Note
Here's a regular link: [documentation](docs/guide.md)
@ -551,15 +574,14 @@ Multiple images:
![Logo](logo.png) and ![Banner](banner.jpg)
Mixed: [link](file.md) then ![image](pic.png) then [another](other.md)
"#;
";
let links = extract_links(test_media_id(), content);
// Should only extract the 4 markdown links, not the 4 images
assert_eq!(
links.len(),
4,
"Should extract 4 links, not images. Got: {:#?}",
links
"Should extract 4 links, not images. Got: {links:#?}"
);
// Verify all extracted items are MarkdownLink type (not images)