pinakes-core: update remaining modules and tests
Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: I9e0ff5ea33a5cf697473423e88f167ce6a6a6964
This commit is contained in:
parent
c8425a4c34
commit
3d9f8933d2
44 changed files with 1207 additions and 578 deletions
|
|
@ -15,18 +15,17 @@ use uuid::Uuid;
|
|||
|
||||
use crate::model::{LinkType, MarkdownLink, MediaId};
|
||||
|
||||
// Compile regexes once at startup to avoid recompilation on every call
|
||||
static WIKILINK_RE: LazyLock<Regex> = LazyLock::new(|| {
|
||||
Regex::new(r"\[\[([^\]|]+)(?:\|([^\]]+))?\]\]").expect("valid wikilink regex")
|
||||
});
|
||||
// Compile regexes once at startup to avoid recompilation on every call.
|
||||
// Stored as Option so that initialization failure is handled gracefully
|
||||
// rather than panicking.
|
||||
static WIKILINK_RE: LazyLock<Option<Regex>> =
|
||||
LazyLock::new(|| Regex::new(r"\[\[([^\]|]+)(?:\|([^\]]+))?\]\]").ok());
|
||||
|
||||
static EMBED_RE: LazyLock<Regex> = LazyLock::new(|| {
|
||||
Regex::new(r"!\[\[([^\]|]+)(?:\|([^\]]+))?\]\]").expect("valid embed regex")
|
||||
});
|
||||
static EMBED_RE: LazyLock<Option<Regex>> =
|
||||
LazyLock::new(|| Regex::new(r"!\[\[([^\]|]+)(?:\|([^\]]+))?\]\]").ok());
|
||||
|
||||
static MARKDOWN_LINK_RE: LazyLock<Regex> = LazyLock::new(|| {
|
||||
Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").expect("valid markdown link regex")
|
||||
});
|
||||
static MARKDOWN_LINK_RE: LazyLock<Option<Regex>> =
|
||||
LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").ok());
|
||||
|
||||
/// Configuration for context extraction around links
|
||||
const CONTEXT_CHARS_BEFORE: usize = 50;
|
||||
|
|
@ -38,6 +37,7 @@ const CONTEXT_CHARS_AFTER: usize = 50;
|
|||
/// - Wikilinks: `[[target]]` and `[[target|display text]]`
|
||||
/// - Embeds: `![[target]]`
|
||||
/// - Markdown links: `[text](path)` (internal paths only, no http/https)
|
||||
#[must_use]
|
||||
pub fn extract_links(
|
||||
source_media_id: MediaId,
|
||||
content: &str,
|
||||
|
|
@ -63,10 +63,13 @@ fn extract_wikilinks(
|
|||
source_media_id: MediaId,
|
||||
content: &str,
|
||||
) -> Vec<MarkdownLink> {
|
||||
let Some(re) = WIKILINK_RE.as_ref() else {
|
||||
return Vec::new();
|
||||
};
|
||||
let mut links = Vec::new();
|
||||
|
||||
for (line_num, line) in content.lines().enumerate() {
|
||||
for cap in WIKILINK_RE.captures_iter(line) {
|
||||
for cap in re.captures_iter(line) {
|
||||
let Some(full_match) = cap.get(0) else {
|
||||
continue;
|
||||
};
|
||||
|
|
@ -100,7 +103,11 @@ fn extract_wikilinks(
|
|||
target_media_id: None, // Will be resolved later
|
||||
link_type: LinkType::Wikilink,
|
||||
link_text: display_text.or_else(|| Some(target.to_string())),
|
||||
line_number: Some(line_num as i32 + 1), // 1-indexed
|
||||
line_number: Some(
|
||||
i32::try_from(line_num)
|
||||
.unwrap_or(i32::MAX)
|
||||
.saturating_add(1),
|
||||
), // 1-indexed
|
||||
context: Some(context),
|
||||
created_at: chrono::Utc::now(),
|
||||
});
|
||||
|
|
@ -116,10 +123,13 @@ fn extract_embeds(
|
|||
source_media_id: MediaId,
|
||||
content: &str,
|
||||
) -> Vec<MarkdownLink> {
|
||||
let Some(re) = EMBED_RE.as_ref() else {
|
||||
return Vec::new();
|
||||
};
|
||||
let mut links = Vec::new();
|
||||
|
||||
for (line_num, line) in content.lines().enumerate() {
|
||||
for cap in EMBED_RE.captures_iter(line) {
|
||||
for cap in re.captures_iter(line) {
|
||||
let Some(full_match) = cap.get(0) else {
|
||||
continue;
|
||||
};
|
||||
|
|
@ -143,7 +153,11 @@ fn extract_embeds(
|
|||
target_media_id: None,
|
||||
link_type: LinkType::Embed,
|
||||
link_text: display_text.or_else(|| Some(target.to_string())),
|
||||
line_number: Some(line_num as i32 + 1),
|
||||
line_number: Some(
|
||||
i32::try_from(line_num)
|
||||
.unwrap_or(i32::MAX)
|
||||
.saturating_add(1),
|
||||
),
|
||||
context: Some(context),
|
||||
created_at: chrono::Utc::now(),
|
||||
});
|
||||
|
|
@ -159,10 +173,13 @@ fn extract_markdown_links(
|
|||
source_media_id: MediaId,
|
||||
content: &str,
|
||||
) -> Vec<MarkdownLink> {
|
||||
let Some(re) = MARKDOWN_LINK_RE.as_ref() else {
|
||||
return Vec::new();
|
||||
};
|
||||
let mut links = Vec::new();
|
||||
|
||||
for (line_num, line) in content.lines().enumerate() {
|
||||
for cap in MARKDOWN_LINK_RE.captures_iter(line) {
|
||||
for cap in re.captures_iter(line) {
|
||||
let Some(full_match) = cap.get(0) else {
|
||||
continue;
|
||||
};
|
||||
|
|
@ -215,7 +232,11 @@ fn extract_markdown_links(
|
|||
target_media_id: None,
|
||||
link_type: LinkType::MarkdownLink,
|
||||
link_text: Some(text.to_string()),
|
||||
line_number: Some(line_num as i32 + 1),
|
||||
line_number: Some(
|
||||
i32::try_from(line_num)
|
||||
.unwrap_or(i32::MAX)
|
||||
.saturating_add(1),
|
||||
),
|
||||
context: Some(context),
|
||||
created_at: chrono::Utc::now(),
|
||||
});
|
||||
|
|
@ -278,6 +299,7 @@ pub enum ResolutionStrategy {
|
|||
/// Resolve a link target to possible file paths.
|
||||
///
|
||||
/// Returns a list of candidate paths to check, in order of preference.
|
||||
#[must_use]
|
||||
pub fn resolve_link_candidates(
|
||||
target: &str,
|
||||
source_path: &Path,
|
||||
|
|
@ -307,7 +329,7 @@ pub fn resolve_link_candidates(
|
|||
candidates.push(relative.clone());
|
||||
|
||||
// Also try with .md extension
|
||||
if !target.ends_with(".md") {
|
||||
if !target.to_ascii_lowercase().ends_with(".md") {
|
||||
candidates.push(relative.with_extension("md"));
|
||||
let mut with_md = relative.clone();
|
||||
with_md.set_file_name(format!(
|
||||
|
|
@ -319,10 +341,10 @@ pub fn resolve_link_candidates(
|
|||
}
|
||||
|
||||
// 3. Filename with .md extension in root dirs
|
||||
let target_with_md = if target.ends_with(".md") {
|
||||
let target_with_md = if target.to_ascii_lowercase().ends_with(".md") {
|
||||
target.to_string()
|
||||
} else {
|
||||
format!("{}.md", target)
|
||||
format!("{target}.md")
|
||||
};
|
||||
|
||||
for root in root_dirs {
|
||||
|
|
@ -340,6 +362,7 @@ pub fn resolve_link_candidates(
|
|||
///
|
||||
/// Obsidian uses the `aliases` field in frontmatter to define alternative names
|
||||
/// for a note that can be used in wikilinks.
|
||||
#[must_use]
|
||||
pub fn extract_aliases(content: &str) -> Vec<String> {
|
||||
let Ok(parsed) =
|
||||
gray_matter::Matter::<gray_matter::engine::YAML>::new().parse(content)
|
||||
|
|
@ -441,7 +464,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_multiple_links() {
|
||||
let content = r#"
|
||||
let content = r"
|
||||
# My Note
|
||||
|
||||
This links to [[Note A]] and also [[Note B|Note B Title]].
|
||||
|
|
@ -449,7 +472,7 @@ This links to [[Note A]] and also [[Note B|Note B Title]].
|
|||
We also have a markdown link to [config](./config.md).
|
||||
|
||||
And an embedded image: ![[diagram.png]]
|
||||
"#;
|
||||
";
|
||||
let links = extract_links(test_media_id(), content);
|
||||
|
||||
assert_eq!(links.len(), 4);
|
||||
|
|
@ -488,7 +511,7 @@ And an embedded image: ![[diagram.png]]
|
|||
|
||||
#[test]
|
||||
fn test_extract_aliases() {
|
||||
let content = r#"---
|
||||
let content = r"---
|
||||
title: My Note
|
||||
aliases:
|
||||
- Alternative Name
|
||||
|
|
@ -496,20 +519,20 @@ aliases:
|
|||
---
|
||||
|
||||
# Content here
|
||||
"#;
|
||||
";
|
||||
let aliases = extract_aliases(content);
|
||||
assert_eq!(aliases, vec!["Alternative Name", "Another Alias"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_single_alias() {
|
||||
let content = r#"---
|
||||
let content = r"---
|
||||
title: My Note
|
||||
aliases: Single Alias
|
||||
---
|
||||
|
||||
# Content
|
||||
"#;
|
||||
";
|
||||
let aliases = extract_aliases(content);
|
||||
assert_eq!(aliases, vec!["Single Alias"]);
|
||||
}
|
||||
|
|
@ -538,7 +561,7 @@ aliases: Single Alias
|
|||
#[test]
|
||||
fn test_exclude_markdown_images() {
|
||||
// Test that markdown images  are NOT extracted as links
|
||||
let content = r#"
|
||||
let content = r"
|
||||
# My Note
|
||||
|
||||
Here's a regular link: [documentation](docs/guide.md)
|
||||
|
|
@ -551,15 +574,14 @@ Multiple images:
|
|||
 and 
|
||||
|
||||
Mixed: [link](file.md) then  then [another](other.md)
|
||||
"#;
|
||||
";
|
||||
let links = extract_links(test_media_id(), content);
|
||||
|
||||
// Should only extract the 4 markdown links, not the 4 images
|
||||
assert_eq!(
|
||||
links.len(),
|
||||
4,
|
||||
"Should extract 4 links, not images. Got: {:#?}",
|
||||
links
|
||||
"Should extract 4 links, not images. Got: {links:#?}"
|
||||
);
|
||||
|
||||
// Verify all extracted items are MarkdownLink type (not images)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue