various: markdown improvements
Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: I81fda8247814da19eed1e76dbe97bd5b6a6a6964
This commit is contained in:
parent
875bdf5ebc
commit
80a8b5c7ca
23 changed files with 3458 additions and 30 deletions
|
|
@ -6,7 +6,8 @@ use tracing::info;
|
|||
use crate::audit;
|
||||
use crate::error::{PinakesError, Result};
|
||||
use crate::hash::compute_file_hash;
|
||||
use crate::media_type::MediaType;
|
||||
use crate::links;
|
||||
use crate::media_type::{BuiltinMediaType, MediaType};
|
||||
use crate::metadata;
|
||||
use crate::model::*;
|
||||
use crate::storage::DynStorageBackend;
|
||||
|
|
@ -168,6 +169,9 @@ pub async fn import_file_with_options(
|
|||
None
|
||||
};
|
||||
|
||||
// Check if this is a markdown file for link extraction
|
||||
let is_markdown = media_type == MediaType::Builtin(BuiltinMediaType::Markdown);
|
||||
|
||||
let item = MediaItem {
|
||||
id: media_id,
|
||||
path: path.clone(),
|
||||
|
|
@ -206,10 +210,25 @@ pub async fn import_file_with_options(
|
|||
|
||||
// New items are not deleted
|
||||
deleted_at: None,
|
||||
|
||||
// Links will be extracted separately
|
||||
links_extracted_at: None,
|
||||
};
|
||||
|
||||
storage.insert_media(&item).await?;
|
||||
|
||||
// Extract and store markdown links for markdown files
|
||||
if is_markdown {
|
||||
if let Err(e) = extract_and_store_links(storage, media_id, &path).await {
|
||||
tracing::warn!(
|
||||
media_id = %media_id,
|
||||
path = %path.display(),
|
||||
error = %e,
|
||||
"failed to extract markdown links"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Store extracted extra metadata as custom fields
|
||||
for (key, value) in &extracted.extra {
|
||||
let field = CustomField {
|
||||
|
|
@ -372,3 +391,44 @@ pub async fn import_directory_with_options(
|
|||
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
/// Extract markdown links from a file and store them in the database.
|
||||
async fn extract_and_store_links(
|
||||
storage: &DynStorageBackend,
|
||||
media_id: MediaId,
|
||||
path: &Path,
|
||||
) -> Result<()> {
|
||||
// Read file content
|
||||
let content = tokio::fs::read_to_string(path).await.map_err(|e| {
|
||||
PinakesError::Io(std::io::Error::new(
|
||||
std::io::ErrorKind::Other,
|
||||
format!("failed to read markdown file for link extraction: {e}"),
|
||||
))
|
||||
})?;
|
||||
|
||||
// Extract links
|
||||
let extracted_links = links::extract_links(media_id, &content);
|
||||
|
||||
if extracted_links.is_empty() {
|
||||
// No links found, just mark as extracted
|
||||
storage.mark_links_extracted(media_id).await?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Clear any existing links for this media (in case of re-import)
|
||||
storage.clear_links_for_media(media_id).await?;
|
||||
|
||||
// Save extracted links
|
||||
storage.save_markdown_links(media_id, &extracted_links).await?;
|
||||
|
||||
// Mark links as extracted
|
||||
storage.mark_links_extracted(media_id).await?;
|
||||
|
||||
tracing::debug!(
|
||||
media_id = %media_id,
|
||||
link_count = extracted_links.len(),
|
||||
"extracted markdown links"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ pub mod hash;
|
|||
pub mod import;
|
||||
pub mod integrity;
|
||||
pub mod jobs;
|
||||
pub mod links;
|
||||
pub mod managed_storage;
|
||||
pub mod media_type;
|
||||
pub mod metadata;
|
||||
|
|
|
|||
456
crates/pinakes-core/src/links.rs
Normal file
456
crates/pinakes-core/src/links.rs
Normal file
|
|
@ -0,0 +1,456 @@
|
|||
//! Markdown link extraction and management for Obsidian-style bidirectional links.
|
||||
//!
|
||||
//! This module provides:
|
||||
//! - Wikilink extraction (`[[target]]` and `[[target|display]]`)
|
||||
//! - Embed extraction (`![[target]]`)
|
||||
//! - Markdown link extraction (`[text](path)` for internal links)
|
||||
//! - Link resolution strategies
|
||||
//! - Context extraction for backlink previews
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
use regex::Regex;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::model::{LinkType, MarkdownLink, MediaId};
|
||||
|
||||
/// Configuration for context extraction around links
|
||||
const CONTEXT_CHARS_BEFORE: usize = 50;
|
||||
const CONTEXT_CHARS_AFTER: usize = 50;
|
||||
|
||||
/// Extract all markdown links from file content.
|
||||
///
|
||||
/// This extracts:
|
||||
/// - Wikilinks: `[[target]]` and `[[target|display text]]`
|
||||
/// - Embeds: `![[target]]`
|
||||
/// - Markdown links: `[text](path)` (internal paths only, no http/https)
|
||||
pub fn extract_links(source_media_id: MediaId, content: &str) -> Vec<MarkdownLink> {
|
||||
let mut links = Vec::new();
|
||||
|
||||
// Extract wikilinks: [[target]] or [[target|display]]
|
||||
links.extend(extract_wikilinks(source_media_id, content));
|
||||
|
||||
// Extract embeds: ![[target]]
|
||||
links.extend(extract_embeds(source_media_id, content));
|
||||
|
||||
// Extract markdown links: [text](path)
|
||||
links.extend(extract_markdown_links(source_media_id, content));
|
||||
|
||||
links
|
||||
}
|
||||
|
||||
/// Extract wikilinks from content.
|
||||
/// Matches: `[[target]]` or `[[target|display text]]` but NOT `![[...]]` (embeds)
|
||||
fn extract_wikilinks(source_media_id: MediaId, content: &str) -> Vec<MarkdownLink> {
|
||||
// Match [[...]] - we'll manually filter out embeds that are preceded by !
|
||||
let re = Regex::new(r"\[\[([^\]|]+)(?:\|([^\]]+))?\]\]").unwrap();
|
||||
let mut links = Vec::new();
|
||||
|
||||
for (line_num, line) in content.lines().enumerate() {
|
||||
for cap in re.captures_iter(line) {
|
||||
let full_match = cap.get(0).unwrap();
|
||||
let match_start = full_match.start();
|
||||
|
||||
// Check if preceded by ! (which would make it an embed, not a wikilink)
|
||||
if match_start > 0 {
|
||||
let bytes = line.as_bytes();
|
||||
if bytes.get(match_start - 1) == Some(&b'!') {
|
||||
continue; // Skip embeds
|
||||
}
|
||||
}
|
||||
|
||||
let target = cap.get(1).unwrap().as_str().trim();
|
||||
let display_text = cap.get(2).map(|m| m.as_str().trim().to_string());
|
||||
|
||||
let context = extract_context(content, line_num, full_match.start(), full_match.end());
|
||||
|
||||
links.push(MarkdownLink {
|
||||
id: Uuid::now_v7(),
|
||||
source_media_id,
|
||||
target_path: target.to_string(),
|
||||
target_media_id: None, // Will be resolved later
|
||||
link_type: LinkType::Wikilink,
|
||||
link_text: display_text.or_else(|| Some(target.to_string())),
|
||||
line_number: Some(line_num as i32 + 1), // 1-indexed
|
||||
context: Some(context),
|
||||
created_at: chrono::Utc::now(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
links
|
||||
}
|
||||
|
||||
/// Extract embeds from content.
|
||||
/// Matches: `![[target]]`
|
||||
fn extract_embeds(source_media_id: MediaId, content: &str) -> Vec<MarkdownLink> {
|
||||
let re = Regex::new(r"!\[\[([^\]|]+)(?:\|([^\]]+))?\]\]").unwrap();
|
||||
let mut links = Vec::new();
|
||||
|
||||
for (line_num, line) in content.lines().enumerate() {
|
||||
for cap in re.captures_iter(line) {
|
||||
let full_match = cap.get(0).unwrap();
|
||||
let target = cap.get(1).unwrap().as_str().trim();
|
||||
let display_text = cap.get(2).map(|m| m.as_str().trim().to_string());
|
||||
|
||||
let context = extract_context(content, line_num, full_match.start(), full_match.end());
|
||||
|
||||
links.push(MarkdownLink {
|
||||
id: Uuid::now_v7(),
|
||||
source_media_id,
|
||||
target_path: target.to_string(),
|
||||
target_media_id: None,
|
||||
link_type: LinkType::Embed,
|
||||
link_text: display_text.or_else(|| Some(target.to_string())),
|
||||
line_number: Some(line_num as i32 + 1),
|
||||
context: Some(context),
|
||||
created_at: chrono::Utc::now(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
links
|
||||
}
|
||||
|
||||
/// Extract markdown links from content.
|
||||
/// Matches: `[text](path)` but only for internal paths (no http/https)
|
||||
fn extract_markdown_links(source_media_id: MediaId, content: &str) -> Vec<MarkdownLink> {
|
||||
// Match [text](path) where path doesn't start with http:// or https://
|
||||
let re = Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").unwrap();
|
||||
let mut links = Vec::new();
|
||||
|
||||
for (line_num, line) in content.lines().enumerate() {
|
||||
for cap in re.captures_iter(line) {
|
||||
let full_match = cap.get(0).unwrap();
|
||||
let text = cap.get(1).unwrap().as_str().trim();
|
||||
let path = cap.get(2).unwrap().as_str().trim();
|
||||
|
||||
// Skip external links
|
||||
if path.starts_with("http://")
|
||||
|| path.starts_with("https://")
|
||||
|| path.starts_with("mailto:")
|
||||
|| path.starts_with("ftp://")
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip anchor-only links
|
||||
if path.starts_with('#') {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Remove any anchor from the path for resolution
|
||||
let target_path = path.split('#').next().unwrap_or(path);
|
||||
|
||||
let context = extract_context(content, line_num, full_match.start(), full_match.end());
|
||||
|
||||
links.push(MarkdownLink {
|
||||
id: Uuid::now_v7(),
|
||||
source_media_id,
|
||||
target_path: target_path.to_string(),
|
||||
target_media_id: None,
|
||||
link_type: LinkType::MarkdownLink,
|
||||
link_text: Some(text.to_string()),
|
||||
line_number: Some(line_num as i32 + 1),
|
||||
context: Some(context),
|
||||
created_at: chrono::Utc::now(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
links
|
||||
}
|
||||
|
||||
/// Extract surrounding context for a link.
|
||||
fn extract_context(content: &str, line_num: usize, _start: usize, _end: usize) -> String {
|
||||
let lines: Vec<&str> = content.lines().collect();
|
||||
if line_num >= lines.len() {
|
||||
return String::new();
|
||||
}
|
||||
|
||||
let line = lines[line_num];
|
||||
let line_len = line.len();
|
||||
|
||||
// Get surrounding lines for context if the current line is short
|
||||
if line_len < 30 && line_num > 0 {
|
||||
// Include previous line
|
||||
let prev = lines.get(line_num.saturating_sub(1)).unwrap_or(&"");
|
||||
let next = lines.get(line_num + 1).unwrap_or(&"");
|
||||
return format!("{} {} {}", prev.trim(), line.trim(), next.trim())
|
||||
.chars()
|
||||
.take(CONTEXT_CHARS_BEFORE + CONTEXT_CHARS_AFTER + 20)
|
||||
.collect();
|
||||
}
|
||||
|
||||
// Truncate long lines
|
||||
if line_len > CONTEXT_CHARS_BEFORE + CONTEXT_CHARS_AFTER {
|
||||
line.chars()
|
||||
.take(CONTEXT_CHARS_BEFORE + CONTEXT_CHARS_AFTER)
|
||||
.collect()
|
||||
} else {
|
||||
line.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
/// Link resolution strategies for finding target media items.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum ResolutionStrategy {
|
||||
/// Direct path match
|
||||
DirectPath,
|
||||
/// Relative to source directory
|
||||
RelativeToSource,
|
||||
/// Filename with .md extension added
|
||||
FilenameWithMd,
|
||||
/// Filename-only search (Obsidian-style)
|
||||
FilenameOnly,
|
||||
}
|
||||
|
||||
/// Resolve a link target to possible file paths.
|
||||
///
|
||||
/// Returns a list of candidate paths to check, in order of preference.
|
||||
pub fn resolve_link_candidates(
|
||||
target: &str,
|
||||
source_path: &Path,
|
||||
root_dirs: &[std::path::PathBuf],
|
||||
) -> Vec<std::path::PathBuf> {
|
||||
let mut candidates = Vec::new();
|
||||
|
||||
// Clean up the target path
|
||||
let target = target.trim();
|
||||
|
||||
// 1. Direct path - if it looks like a path
|
||||
if target.contains('/') || target.contains('\\') {
|
||||
let direct = std::path::PathBuf::from(target);
|
||||
if direct.is_absolute() {
|
||||
candidates.push(direct);
|
||||
} else {
|
||||
// Relative to each root dir
|
||||
for root in root_dirs {
|
||||
candidates.push(root.join(&direct));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Relative to source file's directory
|
||||
if let Some(source_dir) = source_path.parent() {
|
||||
let relative = source_dir.join(target);
|
||||
candidates.push(relative.clone());
|
||||
|
||||
// Also try with .md extension
|
||||
if !target.ends_with(".md") {
|
||||
candidates.push(relative.with_extension("md"));
|
||||
let mut with_md = relative.clone();
|
||||
with_md.set_file_name(format!(
|
||||
"{}.md",
|
||||
relative.file_name().unwrap_or_default().to_string_lossy()
|
||||
));
|
||||
candidates.push(with_md);
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Filename with .md extension in root dirs
|
||||
let target_with_md = if target.ends_with(".md") {
|
||||
target.to_string()
|
||||
} else {
|
||||
format!("{}.md", target)
|
||||
};
|
||||
|
||||
for root in root_dirs {
|
||||
candidates.push(root.join(&target_with_md));
|
||||
}
|
||||
|
||||
// 4. Remove duplicates while preserving order
|
||||
let mut seen = std::collections::HashSet::new();
|
||||
candidates.retain(|p| seen.insert(p.clone()));
|
||||
|
||||
candidates
|
||||
}
|
||||
|
||||
/// Extract frontmatter aliases from markdown content.
|
||||
///
|
||||
/// Obsidian uses the `aliases` field in frontmatter to define alternative names
|
||||
/// for a note that can be used in wikilinks.
|
||||
pub fn extract_aliases(content: &str) -> Result<Vec<String>> {
|
||||
let parsed = gray_matter::Matter::<gray_matter::engine::YAML>::new().parse(content);
|
||||
|
||||
if let Some(data) = parsed.ok().and_then(|p| p.data) {
|
||||
if let gray_matter::Pod::Hash(map) = data {
|
||||
if let Some(aliases) = map.get("aliases") {
|
||||
match aliases {
|
||||
gray_matter::Pod::Array(arr) => {
|
||||
return Ok(arr
|
||||
.iter()
|
||||
.filter_map(|a| {
|
||||
if let gray_matter::Pod::String(s) = a {
|
||||
Some(s.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect());
|
||||
}
|
||||
gray_matter::Pod::String(s) => {
|
||||
// Single alias as string
|
||||
return Ok(vec![s.clone()]);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Vec::new())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn test_media_id() -> MediaId {
|
||||
MediaId(Uuid::nil())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_simple_wikilink() {
|
||||
let content = "This is a [[simple link]] in text.";
|
||||
let links = extract_links(test_media_id(), content);
|
||||
|
||||
assert_eq!(links.len(), 1);
|
||||
assert_eq!(links[0].target_path, "simple link");
|
||||
assert_eq!(links[0].link_type, LinkType::Wikilink);
|
||||
assert_eq!(links[0].link_text, Some("simple link".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_wikilink_with_display() {
|
||||
let content = "Check out [[target note|this article]] for more.";
|
||||
let links = extract_links(test_media_id(), content);
|
||||
|
||||
assert_eq!(links.len(), 1);
|
||||
assert_eq!(links[0].target_path, "target note");
|
||||
assert_eq!(links[0].link_text, Some("this article".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_embed() {
|
||||
let content = "Here is an image: ![[image.png]]";
|
||||
let links = extract_links(test_media_id(), content);
|
||||
|
||||
assert_eq!(links.len(), 1);
|
||||
assert_eq!(links[0].target_path, "image.png");
|
||||
assert_eq!(links[0].link_type, LinkType::Embed);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_markdown_link() {
|
||||
let content = "Read [the documentation](docs/README.md) for details.";
|
||||
let links = extract_links(test_media_id(), content);
|
||||
|
||||
assert_eq!(links.len(), 1);
|
||||
assert_eq!(links[0].target_path, "docs/README.md");
|
||||
assert_eq!(links[0].link_type, LinkType::MarkdownLink);
|
||||
assert_eq!(links[0].link_text, Some("the documentation".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_skip_external_links() {
|
||||
let content = "Visit [our site](https://example.com) or [email us](mailto:test@test.com).";
|
||||
let links = extract_links(test_media_id(), content);
|
||||
|
||||
assert!(links.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multiple_links() {
|
||||
let content = r#"
|
||||
# My Note
|
||||
|
||||
This links to [[Note A]] and also [[Note B|Note B Title]].
|
||||
|
||||
We also have a markdown link to [config](./config.md).
|
||||
|
||||
And an embedded image: ![[diagram.png]]
|
||||
"#;
|
||||
let links = extract_links(test_media_id(), content);
|
||||
|
||||
assert_eq!(links.len(), 4);
|
||||
|
||||
let types: Vec<_> = links.iter().map(|l| l.link_type).collect();
|
||||
assert!(types.contains(&LinkType::Wikilink));
|
||||
assert!(types.contains(&LinkType::Embed));
|
||||
assert!(types.contains(&LinkType::MarkdownLink));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_line_numbers() {
|
||||
let content = "Line 1\n[[link on line 2]]\nLine 3";
|
||||
let links = extract_links(test_media_id(), content);
|
||||
|
||||
assert_eq!(links.len(), 1);
|
||||
assert_eq!(links[0].line_number, Some(2));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_candidates() {
|
||||
let source_path = std::path::Path::new("/notes/projects/readme.md");
|
||||
let root_dirs = vec![std::path::PathBuf::from("/notes")];
|
||||
|
||||
let candidates = resolve_link_candidates("My Note", source_path, &root_dirs);
|
||||
|
||||
// Should include relative path and .md variations
|
||||
assert!(!candidates.is_empty());
|
||||
assert!(candidates
|
||||
.iter()
|
||||
.any(|p| p.to_string_lossy().contains("My Note.md")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_aliases() {
|
||||
let content = r#"---
|
||||
title: My Note
|
||||
aliases:
|
||||
- Alternative Name
|
||||
- Another Alias
|
||||
---
|
||||
|
||||
# Content here
|
||||
"#;
|
||||
let aliases = extract_aliases(content).unwrap();
|
||||
assert_eq!(aliases, vec!["Alternative Name", "Another Alias"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_single_alias() {
|
||||
let content = r#"---
|
||||
title: My Note
|
||||
aliases: Single Alias
|
||||
---
|
||||
|
||||
# Content
|
||||
"#;
|
||||
let aliases = extract_aliases(content).unwrap();
|
||||
assert_eq!(aliases, vec!["Single Alias"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_wikilink_not_matching_embed() {
|
||||
let content = "A wikilink [[note]] and an embed ![[image.png]]";
|
||||
let links = extract_links(test_media_id(), content);
|
||||
|
||||
assert_eq!(links.len(), 2);
|
||||
let wikilinks: Vec<_> = links
|
||||
.iter()
|
||||
.filter(|l| l.link_type == LinkType::Wikilink)
|
||||
.collect();
|
||||
let embeds: Vec<_> = links
|
||||
.iter()
|
||||
.filter(|l| l.link_type == LinkType::Embed)
|
||||
.collect();
|
||||
|
||||
assert_eq!(wikilinks.len(), 1);
|
||||
assert_eq!(embeds.len(), 1);
|
||||
assert_eq!(wikilinks[0].target_path, "note");
|
||||
assert_eq!(embeds[0].target_path, "image.png");
|
||||
}
|
||||
}
|
||||
|
|
@ -154,6 +154,9 @@ pub struct MediaItem {
|
|||
|
||||
/// Soft delete timestamp. If set, the item is in the trash.
|
||||
pub deleted_at: Option<DateTime<Utc>>,
|
||||
|
||||
/// When markdown links were last extracted from this file.
|
||||
pub links_extracted_at: Option<DateTime<Utc>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
|
|
@ -486,3 +489,100 @@ impl fmt::Display for ReadingStatus {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ===== Markdown Links (Obsidian-style) =====
|
||||
|
||||
/// Type of markdown link
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum LinkType {
|
||||
/// Wikilink: [[target]] or [[target|display]]
|
||||
Wikilink,
|
||||
/// Markdown link: [text](path)
|
||||
MarkdownLink,
|
||||
/// Embed: ![[target]]
|
||||
Embed,
|
||||
}
|
||||
|
||||
impl fmt::Display for LinkType {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self::Wikilink => write!(f, "wikilink"),
|
||||
Self::MarkdownLink => write!(f, "markdown_link"),
|
||||
Self::Embed => write!(f, "embed"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::str::FromStr for LinkType {
|
||||
type Err = String;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
match s.to_lowercase().as_str() {
|
||||
"wikilink" => Ok(Self::Wikilink),
|
||||
"markdown_link" => Ok(Self::MarkdownLink),
|
||||
"embed" => Ok(Self::Embed),
|
||||
_ => Err(format!("unknown link type: {}", s)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A markdown link extracted from a file
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct MarkdownLink {
|
||||
pub id: Uuid,
|
||||
pub source_media_id: MediaId,
|
||||
/// Raw link target as written in the source (wikilink name or path)
|
||||
pub target_path: String,
|
||||
/// Resolved target media_id (None if unresolved)
|
||||
pub target_media_id: Option<MediaId>,
|
||||
pub link_type: LinkType,
|
||||
/// Display text for the link
|
||||
pub link_text: Option<String>,
|
||||
/// Line number in source file (1-indexed)
|
||||
pub line_number: Option<i32>,
|
||||
/// Surrounding text for backlink preview
|
||||
pub context: Option<String>,
|
||||
pub created_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
/// Information about a backlink (incoming link)
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct BacklinkInfo {
|
||||
pub link_id: Uuid,
|
||||
pub source_id: MediaId,
|
||||
pub source_title: Option<String>,
|
||||
pub source_path: String,
|
||||
pub link_text: Option<String>,
|
||||
pub line_number: Option<i32>,
|
||||
pub context: Option<String>,
|
||||
pub link_type: LinkType,
|
||||
}
|
||||
|
||||
/// Graph data for visualization
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||
pub struct GraphData {
|
||||
pub nodes: Vec<GraphNode>,
|
||||
pub edges: Vec<GraphEdge>,
|
||||
}
|
||||
|
||||
/// A node in the graph visualization
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct GraphNode {
|
||||
pub id: String,
|
||||
pub label: String,
|
||||
pub title: Option<String>,
|
||||
pub media_type: String,
|
||||
/// Number of outgoing links from this node
|
||||
pub link_count: u32,
|
||||
/// Number of incoming links to this node
|
||||
pub backlink_count: u32,
|
||||
}
|
||||
|
||||
/// An edge (link) in the graph visualization
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct GraphEdge {
|
||||
pub source: String,
|
||||
pub target: String,
|
||||
pub link_type: LinkType,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -789,6 +789,45 @@ pub trait StorageBackend: Send + Sync + 'static {
|
|||
|
||||
/// Count items in trash.
|
||||
async fn count_trash(&self) -> Result<u64>;
|
||||
|
||||
// ===== Markdown Links (Obsidian-style) =====
|
||||
|
||||
/// Save extracted markdown links for a media item.
|
||||
/// This replaces any existing links for the source media.
|
||||
async fn save_markdown_links(
|
||||
&self,
|
||||
media_id: MediaId,
|
||||
links: &[crate::model::MarkdownLink],
|
||||
) -> Result<()>;
|
||||
|
||||
/// Get outgoing links from a media item.
|
||||
async fn get_outgoing_links(&self, media_id: MediaId) -> Result<Vec<crate::model::MarkdownLink>>;
|
||||
|
||||
/// Get backlinks (incoming links) to a media item.
|
||||
async fn get_backlinks(&self, media_id: MediaId) -> Result<Vec<crate::model::BacklinkInfo>>;
|
||||
|
||||
/// Clear all links for a media item.
|
||||
async fn clear_links_for_media(&self, media_id: MediaId) -> Result<()>;
|
||||
|
||||
/// Get graph data for visualization.
|
||||
///
|
||||
/// If `center_id` is provided, returns nodes within `depth` hops of that node.
|
||||
/// If `center_id` is None, returns the entire graph (limited by internal max).
|
||||
async fn get_graph_data(
|
||||
&self,
|
||||
center_id: Option<MediaId>,
|
||||
depth: u32,
|
||||
) -> Result<crate::model::GraphData>;
|
||||
|
||||
/// Resolve unresolved links by matching target_path against media item paths.
|
||||
/// Returns the number of links that were resolved.
|
||||
async fn resolve_links(&self) -> Result<u64>;
|
||||
|
||||
/// Update the links_extracted_at timestamp for a media item.
|
||||
async fn mark_links_extracted(&self, media_id: MediaId) -> Result<()>;
|
||||
|
||||
/// Get count of unresolved links (links where target_media_id is NULL).
|
||||
async fn count_unresolved_links(&self) -> Result<u64>;
|
||||
}
|
||||
|
||||
/// Comprehensive library statistics.
|
||||
|
|
|
|||
|
|
@ -200,6 +200,9 @@ fn row_to_media_item(row: &Row) -> Result<MediaItem> {
|
|||
|
||||
// Trash support
|
||||
deleted_at: row.try_get("deleted_at").ok().flatten(),
|
||||
|
||||
// Markdown links extraction timestamp
|
||||
links_extracted_at: row.try_get("links_extracted_at").ok().flatten(),
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -6036,6 +6039,425 @@ impl StorageBackend for PostgresBackend {
|
|||
let count: i64 = row.get(0);
|
||||
Ok(count as u64)
|
||||
}
|
||||
|
||||
// ===== Markdown Links (Obsidian-style) =====
|
||||
|
||||
async fn save_markdown_links(
|
||||
&self,
|
||||
media_id: MediaId,
|
||||
links: &[crate::model::MarkdownLink],
|
||||
) -> Result<()> {
|
||||
let client = self
|
||||
.pool
|
||||
.get()
|
||||
.await
|
||||
.map_err(|e| PinakesError::Database(format!("pool error: {e}")))?;
|
||||
|
||||
let media_id_str = media_id.0.to_string();
|
||||
|
||||
// Delete existing links for this source
|
||||
client
|
||||
.execute(
|
||||
"DELETE FROM markdown_links WHERE source_media_id = $1",
|
||||
&[&media_id_str],
|
||||
)
|
||||
.await
|
||||
.map_err(|e| PinakesError::Database(e.to_string()))?;
|
||||
|
||||
// Insert new links
|
||||
for link in links {
|
||||
let target_media_id = link.target_media_id.map(|id| id.0.to_string());
|
||||
client
|
||||
.execute(
|
||||
"INSERT INTO markdown_links (
|
||||
id, source_media_id, target_path, target_media_id,
|
||||
link_type, link_text, line_number, context, created_at
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)",
|
||||
&[
|
||||
&link.id.to_string(),
|
||||
&media_id_str,
|
||||
&link.target_path,
|
||||
&target_media_id,
|
||||
&link.link_type.to_string(),
|
||||
&link.link_text,
|
||||
&link.line_number,
|
||||
&link.context,
|
||||
&link.created_at,
|
||||
],
|
||||
)
|
||||
.await
|
||||
.map_err(|e| PinakesError::Database(e.to_string()))?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn get_outgoing_links(&self, media_id: MediaId) -> Result<Vec<crate::model::MarkdownLink>> {
|
||||
let client = self
|
||||
.pool
|
||||
.get()
|
||||
.await
|
||||
.map_err(|e| PinakesError::Database(format!("pool error: {e}")))?;
|
||||
|
||||
let media_id_str = media_id.0.to_string();
|
||||
|
||||
let rows = client
|
||||
.query(
|
||||
"SELECT id, source_media_id, target_path, target_media_id,
|
||||
link_type, link_text, line_number, context, created_at
|
||||
FROM markdown_links
|
||||
WHERE source_media_id = $1
|
||||
ORDER BY line_number",
|
||||
&[&media_id_str],
|
||||
)
|
||||
.await
|
||||
.map_err(|e| PinakesError::Database(e.to_string()))?;
|
||||
|
||||
let mut links = Vec::new();
|
||||
for row in rows {
|
||||
links.push(row_to_markdown_link(&row)?);
|
||||
}
|
||||
|
||||
Ok(links)
|
||||
}
|
||||
|
||||
async fn get_backlinks(&self, media_id: MediaId) -> Result<Vec<crate::model::BacklinkInfo>> {
|
||||
let client = self
|
||||
.pool
|
||||
.get()
|
||||
.await
|
||||
.map_err(|e| PinakesError::Database(format!("pool error: {e}")))?;
|
||||
|
||||
let media_id_str = media_id.0.to_string();
|
||||
|
||||
let rows = client
|
||||
.query(
|
||||
"SELECT l.id, l.source_media_id, m.title, m.path,
|
||||
l.link_text, l.line_number, l.context, l.link_type
|
||||
FROM markdown_links l
|
||||
JOIN media_items m ON l.source_media_id = m.id
|
||||
WHERE l.target_media_id = $1
|
||||
ORDER BY m.title, l.line_number",
|
||||
&[&media_id_str],
|
||||
)
|
||||
.await
|
||||
.map_err(|e| PinakesError::Database(e.to_string()))?;
|
||||
|
||||
let mut backlinks = Vec::new();
|
||||
for row in rows {
|
||||
let link_id_str: String = row.get(0);
|
||||
let source_id_str: String = row.get(1);
|
||||
let source_title: Option<String> = row.get(2);
|
||||
let source_path: String = row.get(3);
|
||||
let link_text: Option<String> = row.get(4);
|
||||
let line_number: Option<i32> = row.get(5);
|
||||
let context: Option<String> = row.get(6);
|
||||
let link_type_str: String = row.get(7);
|
||||
|
||||
backlinks.push(crate::model::BacklinkInfo {
|
||||
link_id: Uuid::parse_str(&link_id_str)
|
||||
.map_err(|e| PinakesError::Database(e.to_string()))?,
|
||||
source_id: MediaId(
|
||||
Uuid::parse_str(&source_id_str)
|
||||
.map_err(|e| PinakesError::Database(e.to_string()))?,
|
||||
),
|
||||
source_title,
|
||||
source_path,
|
||||
link_text,
|
||||
line_number,
|
||||
context,
|
||||
link_type: link_type_str
|
||||
.parse()
|
||||
.unwrap_or(crate::model::LinkType::Wikilink),
|
||||
});
|
||||
}
|
||||
|
||||
Ok(backlinks)
|
||||
}
|
||||
|
||||
async fn clear_links_for_media(&self, media_id: MediaId) -> Result<()> {
|
||||
let client = self
|
||||
.pool
|
||||
.get()
|
||||
.await
|
||||
.map_err(|e| PinakesError::Database(format!("pool error: {e}")))?;
|
||||
|
||||
let media_id_str = media_id.0.to_string();
|
||||
|
||||
client
|
||||
.execute(
|
||||
"DELETE FROM markdown_links WHERE source_media_id = $1",
|
||||
&[&media_id_str],
|
||||
)
|
||||
.await
|
||||
.map_err(|e| PinakesError::Database(e.to_string()))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn get_graph_data(
|
||||
&self,
|
||||
center_id: Option<MediaId>,
|
||||
depth: u32,
|
||||
) -> Result<crate::model::GraphData> {
|
||||
let client = self
|
||||
.pool
|
||||
.get()
|
||||
.await
|
||||
.map_err(|e| PinakesError::Database(format!("pool error: {e}")))?;
|
||||
|
||||
let depth = depth.min(5); // Limit depth
|
||||
let mut nodes = Vec::new();
|
||||
let mut edges = Vec::new();
|
||||
let mut node_ids: std::collections::HashSet<String> = std::collections::HashSet::new();
|
||||
|
||||
if let Some(center) = center_id {
|
||||
// BFS to find connected nodes within depth
|
||||
let mut frontier = vec![center.0.to_string()];
|
||||
let mut visited = std::collections::HashSet::new();
|
||||
visited.insert(center.0.to_string());
|
||||
|
||||
for _ in 0..depth {
|
||||
if frontier.is_empty() {
|
||||
break;
|
||||
}
|
||||
let mut next_frontier = Vec::new();
|
||||
|
||||
for node_id in &frontier {
|
||||
// Get outgoing links
|
||||
let rows = client
|
||||
.query(
|
||||
"SELECT target_media_id FROM markdown_links
|
||||
WHERE source_media_id = $1 AND target_media_id IS NOT NULL",
|
||||
&[node_id],
|
||||
)
|
||||
.await
|
||||
.map_err(|e| PinakesError::Database(e.to_string()))?;
|
||||
|
||||
for row in rows {
|
||||
let id: String = row.get(0);
|
||||
if !visited.contains(&id) {
|
||||
visited.insert(id.clone());
|
||||
next_frontier.push(id);
|
||||
}
|
||||
}
|
||||
|
||||
// Get incoming links
|
||||
let rows = client
|
||||
.query(
|
||||
"SELECT source_media_id FROM markdown_links
|
||||
WHERE target_media_id = $1",
|
||||
&[node_id],
|
||||
)
|
||||
.await
|
||||
.map_err(|e| PinakesError::Database(e.to_string()))?;
|
||||
|
||||
for row in rows {
|
||||
let id: String = row.get(0);
|
||||
if !visited.contains(&id) {
|
||||
visited.insert(id.clone());
|
||||
next_frontier.push(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
frontier = next_frontier;
|
||||
}
|
||||
|
||||
node_ids = visited;
|
||||
} else {
|
||||
// Get all markdown files with links (limit to 500)
|
||||
let rows = client
|
||||
.query(
|
||||
"SELECT DISTINCT id FROM media_items
|
||||
WHERE media_type = 'markdown' AND deleted_at IS NULL
|
||||
LIMIT 500",
|
||||
&[],
|
||||
)
|
||||
.await
|
||||
.map_err(|e| PinakesError::Database(e.to_string()))?;
|
||||
|
||||
for row in rows {
|
||||
let id: String = row.get(0);
|
||||
node_ids.insert(id);
|
||||
}
|
||||
}
|
||||
|
||||
// Build nodes with metadata
|
||||
for node_id in &node_ids {
|
||||
let row = client
|
||||
.query_opt(
|
||||
"SELECT id, COALESCE(title, file_name) as label, title, media_type
|
||||
FROM media_items WHERE id = $1",
|
||||
&[node_id],
|
||||
)
|
||||
.await
|
||||
.map_err(|e| PinakesError::Database(e.to_string()))?;
|
||||
|
||||
if let Some(row) = row {
|
||||
let id: String = row.get(0);
|
||||
let label: String = row.get(1);
|
||||
let title: Option<String> = row.get(2);
|
||||
let media_type: String = row.get(3);
|
||||
|
||||
// Count outgoing links
|
||||
let link_count_row = client
|
||||
.query_one(
|
||||
"SELECT COUNT(*) FROM markdown_links WHERE source_media_id = $1",
|
||||
&[&id],
|
||||
)
|
||||
.await
|
||||
.map_err(|e| PinakesError::Database(e.to_string()))?;
|
||||
let link_count: i64 = link_count_row.get(0);
|
||||
|
||||
// Count incoming links
|
||||
let backlink_count_row = client
|
||||
.query_one(
|
||||
"SELECT COUNT(*) FROM markdown_links WHERE target_media_id = $1",
|
||||
&[&id],
|
||||
)
|
||||
.await
|
||||
.map_err(|e| PinakesError::Database(e.to_string()))?;
|
||||
let backlink_count: i64 = backlink_count_row.get(0);
|
||||
|
||||
nodes.push(crate::model::GraphNode {
|
||||
id: id.clone(),
|
||||
label,
|
||||
title,
|
||||
media_type,
|
||||
link_count: link_count as u32,
|
||||
backlink_count: backlink_count as u32,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Build edges
|
||||
for node_id in &node_ids {
|
||||
let rows = client
|
||||
.query(
|
||||
"SELECT source_media_id, target_media_id, link_type
|
||||
FROM markdown_links
|
||||
WHERE source_media_id = $1 AND target_media_id IS NOT NULL",
|
||||
&[node_id],
|
||||
)
|
||||
.await
|
||||
.map_err(|e| PinakesError::Database(e.to_string()))?;
|
||||
|
||||
for row in rows {
|
||||
let source: String = row.get(0);
|
||||
let target: String = row.get(1);
|
||||
let link_type_str: String = row.get(2);
|
||||
|
||||
if node_ids.contains(&target) {
|
||||
edges.push(crate::model::GraphEdge {
|
||||
source,
|
||||
target,
|
||||
link_type: link_type_str
|
||||
.parse()
|
||||
.unwrap_or(crate::model::LinkType::Wikilink),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(crate::model::GraphData { nodes, edges })
|
||||
}
|
||||
|
||||
async fn resolve_links(&self) -> Result<u64> {
|
||||
let client = self
|
||||
.pool
|
||||
.get()
|
||||
.await
|
||||
.map_err(|e| PinakesError::Database(format!("pool error: {e}")))?;
|
||||
|
||||
// Strategy 1: Exact path match
|
||||
let result1 = client
|
||||
.execute(
|
||||
"UPDATE markdown_links
|
||||
SET target_media_id = (
|
||||
SELECT id FROM media_items
|
||||
WHERE path = markdown_links.target_path
|
||||
AND deleted_at IS NULL
|
||||
LIMIT 1
|
||||
)
|
||||
WHERE target_media_id IS NULL
|
||||
AND EXISTS (
|
||||
SELECT 1 FROM media_items
|
||||
WHERE path = markdown_links.target_path
|
||||
AND deleted_at IS NULL
|
||||
)",
|
||||
&[],
|
||||
)
|
||||
.await
|
||||
.map_err(|e| PinakesError::Database(e.to_string()))?;
|
||||
|
||||
// Strategy 2: Filename match
|
||||
let result2 = client
|
||||
.execute(
|
||||
"UPDATE markdown_links
|
||||
SET target_media_id = (
|
||||
SELECT id FROM media_items
|
||||
WHERE (file_name = markdown_links.target_path
|
||||
OR file_name = markdown_links.target_path || '.md'
|
||||
OR REPLACE(file_name, '.md', '') = markdown_links.target_path)
|
||||
AND deleted_at IS NULL
|
||||
LIMIT 1
|
||||
)
|
||||
WHERE target_media_id IS NULL
|
||||
AND EXISTS (
|
||||
SELECT 1 FROM media_items
|
||||
WHERE (file_name = markdown_links.target_path
|
||||
OR file_name = markdown_links.target_path || '.md'
|
||||
OR REPLACE(file_name, '.md', '') = markdown_links.target_path)
|
||||
AND deleted_at IS NULL
|
||||
)",
|
||||
&[],
|
||||
)
|
||||
.await
|
||||
.map_err(|e| PinakesError::Database(e.to_string()))?;
|
||||
|
||||
Ok(result1 + result2)
|
||||
}
|
||||
|
||||
async fn mark_links_extracted(&self, media_id: MediaId) -> Result<()> {
|
||||
let client = self
|
||||
.pool
|
||||
.get()
|
||||
.await
|
||||
.map_err(|e| PinakesError::Database(format!("pool error: {e}")))?;
|
||||
|
||||
let media_id_str = media_id.0.to_string();
|
||||
let now = chrono::Utc::now();
|
||||
|
||||
client
|
||||
.execute(
|
||||
"UPDATE media_items SET links_extracted_at = $1 WHERE id = $2",
|
||||
&[&now, &media_id_str],
|
||||
)
|
||||
.await
|
||||
.map_err(|e| PinakesError::Database(e.to_string()))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn count_unresolved_links(&self) -> Result<u64> {
|
||||
let client = self
|
||||
.pool
|
||||
.get()
|
||||
.await
|
||||
.map_err(|e| PinakesError::Database(format!("pool error: {e}")))?;
|
||||
|
||||
let row = client
|
||||
.query_one(
|
||||
"SELECT COUNT(*) FROM markdown_links WHERE target_media_id IS NULL",
|
||||
&[],
|
||||
)
|
||||
.await
|
||||
.map_err(|e| PinakesError::Database(e.to_string()))?;
|
||||
|
||||
let count: i64 = row.get(0);
|
||||
Ok(count as u64)
|
||||
}
|
||||
}
|
||||
|
||||
impl PostgresBackend {
|
||||
|
|
@ -6329,6 +6751,37 @@ fn find_first_fts_param(query: &SearchQuery) -> i32 {
|
|||
find_inner(query, &mut offset).unwrap_or(1)
|
||||
}
|
||||
|
||||
// Helper function to parse a markdown link row
|
||||
fn row_to_markdown_link(row: &Row) -> Result<crate::model::MarkdownLink> {
|
||||
let id_str: String = row.get(0);
|
||||
let source_id_str: String = row.get(1);
|
||||
let target_path: String = row.get(2);
|
||||
let target_id: Option<String> = row.get(3);
|
||||
let link_type_str: String = row.get(4);
|
||||
let link_text: Option<String> = row.get(5);
|
||||
let line_number: Option<i32> = row.get(6);
|
||||
let context: Option<String> = row.get(7);
|
||||
let created_at: chrono::DateTime<Utc> = row.get(8);
|
||||
|
||||
Ok(crate::model::MarkdownLink {
|
||||
id: Uuid::parse_str(&id_str).map_err(|e| PinakesError::Database(e.to_string()))?,
|
||||
source_media_id: MediaId(
|
||||
Uuid::parse_str(&source_id_str).map_err(|e| PinakesError::Database(e.to_string()))?,
|
||||
),
|
||||
target_path,
|
||||
target_media_id: target_id
|
||||
.and_then(|s| Uuid::parse_str(&s).ok())
|
||||
.map(MediaId),
|
||||
link_type: link_type_str
|
||||
.parse()
|
||||
.unwrap_or(crate::model::LinkType::Wikilink),
|
||||
link_text,
|
||||
line_number,
|
||||
context,
|
||||
created_at,
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
|
|
|||
|
|
@ -160,6 +160,14 @@ fn row_to_media_item(row: &Row) -> rusqlite::Result<MediaItem> {
|
|||
.flatten()
|
||||
.and_then(|s| DateTime::parse_from_rfc3339(&s).ok())
|
||||
.map(|dt| dt.with_timezone(&Utc)),
|
||||
|
||||
// Markdown links extraction timestamp
|
||||
links_extracted_at: row
|
||||
.get::<_, Option<String>>("links_extracted_at")
|
||||
.ok()
|
||||
.flatten()
|
||||
.and_then(|s| DateTime::parse_from_rfc3339(&s).ok())
|
||||
.map(|dt| dt.with_timezone(&Utc)),
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -6379,6 +6387,428 @@ impl StorageBackend for SqliteBackend {
|
|||
|
||||
Ok(count)
|
||||
}
|
||||
|
||||
// ===== Markdown Links (Obsidian-style) =====
|
||||
|
||||
async fn save_markdown_links(
|
||||
&self,
|
||||
media_id: MediaId,
|
||||
links: &[crate::model::MarkdownLink],
|
||||
) -> Result<()> {
|
||||
let conn = self.conn.clone();
|
||||
let media_id_str = media_id.0.to_string();
|
||||
let links: Vec<_> = links.to_vec();
|
||||
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let conn = conn.lock().unwrap();
|
||||
|
||||
// Delete existing links for this source
|
||||
conn.execute(
|
||||
"DELETE FROM markdown_links WHERE source_media_id = ?1",
|
||||
[&media_id_str],
|
||||
)?;
|
||||
|
||||
// Insert new links
|
||||
let mut stmt = conn.prepare(
|
||||
"INSERT INTO markdown_links (
|
||||
id, source_media_id, target_path, target_media_id,
|
||||
link_type, link_text, line_number, context, created_at
|
||||
) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)"
|
||||
)?;
|
||||
|
||||
for link in &links {
|
||||
stmt.execute(params![
|
||||
link.id.to_string(),
|
||||
media_id_str,
|
||||
link.target_path,
|
||||
link.target_media_id.map(|id| id.0.to_string()),
|
||||
link.link_type.to_string(),
|
||||
link.link_text,
|
||||
link.line_number,
|
||||
link.context,
|
||||
link.created_at.to_rfc3339(),
|
||||
])?;
|
||||
}
|
||||
|
||||
Ok::<_, rusqlite::Error>(())
|
||||
})
|
||||
.await
|
||||
.map_err(|e| PinakesError::Database(e.to_string()))??;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn get_outgoing_links(&self, media_id: MediaId) -> Result<Vec<crate::model::MarkdownLink>> {
|
||||
let conn = self.conn.clone();
|
||||
let media_id_str = media_id.0.to_string();
|
||||
|
||||
let links = tokio::task::spawn_blocking(move || {
|
||||
let conn = conn.lock().unwrap();
|
||||
let mut stmt = conn.prepare(
|
||||
"SELECT id, source_media_id, target_path, target_media_id,
|
||||
link_type, link_text, line_number, context, created_at
|
||||
FROM markdown_links
|
||||
WHERE source_media_id = ?1
|
||||
ORDER BY line_number"
|
||||
)?;
|
||||
|
||||
let rows = stmt.query_map([&media_id_str], |row| {
|
||||
row_to_markdown_link(row)
|
||||
})?;
|
||||
|
||||
let mut links = Vec::new();
|
||||
for row in rows {
|
||||
links.push(row?);
|
||||
}
|
||||
Ok::<_, rusqlite::Error>(links)
|
||||
})
|
||||
.await
|
||||
.map_err(|e| PinakesError::Database(e.to_string()))??;
|
||||
|
||||
Ok(links)
|
||||
}
|
||||
|
||||
async fn get_backlinks(&self, media_id: MediaId) -> Result<Vec<crate::model::BacklinkInfo>> {
|
||||
let conn = self.conn.clone();
|
||||
let media_id_str = media_id.0.to_string();
|
||||
|
||||
let backlinks = tokio::task::spawn_blocking(move || {
|
||||
let conn = conn.lock().unwrap();
|
||||
let mut stmt = conn.prepare(
|
||||
"SELECT l.id, l.source_media_id, m.title, m.path,
|
||||
l.link_text, l.line_number, l.context, l.link_type
|
||||
FROM markdown_links l
|
||||
JOIN media_items m ON l.source_media_id = m.id
|
||||
WHERE l.target_media_id = ?1
|
||||
ORDER BY m.title, l.line_number"
|
||||
)?;
|
||||
|
||||
let rows = stmt.query_map([&media_id_str], |row| {
|
||||
let link_id_str: String = row.get(0)?;
|
||||
let source_id_str: String = row.get(1)?;
|
||||
let source_title: Option<String> = row.get(2)?;
|
||||
let source_path: String = row.get(3)?;
|
||||
let link_text: Option<String> = row.get(4)?;
|
||||
let line_number: Option<i32> = row.get(5)?;
|
||||
let context: Option<String> = row.get(6)?;
|
||||
let link_type_str: String = row.get(7)?;
|
||||
|
||||
Ok(crate::model::BacklinkInfo {
|
||||
link_id: parse_uuid(&link_id_str)?,
|
||||
source_id: MediaId(parse_uuid(&source_id_str)?),
|
||||
source_title,
|
||||
source_path,
|
||||
link_text,
|
||||
line_number,
|
||||
context,
|
||||
link_type: link_type_str.parse().unwrap_or(crate::model::LinkType::Wikilink),
|
||||
})
|
||||
})?;
|
||||
|
||||
let mut backlinks = Vec::new();
|
||||
for row in rows {
|
||||
backlinks.push(row?);
|
||||
}
|
||||
Ok::<_, rusqlite::Error>(backlinks)
|
||||
})
|
||||
.await
|
||||
.map_err(|e| PinakesError::Database(e.to_string()))??;
|
||||
|
||||
Ok(backlinks)
|
||||
}
|
||||
|
||||
async fn clear_links_for_media(&self, media_id: MediaId) -> Result<()> {
|
||||
let conn = self.conn.clone();
|
||||
let media_id_str = media_id.0.to_string();
|
||||
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let conn = conn.lock().unwrap();
|
||||
conn.execute(
|
||||
"DELETE FROM markdown_links WHERE source_media_id = ?1",
|
||||
[&media_id_str],
|
||||
)?;
|
||||
Ok::<_, rusqlite::Error>(())
|
||||
})
|
||||
.await
|
||||
.map_err(|e| PinakesError::Database(e.to_string()))??;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn get_graph_data(
|
||||
&self,
|
||||
center_id: Option<MediaId>,
|
||||
depth: u32,
|
||||
) -> Result<crate::model::GraphData> {
|
||||
let conn = self.conn.clone();
|
||||
let center_id_str = center_id.map(|id| id.0.to_string());
|
||||
let depth = depth.min(5); // Limit depth to prevent huge queries
|
||||
|
||||
let graph_data = tokio::task::spawn_blocking(move || {
|
||||
let conn = conn.lock().unwrap();
|
||||
let mut nodes = Vec::new();
|
||||
let mut edges = Vec::new();
|
||||
let mut node_ids = std::collections::HashSet::new();
|
||||
|
||||
// Get nodes - either all markdown files or those connected to center
|
||||
if let Some(center_id) = center_id_str {
|
||||
// BFS to find connected nodes within depth
|
||||
let mut frontier = vec![center_id.clone()];
|
||||
let mut visited = std::collections::HashSet::new();
|
||||
visited.insert(center_id.clone());
|
||||
|
||||
for _ in 0..depth {
|
||||
let mut next_frontier = Vec::new();
|
||||
|
||||
for node_id in &frontier {
|
||||
// Get outgoing links
|
||||
let mut stmt = conn.prepare(
|
||||
"SELECT target_media_id FROM markdown_links
|
||||
WHERE source_media_id = ?1 AND target_media_id IS NOT NULL"
|
||||
)?;
|
||||
let rows = stmt.query_map([node_id], |row| {
|
||||
let id: String = row.get(0)?;
|
||||
Ok(id)
|
||||
})?;
|
||||
for row in rows {
|
||||
let id = row?;
|
||||
if !visited.contains(&id) {
|
||||
visited.insert(id.clone());
|
||||
next_frontier.push(id);
|
||||
}
|
||||
}
|
||||
|
||||
// Get incoming links
|
||||
let mut stmt = conn.prepare(
|
||||
"SELECT source_media_id FROM markdown_links
|
||||
WHERE target_media_id = ?1"
|
||||
)?;
|
||||
let rows = stmt.query_map([node_id], |row| {
|
||||
let id: String = row.get(0)?;
|
||||
Ok(id)
|
||||
})?;
|
||||
for row in rows {
|
||||
let id = row?;
|
||||
if !visited.contains(&id) {
|
||||
visited.insert(id.clone());
|
||||
next_frontier.push(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
frontier = next_frontier;
|
||||
}
|
||||
|
||||
node_ids = visited;
|
||||
} else {
|
||||
// Get all markdown files with links (limit to 500 for performance)
|
||||
let mut stmt = conn.prepare(
|
||||
"SELECT DISTINCT id FROM media_items
|
||||
WHERE media_type = 'markdown' AND deleted_at IS NULL
|
||||
LIMIT 500"
|
||||
)?;
|
||||
let rows = stmt.query_map([], |row| {
|
||||
let id: String = row.get(0)?;
|
||||
Ok(id)
|
||||
})?;
|
||||
for row in rows {
|
||||
node_ids.insert(row?);
|
||||
}
|
||||
}
|
||||
|
||||
// Build nodes with metadata
|
||||
for node_id in &node_ids {
|
||||
let mut stmt = conn.prepare(
|
||||
"SELECT id, COALESCE(title, file_name) as label, title, media_type
|
||||
FROM media_items WHERE id = ?1"
|
||||
)?;
|
||||
if let Ok((id, label, title, media_type)) = stmt.query_row([node_id], |row| {
|
||||
Ok((
|
||||
row.get::<_, String>(0)?,
|
||||
row.get::<_, String>(1)?,
|
||||
row.get::<_, Option<String>>(2)?,
|
||||
row.get::<_, String>(3)?,
|
||||
))
|
||||
}) {
|
||||
// Count outgoing links
|
||||
let link_count: i64 = conn.query_row(
|
||||
"SELECT COUNT(*) FROM markdown_links WHERE source_media_id = ?1",
|
||||
[&id],
|
||||
|row| row.get(0),
|
||||
)?;
|
||||
|
||||
// Count incoming links
|
||||
let backlink_count: i64 = conn.query_row(
|
||||
"SELECT COUNT(*) FROM markdown_links WHERE target_media_id = ?1",
|
||||
[&id],
|
||||
|row| row.get(0),
|
||||
)?;
|
||||
|
||||
nodes.push(crate::model::GraphNode {
|
||||
id: id.clone(),
|
||||
label,
|
||||
title,
|
||||
media_type,
|
||||
link_count: link_count as u32,
|
||||
backlink_count: backlink_count as u32,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Build edges
|
||||
for node_id in &node_ids {
|
||||
let mut stmt = conn.prepare(
|
||||
"SELECT source_media_id, target_media_id, link_type
|
||||
FROM markdown_links
|
||||
WHERE source_media_id = ?1 AND target_media_id IS NOT NULL"
|
||||
)?;
|
||||
let rows = stmt.query_map([node_id], |row| {
|
||||
let source: String = row.get(0)?;
|
||||
let target: String = row.get(1)?;
|
||||
let link_type_str: String = row.get(2)?;
|
||||
Ok((source, target, link_type_str))
|
||||
})?;
|
||||
for row in rows {
|
||||
let (source, target, link_type_str) = row?;
|
||||
if node_ids.contains(&target) {
|
||||
edges.push(crate::model::GraphEdge {
|
||||
source,
|
||||
target,
|
||||
link_type: link_type_str.parse().unwrap_or(crate::model::LinkType::Wikilink),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok::<_, rusqlite::Error>(crate::model::GraphData { nodes, edges })
|
||||
})
|
||||
.await
|
||||
.map_err(|e| PinakesError::Database(e.to_string()))??;
|
||||
|
||||
Ok(graph_data)
|
||||
}
|
||||
|
||||
async fn resolve_links(&self) -> Result<u64> {
|
||||
let conn = self.conn.clone();
|
||||
|
||||
let count = tokio::task::spawn_blocking(move || {
|
||||
let conn = conn.lock().unwrap();
|
||||
|
||||
// Find unresolved links and try to resolve them
|
||||
// Strategy 1: Exact path match
|
||||
let updated1 = conn.execute(
|
||||
"UPDATE markdown_links
|
||||
SET target_media_id = (
|
||||
SELECT id FROM media_items
|
||||
WHERE path = markdown_links.target_path
|
||||
AND deleted_at IS NULL
|
||||
LIMIT 1
|
||||
)
|
||||
WHERE target_media_id IS NULL
|
||||
AND EXISTS (
|
||||
SELECT 1 FROM media_items
|
||||
WHERE path = markdown_links.target_path
|
||||
AND deleted_at IS NULL
|
||||
)",
|
||||
[],
|
||||
)?;
|
||||
|
||||
// Strategy 2: Filename match (Obsidian-style)
|
||||
// Match target_path to file_name (with or without .md extension)
|
||||
let updated2 = conn.execute(
|
||||
"UPDATE markdown_links
|
||||
SET target_media_id = (
|
||||
SELECT id FROM media_items
|
||||
WHERE (file_name = markdown_links.target_path
|
||||
OR file_name = markdown_links.target_path || '.md'
|
||||
OR REPLACE(file_name, '.md', '') = markdown_links.target_path)
|
||||
AND deleted_at IS NULL
|
||||
LIMIT 1
|
||||
)
|
||||
WHERE target_media_id IS NULL
|
||||
AND EXISTS (
|
||||
SELECT 1 FROM media_items
|
||||
WHERE (file_name = markdown_links.target_path
|
||||
OR file_name = markdown_links.target_path || '.md'
|
||||
OR REPLACE(file_name, '.md', '') = markdown_links.target_path)
|
||||
AND deleted_at IS NULL
|
||||
)",
|
||||
[],
|
||||
)?;
|
||||
|
||||
Ok::<_, rusqlite::Error>((updated1 + updated2) as u64)
|
||||
})
|
||||
.await
|
||||
.map_err(|e| PinakesError::Database(e.to_string()))??;
|
||||
|
||||
Ok(count)
|
||||
}
|
||||
|
||||
async fn mark_links_extracted(&self, media_id: MediaId) -> Result<()> {
|
||||
let conn = self.conn.clone();
|
||||
let media_id_str = media_id.0.to_string();
|
||||
let now = chrono::Utc::now().to_rfc3339();
|
||||
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let conn = conn.lock().unwrap();
|
||||
conn.execute(
|
||||
"UPDATE media_items SET links_extracted_at = ?1 WHERE id = ?2",
|
||||
params![now, media_id_str],
|
||||
)?;
|
||||
Ok::<_, rusqlite::Error>(())
|
||||
})
|
||||
.await
|
||||
.map_err(|e| PinakesError::Database(e.to_string()))??;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn count_unresolved_links(&self) -> Result<u64> {
|
||||
let conn = self.conn.clone();
|
||||
|
||||
let count = tokio::task::spawn_blocking(move || {
|
||||
let conn = conn.lock().unwrap();
|
||||
let count: i64 = conn.query_row(
|
||||
"SELECT COUNT(*) FROM markdown_links WHERE target_media_id IS NULL",
|
||||
[],
|
||||
|row| row.get(0),
|
||||
)?;
|
||||
Ok::<_, rusqlite::Error>(count as u64)
|
||||
})
|
||||
.await
|
||||
.map_err(|e| PinakesError::Database(e.to_string()))??;
|
||||
|
||||
Ok(count)
|
||||
}
|
||||
}
|
||||
|
||||
// Helper function to parse a markdown link row
|
||||
fn row_to_markdown_link(row: &Row) -> rusqlite::Result<crate::model::MarkdownLink> {
|
||||
let id_str: String = row.get(0)?;
|
||||
let source_id_str: String = row.get(1)?;
|
||||
let target_path: String = row.get(2)?;
|
||||
let target_id: Option<String> = row.get(3)?;
|
||||
let link_type_str: String = row.get(4)?;
|
||||
let link_text: Option<String> = row.get(5)?;
|
||||
let line_number: Option<i32> = row.get(6)?;
|
||||
let context: Option<String> = row.get(7)?;
|
||||
let created_at_str: String = row.get(8)?;
|
||||
|
||||
Ok(crate::model::MarkdownLink {
|
||||
id: parse_uuid(&id_str)?,
|
||||
source_media_id: MediaId(parse_uuid(&source_id_str)?),
|
||||
target_path,
|
||||
target_media_id: target_id
|
||||
.and_then(|s| Uuid::parse_str(&s).ok())
|
||||
.map(MediaId),
|
||||
link_type: link_type_str
|
||||
.parse()
|
||||
.unwrap_or(crate::model::LinkType::Wikilink),
|
||||
link_text,
|
||||
line_number,
|
||||
context,
|
||||
created_at: parse_datetime(&created_at_str),
|
||||
})
|
||||
}
|
||||
|
||||
// Helper function to parse a share row
|
||||
|
|
|
|||
|
|
@ -98,6 +98,7 @@ pub async fn process_upload<R: AsyncRead + Unpin>(
|
|||
created_at: now,
|
||||
updated_at: now,
|
||||
deleted_at: None,
|
||||
links_extracted_at: None,
|
||||
};
|
||||
|
||||
// Store the media item
|
||||
|
|
|
|||
|
|
@ -57,6 +57,7 @@ pub fn make_test_media(hash: &str) -> MediaItem {
|
|||
created_at: now,
|
||||
updated_at: now,
|
||||
deleted_at: None,
|
||||
links_extracted_at: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -93,5 +94,6 @@ pub fn create_test_media_item(path: PathBuf, hash: &str) -> MediaItem {
|
|||
created_at: now,
|
||||
updated_at: now,
|
||||
deleted_at: None,
|
||||
links_extracted_at: None,
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,6 +47,7 @@ async fn test_media_crud() {
|
|||
created_at: now,
|
||||
updated_at: now,
|
||||
deleted_at: None,
|
||||
links_extracted_at: None,
|
||||
};
|
||||
|
||||
// Insert
|
||||
|
|
@ -138,6 +139,7 @@ async fn test_tags() {
|
|||
created_at: now,
|
||||
updated_at: now,
|
||||
deleted_at: None,
|
||||
links_extracted_at: None,
|
||||
};
|
||||
storage.insert_media(&item).await.unwrap();
|
||||
storage.tag_media(id, parent.id).await.unwrap();
|
||||
|
|
@ -203,6 +205,7 @@ async fn test_collections() {
|
|||
created_at: now,
|
||||
updated_at: now,
|
||||
deleted_at: None,
|
||||
links_extracted_at: None,
|
||||
};
|
||||
storage.insert_media(&item).await.unwrap();
|
||||
|
||||
|
|
@ -263,6 +266,7 @@ async fn test_custom_fields() {
|
|||
created_at: now,
|
||||
updated_at: now,
|
||||
deleted_at: None,
|
||||
links_extracted_at: None,
|
||||
};
|
||||
storage.insert_media(&item).await.unwrap();
|
||||
|
||||
|
|
@ -342,6 +346,7 @@ async fn test_search() {
|
|||
created_at: now,
|
||||
updated_at: now,
|
||||
deleted_at: None,
|
||||
links_extracted_at: None,
|
||||
};
|
||||
storage.insert_media(&item).await.unwrap();
|
||||
}
|
||||
|
|
@ -486,6 +491,7 @@ async fn test_library_statistics_with_data() {
|
|||
created_at: now,
|
||||
updated_at: now,
|
||||
deleted_at: None,
|
||||
links_extracted_at: None,
|
||||
};
|
||||
storage.insert_media(&item).await.unwrap();
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue