various: markdown improvements

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: I81fda8247814da19eed1e76dbe97bd5b6a6a6964
This commit is contained in:
raf 2026-02-05 15:39:05 +03:00
commit 80a8b5c7ca
Signed by: NotAShelf
GPG key ID: 29D95B64378DB4BF
23 changed files with 3458 additions and 30 deletions

View file

@ -6,7 +6,8 @@ use tracing::info;
use crate::audit;
use crate::error::{PinakesError, Result};
use crate::hash::compute_file_hash;
use crate::media_type::MediaType;
use crate::links;
use crate::media_type::{BuiltinMediaType, MediaType};
use crate::metadata;
use crate::model::*;
use crate::storage::DynStorageBackend;
@ -168,6 +169,9 @@ pub async fn import_file_with_options(
None
};
// Check if this is a markdown file for link extraction
let is_markdown = media_type == MediaType::Builtin(BuiltinMediaType::Markdown);
let item = MediaItem {
id: media_id,
path: path.clone(),
@ -206,10 +210,25 @@ pub async fn import_file_with_options(
// New items are not deleted
deleted_at: None,
// Links will be extracted separately
links_extracted_at: None,
};
storage.insert_media(&item).await?;
// Extract and store markdown links for markdown files
if is_markdown {
if let Err(e) = extract_and_store_links(storage, media_id, &path).await {
tracing::warn!(
media_id = %media_id,
path = %path.display(),
error = %e,
"failed to extract markdown links"
);
}
}
// Store extracted extra metadata as custom fields
for (key, value) in &extracted.extra {
let field = CustomField {
@ -372,3 +391,44 @@ pub async fn import_directory_with_options(
Ok(results)
}
/// Extract markdown links from a file and store them in the database.
async fn extract_and_store_links(
storage: &DynStorageBackend,
media_id: MediaId,
path: &Path,
) -> Result<()> {
// Read file content
let content = tokio::fs::read_to_string(path).await.map_err(|e| {
PinakesError::Io(std::io::Error::new(
std::io::ErrorKind::Other,
format!("failed to read markdown file for link extraction: {e}"),
))
})?;
// Extract links
let extracted_links = links::extract_links(media_id, &content);
if extracted_links.is_empty() {
// No links found, just mark as extracted
storage.mark_links_extracted(media_id).await?;
return Ok(());
}
// Clear any existing links for this media (in case of re-import)
storage.clear_links_for_media(media_id).await?;
// Save extracted links
storage.save_markdown_links(media_id, &extracted_links).await?;
// Mark links as extracted
storage.mark_links_extracted(media_id).await?;
tracing::debug!(
media_id = %media_id,
link_count = extracted_links.len(),
"extracted markdown links"
);
Ok(())
}

View file

@ -12,6 +12,7 @@ pub mod hash;
pub mod import;
pub mod integrity;
pub mod jobs;
pub mod links;
pub mod managed_storage;
pub mod media_type;
pub mod metadata;

View file

@ -0,0 +1,456 @@
//! Markdown link extraction and management for Obsidian-style bidirectional links.
//!
//! This module provides:
//! - Wikilink extraction (`[[target]]` and `[[target|display]]`)
//! - Embed extraction (`![[target]]`)
//! - Markdown link extraction (`[text](path)` for internal links)
//! - Link resolution strategies
//! - Context extraction for backlink previews
use std::path::Path;
use regex::Regex;
use uuid::Uuid;
use crate::error::Result;
use crate::model::{LinkType, MarkdownLink, MediaId};
/// Configuration for context extraction around links
const CONTEXT_CHARS_BEFORE: usize = 50;
const CONTEXT_CHARS_AFTER: usize = 50;
/// Extract all markdown links from file content.
///
/// This extracts:
/// - Wikilinks: `[[target]]` and `[[target|display text]]`
/// - Embeds: `![[target]]`
/// - Markdown links: `[text](path)` (internal paths only, no http/https)
pub fn extract_links(source_media_id: MediaId, content: &str) -> Vec<MarkdownLink> {
let mut links = Vec::new();
// Extract wikilinks: [[target]] or [[target|display]]
links.extend(extract_wikilinks(source_media_id, content));
// Extract embeds: ![[target]]
links.extend(extract_embeds(source_media_id, content));
// Extract markdown links: [text](path)
links.extend(extract_markdown_links(source_media_id, content));
links
}
/// Extract wikilinks from content.
/// Matches: `[[target]]` or `[[target|display text]]` but NOT `![[...]]` (embeds)
fn extract_wikilinks(source_media_id: MediaId, content: &str) -> Vec<MarkdownLink> {
// Match [[...]] - we'll manually filter out embeds that are preceded by !
let re = Regex::new(r"\[\[([^\]|]+)(?:\|([^\]]+))?\]\]").unwrap();
let mut links = Vec::new();
for (line_num, line) in content.lines().enumerate() {
for cap in re.captures_iter(line) {
let full_match = cap.get(0).unwrap();
let match_start = full_match.start();
// Check if preceded by ! (which would make it an embed, not a wikilink)
if match_start > 0 {
let bytes = line.as_bytes();
if bytes.get(match_start - 1) == Some(&b'!') {
continue; // Skip embeds
}
}
let target = cap.get(1).unwrap().as_str().trim();
let display_text = cap.get(2).map(|m| m.as_str().trim().to_string());
let context = extract_context(content, line_num, full_match.start(), full_match.end());
links.push(MarkdownLink {
id: Uuid::now_v7(),
source_media_id,
target_path: target.to_string(),
target_media_id: None, // Will be resolved later
link_type: LinkType::Wikilink,
link_text: display_text.or_else(|| Some(target.to_string())),
line_number: Some(line_num as i32 + 1), // 1-indexed
context: Some(context),
created_at: chrono::Utc::now(),
});
}
}
links
}
/// Extract embeds from content.
/// Matches: `![[target]]`
fn extract_embeds(source_media_id: MediaId, content: &str) -> Vec<MarkdownLink> {
let re = Regex::new(r"!\[\[([^\]|]+)(?:\|([^\]]+))?\]\]").unwrap();
let mut links = Vec::new();
for (line_num, line) in content.lines().enumerate() {
for cap in re.captures_iter(line) {
let full_match = cap.get(0).unwrap();
let target = cap.get(1).unwrap().as_str().trim();
let display_text = cap.get(2).map(|m| m.as_str().trim().to_string());
let context = extract_context(content, line_num, full_match.start(), full_match.end());
links.push(MarkdownLink {
id: Uuid::now_v7(),
source_media_id,
target_path: target.to_string(),
target_media_id: None,
link_type: LinkType::Embed,
link_text: display_text.or_else(|| Some(target.to_string())),
line_number: Some(line_num as i32 + 1),
context: Some(context),
created_at: chrono::Utc::now(),
});
}
}
links
}
/// Extract markdown links from content.
/// Matches: `[text](path)` but only for internal paths (no http/https)
fn extract_markdown_links(source_media_id: MediaId, content: &str) -> Vec<MarkdownLink> {
// Match [text](path) where path doesn't start with http:// or https://
let re = Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").unwrap();
let mut links = Vec::new();
for (line_num, line) in content.lines().enumerate() {
for cap in re.captures_iter(line) {
let full_match = cap.get(0).unwrap();
let text = cap.get(1).unwrap().as_str().trim();
let path = cap.get(2).unwrap().as_str().trim();
// Skip external links
if path.starts_with("http://")
|| path.starts_with("https://")
|| path.starts_with("mailto:")
|| path.starts_with("ftp://")
{
continue;
}
// Skip anchor-only links
if path.starts_with('#') {
continue;
}
// Remove any anchor from the path for resolution
let target_path = path.split('#').next().unwrap_or(path);
let context = extract_context(content, line_num, full_match.start(), full_match.end());
links.push(MarkdownLink {
id: Uuid::now_v7(),
source_media_id,
target_path: target_path.to_string(),
target_media_id: None,
link_type: LinkType::MarkdownLink,
link_text: Some(text.to_string()),
line_number: Some(line_num as i32 + 1),
context: Some(context),
created_at: chrono::Utc::now(),
});
}
}
links
}
/// Extract surrounding context for a link.
fn extract_context(content: &str, line_num: usize, _start: usize, _end: usize) -> String {
let lines: Vec<&str> = content.lines().collect();
if line_num >= lines.len() {
return String::new();
}
let line = lines[line_num];
let line_len = line.len();
// Get surrounding lines for context if the current line is short
if line_len < 30 && line_num > 0 {
// Include previous line
let prev = lines.get(line_num.saturating_sub(1)).unwrap_or(&"");
let next = lines.get(line_num + 1).unwrap_or(&"");
return format!("{} {} {}", prev.trim(), line.trim(), next.trim())
.chars()
.take(CONTEXT_CHARS_BEFORE + CONTEXT_CHARS_AFTER + 20)
.collect();
}
// Truncate long lines
if line_len > CONTEXT_CHARS_BEFORE + CONTEXT_CHARS_AFTER {
line.chars()
.take(CONTEXT_CHARS_BEFORE + CONTEXT_CHARS_AFTER)
.collect()
} else {
line.to_string()
}
}
/// Link resolution strategies for finding target media items.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ResolutionStrategy {
/// Direct path match
DirectPath,
/// Relative to source directory
RelativeToSource,
/// Filename with .md extension added
FilenameWithMd,
/// Filename-only search (Obsidian-style)
FilenameOnly,
}
/// Resolve a link target to possible file paths.
///
/// Returns a list of candidate paths to check, in order of preference.
pub fn resolve_link_candidates(
target: &str,
source_path: &Path,
root_dirs: &[std::path::PathBuf],
) -> Vec<std::path::PathBuf> {
let mut candidates = Vec::new();
// Clean up the target path
let target = target.trim();
// 1. Direct path - if it looks like a path
if target.contains('/') || target.contains('\\') {
let direct = std::path::PathBuf::from(target);
if direct.is_absolute() {
candidates.push(direct);
} else {
// Relative to each root dir
for root in root_dirs {
candidates.push(root.join(&direct));
}
}
}
// 2. Relative to source file's directory
if let Some(source_dir) = source_path.parent() {
let relative = source_dir.join(target);
candidates.push(relative.clone());
// Also try with .md extension
if !target.ends_with(".md") {
candidates.push(relative.with_extension("md"));
let mut with_md = relative.clone();
with_md.set_file_name(format!(
"{}.md",
relative.file_name().unwrap_or_default().to_string_lossy()
));
candidates.push(with_md);
}
}
// 3. Filename with .md extension in root dirs
let target_with_md = if target.ends_with(".md") {
target.to_string()
} else {
format!("{}.md", target)
};
for root in root_dirs {
candidates.push(root.join(&target_with_md));
}
// 4. Remove duplicates while preserving order
let mut seen = std::collections::HashSet::new();
candidates.retain(|p| seen.insert(p.clone()));
candidates
}
/// Extract frontmatter aliases from markdown content.
///
/// Obsidian uses the `aliases` field in frontmatter to define alternative names
/// for a note that can be used in wikilinks.
pub fn extract_aliases(content: &str) -> Result<Vec<String>> {
let parsed = gray_matter::Matter::<gray_matter::engine::YAML>::new().parse(content);
if let Some(data) = parsed.ok().and_then(|p| p.data) {
if let gray_matter::Pod::Hash(map) = data {
if let Some(aliases) = map.get("aliases") {
match aliases {
gray_matter::Pod::Array(arr) => {
return Ok(arr
.iter()
.filter_map(|a| {
if let gray_matter::Pod::String(s) = a {
Some(s.clone())
} else {
None
}
})
.collect());
}
gray_matter::Pod::String(s) => {
// Single alias as string
return Ok(vec![s.clone()]);
}
_ => {}
}
}
}
}
Ok(Vec::new())
}
#[cfg(test)]
mod tests {
use super::*;
fn test_media_id() -> MediaId {
MediaId(Uuid::nil())
}
#[test]
fn test_extract_simple_wikilink() {
let content = "This is a [[simple link]] in text.";
let links = extract_links(test_media_id(), content);
assert_eq!(links.len(), 1);
assert_eq!(links[0].target_path, "simple link");
assert_eq!(links[0].link_type, LinkType::Wikilink);
assert_eq!(links[0].link_text, Some("simple link".to_string()));
}
#[test]
fn test_extract_wikilink_with_display() {
let content = "Check out [[target note|this article]] for more.";
let links = extract_links(test_media_id(), content);
assert_eq!(links.len(), 1);
assert_eq!(links[0].target_path, "target note");
assert_eq!(links[0].link_text, Some("this article".to_string()));
}
#[test]
fn test_extract_embed() {
let content = "Here is an image: ![[image.png]]";
let links = extract_links(test_media_id(), content);
assert_eq!(links.len(), 1);
assert_eq!(links[0].target_path, "image.png");
assert_eq!(links[0].link_type, LinkType::Embed);
}
#[test]
fn test_extract_markdown_link() {
let content = "Read [the documentation](docs/README.md) for details.";
let links = extract_links(test_media_id(), content);
assert_eq!(links.len(), 1);
assert_eq!(links[0].target_path, "docs/README.md");
assert_eq!(links[0].link_type, LinkType::MarkdownLink);
assert_eq!(links[0].link_text, Some("the documentation".to_string()));
}
#[test]
fn test_skip_external_links() {
let content = "Visit [our site](https://example.com) or [email us](mailto:test@test.com).";
let links = extract_links(test_media_id(), content);
assert!(links.is_empty());
}
#[test]
fn test_multiple_links() {
let content = r#"
# My Note
This links to [[Note A]] and also [[Note B|Note B Title]].
We also have a markdown link to [config](./config.md).
And an embedded image: ![[diagram.png]]
"#;
let links = extract_links(test_media_id(), content);
assert_eq!(links.len(), 4);
let types: Vec<_> = links.iter().map(|l| l.link_type).collect();
assert!(types.contains(&LinkType::Wikilink));
assert!(types.contains(&LinkType::Embed));
assert!(types.contains(&LinkType::MarkdownLink));
}
#[test]
fn test_line_numbers() {
let content = "Line 1\n[[link on line 2]]\nLine 3";
let links = extract_links(test_media_id(), content);
assert_eq!(links.len(), 1);
assert_eq!(links[0].line_number, Some(2));
}
#[test]
fn test_resolve_candidates() {
let source_path = std::path::Path::new("/notes/projects/readme.md");
let root_dirs = vec![std::path::PathBuf::from("/notes")];
let candidates = resolve_link_candidates("My Note", source_path, &root_dirs);
// Should include relative path and .md variations
assert!(!candidates.is_empty());
assert!(candidates
.iter()
.any(|p| p.to_string_lossy().contains("My Note.md")));
}
#[test]
fn test_extract_aliases() {
let content = r#"---
title: My Note
aliases:
- Alternative Name
- Another Alias
---
# Content here
"#;
let aliases = extract_aliases(content).unwrap();
assert_eq!(aliases, vec!["Alternative Name", "Another Alias"]);
}
#[test]
fn test_extract_single_alias() {
let content = r#"---
title: My Note
aliases: Single Alias
---
# Content
"#;
let aliases = extract_aliases(content).unwrap();
assert_eq!(aliases, vec!["Single Alias"]);
}
#[test]
fn test_wikilink_not_matching_embed() {
let content = "A wikilink [[note]] and an embed ![[image.png]]";
let links = extract_links(test_media_id(), content);
assert_eq!(links.len(), 2);
let wikilinks: Vec<_> = links
.iter()
.filter(|l| l.link_type == LinkType::Wikilink)
.collect();
let embeds: Vec<_> = links
.iter()
.filter(|l| l.link_type == LinkType::Embed)
.collect();
assert_eq!(wikilinks.len(), 1);
assert_eq!(embeds.len(), 1);
assert_eq!(wikilinks[0].target_path, "note");
assert_eq!(embeds[0].target_path, "image.png");
}
}

View file

@ -154,6 +154,9 @@ pub struct MediaItem {
/// Soft delete timestamp. If set, the item is in the trash.
pub deleted_at: Option<DateTime<Utc>>,
/// When markdown links were last extracted from this file.
pub links_extracted_at: Option<DateTime<Utc>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
@ -486,3 +489,100 @@ impl fmt::Display for ReadingStatus {
}
}
}
// ===== Markdown Links (Obsidian-style) =====
/// Type of markdown link
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum LinkType {
/// Wikilink: [[target]] or [[target|display]]
Wikilink,
/// Markdown link: [text](path)
MarkdownLink,
/// Embed: ![[target]]
Embed,
}
impl fmt::Display for LinkType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Wikilink => write!(f, "wikilink"),
Self::MarkdownLink => write!(f, "markdown_link"),
Self::Embed => write!(f, "embed"),
}
}
}
impl std::str::FromStr for LinkType {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"wikilink" => Ok(Self::Wikilink),
"markdown_link" => Ok(Self::MarkdownLink),
"embed" => Ok(Self::Embed),
_ => Err(format!("unknown link type: {}", s)),
}
}
}
/// A markdown link extracted from a file
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MarkdownLink {
pub id: Uuid,
pub source_media_id: MediaId,
/// Raw link target as written in the source (wikilink name or path)
pub target_path: String,
/// Resolved target media_id (None if unresolved)
pub target_media_id: Option<MediaId>,
pub link_type: LinkType,
/// Display text for the link
pub link_text: Option<String>,
/// Line number in source file (1-indexed)
pub line_number: Option<i32>,
/// Surrounding text for backlink preview
pub context: Option<String>,
pub created_at: DateTime<Utc>,
}
/// Information about a backlink (incoming link)
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BacklinkInfo {
pub link_id: Uuid,
pub source_id: MediaId,
pub source_title: Option<String>,
pub source_path: String,
pub link_text: Option<String>,
pub line_number: Option<i32>,
pub context: Option<String>,
pub link_type: LinkType,
}
/// Graph data for visualization
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct GraphData {
pub nodes: Vec<GraphNode>,
pub edges: Vec<GraphEdge>,
}
/// A node in the graph visualization
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GraphNode {
pub id: String,
pub label: String,
pub title: Option<String>,
pub media_type: String,
/// Number of outgoing links from this node
pub link_count: u32,
/// Number of incoming links to this node
pub backlink_count: u32,
}
/// An edge (link) in the graph visualization
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GraphEdge {
pub source: String,
pub target: String,
pub link_type: LinkType,
}

View file

@ -789,6 +789,45 @@ pub trait StorageBackend: Send + Sync + 'static {
/// Count items in trash.
async fn count_trash(&self) -> Result<u64>;
// ===== Markdown Links (Obsidian-style) =====
/// Save extracted markdown links for a media item.
/// This replaces any existing links for the source media.
async fn save_markdown_links(
&self,
media_id: MediaId,
links: &[crate::model::MarkdownLink],
) -> Result<()>;
/// Get outgoing links from a media item.
async fn get_outgoing_links(&self, media_id: MediaId) -> Result<Vec<crate::model::MarkdownLink>>;
/// Get backlinks (incoming links) to a media item.
async fn get_backlinks(&self, media_id: MediaId) -> Result<Vec<crate::model::BacklinkInfo>>;
/// Clear all links for a media item.
async fn clear_links_for_media(&self, media_id: MediaId) -> Result<()>;
/// Get graph data for visualization.
///
/// If `center_id` is provided, returns nodes within `depth` hops of that node.
/// If `center_id` is None, returns the entire graph (limited by internal max).
async fn get_graph_data(
&self,
center_id: Option<MediaId>,
depth: u32,
) -> Result<crate::model::GraphData>;
/// Resolve unresolved links by matching target_path against media item paths.
/// Returns the number of links that were resolved.
async fn resolve_links(&self) -> Result<u64>;
/// Update the links_extracted_at timestamp for a media item.
async fn mark_links_extracted(&self, media_id: MediaId) -> Result<()>;
/// Get count of unresolved links (links where target_media_id is NULL).
async fn count_unresolved_links(&self) -> Result<u64>;
}
/// Comprehensive library statistics.

View file

@ -200,6 +200,9 @@ fn row_to_media_item(row: &Row) -> Result<MediaItem> {
// Trash support
deleted_at: row.try_get("deleted_at").ok().flatten(),
// Markdown links extraction timestamp
links_extracted_at: row.try_get("links_extracted_at").ok().flatten(),
})
}
@ -6036,6 +6039,425 @@ impl StorageBackend for PostgresBackend {
let count: i64 = row.get(0);
Ok(count as u64)
}
// ===== Markdown Links (Obsidian-style) =====
async fn save_markdown_links(
&self,
media_id: MediaId,
links: &[crate::model::MarkdownLink],
) -> Result<()> {
let client = self
.pool
.get()
.await
.map_err(|e| PinakesError::Database(format!("pool error: {e}")))?;
let media_id_str = media_id.0.to_string();
// Delete existing links for this source
client
.execute(
"DELETE FROM markdown_links WHERE source_media_id = $1",
&[&media_id_str],
)
.await
.map_err(|e| PinakesError::Database(e.to_string()))?;
// Insert new links
for link in links {
let target_media_id = link.target_media_id.map(|id| id.0.to_string());
client
.execute(
"INSERT INTO markdown_links (
id, source_media_id, target_path, target_media_id,
link_type, link_text, line_number, context, created_at
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)",
&[
&link.id.to_string(),
&media_id_str,
&link.target_path,
&target_media_id,
&link.link_type.to_string(),
&link.link_text,
&link.line_number,
&link.context,
&link.created_at,
],
)
.await
.map_err(|e| PinakesError::Database(e.to_string()))?;
}
Ok(())
}
async fn get_outgoing_links(&self, media_id: MediaId) -> Result<Vec<crate::model::MarkdownLink>> {
let client = self
.pool
.get()
.await
.map_err(|e| PinakesError::Database(format!("pool error: {e}")))?;
let media_id_str = media_id.0.to_string();
let rows = client
.query(
"SELECT id, source_media_id, target_path, target_media_id,
link_type, link_text, line_number, context, created_at
FROM markdown_links
WHERE source_media_id = $1
ORDER BY line_number",
&[&media_id_str],
)
.await
.map_err(|e| PinakesError::Database(e.to_string()))?;
let mut links = Vec::new();
for row in rows {
links.push(row_to_markdown_link(&row)?);
}
Ok(links)
}
async fn get_backlinks(&self, media_id: MediaId) -> Result<Vec<crate::model::BacklinkInfo>> {
let client = self
.pool
.get()
.await
.map_err(|e| PinakesError::Database(format!("pool error: {e}")))?;
let media_id_str = media_id.0.to_string();
let rows = client
.query(
"SELECT l.id, l.source_media_id, m.title, m.path,
l.link_text, l.line_number, l.context, l.link_type
FROM markdown_links l
JOIN media_items m ON l.source_media_id = m.id
WHERE l.target_media_id = $1
ORDER BY m.title, l.line_number",
&[&media_id_str],
)
.await
.map_err(|e| PinakesError::Database(e.to_string()))?;
let mut backlinks = Vec::new();
for row in rows {
let link_id_str: String = row.get(0);
let source_id_str: String = row.get(1);
let source_title: Option<String> = row.get(2);
let source_path: String = row.get(3);
let link_text: Option<String> = row.get(4);
let line_number: Option<i32> = row.get(5);
let context: Option<String> = row.get(6);
let link_type_str: String = row.get(7);
backlinks.push(crate::model::BacklinkInfo {
link_id: Uuid::parse_str(&link_id_str)
.map_err(|e| PinakesError::Database(e.to_string()))?,
source_id: MediaId(
Uuid::parse_str(&source_id_str)
.map_err(|e| PinakesError::Database(e.to_string()))?,
),
source_title,
source_path,
link_text,
line_number,
context,
link_type: link_type_str
.parse()
.unwrap_or(crate::model::LinkType::Wikilink),
});
}
Ok(backlinks)
}
async fn clear_links_for_media(&self, media_id: MediaId) -> Result<()> {
let client = self
.pool
.get()
.await
.map_err(|e| PinakesError::Database(format!("pool error: {e}")))?;
let media_id_str = media_id.0.to_string();
client
.execute(
"DELETE FROM markdown_links WHERE source_media_id = $1",
&[&media_id_str],
)
.await
.map_err(|e| PinakesError::Database(e.to_string()))?;
Ok(())
}
async fn get_graph_data(
&self,
center_id: Option<MediaId>,
depth: u32,
) -> Result<crate::model::GraphData> {
let client = self
.pool
.get()
.await
.map_err(|e| PinakesError::Database(format!("pool error: {e}")))?;
let depth = depth.min(5); // Limit depth
let mut nodes = Vec::new();
let mut edges = Vec::new();
let mut node_ids: std::collections::HashSet<String> = std::collections::HashSet::new();
if let Some(center) = center_id {
// BFS to find connected nodes within depth
let mut frontier = vec![center.0.to_string()];
let mut visited = std::collections::HashSet::new();
visited.insert(center.0.to_string());
for _ in 0..depth {
if frontier.is_empty() {
break;
}
let mut next_frontier = Vec::new();
for node_id in &frontier {
// Get outgoing links
let rows = client
.query(
"SELECT target_media_id FROM markdown_links
WHERE source_media_id = $1 AND target_media_id IS NOT NULL",
&[node_id],
)
.await
.map_err(|e| PinakesError::Database(e.to_string()))?;
for row in rows {
let id: String = row.get(0);
if !visited.contains(&id) {
visited.insert(id.clone());
next_frontier.push(id);
}
}
// Get incoming links
let rows = client
.query(
"SELECT source_media_id FROM markdown_links
WHERE target_media_id = $1",
&[node_id],
)
.await
.map_err(|e| PinakesError::Database(e.to_string()))?;
for row in rows {
let id: String = row.get(0);
if !visited.contains(&id) {
visited.insert(id.clone());
next_frontier.push(id);
}
}
}
frontier = next_frontier;
}
node_ids = visited;
} else {
// Get all markdown files with links (limit to 500)
let rows = client
.query(
"SELECT DISTINCT id FROM media_items
WHERE media_type = 'markdown' AND deleted_at IS NULL
LIMIT 500",
&[],
)
.await
.map_err(|e| PinakesError::Database(e.to_string()))?;
for row in rows {
let id: String = row.get(0);
node_ids.insert(id);
}
}
// Build nodes with metadata
for node_id in &node_ids {
let row = client
.query_opt(
"SELECT id, COALESCE(title, file_name) as label, title, media_type
FROM media_items WHERE id = $1",
&[node_id],
)
.await
.map_err(|e| PinakesError::Database(e.to_string()))?;
if let Some(row) = row {
let id: String = row.get(0);
let label: String = row.get(1);
let title: Option<String> = row.get(2);
let media_type: String = row.get(3);
// Count outgoing links
let link_count_row = client
.query_one(
"SELECT COUNT(*) FROM markdown_links WHERE source_media_id = $1",
&[&id],
)
.await
.map_err(|e| PinakesError::Database(e.to_string()))?;
let link_count: i64 = link_count_row.get(0);
// Count incoming links
let backlink_count_row = client
.query_one(
"SELECT COUNT(*) FROM markdown_links WHERE target_media_id = $1",
&[&id],
)
.await
.map_err(|e| PinakesError::Database(e.to_string()))?;
let backlink_count: i64 = backlink_count_row.get(0);
nodes.push(crate::model::GraphNode {
id: id.clone(),
label,
title,
media_type,
link_count: link_count as u32,
backlink_count: backlink_count as u32,
});
}
}
// Build edges
for node_id in &node_ids {
let rows = client
.query(
"SELECT source_media_id, target_media_id, link_type
FROM markdown_links
WHERE source_media_id = $1 AND target_media_id IS NOT NULL",
&[node_id],
)
.await
.map_err(|e| PinakesError::Database(e.to_string()))?;
for row in rows {
let source: String = row.get(0);
let target: String = row.get(1);
let link_type_str: String = row.get(2);
if node_ids.contains(&target) {
edges.push(crate::model::GraphEdge {
source,
target,
link_type: link_type_str
.parse()
.unwrap_or(crate::model::LinkType::Wikilink),
});
}
}
}
Ok(crate::model::GraphData { nodes, edges })
}
async fn resolve_links(&self) -> Result<u64> {
let client = self
.pool
.get()
.await
.map_err(|e| PinakesError::Database(format!("pool error: {e}")))?;
// Strategy 1: Exact path match
let result1 = client
.execute(
"UPDATE markdown_links
SET target_media_id = (
SELECT id FROM media_items
WHERE path = markdown_links.target_path
AND deleted_at IS NULL
LIMIT 1
)
WHERE target_media_id IS NULL
AND EXISTS (
SELECT 1 FROM media_items
WHERE path = markdown_links.target_path
AND deleted_at IS NULL
)",
&[],
)
.await
.map_err(|e| PinakesError::Database(e.to_string()))?;
// Strategy 2: Filename match
let result2 = client
.execute(
"UPDATE markdown_links
SET target_media_id = (
SELECT id FROM media_items
WHERE (file_name = markdown_links.target_path
OR file_name = markdown_links.target_path || '.md'
OR REPLACE(file_name, '.md', '') = markdown_links.target_path)
AND deleted_at IS NULL
LIMIT 1
)
WHERE target_media_id IS NULL
AND EXISTS (
SELECT 1 FROM media_items
WHERE (file_name = markdown_links.target_path
OR file_name = markdown_links.target_path || '.md'
OR REPLACE(file_name, '.md', '') = markdown_links.target_path)
AND deleted_at IS NULL
)",
&[],
)
.await
.map_err(|e| PinakesError::Database(e.to_string()))?;
Ok(result1 + result2)
}
async fn mark_links_extracted(&self, media_id: MediaId) -> Result<()> {
let client = self
.pool
.get()
.await
.map_err(|e| PinakesError::Database(format!("pool error: {e}")))?;
let media_id_str = media_id.0.to_string();
let now = chrono::Utc::now();
client
.execute(
"UPDATE media_items SET links_extracted_at = $1 WHERE id = $2",
&[&now, &media_id_str],
)
.await
.map_err(|e| PinakesError::Database(e.to_string()))?;
Ok(())
}
async fn count_unresolved_links(&self) -> Result<u64> {
let client = self
.pool
.get()
.await
.map_err(|e| PinakesError::Database(format!("pool error: {e}")))?;
let row = client
.query_one(
"SELECT COUNT(*) FROM markdown_links WHERE target_media_id IS NULL",
&[],
)
.await
.map_err(|e| PinakesError::Database(e.to_string()))?;
let count: i64 = row.get(0);
Ok(count as u64)
}
}
impl PostgresBackend {
@ -6329,6 +6751,37 @@ fn find_first_fts_param(query: &SearchQuery) -> i32 {
find_inner(query, &mut offset).unwrap_or(1)
}
// Helper function to parse a markdown link row
fn row_to_markdown_link(row: &Row) -> Result<crate::model::MarkdownLink> {
let id_str: String = row.get(0);
let source_id_str: String = row.get(1);
let target_path: String = row.get(2);
let target_id: Option<String> = row.get(3);
let link_type_str: String = row.get(4);
let link_text: Option<String> = row.get(5);
let line_number: Option<i32> = row.get(6);
let context: Option<String> = row.get(7);
let created_at: chrono::DateTime<Utc> = row.get(8);
Ok(crate::model::MarkdownLink {
id: Uuid::parse_str(&id_str).map_err(|e| PinakesError::Database(e.to_string()))?,
source_media_id: MediaId(
Uuid::parse_str(&source_id_str).map_err(|e| PinakesError::Database(e.to_string()))?,
),
target_path,
target_media_id: target_id
.and_then(|s| Uuid::parse_str(&s).ok())
.map(MediaId),
link_type: link_type_str
.parse()
.unwrap_or(crate::model::LinkType::Wikilink),
link_text,
line_number,
context,
created_at,
})
}
#[cfg(test)]
mod tests {
use super::*;

View file

@ -160,6 +160,14 @@ fn row_to_media_item(row: &Row) -> rusqlite::Result<MediaItem> {
.flatten()
.and_then(|s| DateTime::parse_from_rfc3339(&s).ok())
.map(|dt| dt.with_timezone(&Utc)),
// Markdown links extraction timestamp
links_extracted_at: row
.get::<_, Option<String>>("links_extracted_at")
.ok()
.flatten()
.and_then(|s| DateTime::parse_from_rfc3339(&s).ok())
.map(|dt| dt.with_timezone(&Utc)),
})
}
@ -6379,6 +6387,428 @@ impl StorageBackend for SqliteBackend {
Ok(count)
}
// ===== Markdown Links (Obsidian-style) =====
async fn save_markdown_links(
&self,
media_id: MediaId,
links: &[crate::model::MarkdownLink],
) -> Result<()> {
let conn = self.conn.clone();
let media_id_str = media_id.0.to_string();
let links: Vec<_> = links.to_vec();
tokio::task::spawn_blocking(move || {
let conn = conn.lock().unwrap();
// Delete existing links for this source
conn.execute(
"DELETE FROM markdown_links WHERE source_media_id = ?1",
[&media_id_str],
)?;
// Insert new links
let mut stmt = conn.prepare(
"INSERT INTO markdown_links (
id, source_media_id, target_path, target_media_id,
link_type, link_text, line_number, context, created_at
) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)"
)?;
for link in &links {
stmt.execute(params![
link.id.to_string(),
media_id_str,
link.target_path,
link.target_media_id.map(|id| id.0.to_string()),
link.link_type.to_string(),
link.link_text,
link.line_number,
link.context,
link.created_at.to_rfc3339(),
])?;
}
Ok::<_, rusqlite::Error>(())
})
.await
.map_err(|e| PinakesError::Database(e.to_string()))??;
Ok(())
}
async fn get_outgoing_links(&self, media_id: MediaId) -> Result<Vec<crate::model::MarkdownLink>> {
let conn = self.conn.clone();
let media_id_str = media_id.0.to_string();
let links = tokio::task::spawn_blocking(move || {
let conn = conn.lock().unwrap();
let mut stmt = conn.prepare(
"SELECT id, source_media_id, target_path, target_media_id,
link_type, link_text, line_number, context, created_at
FROM markdown_links
WHERE source_media_id = ?1
ORDER BY line_number"
)?;
let rows = stmt.query_map([&media_id_str], |row| {
row_to_markdown_link(row)
})?;
let mut links = Vec::new();
for row in rows {
links.push(row?);
}
Ok::<_, rusqlite::Error>(links)
})
.await
.map_err(|e| PinakesError::Database(e.to_string()))??;
Ok(links)
}
async fn get_backlinks(&self, media_id: MediaId) -> Result<Vec<crate::model::BacklinkInfo>> {
let conn = self.conn.clone();
let media_id_str = media_id.0.to_string();
let backlinks = tokio::task::spawn_blocking(move || {
let conn = conn.lock().unwrap();
let mut stmt = conn.prepare(
"SELECT l.id, l.source_media_id, m.title, m.path,
l.link_text, l.line_number, l.context, l.link_type
FROM markdown_links l
JOIN media_items m ON l.source_media_id = m.id
WHERE l.target_media_id = ?1
ORDER BY m.title, l.line_number"
)?;
let rows = stmt.query_map([&media_id_str], |row| {
let link_id_str: String = row.get(0)?;
let source_id_str: String = row.get(1)?;
let source_title: Option<String> = row.get(2)?;
let source_path: String = row.get(3)?;
let link_text: Option<String> = row.get(4)?;
let line_number: Option<i32> = row.get(5)?;
let context: Option<String> = row.get(6)?;
let link_type_str: String = row.get(7)?;
Ok(crate::model::BacklinkInfo {
link_id: parse_uuid(&link_id_str)?,
source_id: MediaId(parse_uuid(&source_id_str)?),
source_title,
source_path,
link_text,
line_number,
context,
link_type: link_type_str.parse().unwrap_or(crate::model::LinkType::Wikilink),
})
})?;
let mut backlinks = Vec::new();
for row in rows {
backlinks.push(row?);
}
Ok::<_, rusqlite::Error>(backlinks)
})
.await
.map_err(|e| PinakesError::Database(e.to_string()))??;
Ok(backlinks)
}
async fn clear_links_for_media(&self, media_id: MediaId) -> Result<()> {
let conn = self.conn.clone();
let media_id_str = media_id.0.to_string();
tokio::task::spawn_blocking(move || {
let conn = conn.lock().unwrap();
conn.execute(
"DELETE FROM markdown_links WHERE source_media_id = ?1",
[&media_id_str],
)?;
Ok::<_, rusqlite::Error>(())
})
.await
.map_err(|e| PinakesError::Database(e.to_string()))??;
Ok(())
}
async fn get_graph_data(
&self,
center_id: Option<MediaId>,
depth: u32,
) -> Result<crate::model::GraphData> {
let conn = self.conn.clone();
let center_id_str = center_id.map(|id| id.0.to_string());
let depth = depth.min(5); // Limit depth to prevent huge queries
let graph_data = tokio::task::spawn_blocking(move || {
let conn = conn.lock().unwrap();
let mut nodes = Vec::new();
let mut edges = Vec::new();
let mut node_ids = std::collections::HashSet::new();
// Get nodes - either all markdown files or those connected to center
if let Some(center_id) = center_id_str {
// BFS to find connected nodes within depth
let mut frontier = vec![center_id.clone()];
let mut visited = std::collections::HashSet::new();
visited.insert(center_id.clone());
for _ in 0..depth {
let mut next_frontier = Vec::new();
for node_id in &frontier {
// Get outgoing links
let mut stmt = conn.prepare(
"SELECT target_media_id FROM markdown_links
WHERE source_media_id = ?1 AND target_media_id IS NOT NULL"
)?;
let rows = stmt.query_map([node_id], |row| {
let id: String = row.get(0)?;
Ok(id)
})?;
for row in rows {
let id = row?;
if !visited.contains(&id) {
visited.insert(id.clone());
next_frontier.push(id);
}
}
// Get incoming links
let mut stmt = conn.prepare(
"SELECT source_media_id FROM markdown_links
WHERE target_media_id = ?1"
)?;
let rows = stmt.query_map([node_id], |row| {
let id: String = row.get(0)?;
Ok(id)
})?;
for row in rows {
let id = row?;
if !visited.contains(&id) {
visited.insert(id.clone());
next_frontier.push(id);
}
}
}
frontier = next_frontier;
}
node_ids = visited;
} else {
// Get all markdown files with links (limit to 500 for performance)
let mut stmt = conn.prepare(
"SELECT DISTINCT id FROM media_items
WHERE media_type = 'markdown' AND deleted_at IS NULL
LIMIT 500"
)?;
let rows = stmt.query_map([], |row| {
let id: String = row.get(0)?;
Ok(id)
})?;
for row in rows {
node_ids.insert(row?);
}
}
// Build nodes with metadata
for node_id in &node_ids {
let mut stmt = conn.prepare(
"SELECT id, COALESCE(title, file_name) as label, title, media_type
FROM media_items WHERE id = ?1"
)?;
if let Ok((id, label, title, media_type)) = stmt.query_row([node_id], |row| {
Ok((
row.get::<_, String>(0)?,
row.get::<_, String>(1)?,
row.get::<_, Option<String>>(2)?,
row.get::<_, String>(3)?,
))
}) {
// Count outgoing links
let link_count: i64 = conn.query_row(
"SELECT COUNT(*) FROM markdown_links WHERE source_media_id = ?1",
[&id],
|row| row.get(0),
)?;
// Count incoming links
let backlink_count: i64 = conn.query_row(
"SELECT COUNT(*) FROM markdown_links WHERE target_media_id = ?1",
[&id],
|row| row.get(0),
)?;
nodes.push(crate::model::GraphNode {
id: id.clone(),
label,
title,
media_type,
link_count: link_count as u32,
backlink_count: backlink_count as u32,
});
}
}
// Build edges
for node_id in &node_ids {
let mut stmt = conn.prepare(
"SELECT source_media_id, target_media_id, link_type
FROM markdown_links
WHERE source_media_id = ?1 AND target_media_id IS NOT NULL"
)?;
let rows = stmt.query_map([node_id], |row| {
let source: String = row.get(0)?;
let target: String = row.get(1)?;
let link_type_str: String = row.get(2)?;
Ok((source, target, link_type_str))
})?;
for row in rows {
let (source, target, link_type_str) = row?;
if node_ids.contains(&target) {
edges.push(crate::model::GraphEdge {
source,
target,
link_type: link_type_str.parse().unwrap_or(crate::model::LinkType::Wikilink),
});
}
}
}
Ok::<_, rusqlite::Error>(crate::model::GraphData { nodes, edges })
})
.await
.map_err(|e| PinakesError::Database(e.to_string()))??;
Ok(graph_data)
}
async fn resolve_links(&self) -> Result<u64> {
let conn = self.conn.clone();
let count = tokio::task::spawn_blocking(move || {
let conn = conn.lock().unwrap();
// Find unresolved links and try to resolve them
// Strategy 1: Exact path match
let updated1 = conn.execute(
"UPDATE markdown_links
SET target_media_id = (
SELECT id FROM media_items
WHERE path = markdown_links.target_path
AND deleted_at IS NULL
LIMIT 1
)
WHERE target_media_id IS NULL
AND EXISTS (
SELECT 1 FROM media_items
WHERE path = markdown_links.target_path
AND deleted_at IS NULL
)",
[],
)?;
// Strategy 2: Filename match (Obsidian-style)
// Match target_path to file_name (with or without .md extension)
let updated2 = conn.execute(
"UPDATE markdown_links
SET target_media_id = (
SELECT id FROM media_items
WHERE (file_name = markdown_links.target_path
OR file_name = markdown_links.target_path || '.md'
OR REPLACE(file_name, '.md', '') = markdown_links.target_path)
AND deleted_at IS NULL
LIMIT 1
)
WHERE target_media_id IS NULL
AND EXISTS (
SELECT 1 FROM media_items
WHERE (file_name = markdown_links.target_path
OR file_name = markdown_links.target_path || '.md'
OR REPLACE(file_name, '.md', '') = markdown_links.target_path)
AND deleted_at IS NULL
)",
[],
)?;
Ok::<_, rusqlite::Error>((updated1 + updated2) as u64)
})
.await
.map_err(|e| PinakesError::Database(e.to_string()))??;
Ok(count)
}
async fn mark_links_extracted(&self, media_id: MediaId) -> Result<()> {
let conn = self.conn.clone();
let media_id_str = media_id.0.to_string();
let now = chrono::Utc::now().to_rfc3339();
tokio::task::spawn_blocking(move || {
let conn = conn.lock().unwrap();
conn.execute(
"UPDATE media_items SET links_extracted_at = ?1 WHERE id = ?2",
params![now, media_id_str],
)?;
Ok::<_, rusqlite::Error>(())
})
.await
.map_err(|e| PinakesError::Database(e.to_string()))??;
Ok(())
}
async fn count_unresolved_links(&self) -> Result<u64> {
let conn = self.conn.clone();
let count = tokio::task::spawn_blocking(move || {
let conn = conn.lock().unwrap();
let count: i64 = conn.query_row(
"SELECT COUNT(*) FROM markdown_links WHERE target_media_id IS NULL",
[],
|row| row.get(0),
)?;
Ok::<_, rusqlite::Error>(count as u64)
})
.await
.map_err(|e| PinakesError::Database(e.to_string()))??;
Ok(count)
}
}
// Helper function to parse a markdown link row
fn row_to_markdown_link(row: &Row) -> rusqlite::Result<crate::model::MarkdownLink> {
let id_str: String = row.get(0)?;
let source_id_str: String = row.get(1)?;
let target_path: String = row.get(2)?;
let target_id: Option<String> = row.get(3)?;
let link_type_str: String = row.get(4)?;
let link_text: Option<String> = row.get(5)?;
let line_number: Option<i32> = row.get(6)?;
let context: Option<String> = row.get(7)?;
let created_at_str: String = row.get(8)?;
Ok(crate::model::MarkdownLink {
id: parse_uuid(&id_str)?,
source_media_id: MediaId(parse_uuid(&source_id_str)?),
target_path,
target_media_id: target_id
.and_then(|s| Uuid::parse_str(&s).ok())
.map(MediaId),
link_type: link_type_str
.parse()
.unwrap_or(crate::model::LinkType::Wikilink),
link_text,
line_number,
context,
created_at: parse_datetime(&created_at_str),
})
}
// Helper function to parse a share row

View file

@ -98,6 +98,7 @@ pub async fn process_upload<R: AsyncRead + Unpin>(
created_at: now,
updated_at: now,
deleted_at: None,
links_extracted_at: None,
};
// Store the media item