various: markdown improvements

Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: I81fda8247814da19eed1e76dbe97bd5b6a6a6964
2026-02-05 15:39:05 +03:00 · 2026-02-05 15:39:05 +03:00 · 80a8b5c7ca
commit 80a8b5c7ca
parent 875bdf5ebc
23 changed files with 3458 additions and 30 deletions
--- a/crates/pinakes-core/src/import.rs
+++ b/crates/pinakes-core/src/import.rs
@ -6,7 +6,8 @@ use tracing::info;
 use crate::audit;
 use crate::error::{PinakesError, Result};
 use crate::hash::compute_file_hash;
-use crate::media_type::MediaType;
+use crate::links;
+use crate::media_type::{BuiltinMediaType, MediaType};
 use crate::metadata;
 use crate::model::*;
 use crate::storage::DynStorageBackend;
@ -168,6 +169,9 @@ pub async fn import_file_with_options(
        None
    };

+    // Check if this is a markdown file for link extraction
+    let is_markdown = media_type == MediaType::Builtin(BuiltinMediaType::Markdown);
+
    let item = MediaItem {
        id: media_id,
        path: path.clone(),
@ -206,10 +210,25 @@ pub async fn import_file_with_options(

        // New items are not deleted
        deleted_at: None,
+
+        // Links will be extracted separately
+        links_extracted_at: None,
    };

    storage.insert_media(&item).await?;

+    // Extract and store markdown links for markdown files
+    if is_markdown {
+        if let Err(e) = extract_and_store_links(storage, media_id, &path).await {
+            tracing::warn!(
+                media_id = %media_id,
+                path = %path.display(),
+                error = %e,
+                "failed to extract markdown links"
+            );
+        }
+    }
+
    // Store extracted extra metadata as custom fields
    for (key, value) in &extracted.extra {
        let field = CustomField {
@ -372,3 +391,44 @@ pub async fn import_directory_with_options(

    Ok(results)
 }
+
+/// Extract markdown links from a file and store them in the database.
+async fn extract_and_store_links(
+    storage: &DynStorageBackend,
+    media_id: MediaId,
+    path: &Path,
+) -> Result<()> {
+    // Read file content
+    let content = tokio::fs::read_to_string(path).await.map_err(|e| {
+        PinakesError::Io(std::io::Error::new(
+            std::io::ErrorKind::Other,
+            format!("failed to read markdown file for link extraction: {e}"),
+        ))
+    })?;
+
+    // Extract links
+    let extracted_links = links::extract_links(media_id, &content);
+
+    if extracted_links.is_empty() {
+        // No links found, just mark as extracted
+        storage.mark_links_extracted(media_id).await?;
+        return Ok(());
+    }
+
+    // Clear any existing links for this media (in case of re-import)
+    storage.clear_links_for_media(media_id).await?;
+
+    // Save extracted links
+    storage.save_markdown_links(media_id, &extracted_links).await?;
+
+    // Mark links as extracted
+    storage.mark_links_extracted(media_id).await?;
+
+    tracing::debug!(
+        media_id = %media_id,
+        link_count = extracted_links.len(),
+        "extracted markdown links"
+    );
+
+    Ok(())
+}