various: markdown improvements

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: I81fda8247814da19eed1e76dbe97bd5b6a6a6964
This commit is contained in:
raf 2026-02-05 15:39:05 +03:00
commit 80a8b5c7ca
Signed by: NotAShelf
GPG key ID: 29D95B64378DB4BF
23 changed files with 3458 additions and 30 deletions

View file

@ -6,7 +6,8 @@ use tracing::info;
use crate::audit;
use crate::error::{PinakesError, Result};
use crate::hash::compute_file_hash;
use crate::media_type::MediaType;
use crate::links;
use crate::media_type::{BuiltinMediaType, MediaType};
use crate::metadata;
use crate::model::*;
use crate::storage::DynStorageBackend;
@ -168,6 +169,9 @@ pub async fn import_file_with_options(
None
};
// Check if this is a markdown file for link extraction
let is_markdown = media_type == MediaType::Builtin(BuiltinMediaType::Markdown);
let item = MediaItem {
id: media_id,
path: path.clone(),
@ -206,10 +210,25 @@ pub async fn import_file_with_options(
// New items are not deleted
deleted_at: None,
// Links will be extracted separately
links_extracted_at: None,
};
storage.insert_media(&item).await?;
// Extract and store markdown links for markdown files
if is_markdown {
if let Err(e) = extract_and_store_links(storage, media_id, &path).await {
tracing::warn!(
media_id = %media_id,
path = %path.display(),
error = %e,
"failed to extract markdown links"
);
}
}
// Store extracted extra metadata as custom fields
for (key, value) in &extracted.extra {
let field = CustomField {
@ -372,3 +391,44 @@ pub async fn import_directory_with_options(
Ok(results)
}
/// Extract markdown links from a file and store them in the database.
async fn extract_and_store_links(
storage: &DynStorageBackend,
media_id: MediaId,
path: &Path,
) -> Result<()> {
// Read file content
let content = tokio::fs::read_to_string(path).await.map_err(|e| {
PinakesError::Io(std::io::Error::new(
std::io::ErrorKind::Other,
format!("failed to read markdown file for link extraction: {e}"),
))
})?;
// Extract links
let extracted_links = links::extract_links(media_id, &content);
if extracted_links.is_empty() {
// No links found, just mark as extracted
storage.mark_links_extracted(media_id).await?;
return Ok(());
}
// Clear any existing links for this media (in case of re-import)
storage.clear_links_for_media(media_id).await?;
// Save extracted links
storage.save_markdown_links(media_id, &extracted_links).await?;
// Mark links as extracted
storage.mark_links_extracted(media_id).await?;
tracing::debug!(
media_id = %media_id,
link_count = extracted_links.len(),
"extracted markdown links"
);
Ok(())
}