initial commit

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: I4a6b498153eccd5407510dd541b7f4816a6a6964
This commit is contained in:
raf 2026-01-30 22:05:46 +03:00
commit 6a73d11c4b
Signed by: NotAShelf
GPG key ID: 29D95B64378DB4BF
124 changed files with 34856 additions and 0 deletions

View file

@ -0,0 +1,81 @@
use std::path::Path;
use lofty::file::{AudioFile, TaggedFileExt};
use lofty::tag::Accessor;
use crate::error::{PinakesError, Result};
use crate::media_type::MediaType;
use super::{ExtractedMetadata, MetadataExtractor};
pub struct AudioExtractor;
impl MetadataExtractor for AudioExtractor {
fn extract(&self, path: &Path) -> Result<ExtractedMetadata> {
let tagged_file = lofty::read_from_path(path)
.map_err(|e| PinakesError::MetadataExtraction(format!("audio metadata: {e}")))?;
let mut meta = ExtractedMetadata::default();
if let Some(tag) = tagged_file
.primary_tag()
.or_else(|| tagged_file.first_tag())
{
meta.title = tag.title().map(|s| s.to_string());
meta.artist = tag.artist().map(|s| s.to_string());
meta.album = tag.album().map(|s| s.to_string());
meta.genre = tag.genre().map(|s| s.to_string());
meta.year = tag.year().map(|y| y as i32);
}
if let Some(tag) = tagged_file
.primary_tag()
.or_else(|| tagged_file.first_tag())
{
if let Some(track) = tag.track() {
meta.extra
.insert("track_number".to_string(), track.to_string());
}
if let Some(disc) = tag.disk() {
meta.extra
.insert("disc_number".to_string(), disc.to_string());
}
if let Some(comment) = tag.comment() {
meta.extra
.insert("comment".to_string(), comment.to_string());
}
}
let properties = tagged_file.properties();
let duration = properties.duration();
if !duration.is_zero() {
meta.duration_secs = Some(duration.as_secs_f64());
}
if let Some(bitrate) = properties.audio_bitrate() {
meta.extra
.insert("bitrate".to_string(), format!("{bitrate} kbps"));
}
if let Some(sample_rate) = properties.sample_rate() {
meta.extra
.insert("sample_rate".to_string(), format!("{sample_rate} Hz"));
}
if let Some(channels) = properties.channels() {
meta.extra
.insert("channels".to_string(), channels.to_string());
}
Ok(meta)
}
fn supported_types(&self) -> &[MediaType] {
&[
MediaType::Mp3,
MediaType::Flac,
MediaType::Ogg,
MediaType::Wav,
MediaType::Aac,
MediaType::Opus,
]
}
}

View file

@ -0,0 +1,192 @@
use std::path::Path;
use crate::error::{PinakesError, Result};
use crate::media_type::MediaType;
use super::{ExtractedMetadata, MetadataExtractor};
pub struct DocumentExtractor;
impl MetadataExtractor for DocumentExtractor {
fn extract(&self, path: &Path) -> Result<ExtractedMetadata> {
match MediaType::from_path(path) {
Some(MediaType::Pdf) => extract_pdf(path),
Some(MediaType::Epub) => extract_epub(path),
Some(MediaType::Djvu) => extract_djvu(path),
_ => Ok(ExtractedMetadata::default()),
}
}
fn supported_types(&self) -> &[MediaType] {
&[MediaType::Pdf, MediaType::Epub, MediaType::Djvu]
}
}
fn extract_pdf(path: &Path) -> Result<ExtractedMetadata> {
let doc = lopdf::Document::load(path)
.map_err(|e| PinakesError::MetadataExtraction(format!("PDF load: {e}")))?;
let mut meta = ExtractedMetadata::default();
// Find the Info dictionary via the trailer
if let Ok(info_ref) = doc.trailer.get(b"Info") {
let info_obj = if let Ok(reference) = info_ref.as_reference() {
doc.get_object(reference).ok()
} else {
Some(info_ref)
};
if let Some(obj) = info_obj
&& let Ok(dict) = obj.as_dict()
{
if let Ok(title) = dict.get(b"Title") {
meta.title = pdf_object_to_string(title);
}
if let Ok(author) = dict.get(b"Author") {
meta.artist = pdf_object_to_string(author);
}
if let Ok(subject) = dict.get(b"Subject") {
meta.description = pdf_object_to_string(subject);
}
if let Ok(creator) = dict.get(b"Creator") {
meta.extra.insert(
"creator".to_string(),
pdf_object_to_string(creator).unwrap_or_default(),
);
}
if let Ok(producer) = dict.get(b"Producer") {
meta.extra.insert(
"producer".to_string(),
pdf_object_to_string(producer).unwrap_or_default(),
);
}
}
}
// Page count
let page_count = doc.get_pages().len();
if page_count > 0 {
meta.extra
.insert("page_count".to_string(), page_count.to_string());
}
Ok(meta)
}
fn pdf_object_to_string(obj: &lopdf::Object) -> Option<String> {
match obj {
lopdf::Object::String(bytes, _) => Some(String::from_utf8_lossy(bytes).into_owned()),
lopdf::Object::Name(name) => Some(String::from_utf8_lossy(name).into_owned()),
_ => None,
}
}
fn extract_epub(path: &Path) -> Result<ExtractedMetadata> {
let doc = epub::doc::EpubDoc::new(path)
.map_err(|e| PinakesError::MetadataExtraction(format!("EPUB parse: {e}")))?;
let mut meta = ExtractedMetadata {
title: doc.mdata("title").map(|item| item.value.clone()),
artist: doc.mdata("creator").map(|item| item.value.clone()),
description: doc.mdata("description").map(|item| item.value.clone()),
..Default::default()
};
if let Some(lang) = doc.mdata("language") {
meta.extra
.insert("language".to_string(), lang.value.clone());
}
if let Some(publisher) = doc.mdata("publisher") {
meta.extra
.insert("publisher".to_string(), publisher.value.clone());
}
if let Some(date) = doc.mdata("date") {
meta.extra.insert("date".to_string(), date.value.clone());
}
Ok(meta)
}
fn extract_djvu(path: &Path) -> Result<ExtractedMetadata> {
// DjVu files contain metadata in SEXPR (S-expression) format within
// ANTa/ANTz chunks, or in the DIRM chunk. We parse the raw bytes to
// extract any metadata fields we can find.
let data = std::fs::read(path)
.map_err(|e| PinakesError::MetadataExtraction(format!("DjVu read: {e}")))?;
let mut meta = ExtractedMetadata::default();
// DjVu files start with "AT&T" magic followed by FORM:DJVU or FORM:DJVM
if data.len() < 16 {
return Ok(meta);
}
// Search for metadata annotations in the file. DjVu metadata is stored
// as S-expressions like (metadata (key "value") ...) within ANTa chunks.
let content = String::from_utf8_lossy(&data);
// Look for (metadata ...) blocks
if let Some(meta_start) = content.find("(metadata") {
let remainder = &content[meta_start..];
// Extract key-value pairs like (title "Some Title")
extract_djvu_field(remainder, "title", &mut meta.title);
extract_djvu_field(remainder, "author", &mut meta.artist);
let mut desc = None;
extract_djvu_field(remainder, "subject", &mut desc);
if desc.is_none() {
extract_djvu_field(remainder, "description", &mut desc);
}
meta.description = desc;
let mut year_str = None;
extract_djvu_field(remainder, "year", &mut year_str);
if let Some(ref y) = year_str {
meta.year = y.parse().ok();
}
let mut creator = None;
extract_djvu_field(remainder, "creator", &mut creator);
if let Some(c) = creator {
meta.extra.insert("creator".to_string(), c);
}
}
// Also check for booklet-style metadata that some DjVu encoders write
// outside the metadata SEXPR
if meta.title.is_none()
&& let Some(title_start) = content.find("(bookmarks")
{
let remainder = &content[title_start..];
// First bookmark title is often the document title
if let Some(q1) = remainder.find('"') {
let after_q1 = &remainder[q1 + 1..];
if let Some(q2) = after_q1.find('"') {
let val = &after_q1[..q2];
if !val.is_empty() {
meta.title = Some(val.to_string());
}
}
}
}
Ok(meta)
}
fn extract_djvu_field(sexpr: &str, key: &str, out: &mut Option<String>) {
// Look for patterns like (key "value") in the S-expression
let pattern = format!("({key}");
if let Some(start) = sexpr.find(&pattern) {
let remainder = &sexpr[start + pattern.len()..];
// Find the quoted value
if let Some(q1) = remainder.find('"') {
let after_q1 = &remainder[q1 + 1..];
if let Some(q2) = after_q1.find('"') {
let val = &after_q1[..q2];
if !val.is_empty() {
*out = Some(val.to_string());
}
}
}
}
}

View file

@ -0,0 +1,213 @@
use std::path::Path;
use crate::error::Result;
use crate::media_type::MediaType;
use super::{ExtractedMetadata, MetadataExtractor};
pub struct ImageExtractor;
impl MetadataExtractor for ImageExtractor {
fn extract(&self, path: &Path) -> Result<ExtractedMetadata> {
let mut meta = ExtractedMetadata::default();
let file = std::fs::File::open(path)?;
let mut buf_reader = std::io::BufReader::new(&file);
let exif_data = match exif::Reader::new().read_from_container(&mut buf_reader) {
Ok(exif) => exif,
Err(_) => return Ok(meta),
};
// Image dimensions
if let Some(width) = exif_data
.get_field(exif::Tag::PixelXDimension, exif::In::PRIMARY)
.or_else(|| exif_data.get_field(exif::Tag::ImageWidth, exif::In::PRIMARY))
&& let Some(w) = field_to_u32(width)
{
meta.extra.insert("width".to_string(), w.to_string());
}
if let Some(height) = exif_data
.get_field(exif::Tag::PixelYDimension, exif::In::PRIMARY)
.or_else(|| exif_data.get_field(exif::Tag::ImageLength, exif::In::PRIMARY))
&& let Some(h) = field_to_u32(height)
{
meta.extra.insert("height".to_string(), h.to_string());
}
// Camera make and model
if let Some(make) = exif_data.get_field(exif::Tag::Make, exif::In::PRIMARY) {
let val = make.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("camera_make".to_string(), val);
}
}
if let Some(model) = exif_data.get_field(exif::Tag::Model, exif::In::PRIMARY) {
let val = model.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("camera_model".to_string(), val);
}
}
// Date taken
if let Some(date) = exif_data
.get_field(exif::Tag::DateTimeOriginal, exif::In::PRIMARY)
.or_else(|| exif_data.get_field(exif::Tag::DateTime, exif::In::PRIMARY))
{
let val = date.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("date_taken".to_string(), val);
}
}
// GPS coordinates
if let (Some(lat), Some(lat_ref), Some(lon), Some(lon_ref)) = (
exif_data.get_field(exif::Tag::GPSLatitude, exif::In::PRIMARY),
exif_data.get_field(exif::Tag::GPSLatitudeRef, exif::In::PRIMARY),
exif_data.get_field(exif::Tag::GPSLongitude, exif::In::PRIMARY),
exif_data.get_field(exif::Tag::GPSLongitudeRef, exif::In::PRIMARY),
) && let (Some(lat_val), Some(lon_val)) =
(dms_to_decimal(lat, lat_ref), dms_to_decimal(lon, lon_ref))
{
meta.extra
.insert("gps_latitude".to_string(), format!("{lat_val:.6}"));
meta.extra
.insert("gps_longitude".to_string(), format!("{lon_val:.6}"));
}
// Exposure info
if let Some(iso) =
exif_data.get_field(exif::Tag::PhotographicSensitivity, exif::In::PRIMARY)
{
let val = iso.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("iso".to_string(), val);
}
}
if let Some(exposure) = exif_data.get_field(exif::Tag::ExposureTime, exif::In::PRIMARY) {
let val = exposure.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("exposure_time".to_string(), val);
}
}
if let Some(aperture) = exif_data.get_field(exif::Tag::FNumber, exif::In::PRIMARY) {
let val = aperture.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("f_number".to_string(), val);
}
}
if let Some(focal) = exif_data.get_field(exif::Tag::FocalLength, exif::In::PRIMARY) {
let val = focal.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("focal_length".to_string(), val);
}
}
// Lens model
if let Some(lens) = exif_data.get_field(exif::Tag::LensModel, exif::In::PRIMARY) {
let val = lens.display_value().to_string();
if !val.is_empty() && val != "\"\"" {
meta.extra
.insert("lens_model".to_string(), val.trim_matches('"').to_string());
}
}
// Flash
if let Some(flash) = exif_data.get_field(exif::Tag::Flash, exif::In::PRIMARY) {
let val = flash.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("flash".to_string(), val);
}
}
// Orientation
if let Some(orientation) = exif_data.get_field(exif::Tag::Orientation, exif::In::PRIMARY) {
let val = orientation.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("orientation".to_string(), val);
}
}
// Software
if let Some(software) = exif_data.get_field(exif::Tag::Software, exif::In::PRIMARY) {
let val = software.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("software".to_string(), val);
}
}
// Image description as title
if let Some(desc) = exif_data.get_field(exif::Tag::ImageDescription, exif::In::PRIMARY) {
let val = desc.display_value().to_string();
if !val.is_empty() && val != "\"\"" {
meta.title = Some(val.trim_matches('"').to_string());
}
}
// Artist
if let Some(artist) = exif_data.get_field(exif::Tag::Artist, exif::In::PRIMARY) {
let val = artist.display_value().to_string();
if !val.is_empty() && val != "\"\"" {
meta.artist = Some(val.trim_matches('"').to_string());
}
}
// Copyright as description
if let Some(copyright) = exif_data.get_field(exif::Tag::Copyright, exif::In::PRIMARY) {
let val = copyright.display_value().to_string();
if !val.is_empty() && val != "\"\"" {
meta.description = Some(val.trim_matches('"').to_string());
}
}
Ok(meta)
}
fn supported_types(&self) -> &[MediaType] {
&[
MediaType::Jpeg,
MediaType::Png,
MediaType::Gif,
MediaType::Webp,
MediaType::Avif,
MediaType::Tiff,
MediaType::Bmp,
// RAW formats (TIFF-based, kamadak-exif handles these)
MediaType::Cr2,
MediaType::Nef,
MediaType::Arw,
MediaType::Dng,
MediaType::Orf,
MediaType::Rw2,
// HEIC
MediaType::Heic,
]
}
}
fn field_to_u32(field: &exif::Field) -> Option<u32> {
match &field.value {
exif::Value::Long(v) => v.first().copied(),
exif::Value::Short(v) => v.first().map(|&x| x as u32),
_ => None,
}
}
fn dms_to_decimal(dms_field: &exif::Field, ref_field: &exif::Field) -> Option<f64> {
if let exif::Value::Rational(ref rationals) = dms_field.value
&& rationals.len() >= 3
{
let degrees = rationals[0].to_f64();
let minutes = rationals[1].to_f64();
let seconds = rationals[2].to_f64();
let mut decimal = degrees + minutes / 60.0 + seconds / 3600.0;
let ref_str = ref_field.display_value().to_string();
if ref_str.contains('S') || ref_str.contains('W') {
decimal = -decimal;
}
return Some(decimal);
}
None
}

View file

@ -0,0 +1,40 @@
use std::path::Path;
use crate::error::Result;
use crate::media_type::MediaType;
use super::{ExtractedMetadata, MetadataExtractor};
pub struct MarkdownExtractor;
impl MetadataExtractor for MarkdownExtractor {
fn extract(&self, path: &Path) -> Result<ExtractedMetadata> {
let content = std::fs::read_to_string(path)?;
let parsed = gray_matter::Matter::<gray_matter::engine::YAML>::new().parse(&content);
let mut meta = ExtractedMetadata::default();
if let Some(data) = parsed.ok().and_then(|p| p.data)
&& let gray_matter::Pod::Hash(map) = data
{
if let Some(gray_matter::Pod::String(title)) = map.get("title") {
meta.title = Some(title.clone());
}
if let Some(gray_matter::Pod::String(author)) = map.get("author") {
meta.artist = Some(author.clone());
}
if let Some(gray_matter::Pod::String(desc)) = map.get("description") {
meta.description = Some(desc.clone());
}
if let Some(gray_matter::Pod::String(date)) = map.get("date") {
meta.extra.insert("date".to_string(), date.clone());
}
}
Ok(meta)
}
fn supported_types(&self) -> &[MediaType] {
&[MediaType::Markdown, MediaType::PlainText]
}
}

View file

@ -0,0 +1,46 @@
pub mod audio;
pub mod document;
pub mod image;
pub mod markdown;
pub mod video;
use std::collections::HashMap;
use std::path::Path;
use crate::error::Result;
use crate::media_type::MediaType;
#[derive(Debug, Clone, Default)]
pub struct ExtractedMetadata {
pub title: Option<String>,
pub artist: Option<String>,
pub album: Option<String>,
pub genre: Option<String>,
pub year: Option<i32>,
pub duration_secs: Option<f64>,
pub description: Option<String>,
pub extra: HashMap<String, String>,
}
pub trait MetadataExtractor: Send + Sync {
fn extract(&self, path: &Path) -> Result<ExtractedMetadata>;
fn supported_types(&self) -> &[MediaType];
}
pub fn extract_metadata(path: &Path, media_type: MediaType) -> Result<ExtractedMetadata> {
let extractors: Vec<Box<dyn MetadataExtractor>> = vec![
Box::new(audio::AudioExtractor),
Box::new(document::DocumentExtractor),
Box::new(video::VideoExtractor),
Box::new(markdown::MarkdownExtractor),
Box::new(image::ImageExtractor),
];
for extractor in &extractors {
if extractor.supported_types().contains(&media_type) {
return extractor.extract(path);
}
}
Ok(ExtractedMetadata::default())
}

View file

@ -0,0 +1,120 @@
use std::path::Path;
use crate::error::{PinakesError, Result};
use crate::media_type::MediaType;
use super::{ExtractedMetadata, MetadataExtractor};
pub struct VideoExtractor;
impl MetadataExtractor for VideoExtractor {
fn extract(&self, path: &Path) -> Result<ExtractedMetadata> {
match MediaType::from_path(path) {
Some(MediaType::Mkv) => extract_mkv(path),
Some(MediaType::Mp4) => extract_mp4(path),
_ => Ok(ExtractedMetadata::default()),
}
}
fn supported_types(&self) -> &[MediaType] {
&[
MediaType::Mp4,
MediaType::Mkv,
MediaType::Avi,
MediaType::Webm,
]
}
}
fn extract_mkv(path: &Path) -> Result<ExtractedMetadata> {
let file = std::fs::File::open(path)?;
let mkv = matroska::Matroska::open(file)
.map_err(|e| PinakesError::MetadataExtraction(format!("MKV parse: {e}")))?;
let mut meta = ExtractedMetadata {
title: mkv.info.title.clone(),
duration_secs: mkv.info.duration.map(|dur| dur.as_secs_f64()),
..Default::default()
};
// Extract resolution and codec info from tracks
for track in &mkv.tracks {
match &track.settings {
matroska::Settings::Video(v) => {
meta.extra.insert(
"resolution".to_string(),
format!("{}x{}", v.pixel_width, v.pixel_height),
);
if !track.codec_id.is_empty() {
meta.extra
.insert("video_codec".to_string(), track.codec_id.clone());
}
}
matroska::Settings::Audio(a) => {
meta.extra.insert(
"sample_rate".to_string(),
format!("{} Hz", a.sample_rate as u32),
);
meta.extra
.insert("channels".to_string(), a.channels.to_string());
if !track.codec_id.is_empty() {
meta.extra
.insert("audio_codec".to_string(), track.codec_id.clone());
}
}
_ => {}
}
}
Ok(meta)
}
fn extract_mp4(path: &Path) -> Result<ExtractedMetadata> {
use lofty::file::{AudioFile, TaggedFileExt};
use lofty::tag::Accessor;
let tagged_file = lofty::read_from_path(path)
.map_err(|e| PinakesError::MetadataExtraction(format!("MP4 metadata: {e}")))?;
let mut meta = ExtractedMetadata::default();
if let Some(tag) = tagged_file
.primary_tag()
.or_else(|| tagged_file.first_tag())
{
meta.title = tag
.title()
.map(|s: std::borrow::Cow<'_, str>| s.to_string());
meta.artist = tag
.artist()
.map(|s: std::borrow::Cow<'_, str>| s.to_string());
meta.album = tag
.album()
.map(|s: std::borrow::Cow<'_, str>| s.to_string());
meta.genre = tag
.genre()
.map(|s: std::borrow::Cow<'_, str>| s.to_string());
meta.year = tag.year().map(|y| y as i32);
}
let properties = tagged_file.properties();
let duration = properties.duration();
if !duration.is_zero() {
meta.duration_secs = Some(duration.as_secs_f64());
}
if let Some(bitrate) = properties.audio_bitrate() {
meta.extra
.insert("audio_bitrate".to_string(), format!("{bitrate} kbps"));
}
if let Some(sample_rate) = properties.sample_rate() {
meta.extra
.insert("sample_rate".to_string(), format!("{sample_rate} Hz"));
}
if let Some(channels) = properties.channels() {
meta.extra
.insert("channels".to_string(), channels.to_string());
}
Ok(meta)
}