pinakes/crates/pinakes-metadata/src/image.rs
NotAShelf e955f167b9
treewide: extract various components from pinakes-core into their own crates
Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: Ida2b25c66d62b40e75eeee924fe9c39c6a6a6964
2026-05-24 14:25:54 +03:00

300 lines
9.1 KiB
Rust

use std::path::Path;
use pinakes_types::{
error::Result,
media_type::{BuiltinMediaType, MediaType},
};
use super::{ExtractedMetadata, MetadataExtractor};
pub struct ImageExtractor;
impl MetadataExtractor for ImageExtractor {
fn extract(&self, path: &Path) -> Result<ExtractedMetadata> {
let mut meta = ExtractedMetadata::default();
let file = std::fs::File::open(path)?;
let mut buf_reader = std::io::BufReader::new(&file);
let Ok(exif_data) =
exif::Reader::new().read_from_container(&mut buf_reader)
else {
return Ok(meta);
};
// Image dimensions
if let Some(width) = exif_data
.get_field(exif::Tag::PixelXDimension, exif::In::PRIMARY)
.or_else(|| exif_data.get_field(exif::Tag::ImageWidth, exif::In::PRIMARY))
&& let Some(w) = field_to_u32(width)
{
meta.extra.insert("width".to_string(), w.to_string());
}
if let Some(height) = exif_data
.get_field(exif::Tag::PixelYDimension, exif::In::PRIMARY)
.or_else(|| {
exif_data.get_field(exif::Tag::ImageLength, exif::In::PRIMARY)
})
&& let Some(h) = field_to_u32(height)
{
meta.extra.insert("height".to_string(), h.to_string());
}
// Camera make and model - set both in top-level fields and extra
if let Some(make) = exif_data.get_field(exif::Tag::Make, exif::In::PRIMARY)
{
let val = make.display_value().to_string().trim().to_string();
if !val.is_empty() {
meta.camera_make = Some(val.clone());
meta.extra.insert("camera_make".to_string(), val);
}
}
if let Some(model) =
exif_data.get_field(exif::Tag::Model, exif::In::PRIMARY)
{
let val = model.display_value().to_string().trim().to_string();
if !val.is_empty() {
meta.camera_model = Some(val.clone());
meta.extra.insert("camera_model".to_string(), val);
}
}
// Date taken - parse EXIF date format (YYYY:MM:DD HH:MM:SS)
if let Some(date) = exif_data
.get_field(exif::Tag::DateTimeOriginal, exif::In::PRIMARY)
.or_else(|| exif_data.get_field(exif::Tag::DateTime, exif::In::PRIMARY))
{
let val = date.display_value().to_string();
if !val.is_empty() {
// Try parsing EXIF format: "YYYY:MM:DD HH:MM:SS"
if let Some(dt) = parse_exif_datetime(&val) {
meta.date_taken = Some(dt);
}
meta.extra.insert("date_taken".to_string(), val);
}
}
// GPS coordinates - set both in top-level fields and extra
if let (Some(lat), Some(lat_ref), Some(lon), Some(lon_ref)) = (
exif_data.get_field(exif::Tag::GPSLatitude, exif::In::PRIMARY),
exif_data.get_field(exif::Tag::GPSLatitudeRef, exif::In::PRIMARY),
exif_data.get_field(exif::Tag::GPSLongitude, exif::In::PRIMARY),
exif_data.get_field(exif::Tag::GPSLongitudeRef, exif::In::PRIMARY),
) && let (Some(lat_val), Some(lon_val)) =
(dms_to_decimal(lat, lat_ref), dms_to_decimal(lon, lon_ref))
{
meta.latitude = Some(lat_val);
meta.longitude = Some(lon_val);
meta
.extra
.insert("gps_latitude".to_string(), format!("{lat_val:.6}"));
meta
.extra
.insert("gps_longitude".to_string(), format!("{lon_val:.6}"));
}
// Exposure info
if let Some(iso) =
exif_data.get_field(exif::Tag::PhotographicSensitivity, exif::In::PRIMARY)
{
let val = iso.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("iso".to_string(), val);
}
}
if let Some(exposure) =
exif_data.get_field(exif::Tag::ExposureTime, exif::In::PRIMARY)
{
let val = exposure.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("exposure_time".to_string(), val);
}
}
if let Some(aperture) =
exif_data.get_field(exif::Tag::FNumber, exif::In::PRIMARY)
{
let val = aperture.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("f_number".to_string(), val);
}
}
if let Some(focal) =
exif_data.get_field(exif::Tag::FocalLength, exif::In::PRIMARY)
{
let val = focal.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("focal_length".to_string(), val);
}
}
// Lens model
if let Some(lens) =
exif_data.get_field(exif::Tag::LensModel, exif::In::PRIMARY)
{
let val = lens.display_value().to_string();
if !val.is_empty() && val != "\"\"" {
meta
.extra
.insert("lens_model".to_string(), val.trim_matches('"').to_string());
}
}
// Flash
if let Some(flash) =
exif_data.get_field(exif::Tag::Flash, exif::In::PRIMARY)
{
let val = flash.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("flash".to_string(), val);
}
}
// Orientation
if let Some(orientation) =
exif_data.get_field(exif::Tag::Orientation, exif::In::PRIMARY)
{
let val = orientation.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("orientation".to_string(), val);
}
}
// Software
if let Some(software) =
exif_data.get_field(exif::Tag::Software, exif::In::PRIMARY)
{
let val = software.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("software".to_string(), val);
}
}
// Image description as title
if let Some(desc) =
exif_data.get_field(exif::Tag::ImageDescription, exif::In::PRIMARY)
{
let val = desc.display_value().to_string();
if !val.is_empty() && val != "\"\"" {
meta.title = Some(val.trim_matches('"').to_string());
}
}
// Artist
if let Some(artist) =
exif_data.get_field(exif::Tag::Artist, exif::In::PRIMARY)
{
let val = artist.display_value().to_string();
if !val.is_empty() && val != "\"\"" {
meta.artist = Some(val.trim_matches('"').to_string());
}
}
// Copyright as description
if let Some(copyright) =
exif_data.get_field(exif::Tag::Copyright, exif::In::PRIMARY)
{
let val = copyright.display_value().to_string();
if !val.is_empty() && val != "\"\"" {
meta.description = Some(val.trim_matches('"').to_string());
}
}
Ok(meta)
}
fn supported_types(&self) -> Vec<MediaType> {
vec![
MediaType::Builtin(BuiltinMediaType::Jpeg),
MediaType::Builtin(BuiltinMediaType::Png),
MediaType::Builtin(BuiltinMediaType::Gif),
MediaType::Builtin(BuiltinMediaType::Webp),
MediaType::Builtin(BuiltinMediaType::Avif),
MediaType::Builtin(BuiltinMediaType::Tiff),
MediaType::Builtin(BuiltinMediaType::Bmp),
// RAW formats (TIFF-based, kamadak-exif handles these)
MediaType::Builtin(BuiltinMediaType::Cr2),
MediaType::Builtin(BuiltinMediaType::Nef),
MediaType::Builtin(BuiltinMediaType::Arw),
MediaType::Builtin(BuiltinMediaType::Dng),
MediaType::Builtin(BuiltinMediaType::Orf),
MediaType::Builtin(BuiltinMediaType::Rw2),
// HEIC
MediaType::Builtin(BuiltinMediaType::Heic),
]
}
}
fn field_to_u32(field: &exif::Field) -> Option<u32> {
match &field.value {
exif::Value::Long(v) => v.first().copied(),
exif::Value::Short(v) => v.first().map(|&x| u32::from(x)),
_ => None,
}
}
fn dms_to_decimal(
dms_field: &exif::Field,
ref_field: &exif::Field,
) -> Option<f64> {
if let exif::Value::Rational(ref rationals) = dms_field.value
&& rationals.len() >= 3
{
let degrees = rationals[0].to_f64();
let minutes = rationals[1].to_f64();
let seconds = rationals[2].to_f64();
let mut decimal = degrees + minutes / 60.0 + seconds / 3600.0;
let ref_str = ref_field.display_value().to_string();
if ref_str.contains('S') || ref_str.contains('W') {
decimal = -decimal;
}
return Some(decimal);
}
None
}
/// Parse EXIF datetime format: "YYYY:MM:DD HH:MM:SS"
fn parse_exif_datetime(s: &str) -> Option<chrono::DateTime<chrono::Utc>> {
use chrono::NaiveDateTime;
// EXIF format is "YYYY:MM:DD HH:MM:SS"
let s = s.trim().trim_matches('"');
// Try standard EXIF format
if let Ok(dt) = NaiveDateTime::parse_from_str(s, "%Y:%m:%d %H:%M:%S") {
return Some(dt.and_utc());
}
// Try ISO format as fallback
if let Ok(dt) = NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S") {
return Some(dt.and_utc());
}
None
}
/// Generate a perceptual hash for an image file.
///
/// Uses DCT (Discrete Cosine Transform) hash algorithm for robust similarity
/// detection. Returns a hex-encoded hash string, or None if the image cannot be
/// processed.
#[must_use]
pub fn generate_perceptual_hash(path: &Path) -> Option<String> {
use image_hasher::{HashAlg, HasherConfig};
// Open and decode the image
let img = image::open(path).ok()?;
// Create hasher with DCT algorithm (good for finding similar images)
let hasher = HasherConfig::new()
.hash_alg(HashAlg::DoubleGradient)
.hash_size(8, 8) // 64-bit hash
.to_hasher();
// Generate hash
let hash = hasher.hash_image(&img);
// Convert to hex string for storage
Some(hash.to_base64())
}