pinakes-core: fix isbn regex, csv quoting, document extraction, and enrichment accuracy

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: I974959e74d2b5b5591437daa0f29291a6a6a6964
This commit is contained in:
raf 2026-03-08 00:42:01 +03:00
commit d5be5026a7
Signed by: NotAShelf
GPG key ID: 29D95B64378DB4BF
5 changed files with 132 additions and 90 deletions

View file

@ -11,7 +11,8 @@ use crate::media_type::MediaType;
pub struct MediaId(pub Uuid);
impl MediaId {
/// Creates a new media ID using UUIDv7.
/// Creates a new media ID using `UUIDv7`.
#[must_use]
pub fn new() -> Self {
Self(Uuid::now_v7())
}
@ -25,7 +26,7 @@ impl fmt::Display for MediaId {
impl Default for MediaId {
fn default() -> Self {
Self::new()
Self(uuid::Uuid::nil())
}
}
@ -35,7 +36,8 @@ pub struct ContentHash(pub String);
impl ContentHash {
/// Creates a new content hash from a hex string.
pub fn new(hex: String) -> Self {
#[must_use]
pub const fn new(hex: String) -> Self {
Self(hex)
}
}
@ -75,7 +77,7 @@ impl std::str::FromStr for StorageMode {
match s.to_lowercase().as_str() {
"external" => Ok(Self::External),
"managed" => Ok(Self::Managed),
_ => Err(format!("unknown storage mode: {}", s)),
_ => Err(format!("unknown storage mode: {s}")),
}
}
}
@ -147,11 +149,11 @@ pub struct MediaItem {
#[serde(default)]
pub storage_mode: StorageMode,
/// Original filename for uploaded files (preserved separately from
/// file_name)
/// `file_name`)
pub original_filename: Option<String>,
/// When the file was uploaded to managed storage
pub uploaded_at: Option<DateTime<Utc>>,
/// Storage key for looking up the blob (usually same as content_hash)
/// Storage key for looking up the blob (usually same as `content_hash`)
pub storage_key: Option<String>,
pub created_at: DateTime<Utc>,
@ -182,7 +184,8 @@ pub enum CustomFieldType {
}
impl CustomFieldType {
pub fn as_str(&self) -> &'static str {
#[must_use]
pub const fn as_str(&self) -> &'static str {
match self {
Self::Text => "text",
Self::Number => "number",
@ -228,7 +231,8 @@ pub enum CollectionKind {
}
impl CollectionKind {
pub fn as_str(&self) -> &'static str {
#[must_use]
pub const fn as_str(&self) -> &'static str {
match self {
Self::Manual => "manual",
Self::Virtual => "virtual",
@ -380,7 +384,8 @@ pub struct Pagination {
impl Pagination {
/// Creates a new pagination instance.
pub fn new(offset: u64, limit: u64, sort: Option<String>) -> Self {
#[must_use]
pub const fn new(offset: u64, limit: u64, sort: Option<String>) -> Self {
Self {
offset,
limit,
@ -431,7 +436,7 @@ pub struct BookMetadata {
}
/// Information about a book author.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct AuthorInfo {
pub name: String,
pub role: String,
@ -441,6 +446,7 @@ pub struct AuthorInfo {
impl AuthorInfo {
/// Creates a new author with the given name.
#[must_use]
pub fn new(name: String) -> Self {
Self {
name,
@ -451,17 +457,20 @@ impl AuthorInfo {
}
/// Sets the author's role.
#[must_use]
pub fn with_role(mut self, role: String) -> Self {
self.role = role;
self
}
#[must_use]
pub fn with_file_as(mut self, file_as: String) -> Self {
self.file_as = Some(file_as);
self
}
pub fn with_position(mut self, position: i32) -> Self {
#[must_use]
pub const fn with_position(mut self, position: i32) -> Self {
self.position = position;
self
}
@ -496,21 +505,20 @@ pub struct ReadingProgress {
impl ReadingProgress {
/// Creates a new reading progress entry.
#[must_use]
pub fn new(
media_id: MediaId,
user_id: Uuid,
current_page: i32,
total_pages: Option<i32>,
) -> Self {
let progress_percent = if let Some(total) = total_pages {
let progress_percent = total_pages.map_or(0.0, |total| {
if total > 0 {
(current_page as f64 / total as f64 * 100.0).min(100.0)
(f64::from(current_page) / f64::from(total) * 100.0).min(100.0)
} else {
0.0
}
} else {
0.0
};
});
Self {
media_id,
@ -574,7 +582,7 @@ impl std::str::FromStr for LinkType {
"wikilink" => Ok(Self::Wikilink),
"markdown_link" => Ok(Self::MarkdownLink),
"embed" => Ok(Self::Embed),
_ => Err(format!("unknown link type: {}", s)),
_ => Err(format!("unknown link type: {s}")),
}
}
}
@ -586,7 +594,7 @@ pub struct MarkdownLink {
pub source_media_id: MediaId,
/// Raw link target as written in the source (wikilink name or path)
pub target_path: String,
/// Resolved target media_id (None if unresolved)
/// Resolved target `media_id` (None if unresolved)
pub target_media_id: Option<MediaId>,
pub link_type: LinkType,
/// Display text for the link