From 5a3e3333b369b2c3ad1e785d1e924f26de62cec3 Mon Sep 17 00:00:00 2001 From: "A.M. Rowsell" Date: Fri, 24 Apr 2026 10:26:26 -0400 Subject: [PATCH] hash: added FIFO for hashes, should reduce duplicates --- discorss.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/discorss.py b/discorss.py index 7e4f279..c880a98 100755 --- a/discorss.py +++ b/discorss.py @@ -27,6 +27,7 @@ import re class Discorss: FEED_TIMEOUT_SECONDS = 15 DRY_RUN = True + HASH_HISTORY_LIMIT = 10 def __init__(self): self.config_dir = os.environ.get("XDG_CONFIG_HOME") @@ -63,6 +64,20 @@ class Discorss: timeout=self.FEED_TIMEOUT_SECONDS, ) + def _get_hash_history(self, hook): + # now we store a list of hashes 10 long + # this function checks if it's the old format and updates it if needed + existing_hashes = hook.get("lasthash", []) + if isinstance(existing_hashes, str): + return [existing_hashes] + if isinstance(existing_hashes, list): + return [ + saved_hash + for saved_hash in existing_hashes + if isinstance(saved_hash, str) + ] + return [] + async def _process_feed(self, hook, last_check): self.logger.debug("Parsing feed %s...", hook["name"]) feeds = await self._fetch_feed(hook) @@ -102,7 +117,7 @@ class Discorss: self.logger.error("URL of %s isn't hashing correctly", hook["name"]) return None - if hook.get("lasthash") == new_hash: + if new_hash in self._get_hash_history(hook): return None # Generate the webhook @@ -200,7 +215,11 @@ class Discorss: self.logger.info( "Feed %s has no existing hash, likely a new feed!", hook["name"] ) - self.app_config["feeds"][i]["lasthash"] = result + hash_history = self._get_hash_history(hook) + hash_history.append(result) + if len(hash_history) > self.HASH_HISTORY_LIMIT: + hash_history = hash_history[-self.HASH_HISTORY_LIMIT :] + self.app_config["feeds"][i]["lasthash"] = hash_history # This function gets and formats the brief excerpt that goes in the embed # Different feeds put summaries in different fields, so we pick the best