hash: added FIFO for hashes, should reduce duplicates

This commit is contained in:
A.M. Rowsell 2026-04-24 10:26:26 -04:00
commit 5a3e3333b3
Signed by: amr
GPG key ID: E0879EDBDB0CA7B1

View file

@ -27,6 +27,7 @@ import re
class Discorss:
FEED_TIMEOUT_SECONDS = 15
DRY_RUN = True
HASH_HISTORY_LIMIT = 10
def __init__(self):
self.config_dir = os.environ.get("XDG_CONFIG_HOME")
@ -63,6 +64,20 @@ class Discorss:
timeout=self.FEED_TIMEOUT_SECONDS,
)
def _get_hash_history(self, hook):
# now we store a list of hashes 10 long
# this function checks if it's the old format and updates it if needed
existing_hashes = hook.get("lasthash", [])
if isinstance(existing_hashes, str):
return [existing_hashes]
if isinstance(existing_hashes, list):
return [
saved_hash
for saved_hash in existing_hashes
if isinstance(saved_hash, str)
]
return []
async def _process_feed(self, hook, last_check):
self.logger.debug("Parsing feed %s...", hook["name"])
feeds = await self._fetch_feed(hook)
@ -102,7 +117,7 @@ class Discorss:
self.logger.error("URL of %s isn't hashing correctly", hook["name"])
return None
if hook.get("lasthash") == new_hash:
if new_hash in self._get_hash_history(hook):
return None
# Generate the webhook
@ -200,7 +215,11 @@ class Discorss:
self.logger.info(
"Feed %s has no existing hash, likely a new feed!", hook["name"]
)
self.app_config["feeds"][i]["lasthash"] = result
hash_history = self._get_hash_history(hook)
hash_history.append(result)
if len(hash_history) > self.HASH_HISTORY_LIMIT:
hash_history = hash_history[-self.HASH_HISTORY_LIMIT :]
self.app_config["feeds"][i]["lasthash"] = hash_history
# This function gets and formats the brief excerpt that goes in the embed
# Different feeds put summaries in different fields, so we pick the best