hash: added FIFO for hashes, should reduce duplicates
This commit is contained in:
parent
d412c1a378
commit
5a3e3333b3
1 changed files with 21 additions and 2 deletions
23
discorss.py
23
discorss.py
|
|
@ -27,6 +27,7 @@ import re
|
|||
class Discorss:
|
||||
FEED_TIMEOUT_SECONDS = 15
|
||||
DRY_RUN = True
|
||||
HASH_HISTORY_LIMIT = 10
|
||||
|
||||
def __init__(self):
|
||||
self.config_dir = os.environ.get("XDG_CONFIG_HOME")
|
||||
|
|
@ -63,6 +64,20 @@ class Discorss:
|
|||
timeout=self.FEED_TIMEOUT_SECONDS,
|
||||
)
|
||||
|
||||
def _get_hash_history(self, hook):
|
||||
# now we store a list of hashes 10 long
|
||||
# this function checks if it's the old format and updates it if needed
|
||||
existing_hashes = hook.get("lasthash", [])
|
||||
if isinstance(existing_hashes, str):
|
||||
return [existing_hashes]
|
||||
if isinstance(existing_hashes, list):
|
||||
return [
|
||||
saved_hash
|
||||
for saved_hash in existing_hashes
|
||||
if isinstance(saved_hash, str)
|
||||
]
|
||||
return []
|
||||
|
||||
async def _process_feed(self, hook, last_check):
|
||||
self.logger.debug("Parsing feed %s...", hook["name"])
|
||||
feeds = await self._fetch_feed(hook)
|
||||
|
|
@ -102,7 +117,7 @@ class Discorss:
|
|||
self.logger.error("URL of %s isn't hashing correctly", hook["name"])
|
||||
return None
|
||||
|
||||
if hook.get("lasthash") == new_hash:
|
||||
if new_hash in self._get_hash_history(hook):
|
||||
return None
|
||||
|
||||
# Generate the webhook
|
||||
|
|
@ -200,7 +215,11 @@ class Discorss:
|
|||
self.logger.info(
|
||||
"Feed %s has no existing hash, likely a new feed!", hook["name"]
|
||||
)
|
||||
self.app_config["feeds"][i]["lasthash"] = result
|
||||
hash_history = self._get_hash_history(hook)
|
||||
hash_history.append(result)
|
||||
if len(hash_history) > self.HASH_HISTORY_LIMIT:
|
||||
hash_history = hash_history[-self.HASH_HISTORY_LIMIT :]
|
||||
self.app_config["feeds"][i]["lasthash"] = hash_history
|
||||
|
||||
# This function gets and formats the brief excerpt that goes in the embed
|
||||
# Different feeds put summaries in different fields, so we pick the best
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue