From 8129da759f1c74dee55e3ea3cdd1a6c3b83de844 Mon Sep 17 00:00:00 2001 From: "A.M. Rowsell" Date: Tue, 4 Mar 2025 16:27:06 -0500 Subject: [PATCH] Trying to make description cutoff smarter. Changed hashing. Hashing now takes the sha3_512 hash of both the title and the published time, because some feeds (like weather alerts) will have the same title all the time, for every entry. The description cutoff now goes backwards until it finds a space character, then it sets the cutoff there. Also, the length can now be passed as a parameter, with default value of 250. Might also add minimum length as a parameter too. --- discorss.py | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/discorss.py b/discorss.py index e7f27d5..d33f387 100755 --- a/discorss.py +++ b/discorss.py @@ -45,18 +45,32 @@ app_config = {} # Different feeds put summaries in different fields, so we pick the best # one and limit it to 250 characters. # TODO: make the character limit smarter, as to split at a natural point -def get_description(feed): +def get_description(feed, length=250): try: temporary_string = str(feed["summary_detail"]["value"]) temporary_string = html_filter.sub("", temporary_string) + while length > 150: + if temporary_string[length - 1 : length] == " ": + break + else: + length -= 1 desc = ( - temporary_string[:250] if len(temporary_string) > 250 else temporary_string + temporary_string[:length] + if len(temporary_string) > length + else temporary_string ) except KeyError: temporary_string = str(feed["description"]) temporary_string = html_filter.sub("", temporary_string) + while length > 150: + if temporary_string[length - 1 : length] == " ": + break + else: + length -= 1 desc = ( - temporary_string[:250] if len(temporary_string) > 250 else temporary_string + temporary_string[:length] + if len(temporary_string) > length + else temporary_string ) return desc @@ -135,8 +149,10 @@ def main(): "Feed %s doesn't supply a published time, using updated time instead", hook["name"], ) - # Hash the title of the latest post and use that to determine if it's been posted - new_hash = hashlib.sha3_512(bytes(latest_post["title"], "utf-8")).hexdigest() + # Hash the title and time of the latest post and use that to determine if it's been posted + new_hash = hashlib.sha3_512( + bytes(latest_post["title"] + str(published_time), "utf-8") + ).hexdigest() try: if hook["lasthash"] != new_hash: app_config["feeds"][i]["lasthash"] = new_hash