Trying to make description cutoff smarter. Changed hashing.

Hashing now takes the sha3_512 hash of both the title and
the published time, because some feeds (like weather alerts)
will have the same title all the time, for every entry.

The description cutoff now goes backwards until it finds a
space character, then it sets the cutoff there. Also, the
length can now be passed as a parameter, with default value
of 250. Might also add minimum length as a parameter too.
This commit is contained in:
A.M. Rowsell 2025-03-04 16:27:06 -05:00
parent 8ff64608cd
commit 8129da759f
Signed by: amr
GPG key ID: 0B6E2D8375CF79A9

View file

@ -45,18 +45,32 @@ app_config = {}
# Different feeds put summaries in different fields, so we pick the best
# one and limit it to 250 characters.
# TODO: make the character limit smarter, as to split at a natural point
def get_description(feed):
def get_description(feed, length=250):
try:
temporary_string = str(feed["summary_detail"]["value"])
temporary_string = html_filter.sub("", temporary_string)
while length > 150:
if temporary_string[length - 1 : length] == " ":
break
else:
length -= 1
desc = (
temporary_string[:250] if len(temporary_string) > 250 else temporary_string
temporary_string[:length]
if len(temporary_string) > length
else temporary_string
)
except KeyError:
temporary_string = str(feed["description"])
temporary_string = html_filter.sub("", temporary_string)
while length > 150:
if temporary_string[length - 1 : length] == " ":
break
else:
length -= 1
desc = (
temporary_string[:250] if len(temporary_string) > 250 else temporary_string
temporary_string[:length]
if len(temporary_string) > length
else temporary_string
)
return desc
@ -135,8 +149,10 @@ def main():
"Feed %s doesn't supply a published time, using updated time instead",
hook["name"],
)
# Hash the title of the latest post and use that to determine if it's been posted
new_hash = hashlib.sha3_512(bytes(latest_post["title"], "utf-8")).hexdigest()
# Hash the title and time of the latest post and use that to determine if it's been posted
new_hash = hashlib.sha3_512(
bytes(latest_post["title"] + str(published_time), "utf-8")
).hexdigest()
try:
if hook["lasthash"] != new_hash:
app_config["feeds"][i]["lasthash"] = new_hash