Trying to make description cutoff smarter. Changed hashing.
Hashing now takes the sha3_512 hash of both the title and the published time, because some feeds (like weather alerts) will have the same title all the time, for every entry. The description cutoff now goes backwards until it finds a space character, then it sets the cutoff there. Also, the length can now be passed as a parameter, with default value of 250. Might also add minimum length as a parameter too.
This commit is contained in:
parent
8ff64608cd
commit
8129da759f
1 changed files with 21 additions and 5 deletions
26
discorss.py
26
discorss.py
|
@ -45,18 +45,32 @@ app_config = {}
|
|||
# Different feeds put summaries in different fields, so we pick the best
|
||||
# one and limit it to 250 characters.
|
||||
# TODO: make the character limit smarter, as to split at a natural point
|
||||
def get_description(feed):
|
||||
def get_description(feed, length=250):
|
||||
try:
|
||||
temporary_string = str(feed["summary_detail"]["value"])
|
||||
temporary_string = html_filter.sub("", temporary_string)
|
||||
while length > 150:
|
||||
if temporary_string[length - 1 : length] == " ":
|
||||
break
|
||||
else:
|
||||
length -= 1
|
||||
desc = (
|
||||
temporary_string[:250] if len(temporary_string) > 250 else temporary_string
|
||||
temporary_string[:length]
|
||||
if len(temporary_string) > length
|
||||
else temporary_string
|
||||
)
|
||||
except KeyError:
|
||||
temporary_string = str(feed["description"])
|
||||
temporary_string = html_filter.sub("", temporary_string)
|
||||
while length > 150:
|
||||
if temporary_string[length - 1 : length] == " ":
|
||||
break
|
||||
else:
|
||||
length -= 1
|
||||
desc = (
|
||||
temporary_string[:250] if len(temporary_string) > 250 else temporary_string
|
||||
temporary_string[:length]
|
||||
if len(temporary_string) > length
|
||||
else temporary_string
|
||||
)
|
||||
return desc
|
||||
|
||||
|
@ -135,8 +149,10 @@ def main():
|
|||
"Feed %s doesn't supply a published time, using updated time instead",
|
||||
hook["name"],
|
||||
)
|
||||
# Hash the title of the latest post and use that to determine if it's been posted
|
||||
new_hash = hashlib.sha3_512(bytes(latest_post["title"], "utf-8")).hexdigest()
|
||||
# Hash the title and time of the latest post and use that to determine if it's been posted
|
||||
new_hash = hashlib.sha3_512(
|
||||
bytes(latest_post["title"] + str(published_time), "utf-8")
|
||||
).hexdigest()
|
||||
try:
|
||||
if hook["lasthash"] != new_hash:
|
||||
app_config["feeds"][i]["lasthash"] = new_hash
|
||||
|
|
Loading…
Add table
Reference in a new issue