Added HTML filter, removed duplicate titles in embed, better output

This commit is contained in:
A.M. Rowsell 2025-02-03 14:34:15 -05:00
parent 5faf789e82
commit 30756fc462
Signed by: amr
GPG key ID: 0B6E2D8375CF79A9

View file

@ -14,25 +14,30 @@ import requests
import feedparser import feedparser
from pathlib import Path from pathlib import Path
import json import json
# import datetime
import time import time
import os import os
import re
config_file_path = r"/etc/discorss.conf" config_file_path = r"/etc/discorss.conf"
# config_file_path = r"discorss.conf" # config_file_path = r"discorss.conf"
# log_file_path = r"/var/log/discorss" log_file_path = r"/var/log/discorss"
log_file_path = r"./log" # log_file_path = r"./log"
log_file_name = r"/app.log" log_file_name = r"/app.log"
# Yes, I know you "can't parse HTML with regex", but
# just watch me.
html_filter = re.compile(r"\<\/?([A-Za-z \"\=])*\>")
def get_description(feed): def get_description(feed):
try: try:
temporary_string = str(feed.entries[0]["summary_detail"]["value"]) temporary_string = str(feed.entries[0]["summary_detail"]["value"])
temporary_string = html_filter.sub("", temporary_string)
desc = ( desc = (
temporary_string[:150] if len(temporary_string) > 150 else temporary_string temporary_string[:150] if len(temporary_string) > 150 else temporary_string
) )
except KeyError: except KeyError:
temporary_string = str(feed.entries[0]["description"]) temporary_string = str(feed.entries[0]["description"])
temporary_string = html_filter.sub("", temporary_string)
desc = ( desc = (
temporary_string[:150] if len(temporary_string) > 150 else temporary_string temporary_string[:150] if len(temporary_string) > 150 else temporary_string
) )
@ -59,7 +64,7 @@ def main():
feed = feedparser.parse(hook["url"]) feed = feedparser.parse(hook["url"])
published_time = time.mktime(feed.entries[0]["published_parsed"]) published_time = time.mktime(feed.entries[0]["published_parsed"])
published_time = published_time + hook["offset"] published_time = published_time + hook["offset"]
print(feed.entries[0]["published"], published_time, now) print("Parsing feed {}...".format(hook["name"]))
# Generate the webhook # Generate the webhook
webhook = { webhook = {
"embeds": [ "embeds": [
@ -74,7 +79,7 @@ def main():
"author": {"name": str(hook["name"]), "url": str(hook["siteurl"])}, "author": {"name": str(hook["name"]), "url": str(hook["siteurl"])},
"fields": [ "fields": [
{ {
"name": str(feed.entries[0]["title"]), "name": "Excerpt from post:",
"value": get_description(feed), "value": get_description(feed),
} }
], ],
@ -92,8 +97,12 @@ def main():
r = requests.post( r = requests.post(
hook["webhook"], data=webhook_string, headers=custom_header hook["webhook"], data=webhook_string, headers=custom_header
) )
if r.status_code != '200': if r.status_code != "200":
print("Error {} while trying to post {}".format(r.status_code, hook["webhook"])) print(
"Error {} while trying to post {}".format(
r.status_code, hook["webhook"]
)
)
app_config["lastupdate"] = now app_config["lastupdate"] = now
with open(config_file_path, "w") as config_file: with open(config_file_path, "w") as config_file:
json.dump(app_config, config_file, indent=4) json.dump(app_config, config_file, indent=4)