Added HTML filter, removed duplicate titles in embed, better output

This commit is contained in:
A.M. Rowsell 2025-02-03 14:34:15 -05:00
parent 5faf789e82
commit 30756fc462
Signed by: amr
GPG key ID: 0B6E2D8375CF79A9

View file

@ -14,25 +14,30 @@ import requests
import feedparser
from pathlib import Path
import json
# import datetime
import time
import os
import re
config_file_path = r"/etc/discorss.conf"
# config_file_path = r"discorss.conf"
# log_file_path = r"/var/log/discorss"
log_file_path = r"./log"
log_file_path = r"/var/log/discorss"
# log_file_path = r"./log"
log_file_name = r"/app.log"
# Yes, I know you "can't parse HTML with regex", but
# just watch me.
html_filter = re.compile(r"\<\/?([A-Za-z \"\=])*\>")
def get_description(feed):
try:
temporary_string = str(feed.entries[0]["summary_detail"]["value"])
temporary_string = html_filter.sub("", temporary_string)
desc = (
temporary_string[:150] if len(temporary_string) > 150 else temporary_string
)
except KeyError:
temporary_string = str(feed.entries[0]["description"])
temporary_string = html_filter.sub("", temporary_string)
desc = (
temporary_string[:150] if len(temporary_string) > 150 else temporary_string
)
@ -59,7 +64,7 @@ def main():
feed = feedparser.parse(hook["url"])
published_time = time.mktime(feed.entries[0]["published_parsed"])
published_time = published_time + hook["offset"]
print(feed.entries[0]["published"], published_time, now)
print("Parsing feed {}...".format(hook["name"]))
# Generate the webhook
webhook = {
"embeds": [
@ -74,7 +79,7 @@ def main():
"author": {"name": str(hook["name"]), "url": str(hook["siteurl"])},
"fields": [
{
"name": str(feed.entries[0]["title"]),
"name": "Excerpt from post:",
"value": get_description(feed),
}
],
@ -92,8 +97,12 @@ def main():
r = requests.post(
hook["webhook"], data=webhook_string, headers=custom_header
)
if r.status_code != '200':
print("Error {} while trying to post {}".format(r.status_code, hook["webhook"]))
if r.status_code != "200":
print(
"Error {} while trying to post {}".format(
r.status_code, hook["webhook"]
)
)
app_config["lastupdate"] = now
with open(config_file_path, "w") as config_file:
json.dump(app_config, config_file, indent=4)