From 8ff64608cdf612c9f0fe420a2dadcdde6b949e71 Mon Sep 17 00:00:00 2001 From: "A.M. Rowsell" Date: Mon, 3 Mar 2025 07:41:41 -0500 Subject: [PATCH] Fixed html regex filter. Edited some logging types. See full msg. The HTML regex wasn't working because I was missing some really obvious capture groups. The regex filter is really only for the kernel.org Releases Feed, just to make it look a bit cleaner. We don't actually need the direct links because the post's title already links directly to the front page. We mostly just want to know there's been a release. Some logging parameters were changed to make the log less cluttered by default. Going to write a logrotate config that will be included in the README or something to help people with rotating the logs automatically. Also added a few small clarifying comments, as well as an IDEA! --- discorss.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/discorss.py b/discorss.py index 55b0b98..e7f27d5 100755 --- a/discorss.py +++ b/discorss.py @@ -32,10 +32,14 @@ log_dir = r"/var/log/discorss" log_file_path = r"/app.log" # Yes, I know you "can't parse HTML with regex", but # just watch me. -html_filter = re.compile(r"\<\/?([A-Za-z \:\.\/\"\=])*\>") +html_filter = re.compile(r"\<\/?([A-Za-z0-9 \:\.\-\/\"\=])*\>") success_codes = [200, 201, 202, 203, 204, 205, 206] app_config = {} +# IDEA: Consider making this into a class-based program +# This would solve a couple issues around global variables and generally +# make things a bit neater + # This function gets and formats the brief excerpt that goes in the embed # Different feeds put summaries in different fields, so we pick the best @@ -109,7 +113,7 @@ def main(): except KeyError: last_check = now - 21600 # first run, no lastupdate, check up to 6 hours ago for i, hook in enumerate(app_config["feeds"]): # Feed loop start - logger.info("Parsing feed %s...", hook["name"]) + logger.debug("Parsing feed %s...", hook["name"]) feeds = feedparser.parse(hook["url"]) latest_post = [] prev_best = 0 @@ -183,10 +187,14 @@ def main(): r = requests.post(hook["webhook"], data=webhook_string, headers=custom_header) if r.status_code not in success_codes: logger.error( - "Error %d while trying to post %s", r.status_code, hook["webhook"] + "Error %d while trying to post %s", r.status_code, hook["name"] ) + else: + logger.debug("Got %d when posting %s", r.status_code, hook["name"]) # End of feed loop + + # Dump updated config back to json file app_config["lastupdate"] = now with open(config_file_path, "w") as config_file: json.dump(app_config, config_file, indent=4)