From 810cbd6f3d32ca0c7f74ec90f17c0011661d4785 Mon Sep 17 00:00:00 2001 From: "A.M. Rowsell" Date: Sun, 20 Apr 2025 13:51:41 -0400 Subject: [PATCH 01/24] refactor: transformed into class-based app --- discorss.py | 392 +++++++++++++++++++++++++++------------------------- 1 file changed, 204 insertions(+), 188 deletions(-) diff --git a/discorss.py b/discorss.py index 76f243d..12e3054 100755 --- a/discorss.py +++ b/discorss.py @@ -22,204 +22,220 @@ import sys import argparse import re -config_dir = os.environ.get("XDG_CONFIG_HOME") -home_dir = Path.home() -if config_dir is None: - config_file_path = str(home_dir) + "/.config/discorss/discorss.conf" - config_dir = str(home_dir) + "/.config/discorss" -else: - config_file_path = config_dir + r"/discorss/discorss.conf" -log_dir = r"/var/log/discorss" -log_file_path = r"/app.log" -# Yes, I know you "can't parse HTML with regex", but -# just watch me. -html_filter = re.compile(r"\<\/?([A-Za-z0-9 \:\.\-\/\"\=])*\>") -success_codes = [200, 201, 202, 203, 204, 205, 206] -app_config = {} -# IDEA: Consider making this into a class-based program -# This would solve a couple issues around global variables and generally -# make things a bit neater +class Discorss: + def __init__(self): + self.config_dir = os.environ.get("XDG_CONFIG_HOME") + home_dir = Path.home() + if self.config_dir is None: + self.config_file_path = str(home_dir) + "/.config/discorss/discorss.conf" + self.config_dir = str(home_dir) + "/.config/discorss" + else: + self.config_file_path = self.config_dir + r"/discorss/discorss.conf" + self.log_dir = r"/var/log/discorss" + self.log_file_path = r"/app.log" + # Yes, I know you "can't parse HTML with regex", but + # just watch me. + self.html_filter = re.compile(r"\<\/?([A-Za-z0-9 \:\.\-\/\"\=])*\>") + self.success_codes = [200, 201, 202, 203, 204, 205, 206] + self.app_config = {} - -# This function gets and formats the brief excerpt that goes in the embed -# Different feeds put summaries in different fields, so we pick the best -# one and limit it to 250 characters. -def get_description(feed, length=250, min_length=150, addons=None): - try: - temporary_string = str(feed["summary_detail"]["value"]) - temporary_string = html_filter.sub("", temporary_string) - while length > min_length: - if temporary_string[length - 1 : length] == " ": - break - else: - length -= 1 - except KeyError: - temporary_string = str(feed["description"]) - temporary_string = html_filter.sub("", temporary_string) - while length > min_length: - if temporary_string[length - 1 : length] == " ": - break - else: - length -= 1 - - desc = temporary_string[:length] - if addons is not None: - desc = desc + str(addons) - return desc - - -def setupPaths(): - global app_config - global logger - # Check for log and config files/paths, create empty directories if needed - # TODO: make this cleaner - if not Path(log_dir).exists(): - print("No log file path exists. Yark! We'll try and make {}...".format(log_dir)) + # This function gets and formats the brief excerpt that goes in the embed + # Different feeds put summaries in different fields, so we pick the best + # one and limit it to 250 characters. + def get_description(self, feed, length=250, min_length=150, addons=None): try: - Path(log_dir).mkdir(parents=True, exist_ok=True) - except FileExistsError: - print("The path {} already exists and is not a directory!".format(log_dir)) - if not Path(config_file_path).exists(): - print( - "No config file at {}! Snarf. We'll try and make {}...".format( - config_file_path, config_dir - ) - ) - try: - Path(config_dir).mkdir(parents=True, exist_ok=True) - except FileExistsError: + temporary_string = str(feed["summary_detail"]["value"]) + temporary_string = self.html_filter.sub("", temporary_string) + while length > min_length: + if temporary_string[length - 1 : length] == " ": + break + else: + length -= 1 + except KeyError: + temporary_string = str(feed["description"]) + temporary_string = self.html_filter.sub("", temporary_string) + while length > min_length: + if temporary_string[length - 1 : length] == " ": + break + else: + length -= 1 + + desc = temporary_string[:length] + if addons is not None: + desc = desc + str(addons) + return desc + + def setupPaths(self): + # Check for log and config files/paths, create empty directories if needed + # TODO: make this cleaner + if not Path(self.log_dir).exists(): print( - "The config dir {} already exists and is not a directory! Please fix manually. Quitting!".format( - config_dir + "No log file path exists. Yark! We'll try and make {}...".format( + self.log_dir ) ) - sys.exit(255) + try: + Path(self.log_dir).mkdir(parents=True, exist_ok=True) + except FileExistsError: + print( + "The path {} already exists and is not a directory!".format( + self.log_dir + ) + ) + if not Path(self.config_file_path).exists(): + print( + "No config file at {}! Snarf. We'll try and make {}...".format( + self.config_file_path, self.config_dir + ) + ) + try: + Path(self.config_dir).mkdir(parents=True, exist_ok=True) + except FileExistsError: + print( + "The config dir {} already exists and is not a directory! Please fix manually. Quitting!".format( + self.config_dir + ) + ) + sys.exit(255) + return + # Loading the config file + with open(self.config_file_path, "r") as config_file: + self.self.app_config = json.load(config_file) + # Set up logging + self.logger = logging.getself.logger(__name__) + logging.basicConfig( + filename=str(self.log_dir + self.log_file_path), + encoding="utf-8", + level=logging.DEBUG, + datefmt="%m/%d/%Y %H:%M:%S", + format="%(asctime)s: %(levelname)s: %(message)s", + ) return - # Loading the config file - with open(config_file_path, "r") as config_file: - app_config = json.load(config_file) - # Set up logging - logger = logging.getLogger(__name__) - logging.basicConfig( - filename=str(log_dir + log_file_path), - encoding="utf-8", - level=logging.DEBUG, - datefmt="%m/%d/%Y %H:%M:%S", - format="%(asctime)s: %(levelname)s: %(message)s", - ) - return + + def process(self): + os.environ["TZ"] = "America/Toronto" + time.tzset() + now = time.mktime(time.localtime()) + self.setupPaths() # Handle the config and log paths + try: + last_check = self.app_config["lastupdate"] + except KeyError: + last_check = ( + now - 21600 + ) # first run, no lastupdate, check up to 6 hours ago + for i, hook in enumerate(self.app_config["feeds"]): # Feed loop start + self.logger.debug("Parsing feed %s...", hook["name"]) + self.feeds = feedparser.parse(hook["url"]) + self.latest_post = [] + prev_best = 0 + self.logger.debug( + "About to sort through entries for feed %s ...", hook["name"] + ) + for feed in self.feeds["entries"]: + try: + bad_time = False + published_time = time.mktime(feed["published_parsed"]) + published_time = published_time + hook["offset"] + except KeyError: + published_time = time.mktime(feed["updated_parsed"]) + bad_time = True + if published_time > prev_best: + latest_post = feed + prev_best = published_time + else: + continue + if bad_time is True: + self.logger.debug( + "Feed %s doesn't supply a published time, using updated time instead", + hook["name"], + ) + # Hash the title and time of the latest post and use that to determine if it's been posted + # Yes, SHA3-512 is totally unnecessary for this purpose, but I love SHA3 + self.logger.debug("About to hash %s ...", latest_post["title"]) + try: + new_hash = hashlib.sha3_512( + bytes(latest_post["title"] + str(published_time), "utf-8") + ).hexdigest() + except TypeError: + self.logger.error("Title of %s isn't hashing correctly", hook["name"]) + continue + try: + if hook["lasthash"] != new_hash: + self.app_config["feeds"][i]["lasthash"] = new_hash + else: + continue + except KeyError: + self.app_config["feeds"][i]["lasthash"] = new_hash + self.logger.info( + "Feed %s has no existing hash, likely a new feed!", hook["name"] + ) + # Generate the webhook + self.logger.info( + "Publishing webhook for %s. Last check was %d, now is %d", + hook["name"], + last_check, + now, + ) + webhook = { + "embeds": [ + { + "title": str(latest_post["title"]), + "url": str(latest_post["link"]), + "color": 2123412, + "footer": { + "text": "DiscoRSS", + "icon_url": "https://frzn.dev/~amr/images/discorss.png", + }, + "author": { + "name": str(hook["name"]), + "url": str(hook["siteurl"]), + }, + "fields": [ + { + "name": "Excerpt from post:", + "value": self.get_description(latest_post), + } + ], + # "timestamp": str(now), + } + ], + "attachments": [], + } + custom_header = { + "user-agent": "DiscoRSS (https://git.frzn.dev/amr/discorss, 0.2rc3)", + "content-type": "application/json", + } + webhook_string = json.dumps(webhook) + + self.logger.debug("About to run POST for %s", hook["name"]) + r = requests.post( + hook["webhook"], data=webhook_string, headers=custom_header + ) + if r.status_code not in self.success_codes: + self.logger.error( + "Error %d while trying to post %s", r.status_code, hook["name"] + ) + else: + self.logger.debug("Got %d when posting %s", r.status_code, hook["name"]) + + # End of feed loop + + # Dump updated config back to json file + self.logger.debug("Dumping config back to %s", str(self.config_file_path)) + self.app_config["lastupdate"] = now + with open(self.config_file_path, "w") as config_file: + json.dump(self.app_config, config_file, indent=4) + + return + + +# end of Discorss class def main(): - os.environ["TZ"] = "America/Toronto" - time.tzset() - now = time.mktime(time.localtime()) - setupPaths() # Handle the config and log paths - try: - last_check = app_config["lastupdate"] - except KeyError: - last_check = now - 21600 # first run, no lastupdate, check up to 6 hours ago - for i, hook in enumerate(app_config["feeds"]): # Feed loop start - logger.debug("Parsing feed %s...", hook["name"]) - feeds = feedparser.parse(hook["url"]) - latest_post = [] - prev_best = 0 - logger.debug("About to sort through entries for feed %s ...", hook["name"]) - for feed in feeds["entries"]: - try: - bad_time = False - published_time = time.mktime(feed["published_parsed"]) - published_time = published_time + hook["offset"] - except KeyError: - published_time = time.mktime(feed["updated_parsed"]) - bad_time = True - if published_time > prev_best: - latest_post = feed - prev_best = published_time - else: - continue - if bad_time is True: - logger.debug( - "Feed %s doesn't supply a published time, using updated time instead", - hook["name"], - ) - # Hash the title and time of the latest post and use that to determine if it's been posted - # Yes, SHA3-512 is totally unnecessary for this purpose, but I love SHA3 - logger.debug("About to hash %s ...", latest_post["title"]) - try: - new_hash = hashlib.sha3_512( - bytes(latest_post["title"] + str(published_time), "utf-8") - ).hexdigest() - except TypeError: - logger.error("Title of %s isn't hashing correctly", hook["name"]) - continue - try: - if hook["lasthash"] != new_hash: - app_config["feeds"][i]["lasthash"] = new_hash - else: - continue - except KeyError: - app_config["feeds"][i]["lasthash"] = new_hash - logger.info( - "Feed %s has no existing hash, likely a new feed!", hook["name"] - ) - # Generate the webhook - logger.info( - "Publishing webhook for %s. Last check was %d, now is %d", - hook["name"], - last_check, - now, - ) - webhook = { - "embeds": [ - { - "title": str(latest_post["title"]), - "url": str(latest_post["link"]), - "color": 2123412, - "footer": { - "text": "DiscoRSS", - "icon_url": "https://frzn.dev/~amr/images/discorss.png", - }, - "author": { - "name": str(hook["name"]), - "url": str(hook["siteurl"]), - }, - "fields": [ - { - "name": "Excerpt from post:", - "value": get_description(latest_post), - } - ], - # "timestamp": str(now), - } - ], - "attachments": [], - } - custom_header = { - "user-agent": "DiscoRSS (https://git.frzn.dev/amr/discorss, 0.2rc3)", - "content-type": "application/json", - } - webhook_string = json.dumps(webhook) - - logger.debug("About to run POST for %s", hook["name"]) - r = requests.post(hook["webhook"], data=webhook_string, headers=custom_header) - if r.status_code not in success_codes: - logger.error( - "Error %d while trying to post %s", r.status_code, hook["name"] - ) - else: - logger.debug("Got %d when posting %s", r.status_code, hook["name"]) - - # End of feed loop - - # Dump updated config back to json file - logger.debug("Dumping config back to %s", str(config_file_path)) - app_config["lastupdate"] = now - with open(config_file_path, "w") as config_file: - json.dump(app_config, config_file, indent=4) - - return + app = Discorss() + app.process() if __name__ == "__main__": From 597f3837244cce6f501cf16f96e2cac3a81c8ab2 Mon Sep 17 00:00:00 2001 From: "A.M. Rowsell" Date: Sun, 20 Apr 2025 16:04:00 -0400 Subject: [PATCH 02/24] fix: typo in log setup, error when replacing w/self --- discorss.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/discorss.py b/discorss.py index 12e3054..d90c7fb 100755 --- a/discorss.py +++ b/discorss.py @@ -101,9 +101,9 @@ class Discorss: return # Loading the config file with open(self.config_file_path, "r") as config_file: - self.self.app_config = json.load(config_file) + self.app_config = json.load(config_file) # Set up logging - self.logger = logging.getself.logger(__name__) + self.logger = logging.getLogger(__name__) logging.basicConfig( filename=str(self.log_dir + self.log_file_path), encoding="utf-8", From 087a6339c8adf4df3c23d298fe1cbddcbdf23eef Mon Sep 17 00:00:00 2001 From: "A.M. Rowsell" Date: Sun, 20 Apr 2025 16:06:16 -0400 Subject: [PATCH 03/24] class refactor: Squashed commit of the following: commit 597f3837244cce6f501cf16f96e2cac3a81c8ab2 Author: A.M. Rowsell Date: Sun Apr 20 16:04:00 2025 -0400 fix: typo in log setup, error when replacing w/self commit 810cbd6f3d32ca0c7f74ec90f17c0011661d4785 Author: A.M. Rowsell Date: Sun Apr 20 13:51:41 2025 -0400 refactor: transformed into class-based app --- discorss.py | 392 +++++++++++++++++++++++++++------------------------- 1 file changed, 204 insertions(+), 188 deletions(-) diff --git a/discorss.py b/discorss.py index 76f243d..d90c7fb 100755 --- a/discorss.py +++ b/discorss.py @@ -22,204 +22,220 @@ import sys import argparse import re -config_dir = os.environ.get("XDG_CONFIG_HOME") -home_dir = Path.home() -if config_dir is None: - config_file_path = str(home_dir) + "/.config/discorss/discorss.conf" - config_dir = str(home_dir) + "/.config/discorss" -else: - config_file_path = config_dir + r"/discorss/discorss.conf" -log_dir = r"/var/log/discorss" -log_file_path = r"/app.log" -# Yes, I know you "can't parse HTML with regex", but -# just watch me. -html_filter = re.compile(r"\<\/?([A-Za-z0-9 \:\.\-\/\"\=])*\>") -success_codes = [200, 201, 202, 203, 204, 205, 206] -app_config = {} -# IDEA: Consider making this into a class-based program -# This would solve a couple issues around global variables and generally -# make things a bit neater +class Discorss: + def __init__(self): + self.config_dir = os.environ.get("XDG_CONFIG_HOME") + home_dir = Path.home() + if self.config_dir is None: + self.config_file_path = str(home_dir) + "/.config/discorss/discorss.conf" + self.config_dir = str(home_dir) + "/.config/discorss" + else: + self.config_file_path = self.config_dir + r"/discorss/discorss.conf" + self.log_dir = r"/var/log/discorss" + self.log_file_path = r"/app.log" + # Yes, I know you "can't parse HTML with regex", but + # just watch me. + self.html_filter = re.compile(r"\<\/?([A-Za-z0-9 \:\.\-\/\"\=])*\>") + self.success_codes = [200, 201, 202, 203, 204, 205, 206] + self.app_config = {} - -# This function gets and formats the brief excerpt that goes in the embed -# Different feeds put summaries in different fields, so we pick the best -# one and limit it to 250 characters. -def get_description(feed, length=250, min_length=150, addons=None): - try: - temporary_string = str(feed["summary_detail"]["value"]) - temporary_string = html_filter.sub("", temporary_string) - while length > min_length: - if temporary_string[length - 1 : length] == " ": - break - else: - length -= 1 - except KeyError: - temporary_string = str(feed["description"]) - temporary_string = html_filter.sub("", temporary_string) - while length > min_length: - if temporary_string[length - 1 : length] == " ": - break - else: - length -= 1 - - desc = temporary_string[:length] - if addons is not None: - desc = desc + str(addons) - return desc - - -def setupPaths(): - global app_config - global logger - # Check for log and config files/paths, create empty directories if needed - # TODO: make this cleaner - if not Path(log_dir).exists(): - print("No log file path exists. Yark! We'll try and make {}...".format(log_dir)) + # This function gets and formats the brief excerpt that goes in the embed + # Different feeds put summaries in different fields, so we pick the best + # one and limit it to 250 characters. + def get_description(self, feed, length=250, min_length=150, addons=None): try: - Path(log_dir).mkdir(parents=True, exist_ok=True) - except FileExistsError: - print("The path {} already exists and is not a directory!".format(log_dir)) - if not Path(config_file_path).exists(): - print( - "No config file at {}! Snarf. We'll try and make {}...".format( - config_file_path, config_dir - ) - ) - try: - Path(config_dir).mkdir(parents=True, exist_ok=True) - except FileExistsError: + temporary_string = str(feed["summary_detail"]["value"]) + temporary_string = self.html_filter.sub("", temporary_string) + while length > min_length: + if temporary_string[length - 1 : length] == " ": + break + else: + length -= 1 + except KeyError: + temporary_string = str(feed["description"]) + temporary_string = self.html_filter.sub("", temporary_string) + while length > min_length: + if temporary_string[length - 1 : length] == " ": + break + else: + length -= 1 + + desc = temporary_string[:length] + if addons is not None: + desc = desc + str(addons) + return desc + + def setupPaths(self): + # Check for log and config files/paths, create empty directories if needed + # TODO: make this cleaner + if not Path(self.log_dir).exists(): print( - "The config dir {} already exists and is not a directory! Please fix manually. Quitting!".format( - config_dir + "No log file path exists. Yark! We'll try and make {}...".format( + self.log_dir ) ) - sys.exit(255) + try: + Path(self.log_dir).mkdir(parents=True, exist_ok=True) + except FileExistsError: + print( + "The path {} already exists and is not a directory!".format( + self.log_dir + ) + ) + if not Path(self.config_file_path).exists(): + print( + "No config file at {}! Snarf. We'll try and make {}...".format( + self.config_file_path, self.config_dir + ) + ) + try: + Path(self.config_dir).mkdir(parents=True, exist_ok=True) + except FileExistsError: + print( + "The config dir {} already exists and is not a directory! Please fix manually. Quitting!".format( + self.config_dir + ) + ) + sys.exit(255) + return + # Loading the config file + with open(self.config_file_path, "r") as config_file: + self.app_config = json.load(config_file) + # Set up logging + self.logger = logging.getLogger(__name__) + logging.basicConfig( + filename=str(self.log_dir + self.log_file_path), + encoding="utf-8", + level=logging.DEBUG, + datefmt="%m/%d/%Y %H:%M:%S", + format="%(asctime)s: %(levelname)s: %(message)s", + ) return - # Loading the config file - with open(config_file_path, "r") as config_file: - app_config = json.load(config_file) - # Set up logging - logger = logging.getLogger(__name__) - logging.basicConfig( - filename=str(log_dir + log_file_path), - encoding="utf-8", - level=logging.DEBUG, - datefmt="%m/%d/%Y %H:%M:%S", - format="%(asctime)s: %(levelname)s: %(message)s", - ) - return + + def process(self): + os.environ["TZ"] = "America/Toronto" + time.tzset() + now = time.mktime(time.localtime()) + self.setupPaths() # Handle the config and log paths + try: + last_check = self.app_config["lastupdate"] + except KeyError: + last_check = ( + now - 21600 + ) # first run, no lastupdate, check up to 6 hours ago + for i, hook in enumerate(self.app_config["feeds"]): # Feed loop start + self.logger.debug("Parsing feed %s...", hook["name"]) + self.feeds = feedparser.parse(hook["url"]) + self.latest_post = [] + prev_best = 0 + self.logger.debug( + "About to sort through entries for feed %s ...", hook["name"] + ) + for feed in self.feeds["entries"]: + try: + bad_time = False + published_time = time.mktime(feed["published_parsed"]) + published_time = published_time + hook["offset"] + except KeyError: + published_time = time.mktime(feed["updated_parsed"]) + bad_time = True + if published_time > prev_best: + latest_post = feed + prev_best = published_time + else: + continue + if bad_time is True: + self.logger.debug( + "Feed %s doesn't supply a published time, using updated time instead", + hook["name"], + ) + # Hash the title and time of the latest post and use that to determine if it's been posted + # Yes, SHA3-512 is totally unnecessary for this purpose, but I love SHA3 + self.logger.debug("About to hash %s ...", latest_post["title"]) + try: + new_hash = hashlib.sha3_512( + bytes(latest_post["title"] + str(published_time), "utf-8") + ).hexdigest() + except TypeError: + self.logger.error("Title of %s isn't hashing correctly", hook["name"]) + continue + try: + if hook["lasthash"] != new_hash: + self.app_config["feeds"][i]["lasthash"] = new_hash + else: + continue + except KeyError: + self.app_config["feeds"][i]["lasthash"] = new_hash + self.logger.info( + "Feed %s has no existing hash, likely a new feed!", hook["name"] + ) + # Generate the webhook + self.logger.info( + "Publishing webhook for %s. Last check was %d, now is %d", + hook["name"], + last_check, + now, + ) + webhook = { + "embeds": [ + { + "title": str(latest_post["title"]), + "url": str(latest_post["link"]), + "color": 2123412, + "footer": { + "text": "DiscoRSS", + "icon_url": "https://frzn.dev/~amr/images/discorss.png", + }, + "author": { + "name": str(hook["name"]), + "url": str(hook["siteurl"]), + }, + "fields": [ + { + "name": "Excerpt from post:", + "value": self.get_description(latest_post), + } + ], + # "timestamp": str(now), + } + ], + "attachments": [], + } + custom_header = { + "user-agent": "DiscoRSS (https://git.frzn.dev/amr/discorss, 0.2rc3)", + "content-type": "application/json", + } + webhook_string = json.dumps(webhook) + + self.logger.debug("About to run POST for %s", hook["name"]) + r = requests.post( + hook["webhook"], data=webhook_string, headers=custom_header + ) + if r.status_code not in self.success_codes: + self.logger.error( + "Error %d while trying to post %s", r.status_code, hook["name"] + ) + else: + self.logger.debug("Got %d when posting %s", r.status_code, hook["name"]) + + # End of feed loop + + # Dump updated config back to json file + self.logger.debug("Dumping config back to %s", str(self.config_file_path)) + self.app_config["lastupdate"] = now + with open(self.config_file_path, "w") as config_file: + json.dump(self.app_config, config_file, indent=4) + + return + + +# end of Discorss class def main(): - os.environ["TZ"] = "America/Toronto" - time.tzset() - now = time.mktime(time.localtime()) - setupPaths() # Handle the config and log paths - try: - last_check = app_config["lastupdate"] - except KeyError: - last_check = now - 21600 # first run, no lastupdate, check up to 6 hours ago - for i, hook in enumerate(app_config["feeds"]): # Feed loop start - logger.debug("Parsing feed %s...", hook["name"]) - feeds = feedparser.parse(hook["url"]) - latest_post = [] - prev_best = 0 - logger.debug("About to sort through entries for feed %s ...", hook["name"]) - for feed in feeds["entries"]: - try: - bad_time = False - published_time = time.mktime(feed["published_parsed"]) - published_time = published_time + hook["offset"] - except KeyError: - published_time = time.mktime(feed["updated_parsed"]) - bad_time = True - if published_time > prev_best: - latest_post = feed - prev_best = published_time - else: - continue - if bad_time is True: - logger.debug( - "Feed %s doesn't supply a published time, using updated time instead", - hook["name"], - ) - # Hash the title and time of the latest post and use that to determine if it's been posted - # Yes, SHA3-512 is totally unnecessary for this purpose, but I love SHA3 - logger.debug("About to hash %s ...", latest_post["title"]) - try: - new_hash = hashlib.sha3_512( - bytes(latest_post["title"] + str(published_time), "utf-8") - ).hexdigest() - except TypeError: - logger.error("Title of %s isn't hashing correctly", hook["name"]) - continue - try: - if hook["lasthash"] != new_hash: - app_config["feeds"][i]["lasthash"] = new_hash - else: - continue - except KeyError: - app_config["feeds"][i]["lasthash"] = new_hash - logger.info( - "Feed %s has no existing hash, likely a new feed!", hook["name"] - ) - # Generate the webhook - logger.info( - "Publishing webhook for %s. Last check was %d, now is %d", - hook["name"], - last_check, - now, - ) - webhook = { - "embeds": [ - { - "title": str(latest_post["title"]), - "url": str(latest_post["link"]), - "color": 2123412, - "footer": { - "text": "DiscoRSS", - "icon_url": "https://frzn.dev/~amr/images/discorss.png", - }, - "author": { - "name": str(hook["name"]), - "url": str(hook["siteurl"]), - }, - "fields": [ - { - "name": "Excerpt from post:", - "value": get_description(latest_post), - } - ], - # "timestamp": str(now), - } - ], - "attachments": [], - } - custom_header = { - "user-agent": "DiscoRSS (https://git.frzn.dev/amr/discorss, 0.2rc3)", - "content-type": "application/json", - } - webhook_string = json.dumps(webhook) - - logger.debug("About to run POST for %s", hook["name"]) - r = requests.post(hook["webhook"], data=webhook_string, headers=custom_header) - if r.status_code not in success_codes: - logger.error( - "Error %d while trying to post %s", r.status_code, hook["name"] - ) - else: - logger.debug("Got %d when posting %s", r.status_code, hook["name"]) - - # End of feed loop - - # Dump updated config back to json file - logger.debug("Dumping config back to %s", str(config_file_path)) - app_config["lastupdate"] = now - with open(config_file_path, "w") as config_file: - json.dump(app_config, config_file, indent=4) - - return + app = Discorss() + app.process() if __name__ == "__main__": From 2b4e4216f4c285fdb3cf467e6238339f78846e10 Mon Sep 17 00:00:00 2001 From: "A.M. Rowsell" Date: Sun, 20 Apr 2025 16:10:08 -0400 Subject: [PATCH 04/24] release: bump version number to 0.2 --- discorss.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/discorss.py b/discorss.py index d90c7fb..99b4398 100755 --- a/discorss.py +++ b/discorss.py @@ -203,7 +203,7 @@ class Discorss: "attachments": [], } custom_header = { - "user-agent": "DiscoRSS (https://git.frzn.dev/amr/discorss, 0.2rc3)", + "user-agent": "DiscoRSS (https://git.frzn.dev/amr/discorss, 0.2)", "content-type": "application/json", } webhook_string = json.dumps(webhook) From 2e18ede6a84c18a6b895d6314fba16e7c5af8b26 Mon Sep 17 00:00:00 2001 From: "A.M. Rowsell" Date: Mon, 21 Apr 2025 19:29:56 -0400 Subject: [PATCH 05/24] docs/fix: Updated README.md and install.sh --- README.md | 20 +++++++++++++++----- install.sh | 1 + 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 06b28fe..48148d4 100644 --- a/README.md +++ b/README.md @@ -15,13 +15,15 @@ The remaining imports should all be part of the standard Python install. ## Important Notes -As it currently is written, the script uses the hash of the post title to prevent sending duplicates. However, a recent change to check for the publish time was added, only because some feeds are not in reverse chronological order (latest post at top of feed, ie, entry index 0). Because of this, we do actually need to check the publish times. This still needs some testing and things might be a bit broken because of it. If you see any issues please let me know. - -Logging was recently enabled. Make sure that the user running the script (especially when using systemd timers) has write access to the /var/log/discorss directory. The app will try and create the directory for you, but if your user doesn't have permissions to create directories in /var/log this will fail and this will probably crash the script as is. I will try and remember to catch that exception and exit gracefully with an error message to stdout. If you want the logs to go somewhere else, just edit the log_dir variable near the top of discorss.py. Choose a directory that makes sense. Unfortunately, as far as I know, the XDG standards don't have an equivalent to the /var/log directory in the user directory, so I wasn't sure what the best default was. In the future, we may switch to logging using systemd and journald directly, though it is nice to have a separate file. +The logger will try and put the logs in `/var/log/discorss`. Make sure to create this directory and give the user running the script write permissions there. If you want the logs to go somewhere else, just edit the log_dir variable near the top of discorss.py. Choose a directory that makes sense. Unfortunately, as far as I know, the XDG standards don't have an equivalent to the /var/log directory in the user directory, so I wasn't sure what the best default was. In the future, we may switch to logging using systemd and journald directly, though it is nice to have a separate file. ## How to setup -To configure the script, create ~/.config/discorss/discorss.conf with the following structure: +Note: see the Automation section below for info about using the `install.sh` script to help get all the files in the right places. + +### Config file format + +To configure the script, create `~/.config/discorss/discorss.conf` using JSON formatting like this: ```json { @@ -50,6 +52,13 @@ The offset should only be required if feeds aren't showing up. This is because f ## Automation +**New**: There is now `install.sh` in the repo which will automatically help you set up both the config file and the systemd unit files for the service and timer, using essentially the exact text below. It will copy them to the user systemd unit folder, `~/.config/systemd/user` and optionally enable the timer. It's a good idea to edit the configuration file at `~/.config/discorss/discorss.conf` and paste in your webhook URLs and add any other feeds you want before starting the timer, unless you can do it really quickly before the next 5 minute spot on the clock :) +Of course, if it fires with an invalid config, the script will just crash, and you'll probably just have to manually start the timer once the config is fixed, so not a big deal. + +_Remember to create `/var/log/discorss` and change it to be writeable by the user running the service!_ + +### Manual method + To automate feed posting, create a systemd service and timer to execute the script. Use the command `systemctl --user edit --full --force discorss.service` and then paste in something like this: @@ -61,13 +70,14 @@ Wants=discorss.timer [Service] Type=oneshot +TimeoutStartSec=120 ExecStart=/path/to/discorss.py [Install] WantedBy=default.target ``` -Make sure to edit the ExecStart to point to the correct location. Then we need a systemd timer to automatically fire the script. Run `systemctl --user edit --full --force discorss.timer` and then paste in this: +The TimeoutStartSec will catch any issues with the script locking up due to, e.g., DNS failures or RSS feeds being slow/unavailable. 2 minutes should be more than enough time unless you are running hundreds of feeds. Also make sure to edit the ExecStart to point to the correct location. Then we need a systemd timer to automatically fire the script. Run `systemctl --user edit --full --force discorss.timer` and then paste in this: ```systemd [Unit] Description=Timer for DiscoRSS diff --git a/install.sh b/install.sh index 59b3ea1..f69c1a2 100755 --- a/install.sh +++ b/install.sh @@ -16,6 +16,7 @@ Wants=discorss.timer [Service] Type=oneshot +TimeoutStartSec=120 ExecStart=/home/amr/workspace/python/discorss/discorss.py [Install] From e4539b5733c8bdc499398b4388b544587c989912 Mon Sep 17 00:00:00 2001 From: "A.M. Rowsell" Date: Tue, 22 Apr 2025 02:11:01 -0400 Subject: [PATCH 06/24] chore: rename function, move some init code Also switching logging back to ERROR from DEBUG. The solution to the lockups for now is to just use systemd timer timeouts. --- discorss.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/discorss.py b/discorss.py index 99b4398..9d001ce 100755 --- a/discorss.py +++ b/discorss.py @@ -66,7 +66,10 @@ class Discorss: desc = desc + str(addons) return desc - def setupPaths(self): + def setup(self): + os.environ["TZ"] = "America/Toronto" + time.tzset() + self.now = time.mktime(time.localtime()) # Check for log and config files/paths, create empty directories if needed # TODO: make this cleaner if not Path(self.log_dir).exists(): @@ -107,22 +110,19 @@ class Discorss: logging.basicConfig( filename=str(self.log_dir + self.log_file_path), encoding="utf-8", - level=logging.DEBUG, + level=logging.ERROR, datefmt="%m/%d/%Y %H:%M:%S", format="%(asctime)s: %(levelname)s: %(message)s", ) return def process(self): - os.environ["TZ"] = "America/Toronto" - time.tzset() - now = time.mktime(time.localtime()) - self.setupPaths() # Handle the config and log paths + self.setup() # Handle the config and log paths try: last_check = self.app_config["lastupdate"] except KeyError: last_check = ( - now - 21600 + self.now - 21600 ) # first run, no lastupdate, check up to 6 hours ago for i, hook in enumerate(self.app_config["feeds"]): # Feed loop start self.logger.debug("Parsing feed %s...", hook["name"]) @@ -172,10 +172,10 @@ class Discorss: ) # Generate the webhook self.logger.info( - "Publishing webhook for %s. Last check was %d, now is %d", + "Publishing webhook for %s. Last check was %d, self.now is %d", hook["name"], last_check, - now, + self.now, ) webhook = { "embeds": [ @@ -197,7 +197,7 @@ class Discorss: "value": self.get_description(latest_post), } ], - # "timestamp": str(now), + # "timestamp": str(self.now), } ], "attachments": [], @@ -223,7 +223,7 @@ class Discorss: # Dump updated config back to json file self.logger.debug("Dumping config back to %s", str(self.config_file_path)) - self.app_config["lastupdate"] = now + self.app_config["lastupdate"] = self.now with open(self.config_file_path, "w") as config_file: json.dump(self.app_config, config_file, indent=4) From b0f08c405bc78d77028bc5f3c9a5240f513adeba Mon Sep 17 00:00:00 2001 From: "A.M. Rowsell" Date: Tue, 22 Apr 2025 03:17:16 -0400 Subject: [PATCH 07/24] fix: install.sh works properly now (with colour!) --- install.sh | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/install.sh b/install.sh index f69c1a2..3c32200 100755 --- a/install.sh +++ b/install.sh @@ -9,6 +9,8 @@ # use systemctl --user edit --full discorss.service or discorss.timer # after installing them. +printf "\e[1;34mDisco\e[1;38;5;208mRSS\e[0m Install Helper Script\n\n" + cat << EOF > discorss.service [Unit] Description=Discord RSS feeder @@ -39,14 +41,16 @@ WantedBy=timers.target EOF +mkdir -p ~/.config/systemd/user/ + cp discorss.service ~/.config/systemd/user/ cp discorss.timer ~/.config/systemd/user/ systemctl --user daemon-reload -printf "Would you like a basic example config created for you? [y/n]" +printf "Would you like a basic example config created for you? [y/n]: " read answer1 -if [ "$answer1" =~ ^[yYnN]$ ]; then +if [[ "$answer1" =~ ^([yY])$ ]]; then mkdir -p -v ~/.config/discorss cat << EOF > ~/.config/discorss/discorss.conf { @@ -61,20 +65,20 @@ if [ "$answer1" =~ ^[yYnN]$ ]; then ], } EOF - printf "Make sure to edit ~/.config/discorss/discorss.conf and add in your custom feeds and webhook URLS! The script will just error out if you don't do this." + printf "\nMake sure to edit \e[1;34m~/.config/discorss/discorss.conf\e[0m and add in your custom feeds and webhook URLS! The script will just error out if you don't do this." else - printf "Make sure to create a config at ~/.config/discorss/discorss.conf and follow the pattern shown in the README." + printf "\nMake sure to create a config at \e[1;34m~/.config/discorss/discorss.conf\e[0m and follow the pattern shown in the README." fi -printf "Would you like to have the timer enabled and started now? [y/n]" +printf "\nWould you like to have the timer enabled and started now? [y/n]: " read answer -if [ "$answer" =~ ^[yYnN]$ ]; then +if [[ "$answer" =~ ^([yY])$ ]]; then systemctl --user enable --now discorss.timer - printf "discorss.timer enabled and started. Don't enable or start discorss.service -- the timer does this automatically." + printf "\ndiscorss.timer enabled and started. \e[1;31mDon't enable or start discorss.service\e[0m -- the timer does this automatically." else - printf "Don't forget to run systemctl --user enable --now discorss.timer when you are ready! Don't enable or start discorss.service -- the timer does this automatically." + printf "\nDon't forget to run \e[1;32msystemctl --user enable --now discorss.timer\e[0m when you are ready! \e[1;31mDon't enable or start discorss.service\e[0m -- the timer does this automatically." fi -printf "You should be almost ready to go! Double-check your config files, and check systemctl --user list-timers once the discorss.timer is enabled to see when it will fire next. The default is every 5 minutes." +printf "\n\nYou should be almost ready to go! Double-check your config files, and check \e[1;32msystemctl --user list-timers\e[0m once the discorss.timer is enabled to see when it will fire next. The default is every 5 minutes." -printf "Remember, if you need help or encounter any bugs, contact me via the issues tracker on the git repository where you got this from!" +printf "\nRemember, if you need help or encounter any bugs, contact me via the issues tracker on the git repository where you got this from!" From 9d2530ab02803d27be53dc55111f78fbaed0f192 Mon Sep 17 00:00:00 2001 From: "A.M. Rowsell" Date: Tue, 22 Apr 2025 04:26:49 -0400 Subject: [PATCH 08/24] fix: corrected errors in install.sh Also improved the script to actually use the script location in the discorss.service file... yeah I should have done that from the start, d'oh! --- install.sh | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/install.sh b/install.sh index 3c32200..d3ed185 100755 --- a/install.sh +++ b/install.sh @@ -11,6 +11,8 @@ printf "\e[1;34mDisco\e[1;38;5;208mRSS\e[0m Install Helper Script\n\n" +workingDir=$(pwd) + cat << EOF > discorss.service [Unit] Description=Discord RSS feeder @@ -19,7 +21,7 @@ Wants=discorss.timer [Service] Type=oneshot TimeoutStartSec=120 -ExecStart=/home/amr/workspace/python/discorss/discorss.py +ExecStart=$workingDir/discorss.py [Install] WantedBy=default.target @@ -45,7 +47,8 @@ mkdir -p ~/.config/systemd/user/ cp discorss.service ~/.config/systemd/user/ cp discorss.timer ~/.config/systemd/user/ - +rm -f discorss.service +rm -f discorss.timer systemctl --user daemon-reload printf "Would you like a basic example config created for you? [y/n]: " @@ -60,9 +63,9 @@ if [[ "$answer1" =~ ^([yY])$ ]]; then "siteurl": "https://www.phoronix.com/", "url": "http://www.phoronix.com/rss.php", "webhook": "PASTE WEBHOOK URL HERE", - "offset": 0, + "offset": 0 } - ], + ] } EOF printf "\nMake sure to edit \e[1;34m~/.config/discorss/discorss.conf\e[0m and add in your custom feeds and webhook URLS! The script will just error out if you don't do this." From b243bc7bb4991f85eab236fd7e9c6486bba39051 Mon Sep 17 00:00:00 2001 From: "A.M. Rowsell" Date: Tue, 22 Apr 2025 17:29:45 -0400 Subject: [PATCH 09/24] dev: Added more prompts to install script --- install.sh | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/install.sh b/install.sh index d3ed185..7a89398 100755 --- a/install.sh +++ b/install.sh @@ -13,7 +13,12 @@ printf "\e[1;34mDisco\e[1;38;5;208mRSS\e[0m Install Helper Script\n\n" workingDir=$(pwd) -cat << EOF > discorss.service +printf "Would you like the systemd service and timer files created for you? [y/n]: " +read answer +if [[ "$answer" =~ ^([yY])$ ]]; then + + cat << EOF > discorss.service +# Autogenerated by install.sh [Unit] Description=Discord RSS feeder Wants=discorss.timer @@ -28,7 +33,8 @@ WantedBy=default.target EOF -cat << EOF > discorss.timer + cat << EOF > discorss.timer +# Autogenerated by install.sh [Unit] Description=Timer for DiscoRSS Requires=discorss.service @@ -43,13 +49,18 @@ WantedBy=timers.target EOF -mkdir -p ~/.config/systemd/user/ - -cp discorss.service ~/.config/systemd/user/ -cp discorss.timer ~/.config/systemd/user/ -rm -f discorss.service -rm -f discorss.timer -systemctl --user daemon-reload + printf "Making ~/.config/systemd/user in case it doesn't exist ...\n" + mkdir -p -v ~/.config/systemd/user/ + printf "Copying service and timer files there ... \n" + cp discorss.service ~/.config/systemd/user/ + cp discorss.timer ~/.config/systemd/user/ + rm -f discorss.service + rm -f discorss.timer + printf "Reloading systemd daemon ... \n\n" + systemctl --user daemon-reload +else + printf "This script is intended to be automatically run. It's designed with systemd in mind, but you are free to use any automation tools. You can look at this script for examples of how to structure systemd user services and timers.\nOf course, you could always run it by hand, if you really want to :)\n\n" +fi printf "Would you like a basic example config created for you? [y/n]: " read answer1 @@ -84,4 +95,4 @@ fi printf "\n\nYou should be almost ready to go! Double-check your config files, and check \e[1;32msystemctl --user list-timers\e[0m once the discorss.timer is enabled to see when it will fire next. The default is every 5 minutes." -printf "\nRemember, if you need help or encounter any bugs, contact me via the issues tracker on the git repository where you got this from!" +printf "\nRemember, if you need help or encounter any bugs, contact me via the issues tracker on the git repository where you got this from!\n" From 1787d4da993beb0cfe5872360833637d1d224652 Mon Sep 17 00:00:00 2001 From: "A.M. Rowsell" Date: Fri, 25 Apr 2025 23:59:24 -0400 Subject: [PATCH 10/24] fix: bail install if on a non-systemd machine Instead suggest using cron. At some point I can probably write up some simple instructions for a basic cron setup. --- install.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/install.sh b/install.sh index 7a89398..82a5fab 100755 --- a/install.sh +++ b/install.sh @@ -13,6 +13,14 @@ printf "\e[1;34mDisco\e[1;38;5;208mRSS\e[0m Install Helper Script\n\n" workingDir=$(pwd) +# bail if we're on a non-systemd system, suggest cron +if [[ -d /run/systemd/system ]]; then + printf "systemd detected..." +else + printf "This script and DiscoRSS in general are optimized for systemd! You can use cron as a substitute but I haven't written any documentation for it, so you're on your own for now!" + exit 127 # command not found exit code +fi + printf "Would you like the systemd service and timer files created for you? [y/n]: " read answer if [[ "$answer" =~ ^([yY])$ ]]; then From 1abda8d6e4c76e842e7e5cb01470ff339610a8c2 Mon Sep 17 00:00:00 2001 From: "A.M. Rowsell" Date: Fri, 20 Jun 2025 14:14:47 -0400 Subject: [PATCH 11/24] Small comment updates --- discorss.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/discorss.py b/discorss.py index 9d001ce..f6e5962 100755 --- a/discorss.py +++ b/discorss.py @@ -66,12 +66,13 @@ class Discorss: desc = desc + str(addons) return desc + # Some of this could go in __init__ def setup(self): os.environ["TZ"] = "America/Toronto" time.tzset() self.now = time.mktime(time.localtime()) # Check for log and config files/paths, create empty directories if needed - # TODO: make this cleaner + # TODO: change output to log file, as warning/error if not Path(self.log_dir).exists(): print( "No log file path exists. Yark! We'll try and make {}...".format( From c385b3266cfaf8512b3904bdb5f56ba1c5d7a62e Mon Sep 17 00:00:00 2001 From: "A.M. Rowsell" Date: Sun, 12 Apr 2026 14:37:39 -0400 Subject: [PATCH 12/24] async: complete rewrite as async code --- discorss.py | 253 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 157 insertions(+), 96 deletions(-) diff --git a/discorss.py b/discorss.py index f6e5962..dfe04b4 100755 --- a/discorss.py +++ b/discorss.py @@ -14,6 +14,7 @@ import requests import feedparser import hashlib import logging +import asyncio from pathlib import Path import json import time @@ -24,6 +25,8 @@ import re class Discorss: + FEED_TIMEOUT_SECONDS = 15 + def __init__(self): self.config_dir = os.environ.get("XDG_CONFIG_HOME") home_dir = Path.home() @@ -40,6 +43,159 @@ class Discorss: self.success_codes = [200, 201, 202, 203, 204, 205, 206] self.app_config = {} + async def _fetch_feed(self, hook): + response = await asyncio.to_thread( + requests.get, + hook["url"], + headers={"user-agent": "DiscoRSS (https://git.frzn.dev/amr/discorss, 0.2)"}, + timeout=self.FEED_TIMEOUT_SECONDS, + ) + response.raise_for_status() + return await asyncio.to_thread(feedparser.parse, response.content) + + async def _post_webhook(self, hook, webhook_string, custom_header): + return await asyncio.to_thread( + requests.post, + hook["webhook"], + data=webhook_string, + headers=custom_header, + timeout=self.FEED_TIMEOUT_SECONDS, + ) + + async def _process_feed(self, hook, last_check): + self.logger.debug("Parsing feed %s...", hook["name"]) + feeds = await self._fetch_feed(hook) + latest_post = None + latest_post_time = None + prev_best = 0 + bad_time = False + self.logger.debug("About to sort through entries for feed %s ...", hook["name"]) + for feed in feeds["entries"]: + try: + published_time = time.mktime(feed["published_parsed"]) + published_time = published_time + hook["offset"] + except KeyError: + published_time = time.mktime(feed["updated_parsed"]) + bad_time = True + if published_time > prev_best: + latest_post = feed + latest_post_time = published_time + prev_best = published_time + + if latest_post is None: + self.logger.warning("Feed %s had no entries to process", hook["name"]) + return None + + if bad_time is True: + self.logger.debug( + "Feed %s doesn't supply a published time, using updated time instead", + hook["name"], + ) + # Hash the title and time of the latest post and use that to determine if it's been posted + # Yes, SHA3-512 is totally unnecessary for this purpose, but I love SHA3 + self.logger.debug("About to hash %s ...", latest_post["title"]) + try: + new_hash = hashlib.sha3_512( + bytes(latest_post["title"] + str(latest_post_time), "utf-8") + ).hexdigest() + except TypeError: + self.logger.error("Title of %s isn't hashing correctly", hook["name"]) + return None + + if hook.get("lasthash") == new_hash: + return None + + # Generate the webhook + self.logger.info( + "Publishing webhook for %s. Last check was %d, self.now is %d", + hook["name"], + last_check, + self.now, + ) + webhook = { + "embeds": [ + { + "title": str(latest_post["title"]), + "url": str(latest_post["link"]), + "color": 2123412, + "footer": { + "text": "DiscoRSS", + "icon_url": "https://frzn.dev/~amr/images/discorss.png", + }, + "author": { + "name": str(hook["name"]), + "url": str(hook["siteurl"]), + }, + "fields": [ + { + "name": "Excerpt from post:", + "value": self.get_description(latest_post), + } + ], + # "timestamp": str(self.now), + } + ], + "attachments": [], + } + custom_header = { + "user-agent": "DiscoRSS (https://git.frzn.dev/amr/discorss, 0.2)", + "content-type": "application/json", + } + webhook_string = json.dumps(webhook) + + self.logger.debug("About to run POST for %s", hook["name"]) + response = await self._post_webhook(hook, webhook_string, custom_header) + if response.status_code not in self.success_codes: + self.logger.error( + "Error %d while trying to post %s", response.status_code, hook["name"] + ) + return None + + self.logger.debug("Got %d when posting %s", response.status_code, hook["name"]) + return new_hash + + async def _process_feeds(self, last_check): + tasks = [ + asyncio.create_task( + asyncio.wait_for( + self._process_feed(hook, last_check), + timeout=self.FEED_TIMEOUT_SECONDS, + ) + ) + for hook in self.app_config["feeds"] + ] + results = await asyncio.gather(*tasks, return_exceptions=True) + for i, result in enumerate(results): + hook = self.app_config["feeds"][i] + if isinstance(result, asyncio.TimeoutError): + self.logger.error( + "Timed out processing feed %s after %d seconds", + hook["name"], + self.FEED_TIMEOUT_SECONDS, + ) + continue + if isinstance(result, requests.RequestException): + self.logger.error( + "Network error while processing feed %s: %s", + hook["name"], + result, + ) + continue + if isinstance(result, Exception): + self.logger.error( + "Unhandled error while processing feed %s: %s", + hook["name"], + result, + ) + continue + if result is None: + continue + if "lasthash" not in hook: + self.logger.info( + "Feed %s has no existing hash, likely a new feed!", hook["name"] + ) + self.app_config["feeds"][i]["lasthash"] = result + # This function gets and formats the brief excerpt that goes in the embed # Different feeds put summaries in different fields, so we pick the best # one and limit it to 250 characters. @@ -125,102 +281,7 @@ class Discorss: last_check = ( self.now - 21600 ) # first run, no lastupdate, check up to 6 hours ago - for i, hook in enumerate(self.app_config["feeds"]): # Feed loop start - self.logger.debug("Parsing feed %s...", hook["name"]) - self.feeds = feedparser.parse(hook["url"]) - self.latest_post = [] - prev_best = 0 - self.logger.debug( - "About to sort through entries for feed %s ...", hook["name"] - ) - for feed in self.feeds["entries"]: - try: - bad_time = False - published_time = time.mktime(feed["published_parsed"]) - published_time = published_time + hook["offset"] - except KeyError: - published_time = time.mktime(feed["updated_parsed"]) - bad_time = True - if published_time > prev_best: - latest_post = feed - prev_best = published_time - else: - continue - if bad_time is True: - self.logger.debug( - "Feed %s doesn't supply a published time, using updated time instead", - hook["name"], - ) - # Hash the title and time of the latest post and use that to determine if it's been posted - # Yes, SHA3-512 is totally unnecessary for this purpose, but I love SHA3 - self.logger.debug("About to hash %s ...", latest_post["title"]) - try: - new_hash = hashlib.sha3_512( - bytes(latest_post["title"] + str(published_time), "utf-8") - ).hexdigest() - except TypeError: - self.logger.error("Title of %s isn't hashing correctly", hook["name"]) - continue - try: - if hook["lasthash"] != new_hash: - self.app_config["feeds"][i]["lasthash"] = new_hash - else: - continue - except KeyError: - self.app_config["feeds"][i]["lasthash"] = new_hash - self.logger.info( - "Feed %s has no existing hash, likely a new feed!", hook["name"] - ) - # Generate the webhook - self.logger.info( - "Publishing webhook for %s. Last check was %d, self.now is %d", - hook["name"], - last_check, - self.now, - ) - webhook = { - "embeds": [ - { - "title": str(latest_post["title"]), - "url": str(latest_post["link"]), - "color": 2123412, - "footer": { - "text": "DiscoRSS", - "icon_url": "https://frzn.dev/~amr/images/discorss.png", - }, - "author": { - "name": str(hook["name"]), - "url": str(hook["siteurl"]), - }, - "fields": [ - { - "name": "Excerpt from post:", - "value": self.get_description(latest_post), - } - ], - # "timestamp": str(self.now), - } - ], - "attachments": [], - } - custom_header = { - "user-agent": "DiscoRSS (https://git.frzn.dev/amr/discorss, 0.2)", - "content-type": "application/json", - } - webhook_string = json.dumps(webhook) - - self.logger.debug("About to run POST for %s", hook["name"]) - r = requests.post( - hook["webhook"], data=webhook_string, headers=custom_header - ) - if r.status_code not in self.success_codes: - self.logger.error( - "Error %d while trying to post %s", r.status_code, hook["name"] - ) - else: - self.logger.debug("Got %d when posting %s", r.status_code, hook["name"]) - - # End of feed loop + asyncio.run(self._process_feeds(last_check)) # Dump updated config back to json file self.logger.debug("Dumping config back to %s", str(self.config_file_path)) From 02aa1aa11b369387b70ff3e46cd9cfae1b608a91 Mon Sep 17 00:00:00 2001 From: "A.M. Rowsell" Date: Mon, 13 Apr 2026 15:26:35 -0400 Subject: [PATCH 13/24] hash: remove time from hash, now title only --- .gitignore | 177 +++++++++++++++++++++++++++++++++++++++++++++++++++- discorss.py | 4 +- 2 files changed, 178 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index fed12ea..ffcb59c 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,179 @@ log/ *.bak bin/ lib/ -*.cfg \ No newline at end of file +*.cfg# Created by https://www.toptal.com/developers/gitignore/api/python +# Edit at https://www.toptal.com/developers/gitignore?templates=python + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +### Python Patch ### +# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration +poetry.toml + +# ruff +.ruff_cache/ + +# LSP config files +pyrightconfig.json + +# End of https://www.toptal.com/developers/gitignore/api/python diff --git a/discorss.py b/discorss.py index dfe04b4..c4dc604 100755 --- a/discorss.py +++ b/discorss.py @@ -91,12 +91,12 @@ class Discorss: "Feed %s doesn't supply a published time, using updated time instead", hook["name"], ) - # Hash the title and time of the latest post and use that to determine if it's been posted + # Hash the title of the latest post and use that to determine if it's been posted # Yes, SHA3-512 is totally unnecessary for this purpose, but I love SHA3 self.logger.debug("About to hash %s ...", latest_post["title"]) try: new_hash = hashlib.sha3_512( - bytes(latest_post["title"] + str(latest_post_time), "utf-8") + bytes(latest_post["title"], "utf-8") # Removed time from hash ).hexdigest() except TypeError: self.logger.error("Title of %s isn't hashing correctly", hook["name"]) From d412c1a378d81b49bff406b179fdd073d0fff74a Mon Sep 17 00:00:00 2001 From: "A.M. Rowsell" Date: Fri, 24 Apr 2026 10:23:18 -0400 Subject: [PATCH 14/24] hash: updated hash to use URL. also added DRY_RUN option --- discorss.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/discorss.py b/discorss.py index c4dc604..7e4f279 100755 --- a/discorss.py +++ b/discorss.py @@ -26,6 +26,7 @@ import re class Discorss: FEED_TIMEOUT_SECONDS = 15 + DRY_RUN = True def __init__(self): self.config_dir = os.environ.get("XDG_CONFIG_HOME") @@ -66,7 +67,6 @@ class Discorss: self.logger.debug("Parsing feed %s...", hook["name"]) feeds = await self._fetch_feed(hook) latest_post = None - latest_post_time = None prev_best = 0 bad_time = False self.logger.debug("About to sort through entries for feed %s ...", hook["name"]) @@ -79,7 +79,6 @@ class Discorss: bad_time = True if published_time > prev_best: latest_post = feed - latest_post_time = published_time prev_best = published_time if latest_post is None: @@ -91,15 +90,16 @@ class Discorss: "Feed %s doesn't supply a published time, using updated time instead", hook["name"], ) - # Hash the title of the latest post and use that to determine if it's been posted + self.logger.debug("Feed url is %s", latest_post["url"]) + # Hash the url of the latest post and use that to determine if it's been posted # Yes, SHA3-512 is totally unnecessary for this purpose, but I love SHA3 - self.logger.debug("About to hash %s ...", latest_post["title"]) + self.logger.debug("About to hash %s ...", latest_post["url"]) try: new_hash = hashlib.sha3_512( - bytes(latest_post["title"], "utf-8") # Removed time from hash + bytes(latest_post["url"], "utf-8") # Removed time from hash ).hexdigest() except TypeError: - self.logger.error("Title of %s isn't hashing correctly", hook["name"]) + self.logger.error("URL of %s isn't hashing correctly", hook["name"]) return None if hook.get("lasthash") == new_hash: @@ -144,7 +144,13 @@ class Discorss: webhook_string = json.dumps(webhook) self.logger.debug("About to run POST for %s", hook["name"]) - response = await self._post_webhook(hook, webhook_string, custom_header) + if not self.DRY_RUN: + response = await self._post_webhook(hook, webhook_string, custom_header) + else: + self.logger.debug( + "Dry run, not actually posting to webhook, faking return code 200" + ) + response.status_code = 200 if response.status_code not in self.success_codes: self.logger.error( "Error %d while trying to post %s", response.status_code, hook["name"] @@ -267,9 +273,9 @@ class Discorss: logging.basicConfig( filename=str(self.log_dir + self.log_file_path), encoding="utf-8", - level=logging.ERROR, + level=logging.DEBUG, datefmt="%m/%d/%Y %H:%M:%S", - format="%(asctime)s: %(levelname)s: %(message)s", + format="%(asctime)s -> %(levelname)s: %(message)s", ) return From 5a3e3333b369b2c3ad1e785d1e924f26de62cec3 Mon Sep 17 00:00:00 2001 From: "A.M. Rowsell" Date: Fri, 24 Apr 2026 10:26:26 -0400 Subject: [PATCH 15/24] hash: added FIFO for hashes, should reduce duplicates --- discorss.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/discorss.py b/discorss.py index 7e4f279..c880a98 100755 --- a/discorss.py +++ b/discorss.py @@ -27,6 +27,7 @@ import re class Discorss: FEED_TIMEOUT_SECONDS = 15 DRY_RUN = True + HASH_HISTORY_LIMIT = 10 def __init__(self): self.config_dir = os.environ.get("XDG_CONFIG_HOME") @@ -63,6 +64,20 @@ class Discorss: timeout=self.FEED_TIMEOUT_SECONDS, ) + def _get_hash_history(self, hook): + # now we store a list of hashes 10 long + # this function checks if it's the old format and updates it if needed + existing_hashes = hook.get("lasthash", []) + if isinstance(existing_hashes, str): + return [existing_hashes] + if isinstance(existing_hashes, list): + return [ + saved_hash + for saved_hash in existing_hashes + if isinstance(saved_hash, str) + ] + return [] + async def _process_feed(self, hook, last_check): self.logger.debug("Parsing feed %s...", hook["name"]) feeds = await self._fetch_feed(hook) @@ -102,7 +117,7 @@ class Discorss: self.logger.error("URL of %s isn't hashing correctly", hook["name"]) return None - if hook.get("lasthash") == new_hash: + if new_hash in self._get_hash_history(hook): return None # Generate the webhook @@ -200,7 +215,11 @@ class Discorss: self.logger.info( "Feed %s has no existing hash, likely a new feed!", hook["name"] ) - self.app_config["feeds"][i]["lasthash"] = result + hash_history = self._get_hash_history(hook) + hash_history.append(result) + if len(hash_history) > self.HASH_HISTORY_LIMIT: + hash_history = hash_history[-self.HASH_HISTORY_LIMIT :] + self.app_config["feeds"][i]["lasthash"] = hash_history # This function gets and formats the brief excerpt that goes in the embed # Different feeds put summaries in different fields, so we pick the best From 98d1a3ba450bd781a4c0bca7fd3d10bd9d89ed8f Mon Sep 17 00:00:00 2001 From: "A.M. Rowsell" Date: Fri, 24 Apr 2026 10:31:07 -0400 Subject: [PATCH 16/24] args: implemented argparse for config file, log, and dry run --- discorss.py | 54 +++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 44 insertions(+), 10 deletions(-) diff --git a/discorss.py b/discorss.py index c880a98..438a799 100755 --- a/discorss.py +++ b/discorss.py @@ -22,23 +22,34 @@ import os import sys import argparse import re +from types import SimpleNamespace class Discorss: FEED_TIMEOUT_SECONDS = 15 - DRY_RUN = True HASH_HISTORY_LIMIT = 10 - def __init__(self): + def __init__(self, args=None): + if args is None: + args = SimpleNamespace( + dry_run=False, + config_file=None, + log_file=None, + ) + + self.DRY_RUN = args.dry_run self.config_dir = os.environ.get("XDG_CONFIG_HOME") home_dir = Path.home() if self.config_dir is None: - self.config_file_path = str(home_dir) + "/.config/discorss/discorss.conf" - self.config_dir = str(home_dir) + "/.config/discorss" + default_config_file_path = str(home_dir) + "/.config/discorss/discorss.conf" else: - self.config_file_path = self.config_dir + r"/discorss/discorss.conf" - self.log_dir = r"/var/log/discorss" - self.log_file_path = r"/app.log" + default_config_file_path = self.config_dir + r"/discorss/discorss.conf" + self.config_file_path = args.config_file or default_config_file_path + self.config_dir = str(Path(self.config_file_path).parent) + + default_log_file_path = "/var/log/discorss/app.log" + self.log_file_path = args.log_file or default_log_file_path + self.log_dir = str(Path(self.log_file_path).parent) # Yes, I know you "can't parse HTML with regex", but # just watch me. self.html_filter = re.compile(r"\<\/?([A-Za-z0-9 \:\.\-\/\"\=])*\>") @@ -165,7 +176,7 @@ class Discorss: self.logger.debug( "Dry run, not actually posting to webhook, faking return code 200" ) - response.status_code = 200 + response = SimpleNamespace(status_code=200) if response.status_code not in self.success_codes: self.logger.error( "Error %d while trying to post %s", response.status_code, hook["name"] @@ -290,7 +301,7 @@ class Discorss: # Set up logging self.logger = logging.getLogger(__name__) logging.basicConfig( - filename=str(self.log_dir + self.log_file_path), + filename=self.log_file_path, encoding="utf-8", level=logging.DEBUG, datefmt="%m/%d/%Y %H:%M:%S", @@ -321,7 +332,30 @@ class Discorss: def main(): - app = Discorss() + parser = argparse.ArgumentParser( + description="DiscoRSS: publish feed updates to Discord webhooks." + ) + parser.add_argument( + "-d", + "--dry-run", + action="store_true", + help="Parse feeds and update state without posting to Discord.", + ) + parser.add_argument( + "-c", + "--config-file", + default=None, + help="Alternate config file path. Defaults to the existing config location.", + ) + parser.add_argument( + "-l", + "--log-file", + default=None, + help="Alternate log file path. Defaults to the existing log location.", + ) + args = parser.parse_args() + + app = Discorss(args) app.process() From 63300e6012983e271f7006f3aa761eb6ced4ea14 Mon Sep 17 00:00:00 2001 From: "A.M. Rowsell" Date: Fri, 24 Apr 2026 10:51:59 -0400 Subject: [PATCH 17/24] logging: update logging levels and added new messages --- discorss.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/discorss.py b/discorss.py index 438a799..a77dd48 100755 --- a/discorss.py +++ b/discorss.py @@ -116,16 +116,15 @@ class Discorss: "Feed %s doesn't supply a published time, using updated time instead", hook["name"], ) - self.logger.debug("Feed url is %s", latest_post["url"]) # Hash the url of the latest post and use that to determine if it's been posted # Yes, SHA3-512 is totally unnecessary for this purpose, but I love SHA3 - self.logger.debug("About to hash %s ...", latest_post["url"]) + self.logger.debug("About to hash %s ...", latest_post["link"]) try: new_hash = hashlib.sha3_512( - bytes(latest_post["url"], "utf-8") # Removed time from hash + bytes(latest_post["link"], "utf-8") # Removed time from hash ).hexdigest() except TypeError: - self.logger.error("URL of %s isn't hashing correctly", hook["name"]) + self.logger.error("URL %s isn't hashing correctly", hook["link"]) return None if new_hash in self._get_hash_history(hook): @@ -173,7 +172,7 @@ class Discorss: if not self.DRY_RUN: response = await self._post_webhook(hook, webhook_string, custom_header) else: - self.logger.debug( + self.logger.info( "Dry run, not actually posting to webhook, faking return code 200" ) response = SimpleNamespace(status_code=200) @@ -200,14 +199,14 @@ class Discorss: for i, result in enumerate(results): hook = self.app_config["feeds"][i] if isinstance(result, asyncio.TimeoutError): - self.logger.error( + self.logger.critical( "Timed out processing feed %s after %d seconds", hook["name"], self.FEED_TIMEOUT_SECONDS, ) continue if isinstance(result, requests.RequestException): - self.logger.error( + self.logger.critical( "Network error while processing feed %s: %s", hook["name"], result, @@ -223,7 +222,7 @@ class Discorss: if result is None: continue if "lasthash" not in hook: - self.logger.info( + self.logger.debug( "Feed %s has no existing hash, likely a new feed!", hook["name"] ) hash_history = self._get_hash_history(hook) @@ -303,10 +302,11 @@ class Discorss: logging.basicConfig( filename=self.log_file_path, encoding="utf-8", - level=logging.DEBUG, + level=logging.INFO, datefmt="%m/%d/%Y %H:%M:%S", - format="%(asctime)s -> %(levelname)s: %(message)s", + format="%(asctime)s [%(threadName)s] -> %(levelname)s: %(message)s", ) + self.logger.info("========= Started discorss.py ==========") return def process(self): @@ -324,7 +324,7 @@ class Discorss: self.app_config["lastupdate"] = self.now with open(self.config_file_path, "w") as config_file: json.dump(self.app_config, config_file, indent=4) - + self.logger.info("========= Ended discord.py =========") return From cdd7a2569f94e327083b8600a77c6f36cb266e54 Mon Sep 17 00:00:00 2001 From: "A.M. Rowsell" Date: Fri, 24 Apr 2026 15:53:02 -0400 Subject: [PATCH 18/24] log: change error printing to logging --- discorss.py | 37 ++++++++++++++----------------------- 1 file changed, 14 insertions(+), 23 deletions(-) diff --git a/discorss.py b/discorss.py index a77dd48..3419962 100755 --- a/discorss.py +++ b/discorss.py @@ -262,24 +262,25 @@ class Discorss: os.environ["TZ"] = "America/Toronto" time.tzset() self.now = time.mktime(time.localtime()) + # Set up logging + self.logger = logging.getLogger(__name__) + logging.basicConfig( + filename=self.log_file_path, + encoding="utf-8", + level=logging.WARNING, + datefmt="%m/%d/%Y %H:%M:%S", + format="%(asctime)s [%(threadName)s] -> %(levelname)s: %(message)s", + ) # Check for log and config files/paths, create empty directories if needed # TODO: change output to log file, as warning/error if not Path(self.log_dir).exists(): - print( - "No log file path exists. Yark! We'll try and make {}...".format( - self.log_dir - ) - ) + self.logger.warning("No log file path exists. Yark! We'll try and make %s...", self.log_dir) try: Path(self.log_dir).mkdir(parents=True, exist_ok=True) except FileExistsError: - print( - "The path {} already exists and is not a directory!".format( - self.log_dir - ) - ) + self.logger.critical("The path {} already exists and is not a directory!".format(self.log_dir)) if not Path(self.config_file_path).exists(): - print( + self.logger.warning( "No config file at {}! Snarf. We'll try and make {}...".format( self.config_file_path, self.config_dir ) @@ -287,7 +288,7 @@ class Discorss: try: Path(self.config_dir).mkdir(parents=True, exist_ok=True) except FileExistsError: - print( + self.warning.critical( "The config dir {} already exists and is not a directory! Please fix manually. Quitting!".format( self.config_dir ) @@ -297,20 +298,11 @@ class Discorss: # Loading the config file with open(self.config_file_path, "r") as config_file: self.app_config = json.load(config_file) - # Set up logging - self.logger = logging.getLogger(__name__) - logging.basicConfig( - filename=self.log_file_path, - encoding="utf-8", - level=logging.INFO, - datefmt="%m/%d/%Y %H:%M:%S", - format="%(asctime)s [%(threadName)s] -> %(levelname)s: %(message)s", - ) - self.logger.info("========= Started discorss.py ==========") return def process(self): self.setup() # Handle the config and log paths + self.logger.info("Starting DiscoRSS run...") try: last_check = self.app_config["lastupdate"] except KeyError: @@ -324,7 +316,6 @@ class Discorss: self.app_config["lastupdate"] = self.now with open(self.config_file_path, "w") as config_file: json.dump(self.app_config, config_file, indent=4) - self.logger.info("========= Ended discord.py =========") return From 85d13b930923781143c510de8bb248f7265ba430 Mon Sep 17 00:00:00 2001 From: "A.M. Rowsell" Date: Fri, 24 Apr 2026 16:02:56 -0400 Subject: [PATCH 19/24] bump version to 0.3rc1 --- discorss.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/discorss.py b/discorss.py index 3419962..fc15bb8 100755 --- a/discorss.py +++ b/discorss.py @@ -28,6 +28,7 @@ from types import SimpleNamespace class Discorss: FEED_TIMEOUT_SECONDS = 15 HASH_HISTORY_LIMIT = 10 + APP_VERSION = "0.3rc1" def __init__(self, args=None): if args is None: @@ -60,7 +61,11 @@ class Discorss: response = await asyncio.to_thread( requests.get, hook["url"], - headers={"user-agent": "DiscoRSS (https://git.frzn.dev/amr/discorss, 0.2)"}, + headers={ + "user-agent": "DiscoRSS (https://git.frzn.dev/amr/discorss, {})".format( + self.APP_VERSION + ) + }, timeout=self.FEED_TIMEOUT_SECONDS, ) response.raise_for_status() @@ -274,11 +279,17 @@ class Discorss: # Check for log and config files/paths, create empty directories if needed # TODO: change output to log file, as warning/error if not Path(self.log_dir).exists(): - self.logger.warning("No log file path exists. Yark! We'll try and make %s...", self.log_dir) + self.logger.warning( + "No log file path exists. Yark! We'll try and make %s...", self.log_dir + ) try: Path(self.log_dir).mkdir(parents=True, exist_ok=True) except FileExistsError: - self.logger.critical("The path {} already exists and is not a directory!".format(self.log_dir)) + self.logger.critical( + "The path {} already exists and is not a directory!".format( + self.log_dir + ) + ) if not Path(self.config_file_path).exists(): self.logger.warning( "No config file at {}! Snarf. We'll try and make {}...".format( @@ -302,7 +313,7 @@ class Discorss: def process(self): self.setup() # Handle the config and log paths - self.logger.info("Starting DiscoRSS run...") + self.logger.info("Starting DiscoRSS version {}...".format(self.APP_VERSION)) try: last_check = self.app_config["lastupdate"] except KeyError: From 9c81fb0c81691241d60e97786406d1e3202efa1e Mon Sep 17 00:00:00 2001 From: "A.M. Rowsell" Date: Sat, 25 Apr 2026 22:37:28 -0400 Subject: [PATCH 20/24] readme: update readme with info about recent changes --- README.md | 74 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 40 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index 48148d4..772dc73 100644 --- a/README.md +++ b/README.md @@ -11,46 +11,23 @@ requests >= 2.4.2 feedparser ``` -The remaining imports should all be part of the standard Python install. +The remaining imports should all be part of the standard Python install: hashlib, logging, asyncio, pathlib, json, time, os, sys, argparse, re, types. To install the required ones, use your distro's package manager -- don't use pip unless you want to run the entire script in a virtualenv (which would probably make the systemd unit file a bit more complex, I think you'd have to add a PreExec to activate the virtualenv and a PostExec to disable it?). ## Important Notes -The logger will try and put the logs in `/var/log/discorss`. Make sure to create this directory and give the user running the script write permissions there. If you want the logs to go somewhere else, just edit the log_dir variable near the top of discorss.py. Choose a directory that makes sense. Unfortunately, as far as I know, the XDG standards don't have an equivalent to the /var/log directory in the user directory, so I wasn't sure what the best default was. In the future, we may switch to logging using systemd and journald directly, though it is nice to have a separate file. +By default, DiscoRSS will try and put the logs in `/var/log/discorss`. Make sure to create this directory and give the user running the script write permissions there. If you want the logs to go somewhere else, just give the directory as an argument (shown below). Choose a directory that makes sense. Unfortunately, as far as I know, the XDG standards don't have an equivalent to the /var/log directory in the user directory, so I wasn't sure what the best default was. In the future, we may switch to logging using systemd and journald directly, though it is nice to have a separate file. + +### Script Arguments + +The script has a few different arguments that make it easy to customize certain things: + +* `-d / --dry-run`: Just like it says on the tin -- run the script, pull feeds, but don't post anything to Discord +* `-c / --config-file`: Give a path to an alternate location for the config file. The default is ~/.config/discorss/discorss.conf and that should be fine for the vast majority of users. +* `-l / --log-file`: Give a path to where you want the log file stored. The default is /var/log/discorss/app.log but you will have to create the /var/log/discorss directory (or have install.sh do it for you) and make it writeable by whatever user will be running the script. ## How to setup -Note: see the Automation section below for info about using the `install.sh` script to help get all the files in the right places. - -### Config file format - -To configure the script, create `~/.config/discorss/discorss.conf` using JSON formatting like this: - -```json -{ - "feeds": [ - { - "name": "Phoronix", - "siteurl": "https://www.phoronix.com/", - "url": "http://www.phoronix.com/rss.php", - "webhook": "webhook url", - "offset": -18000 - }, - { - "name": "Pagetable", - "siteurl": "https://pagetable.com", - "url": "https://www.pagetable.com/?feed=rss2", - "webhook": "webhook url", - "offset": -18000 - } - ] -} -``` - -Create a webhook for each feed (unless you want them all to show as the same webhook for whatever reason) and make sure to add it in to the config. I have it set up with a webhook for each site, each with the site's icon and name set for the webhook which makes the messages look really nice. - -The offset should only be required if feeds aren't showing up. This is because feedparser, in its infinite wisdom, just ignores the timezone when converting publish dates from feeds. So most feeds end up with an epoch in UTC. The offset should be the number of seconds between your time zone and UTC. This will eventually be fixed in a future update, I just need to sit down and wrangle with feedparser and datetime some more. All fields are mandatory, if you want to have no offset for example, set it to 0. The name and siteurl are used to create the "author" field in the Discord embed. - -## Automation +### Automation **New**: There is now `install.sh` in the repo which will automatically help you set up both the config file and the systemd unit files for the service and timer, using essentially the exact text below. It will copy them to the user systemd unit folder, `~/.config/systemd/user` and optionally enable the timer. It's a good idea to edit the configuration file at `~/.config/discorss/discorss.conf` and paste in your webhook URLs and add any other feeds you want before starting the timer, unless you can do it really quickly before the next 5 minute spot on the clock :) Of course, if it fires with an invalid config, the script will just crash, and you'll probably just have to manually start the timer once the config is fixed, so not a big deal. @@ -94,6 +71,35 @@ WantedBy=timers.target To change how often this fires, edit the OnCalendar parameter. The config above has it firing every 15 minutes at half past the minute. Look at the systemd timer man pages for help if you want to tweak it. +### Config file format + +To configure the script, create `~/.config/discorss/discorss.conf` (or have install.sh create it for you) using JSON formatting like this: + +```json +{ + "feeds": [ + { + "name": "Phoronix", + "siteurl": "https://www.phoronix.com/", + "url": "http://www.phoronix.com/rss.php", + "webhook": "webhook url", + "offset": -18000 + }, + { + "name": "Pagetable", + "siteurl": "https://pagetable.com", + "url": "https://www.pagetable.com/?feed=rss2", + "webhook": "webhook url", + "offset": -18000 + } + ] +} +``` + +Create a webhook for each feed (unless you want them all to show as the same webhook for whatever reason) and make sure to add it in to the config. I have it set up with a webhook for each site, each with the site's icon and name set for the webhook which makes the messages look really nice. + +The offset should only be required if feeds from the previous 6 hours aren't showing up when you first start the script. This is because feedparser, in its infinite wisdom, just ignores the timezone when converting publish dates from feeds. So most feeds end up with an epoch in UTC. The offset should be the number of seconds between your time zone and UTC. This will eventually be fixed in a future update, I just need to sit down and wrangle with feedparser and datetime some more. All fields are mandatory, if you want to have no offset for example, set it to 0. The name and siteurl are used to create the "author" field in the Discord embed. + ## Contributing Want to fix something or make a suggestion? Feel free! If you want to send a pull request, you *must* run the Python `black` formatter on the source code before committing. I have this set up in my editor to automatically run every time I save the file, but you could have it run as part of a git hook or something. For non-format stuff, please just follow the code style as best you can. For Python code, I separate multi-word variable names with underscores. So it should be `feed_time`, not `feedTime` or `FeedTime` or `feed-time`. Don't ask me why, but I use camelCase for other languages... but in Python I've switched to underscores. From 4128e7808c1056f027e669d393851ab2f9e3b1fe Mon Sep 17 00:00:00 2001 From: "A.M. Rowsell" Date: Sat, 25 Apr 2026 22:40:17 -0400 Subject: [PATCH 21/24] readme: fix small error --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 772dc73..2f50663 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ The remaining imports should all be part of the standard Python install: hashlib ## Important Notes -By default, DiscoRSS will try and put the logs in `/var/log/discorss`. Make sure to create this directory and give the user running the script write permissions there. If you want the logs to go somewhere else, just give the directory as an argument (shown below). Choose a directory that makes sense. Unfortunately, as far as I know, the XDG standards don't have an equivalent to the /var/log directory in the user directory, so I wasn't sure what the best default was. In the future, we may switch to logging using systemd and journald directly, though it is nice to have a separate file. +By default, DiscoRSS will try and put the logs in `/var/log/discorss`. Make sure to create this directory and give the user running the script write permissions there. If you want the logs to go somewhere else, just give the path as an argument (shown below). Choose a directory that makes sense. Unfortunately, as far as I know, the XDG standards don't have an equivalent to the /var/log directory in the user directory, so I wasn't sure what the best default was. In the future, we may switch to logging using systemd and journald directly, though it is nice to have a separate file. ### Script Arguments @@ -23,7 +23,7 @@ The script has a few different arguments that make it easy to customize certain * `-d / --dry-run`: Just like it says on the tin -- run the script, pull feeds, but don't post anything to Discord * `-c / --config-file`: Give a path to an alternate location for the config file. The default is ~/.config/discorss/discorss.conf and that should be fine for the vast majority of users. -* `-l / --log-file`: Give a path to where you want the log file stored. The default is /var/log/discorss/app.log but you will have to create the /var/log/discorss directory (or have install.sh do it for you) and make it writeable by whatever user will be running the script. +* `-l / --log-file`: Give a path to where you want the log file stored. The default is /var/log/discorss/app.log but you will have to create the /var/log/discorss directory and make it writeable by whatever user will be running the script. ## How to setup From a34cc1ac8e46735c81f6a09762dc3f08aa4380f7 Mon Sep 17 00:00:00 2001 From: "A.M. Rowsell" Date: Sat, 25 Apr 2026 22:47:37 -0400 Subject: [PATCH 22/24] readme: add demo screenshot --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 2f50663..81256c7 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,10 @@ feedparser The remaining imports should all be part of the standard Python install: hashlib, logging, asyncio, pathlib, json, time, os, sys, argparse, re, types. To install the required ones, use your distro's package manager -- don't use pip unless you want to run the entire script in a virtualenv (which would probably make the systemd unit file a bit more complex, I think you'd have to add a PreExec to activate the virtualenv and a PostExec to disable it?). +

+![DiscoRSS Demo Screenshot](https://frzn.dev/~amr/images/Screenshot_224228_1.png) +

+ ## Important Notes By default, DiscoRSS will try and put the logs in `/var/log/discorss`. Make sure to create this directory and give the user running the script write permissions there. If you want the logs to go somewhere else, just give the path as an argument (shown below). Choose a directory that makes sense. Unfortunately, as far as I know, the XDG standards don't have an equivalent to the /var/log directory in the user directory, so I wasn't sure what the best default was. In the future, we may switch to logging using systemd and journald directly, though it is nice to have a separate file. From e5a5f4f7d28bf42c214e9c1a6ca60e04b40624a3 Mon Sep 17 00:00:00 2001 From: "A.M. Rowsell" Date: Sat, 25 Apr 2026 22:48:54 -0400 Subject: [PATCH 23/24] readme: fix URL --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 81256c7..8faf1be 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ feedparser The remaining imports should all be part of the standard Python install: hashlib, logging, asyncio, pathlib, json, time, os, sys, argparse, re, types. To install the required ones, use your distro's package manager -- don't use pip unless you want to run the entire script in a virtualenv (which would probably make the systemd unit file a bit more complex, I think you'd have to add a PreExec to activate the virtualenv and a PostExec to disable it?).

-![DiscoRSS Demo Screenshot](https://frzn.dev/~amr/images/Screenshot_224228_1.png) +

## Important Notes From 0e8dba9d6dc5c4deeef1fa12e2d9a22732b11518 Mon Sep 17 00:00:00 2001 From: "A.M. Rowsell" Date: Mon, 4 May 2026 11:48:29 -0400 Subject: [PATCH 24/24] chore: fix discord webhook call to include APP_VERSION --- discorss.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/discorss.py b/discorss.py index fc15bb8..bf431b9 100755 --- a/discorss.py +++ b/discorss.py @@ -168,7 +168,9 @@ class Discorss: "attachments": [], } custom_header = { - "user-agent": "DiscoRSS (https://git.frzn.dev/amr/discorss, 0.2)", + "user-agent": "DiscoRSS (https://git.frzn.dev/amr/discorss, {})".format( + self.APP_VERSION + ), "content-type": "application/json", } webhook_string = json.dumps(webhook)