diff --git a/discorss.py b/discorss.py index be76e69..4fe079b 100755 --- a/discorss.py +++ b/discorss.py @@ -19,10 +19,11 @@ import time import os import re -config_dir = os.environ.get('XDG_CONFIG_HOME') +config_dir = os.environ.get("XDG_CONFIG_HOME") +home_dir = Path.home() if config_dir is None: - config_file_path = r"~/.config/discorss/discorss.conf" - config_dir = r"~/.config/discorss" + config_file_path = str(home_dir) + "/.config/discorss/discorss.conf" + config_dir = str(home_dir) + "/.config/discorss" else: config_file_path = config_dir + r"/discorss/discorss.conf" log_file_path = r"/var/log/discorss" @@ -30,30 +31,26 @@ log_file_path = r"/var/log/discorss" log_file_name = r"/app.log" # Yes, I know you "can't parse HTML with regex", but # just watch me. -html_filter = re.compile(r"\<\/?([A-Za-z \"\=])*\>") +html_filter = re.compile(r"\<\/?([A-Za-z \:\.\/\"\=])*\>") success_codes = ["200", "201", "202", "203", "204", "205", "206"] # This function gets and formats the brief excerpt that goes in the embed # Different feeds put summaries in different fields, so we pick the best -# one and limit it to 150 characters. +# one and limit it to 250 characters. # TODO: make the character limit smarter, as to split at a natural point def get_description(feed): try: temporary_string = str(feed.entries[0]["summary_detail"]["value"]) temporary_string = html_filter.sub("", temporary_string) desc = ( - temporary_string[:150] - if len(temporary_string) > 150 - else temporary_string + temporary_string[:250] if len(temporary_string) > 250 else temporary_string ) except KeyError: temporary_string = str(feed.entries[0]["description"]) temporary_string = html_filter.sub("", temporary_string) desc = ( - temporary_string[:150] - if len(temporary_string) > 150 - else temporary_string + temporary_string[:250] if len(temporary_string) > 250 else temporary_string ) return desc @@ -62,13 +59,22 @@ def main(): os.environ["TZ"] = "America/Toronto" time.tzset() # Check for log and config files/paths, create empty directories if needed + # TODO: make this cleaner try: Path(log_file_path).mkdir(parents=True, exist_ok=True) except FileExistsError: - print("The logfile path {} already exists and is not a directory!".format(log_file_path)) + print( + "The logfile path {} already exists and is not a directory!".format( + log_file_path + ) + ) if not Path(config_file_path).exists(): - print("No config file at {}! Snarf.\n{} was created for you.".format(config_file_path, config_dir)) - Path(config_file_path).mkdir(parents=True, exist_ok=True) + print( + "No config file at {}! Snarf.\n{} was created for you.".format( + config_file_path, config_dir + ) + ) + Path(config_dir).mkdir(parents=True, exist_ok=True) return with open(config_file_path, "r") as config_file: app_config = json.load(config_file) @@ -76,15 +82,16 @@ def main(): try: last_check = app_config["lastupdate"] except KeyError: - last_check = ( - now - 21600 - ) # first run, no lastupdate, check up to 6 hours ago + last_check = now - 21600 # first run, no lastupdate, check up to 6 hours ago for i, hook in enumerate(app_config["feeds"]): # Get the feed - feed = feedparser.parse(hook["url"]) - published_time = time.mktime(feed.entries[0]["published_parsed"]) - published_time = published_time + hook["offset"] print("Parsing feed {}...".format(hook["name"])) + feed = feedparser.parse(hook["url"]) + try: + published_time = time.mktime(feed.entries[0]["published_parsed"]) + published_time = published_time + hook["offset"] + except KeyError: + published_time = now - 10 # Not sure what a sensible default here is # Hash the title of the latest post and use that to determine if it's been posted new_hash = hashlib.sha3_512( bytes(feed.entries[0]["title"], "utf-8") @@ -122,7 +129,7 @@ def main(): "attachments": [], } custom_header = { - "user-agent": "DiscoRSS (https://git.frzn.dev/amr/discorss, 0.2rc1)", + "user-agent": "DiscoRSS (https://git.frzn.dev/amr/discorss, 0.2rc2)", "content-type": "application/json", } webhook_string = json.dumps(webhook)