discorss/discorss.py
A.M. Rowsell 8229a14cfe
Cleaned up file/dir paths a bit, make app_config global
app_config is global pending the next commit which will
separate out all the config file handling from main() as the
LSP was saying the main function had become too complex.
2025-02-25 18:13:01 -05:00

156 lines
5.5 KiB
Python
Executable file

#!/usr/bin/env python
# -*- coding: UTF-8 -*-
# SPDX-License-Identifier: MPL-2.0
# SPDX-FileCopyrightText: © 2025 A.M. Rowsell <https://frzn.dev/~amr>
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
# DiscoRSS: A simple RSS feed reader for Discord. Takes RSS feeds and then sends them to
# webhooks. Intended to run using systemd timers.
import requests
import feedparser
import hashlib
from pathlib import Path
import json
import time
import os
import sys
import re
config_dir = os.environ.get("XDG_CONFIG_HOME")
home_dir = Path.home()
if config_dir is None:
config_file_path = str(home_dir) + "/.config/discorss/discorss.conf"
config_dir = str(home_dir) + "/.config/discorss"
else:
config_file_path = config_dir + r"/discorss/discorss.conf"
log_dir = r"/var/log/discorss"
log_file_path = r"/app.log"
# Yes, I know you "can't parse HTML with regex", but
# just watch me.
html_filter = re.compile(r"\<\/?([A-Za-z \:\.\/\"\=])*\>")
success_codes = ["200", "201", "202", "203", "204", "205", "206"]
app_config = {}
# This function gets and formats the brief excerpt that goes in the embed
# Different feeds put summaries in different fields, so we pick the best
# one and limit it to 250 characters.
# TODO: make the character limit smarter, as to split at a natural point
def get_description(feed):
try:
temporary_string = str(feed.entries[0]["summary_detail"]["value"])
temporary_string = html_filter.sub("", temporary_string)
desc = (
temporary_string[:250] if len(temporary_string) > 250 else temporary_string
)
except KeyError:
temporary_string = str(feed.entries[0]["description"])
temporary_string = html_filter.sub("", temporary_string)
desc = (
temporary_string[:250] if len(temporary_string) > 250 else temporary_string
)
return desc
def main():
os.environ["TZ"] = "America/Toronto"
time.tzset()
# Check for log and config files/paths, create empty directories if needed
# TODO: make this cleaner
try:
Path(log_file_path).mkdir(parents=True, exist_ok=True)
except FileExistsError:
print(
"The logfile path {} already exists and is not a directory!".format(
log_file_path
)
)
if not Path(config_file_path).exists():
print(
"No config file at {}! Snarf.\n{} was created for you.".format(
config_file_path, config_dir
)
)
Path(config_dir).mkdir(parents=True, exist_ok=True)
return
with open(config_file_path, "r") as config_file:
app_config = json.load(config_file)
now = time.mktime(time.localtime())
try:
last_check = app_config["lastupdate"]
except KeyError:
last_check = now - 21600 # first run, no lastupdate, check up to 6 hours ago
for i, hook in enumerate(app_config["feeds"]):
# Get the feed
print("Parsing feed {}...".format(hook["name"]))
feed = feedparser.parse(hook["url"])
try:
published_time = time.mktime(feed.entries[0]["published_parsed"])
published_time = published_time + hook["offset"]
except KeyError:
published_time = now - 10 # Not sure what a sensible default here is
# Hash the title of the latest post and use that to determine if it's been posted
new_hash = hashlib.sha3_512(
bytes(feed.entries[0]["title"], "utf-8")
).hexdigest()
try:
if hook["lasthash"] != new_hash:
app_config["feeds"][i]["lasthash"] = new_hash
else:
continue
except KeyError:
app_config["feeds"][i]["lasthash"] = new_hash
# Generate the webhook
webhook = {
"embeds": [
{
"title": str(feed.entries[0]["title"]),
"url": str(feed.entries[0]["link"]),
"color": 216128,
"footer": {
"name": "DiscoRSS",
# "url": "https://git.frzn.dev/amr/discorss",
},
"author": {
"name": str(hook["name"]),
"url": str(hook["siteurl"]),
},
"fields": [
{
"name": "Excerpt from post:",
"value": get_description(feed),
}
],
}
],
"attachments": [],
}
custom_header = {
"user-agent": "DiscoRSS (https://git.frzn.dev/amr/discorss, 0.2rc2)",
"content-type": "application/json",
}
webhook_string = json.dumps(webhook)
if published_time > last_check:
r = requests.post(
hook["webhook"], data=webhook_string, headers=custom_header
)
if r.status_code not in success_codes:
print(
"Error {} while trying to post {}".format(
r.status_code, hook["webhook"]
)
)
app_config["lastupdate"] = now
with open(config_file_path, "w") as config_file:
json.dump(app_config, config_file, indent=4)
return
if __name__ == "__main__":
main()