Compare commits

..

2 commits

Author SHA1 Message Date
c385b3266c
async: complete rewrite as async code 2026-04-12 14:37:39 -04:00
A.M. Rowsell
1abda8d6e4
Small comment updates 2025-06-20 14:14:47 -04:00

View file

@ -14,6 +14,7 @@ import requests
import feedparser import feedparser
import hashlib import hashlib
import logging import logging
import asyncio
from pathlib import Path from pathlib import Path
import json import json
import time import time
@ -24,6 +25,8 @@ import re
class Discorss: class Discorss:
FEED_TIMEOUT_SECONDS = 15
def __init__(self): def __init__(self):
self.config_dir = os.environ.get("XDG_CONFIG_HOME") self.config_dir = os.environ.get("XDG_CONFIG_HOME")
home_dir = Path.home() home_dir = Path.home()
@ -40,6 +43,159 @@ class Discorss:
self.success_codes = [200, 201, 202, 203, 204, 205, 206] self.success_codes = [200, 201, 202, 203, 204, 205, 206]
self.app_config = {} self.app_config = {}
async def _fetch_feed(self, hook):
response = await asyncio.to_thread(
requests.get,
hook["url"],
headers={"user-agent": "DiscoRSS (https://git.frzn.dev/amr/discorss, 0.2)"},
timeout=self.FEED_TIMEOUT_SECONDS,
)
response.raise_for_status()
return await asyncio.to_thread(feedparser.parse, response.content)
async def _post_webhook(self, hook, webhook_string, custom_header):
return await asyncio.to_thread(
requests.post,
hook["webhook"],
data=webhook_string,
headers=custom_header,
timeout=self.FEED_TIMEOUT_SECONDS,
)
async def _process_feed(self, hook, last_check):
self.logger.debug("Parsing feed %s...", hook["name"])
feeds = await self._fetch_feed(hook)
latest_post = None
latest_post_time = None
prev_best = 0
bad_time = False
self.logger.debug("About to sort through entries for feed %s ...", hook["name"])
for feed in feeds["entries"]:
try:
published_time = time.mktime(feed["published_parsed"])
published_time = published_time + hook["offset"]
except KeyError:
published_time = time.mktime(feed["updated_parsed"])
bad_time = True
if published_time > prev_best:
latest_post = feed
latest_post_time = published_time
prev_best = published_time
if latest_post is None:
self.logger.warning("Feed %s had no entries to process", hook["name"])
return None
if bad_time is True:
self.logger.debug(
"Feed %s doesn't supply a published time, using updated time instead",
hook["name"],
)
# Hash the title and time of the latest post and use that to determine if it's been posted
# Yes, SHA3-512 is totally unnecessary for this purpose, but I love SHA3
self.logger.debug("About to hash %s ...", latest_post["title"])
try:
new_hash = hashlib.sha3_512(
bytes(latest_post["title"] + str(latest_post_time), "utf-8")
).hexdigest()
except TypeError:
self.logger.error("Title of %s isn't hashing correctly", hook["name"])
return None
if hook.get("lasthash") == new_hash:
return None
# Generate the webhook
self.logger.info(
"Publishing webhook for %s. Last check was %d, self.now is %d",
hook["name"],
last_check,
self.now,
)
webhook = {
"embeds": [
{
"title": str(latest_post["title"]),
"url": str(latest_post["link"]),
"color": 2123412,
"footer": {
"text": "DiscoRSS",
"icon_url": "https://frzn.dev/~amr/images/discorss.png",
},
"author": {
"name": str(hook["name"]),
"url": str(hook["siteurl"]),
},
"fields": [
{
"name": "Excerpt from post:",
"value": self.get_description(latest_post),
}
],
# "timestamp": str(self.now),
}
],
"attachments": [],
}
custom_header = {
"user-agent": "DiscoRSS (https://git.frzn.dev/amr/discorss, 0.2)",
"content-type": "application/json",
}
webhook_string = json.dumps(webhook)
self.logger.debug("About to run POST for %s", hook["name"])
response = await self._post_webhook(hook, webhook_string, custom_header)
if response.status_code not in self.success_codes:
self.logger.error(
"Error %d while trying to post %s", response.status_code, hook["name"]
)
return None
self.logger.debug("Got %d when posting %s", response.status_code, hook["name"])
return new_hash
async def _process_feeds(self, last_check):
tasks = [
asyncio.create_task(
asyncio.wait_for(
self._process_feed(hook, last_check),
timeout=self.FEED_TIMEOUT_SECONDS,
)
)
for hook in self.app_config["feeds"]
]
results = await asyncio.gather(*tasks, return_exceptions=True)
for i, result in enumerate(results):
hook = self.app_config["feeds"][i]
if isinstance(result, asyncio.TimeoutError):
self.logger.error(
"Timed out processing feed %s after %d seconds",
hook["name"],
self.FEED_TIMEOUT_SECONDS,
)
continue
if isinstance(result, requests.RequestException):
self.logger.error(
"Network error while processing feed %s: %s",
hook["name"],
result,
)
continue
if isinstance(result, Exception):
self.logger.error(
"Unhandled error while processing feed %s: %s",
hook["name"],
result,
)
continue
if result is None:
continue
if "lasthash" not in hook:
self.logger.info(
"Feed %s has no existing hash, likely a new feed!", hook["name"]
)
self.app_config["feeds"][i]["lasthash"] = result
# This function gets and formats the brief excerpt that goes in the embed # This function gets and formats the brief excerpt that goes in the embed
# Different feeds put summaries in different fields, so we pick the best # Different feeds put summaries in different fields, so we pick the best
# one and limit it to 250 characters. # one and limit it to 250 characters.
@ -66,12 +222,13 @@ class Discorss:
desc = desc + str(addons) desc = desc + str(addons)
return desc return desc
# Some of this could go in __init__
def setup(self): def setup(self):
os.environ["TZ"] = "America/Toronto" os.environ["TZ"] = "America/Toronto"
time.tzset() time.tzset()
self.now = time.mktime(time.localtime()) self.now = time.mktime(time.localtime())
# Check for log and config files/paths, create empty directories if needed # Check for log and config files/paths, create empty directories if needed
# TODO: make this cleaner # TODO: change output to log file, as warning/error
if not Path(self.log_dir).exists(): if not Path(self.log_dir).exists():
print( print(
"No log file path exists. Yark! We'll try and make {}...".format( "No log file path exists. Yark! We'll try and make {}...".format(
@ -124,102 +281,7 @@ class Discorss:
last_check = ( last_check = (
self.now - 21600 self.now - 21600
) # first run, no lastupdate, check up to 6 hours ago ) # first run, no lastupdate, check up to 6 hours ago
for i, hook in enumerate(self.app_config["feeds"]): # Feed loop start asyncio.run(self._process_feeds(last_check))
self.logger.debug("Parsing feed %s...", hook["name"])
self.feeds = feedparser.parse(hook["url"])
self.latest_post = []
prev_best = 0
self.logger.debug(
"About to sort through entries for feed %s ...", hook["name"]
)
for feed in self.feeds["entries"]:
try:
bad_time = False
published_time = time.mktime(feed["published_parsed"])
published_time = published_time + hook["offset"]
except KeyError:
published_time = time.mktime(feed["updated_parsed"])
bad_time = True
if published_time > prev_best:
latest_post = feed
prev_best = published_time
else:
continue
if bad_time is True:
self.logger.debug(
"Feed %s doesn't supply a published time, using updated time instead",
hook["name"],
)
# Hash the title and time of the latest post and use that to determine if it's been posted
# Yes, SHA3-512 is totally unnecessary for this purpose, but I love SHA3
self.logger.debug("About to hash %s ...", latest_post["title"])
try:
new_hash = hashlib.sha3_512(
bytes(latest_post["title"] + str(published_time), "utf-8")
).hexdigest()
except TypeError:
self.logger.error("Title of %s isn't hashing correctly", hook["name"])
continue
try:
if hook["lasthash"] != new_hash:
self.app_config["feeds"][i]["lasthash"] = new_hash
else:
continue
except KeyError:
self.app_config["feeds"][i]["lasthash"] = new_hash
self.logger.info(
"Feed %s has no existing hash, likely a new feed!", hook["name"]
)
# Generate the webhook
self.logger.info(
"Publishing webhook for %s. Last check was %d, self.now is %d",
hook["name"],
last_check,
self.now,
)
webhook = {
"embeds": [
{
"title": str(latest_post["title"]),
"url": str(latest_post["link"]),
"color": 2123412,
"footer": {
"text": "DiscoRSS",
"icon_url": "https://frzn.dev/~amr/images/discorss.png",
},
"author": {
"name": str(hook["name"]),
"url": str(hook["siteurl"]),
},
"fields": [
{
"name": "Excerpt from post:",
"value": self.get_description(latest_post),
}
],
# "timestamp": str(self.now),
}
],
"attachments": [],
}
custom_header = {
"user-agent": "DiscoRSS (https://git.frzn.dev/amr/discorss, 0.2)",
"content-type": "application/json",
}
webhook_string = json.dumps(webhook)
self.logger.debug("About to run POST for %s", hook["name"])
r = requests.post(
hook["webhook"], data=webhook_string, headers=custom_header
)
if r.status_code not in self.success_codes:
self.logger.error(
"Error %d while trying to post %s", r.status_code, hook["name"]
)
else:
self.logger.debug("Got %d when posting %s", r.status_code, hook["name"])
# End of feed loop
# Dump updated config back to json file # Dump updated config back to json file
self.logger.debug("Dumping config back to %s", str(self.config_file_path)) self.logger.debug("Dumping config back to %s", str(self.config_file_path))