async: complete rewrite as async code

This commit is contained in:
A.M. Rowsell 2026-04-12 14:37:39 -04:00
commit c385b3266c
Signed by: amr
GPG key ID: E0879EDBDB0CA7B1

View file

@ -14,6 +14,7 @@ import requests
import feedparser import feedparser
import hashlib import hashlib
import logging import logging
import asyncio
from pathlib import Path from pathlib import Path
import json import json
import time import time
@ -24,6 +25,8 @@ import re
class Discorss: class Discorss:
FEED_TIMEOUT_SECONDS = 15
def __init__(self): def __init__(self):
self.config_dir = os.environ.get("XDG_CONFIG_HOME") self.config_dir = os.environ.get("XDG_CONFIG_HOME")
home_dir = Path.home() home_dir = Path.home()
@ -40,6 +43,159 @@ class Discorss:
self.success_codes = [200, 201, 202, 203, 204, 205, 206] self.success_codes = [200, 201, 202, 203, 204, 205, 206]
self.app_config = {} self.app_config = {}
async def _fetch_feed(self, hook):
response = await asyncio.to_thread(
requests.get,
hook["url"],
headers={"user-agent": "DiscoRSS (https://git.frzn.dev/amr/discorss, 0.2)"},
timeout=self.FEED_TIMEOUT_SECONDS,
)
response.raise_for_status()
return await asyncio.to_thread(feedparser.parse, response.content)
async def _post_webhook(self, hook, webhook_string, custom_header):
return await asyncio.to_thread(
requests.post,
hook["webhook"],
data=webhook_string,
headers=custom_header,
timeout=self.FEED_TIMEOUT_SECONDS,
)
async def _process_feed(self, hook, last_check):
self.logger.debug("Parsing feed %s...", hook["name"])
feeds = await self._fetch_feed(hook)
latest_post = None
latest_post_time = None
prev_best = 0
bad_time = False
self.logger.debug("About to sort through entries for feed %s ...", hook["name"])
for feed in feeds["entries"]:
try:
published_time = time.mktime(feed["published_parsed"])
published_time = published_time + hook["offset"]
except KeyError:
published_time = time.mktime(feed["updated_parsed"])
bad_time = True
if published_time > prev_best:
latest_post = feed
latest_post_time = published_time
prev_best = published_time
if latest_post is None:
self.logger.warning("Feed %s had no entries to process", hook["name"])
return None
if bad_time is True:
self.logger.debug(
"Feed %s doesn't supply a published time, using updated time instead",
hook["name"],
)
# Hash the title and time of the latest post and use that to determine if it's been posted
# Yes, SHA3-512 is totally unnecessary for this purpose, but I love SHA3
self.logger.debug("About to hash %s ...", latest_post["title"])
try:
new_hash = hashlib.sha3_512(
bytes(latest_post["title"] + str(latest_post_time), "utf-8")
).hexdigest()
except TypeError:
self.logger.error("Title of %s isn't hashing correctly", hook["name"])
return None
if hook.get("lasthash") == new_hash:
return None
# Generate the webhook
self.logger.info(
"Publishing webhook for %s. Last check was %d, self.now is %d",
hook["name"],
last_check,
self.now,
)
webhook = {
"embeds": [
{
"title": str(latest_post["title"]),
"url": str(latest_post["link"]),
"color": 2123412,
"footer": {
"text": "DiscoRSS",
"icon_url": "https://frzn.dev/~amr/images/discorss.png",
},
"author": {
"name": str(hook["name"]),
"url": str(hook["siteurl"]),
},
"fields": [
{
"name": "Excerpt from post:",
"value": self.get_description(latest_post),
}
],
# "timestamp": str(self.now),
}
],
"attachments": [],
}
custom_header = {
"user-agent": "DiscoRSS (https://git.frzn.dev/amr/discorss, 0.2)",
"content-type": "application/json",
}
webhook_string = json.dumps(webhook)
self.logger.debug("About to run POST for %s", hook["name"])
response = await self._post_webhook(hook, webhook_string, custom_header)
if response.status_code not in self.success_codes:
self.logger.error(
"Error %d while trying to post %s", response.status_code, hook["name"]
)
return None
self.logger.debug("Got %d when posting %s", response.status_code, hook["name"])
return new_hash
async def _process_feeds(self, last_check):
tasks = [
asyncio.create_task(
asyncio.wait_for(
self._process_feed(hook, last_check),
timeout=self.FEED_TIMEOUT_SECONDS,
)
)
for hook in self.app_config["feeds"]
]
results = await asyncio.gather(*tasks, return_exceptions=True)
for i, result in enumerate(results):
hook = self.app_config["feeds"][i]
if isinstance(result, asyncio.TimeoutError):
self.logger.error(
"Timed out processing feed %s after %d seconds",
hook["name"],
self.FEED_TIMEOUT_SECONDS,
)
continue
if isinstance(result, requests.RequestException):
self.logger.error(
"Network error while processing feed %s: %s",
hook["name"],
result,
)
continue
if isinstance(result, Exception):
self.logger.error(
"Unhandled error while processing feed %s: %s",
hook["name"],
result,
)
continue
if result is None:
continue
if "lasthash" not in hook:
self.logger.info(
"Feed %s has no existing hash, likely a new feed!", hook["name"]
)
self.app_config["feeds"][i]["lasthash"] = result
# This function gets and formats the brief excerpt that goes in the embed # This function gets and formats the brief excerpt that goes in the embed
# Different feeds put summaries in different fields, so we pick the best # Different feeds put summaries in different fields, so we pick the best
# one and limit it to 250 characters. # one and limit it to 250 characters.
@ -125,102 +281,7 @@ class Discorss:
last_check = ( last_check = (
self.now - 21600 self.now - 21600
) # first run, no lastupdate, check up to 6 hours ago ) # first run, no lastupdate, check up to 6 hours ago
for i, hook in enumerate(self.app_config["feeds"]): # Feed loop start asyncio.run(self._process_feeds(last_check))
self.logger.debug("Parsing feed %s...", hook["name"])
self.feeds = feedparser.parse(hook["url"])
self.latest_post = []
prev_best = 0
self.logger.debug(
"About to sort through entries for feed %s ...", hook["name"]
)
for feed in self.feeds["entries"]:
try:
bad_time = False
published_time = time.mktime(feed["published_parsed"])
published_time = published_time + hook["offset"]
except KeyError:
published_time = time.mktime(feed["updated_parsed"])
bad_time = True
if published_time > prev_best:
latest_post = feed
prev_best = published_time
else:
continue
if bad_time is True:
self.logger.debug(
"Feed %s doesn't supply a published time, using updated time instead",
hook["name"],
)
# Hash the title and time of the latest post and use that to determine if it's been posted
# Yes, SHA3-512 is totally unnecessary for this purpose, but I love SHA3
self.logger.debug("About to hash %s ...", latest_post["title"])
try:
new_hash = hashlib.sha3_512(
bytes(latest_post["title"] + str(published_time), "utf-8")
).hexdigest()
except TypeError:
self.logger.error("Title of %s isn't hashing correctly", hook["name"])
continue
try:
if hook["lasthash"] != new_hash:
self.app_config["feeds"][i]["lasthash"] = new_hash
else:
continue
except KeyError:
self.app_config["feeds"][i]["lasthash"] = new_hash
self.logger.info(
"Feed %s has no existing hash, likely a new feed!", hook["name"]
)
# Generate the webhook
self.logger.info(
"Publishing webhook for %s. Last check was %d, self.now is %d",
hook["name"],
last_check,
self.now,
)
webhook = {
"embeds": [
{
"title": str(latest_post["title"]),
"url": str(latest_post["link"]),
"color": 2123412,
"footer": {
"text": "DiscoRSS",
"icon_url": "https://frzn.dev/~amr/images/discorss.png",
},
"author": {
"name": str(hook["name"]),
"url": str(hook["siteurl"]),
},
"fields": [
{
"name": "Excerpt from post:",
"value": self.get_description(latest_post),
}
],
# "timestamp": str(self.now),
}
],
"attachments": [],
}
custom_header = {
"user-agent": "DiscoRSS (https://git.frzn.dev/amr/discorss, 0.2)",
"content-type": "application/json",
}
webhook_string = json.dumps(webhook)
self.logger.debug("About to run POST for %s", hook["name"])
r = requests.post(
hook["webhook"], data=webhook_string, headers=custom_header
)
if r.status_code not in self.success_codes:
self.logger.error(
"Error %d while trying to post %s", r.status_code, hook["name"]
)
else:
self.logger.debug("Got %d when posting %s", r.status_code, hook["name"])
# End of feed loop
# Dump updated config back to json file # Dump updated config back to json file
self.logger.debug("Dumping config back to %s", str(self.config_file_path)) self.logger.debug("Dumping config back to %s", str(self.config_file_path))