Compare commits

...

12 commits

3 changed files with 74 additions and 20 deletions

7
.gitignore vendored Normal file
View file

@ -0,0 +1,7 @@
*.conf
*.txt
log/
*.bak
bin/
lib/
*.cfg

View file

@ -1,5 +1,7 @@
# DiscoRSS # DiscoRSS
![DiscoRSS Logo](https://frzn.dev/~amr/images/discorss.png)
## What is it? ## What is it?
DiscoRSS is a simple Python script to send RSS feeds to Discord webhooks. It was created because existing bots that did this set limits on the number of feeds, and self-hosting stuff is easier and better anyway. To get this working, you will require the following Python libraries: DiscoRSS is a simple Python script to send RSS feeds to Discord webhooks. It was created because existing bots that did this set limits on the number of feeds, and self-hosting stuff is easier and better anyway. To get this working, you will require the following Python libraries:

View file

@ -12,29 +12,48 @@
import requests import requests
import feedparser import feedparser
import hashlib
from pathlib import Path from pathlib import Path
import json import json
import datetime
import time import time
import os import os
import re
config_file_path = r"/etc/discorss.conf" config_dir = os.environ.get('XDG_CONFIG_HOME')
# config_file_path = r"discorss.conf" if config_dir is None:
# log_file_path = r"/var/log/discorss" config_file_path = r"~/.config/discorss/discorss.conf"
log_file_path = r"./log" config_dir = r"~/.config/discorss"
else:
config_file_path = config_dir + r"/discorss/discorss.conf"
log_file_path = r"/var/log/discorss"
# log_file_path = r"./log"
log_file_name = r"/app.log" log_file_name = r"/app.log"
# Yes, I know you "can't parse HTML with regex", but
# just watch me.
html_filter = re.compile(r"\<\/?([A-Za-z \"\=])*\>")
success_codes = ["200", "201", "202", "203", "204", "205", "206"]
# This function gets and formats the brief excerpt that goes in the embed
# Different feeds put summaries in different fields, so we pick the best
# one and limit it to 150 characters.
# TODO: make the character limit smarter, as to split at a natural point
def get_description(feed): def get_description(feed):
try: try:
temporary_string = str(feed.entries[0]["summary_detail"]["value"]) temporary_string = str(feed.entries[0]["summary_detail"]["value"])
temporary_string = html_filter.sub("", temporary_string)
desc = ( desc = (
temporary_string[:150] if len(temporary_string) > 150 else temporary_string temporary_string[:150]
if len(temporary_string) > 150
else temporary_string
) )
except KeyError: except KeyError:
temporary_string = str(feed.entries[0]["description"]) temporary_string = str(feed.entries[0]["description"])
temporary_string = html_filter.sub("", temporary_string)
desc = ( desc = (
temporary_string[:150] if len(temporary_string) > 150 else temporary_string temporary_string[:150]
if len(temporary_string) > 150
else temporary_string
) )
return desc return desc
@ -42,39 +61,59 @@ def get_description(feed):
def main(): def main():
os.environ["TZ"] = "America/Toronto" os.environ["TZ"] = "America/Toronto"
time.tzset() time.tzset()
# Check for log and config files/paths, create empty directories if needed
try: try:
Path(log_file_path).mkdir(parents=True, exist_ok=True) Path(log_file_path).mkdir(parents=True, exist_ok=True)
except FileExistsError: except FileExistsError:
print("This path already exists and is not a directory!") print("The logfile path {} already exists and is not a directory!".format(log_file_path))
# Load and read the config file
if not Path(config_file_path).exists(): if not Path(config_file_path).exists():
print("No config file! Snarf. Directories were created for you.") print("No config file at {}! Snarf.\n{} was created for you.".format(config_file_path, config_dir))
Path(config_file_path).mkdir(parents=True, exist_ok=True)
return return
with open(config_file_path, "r") as config_file: with open(config_file_path, "r") as config_file:
app_config = json.load(config_file) app_config = json.load(config_file)
now = time.mktime(time.localtime()) now = time.mktime(time.localtime())
last_check = app_config["lastupdate"] try:
for hook in app_config["feeds"]: last_check = app_config["lastupdate"]
except KeyError:
last_check = (
now - 21600
) # first run, no lastupdate, check up to 6 hours ago
for i, hook in enumerate(app_config["feeds"]):
# Get the feed # Get the feed
feed = feedparser.parse(hook["url"]) feed = feedparser.parse(hook["url"])
published_time = time.mktime(feed.entries[0]["published_parsed"]) published_time = time.mktime(feed.entries[0]["published_parsed"])
published_time = published_time + hook["offset"] published_time = published_time + hook["offset"]
print(feed.entries[0]["published"], published_time, now) print("Parsing feed {}...".format(hook["name"]))
# Hash the title of the latest post and use that to determine if it's been posted
new_hash = hashlib.sha3_512(
bytes(feed.entries[0]["title"], "utf-8")
).hexdigest()
try:
if hook["lasthash"] != new_hash:
app_config["feeds"][i]["lasthash"] = new_hash
else:
continue
except KeyError:
app_config["feeds"][i]["lasthash"] = new_hash
# Generate the webhook # Generate the webhook
webhook = { webhook = {
"embeds": [ "embeds": [
{ {
"title": str(feed.entries[0]["title"]), "title": str(feed.entries[0]["title"]),
"url": str(feed.entries[0]["link"]), "url": str(feed.entries[0]["link"]),
"color": 5814783, "color": 216128,
"provider": { "footer": {
"name": "DiscoRSS", "name": "DiscoRSS",
"url": "https://git.frzn.dev/amr/discorss", # "url": "https://git.frzn.dev/amr/discorss",
},
"author": {
"name": str(hook["name"]),
"url": str(hook["siteurl"]),
}, },
"author": {"name": str(hook["name"]), "url": str(hook["siteurl"])},
"fields": [ "fields": [
{ {
"name": str(feed.entries[0]["title"]), "name": "Excerpt from post:",
"value": get_description(feed), "value": get_description(feed),
} }
], ],
@ -87,11 +126,17 @@ def main():
"content-type": "application/json", "content-type": "application/json",
} }
webhook_string = json.dumps(webhook) webhook_string = json.dumps(webhook)
# print(webhook_string)
if published_time > last_check and published_time < now: if published_time > last_check:
r = requests.post( r = requests.post(
hook["webhook"], data=webhook_string, headers=custom_header hook["webhook"], data=webhook_string, headers=custom_header
) )
if r.status_code not in success_codes:
print(
"Error {} while trying to post {}".format(
r.status_code, hook["webhook"]
)
)
app_config["lastupdate"] = now app_config["lastupdate"] = now
with open(config_file_path, "w") as config_file: with open(config_file_path, "w") as config_file:
json.dump(app_config, config_file, indent=4) json.dump(app_config, config_file, indent=4)