Compare commits

..

No commits in common. "main" and "v0.2rc1" have entirely different histories.

3 changed files with 20 additions and 74 deletions

7
.gitignore vendored
View file

@ -1,7 +0,0 @@
*.conf
*.txt
log/
*.bak
bin/
lib/
*.cfg

View file

@ -1,7 +1,5 @@
# DiscoRSS # DiscoRSS
![DiscoRSS Logo](https://frzn.dev/~amr/images/discorss.png)
## What is it? ## What is it?
DiscoRSS is a simple Python script to send RSS feeds to Discord webhooks. It was created because existing bots that did this set limits on the number of feeds, and self-hosting stuff is easier and better anyway. To get this working, you will require the following Python libraries: DiscoRSS is a simple Python script to send RSS feeds to Discord webhooks. It was created because existing bots that did this set limits on the number of feeds, and self-hosting stuff is easier and better anyway. To get this working, you will require the following Python libraries:

View file

@ -12,48 +12,29 @@
import requests import requests
import feedparser import feedparser
import hashlib
from pathlib import Path from pathlib import Path
import json import json
import datetime
import time import time
import os import os
import re
config_dir = os.environ.get('XDG_CONFIG_HOME') config_file_path = r"/etc/discorss.conf"
if config_dir is None: # config_file_path = r"discorss.conf"
config_file_path = r"~/.config/discorss/discorss.conf" # log_file_path = r"/var/log/discorss"
config_dir = r"~/.config/discorss" log_file_path = r"./log"
else:
config_file_path = config_dir + r"/discorss/discorss.conf"
log_file_path = r"/var/log/discorss"
# log_file_path = r"./log"
log_file_name = r"/app.log" log_file_name = r"/app.log"
# Yes, I know you "can't parse HTML with regex", but
# just watch me.
html_filter = re.compile(r"\<\/?([A-Za-z \"\=])*\>")
success_codes = ["200", "201", "202", "203", "204", "205", "206"]
# This function gets and formats the brief excerpt that goes in the embed
# Different feeds put summaries in different fields, so we pick the best
# one and limit it to 150 characters.
# TODO: make the character limit smarter, as to split at a natural point
def get_description(feed): def get_description(feed):
try: try:
temporary_string = str(feed.entries[0]["summary_detail"]["value"]) temporary_string = str(feed.entries[0]["summary_detail"]["value"])
temporary_string = html_filter.sub("", temporary_string)
desc = ( desc = (
temporary_string[:150] temporary_string[:150] if len(temporary_string) > 150 else temporary_string
if len(temporary_string) > 150
else temporary_string
) )
except KeyError: except KeyError:
temporary_string = str(feed.entries[0]["description"]) temporary_string = str(feed.entries[0]["description"])
temporary_string = html_filter.sub("", temporary_string)
desc = ( desc = (
temporary_string[:150] temporary_string[:150] if len(temporary_string) > 150 else temporary_string
if len(temporary_string) > 150
else temporary_string
) )
return desc return desc
@ -61,59 +42,39 @@ def get_description(feed):
def main(): def main():
os.environ["TZ"] = "America/Toronto" os.environ["TZ"] = "America/Toronto"
time.tzset() time.tzset()
# Check for log and config files/paths, create empty directories if needed
try: try:
Path(log_file_path).mkdir(parents=True, exist_ok=True) Path(log_file_path).mkdir(parents=True, exist_ok=True)
except FileExistsError: except FileExistsError:
print("The logfile path {} already exists and is not a directory!".format(log_file_path)) print("This path already exists and is not a directory!")
# Load and read the config file
if not Path(config_file_path).exists(): if not Path(config_file_path).exists():
print("No config file at {}! Snarf.\n{} was created for you.".format(config_file_path, config_dir)) print("No config file! Snarf. Directories were created for you.")
Path(config_file_path).mkdir(parents=True, exist_ok=True)
return return
with open(config_file_path, "r") as config_file: with open(config_file_path, "r") as config_file:
app_config = json.load(config_file) app_config = json.load(config_file)
now = time.mktime(time.localtime()) now = time.mktime(time.localtime())
try:
last_check = app_config["lastupdate"] last_check = app_config["lastupdate"]
except KeyError: for hook in app_config["feeds"]:
last_check = (
now - 21600
) # first run, no lastupdate, check up to 6 hours ago
for i, hook in enumerate(app_config["feeds"]):
# Get the feed # Get the feed
feed = feedparser.parse(hook["url"]) feed = feedparser.parse(hook["url"])
published_time = time.mktime(feed.entries[0]["published_parsed"]) published_time = time.mktime(feed.entries[0]["published_parsed"])
published_time = published_time + hook["offset"] published_time = published_time + hook["offset"]
print("Parsing feed {}...".format(hook["name"])) print(feed.entries[0]["published"], published_time, now)
# Hash the title of the latest post and use that to determine if it's been posted
new_hash = hashlib.sha3_512(
bytes(feed.entries[0]["title"], "utf-8")
).hexdigest()
try:
if hook["lasthash"] != new_hash:
app_config["feeds"][i]["lasthash"] = new_hash
else:
continue
except KeyError:
app_config["feeds"][i]["lasthash"] = new_hash
# Generate the webhook # Generate the webhook
webhook = { webhook = {
"embeds": [ "embeds": [
{ {
"title": str(feed.entries[0]["title"]), "title": str(feed.entries[0]["title"]),
"url": str(feed.entries[0]["link"]), "url": str(feed.entries[0]["link"]),
"color": 216128, "color": 5814783,
"footer": { "provider": {
"name": "DiscoRSS", "name": "DiscoRSS",
# "url": "https://git.frzn.dev/amr/discorss", "url": "https://git.frzn.dev/amr/discorss",
},
"author": {
"name": str(hook["name"]),
"url": str(hook["siteurl"]),
}, },
"author": {"name": str(hook["name"]), "url": str(hook["siteurl"])},
"fields": [ "fields": [
{ {
"name": "Excerpt from post:", "name": str(feed.entries[0]["title"]),
"value": get_description(feed), "value": get_description(feed),
} }
], ],
@ -126,17 +87,11 @@ def main():
"content-type": "application/json", "content-type": "application/json",
} }
webhook_string = json.dumps(webhook) webhook_string = json.dumps(webhook)
# print(webhook_string)
if published_time > last_check: if published_time > last_check and published_time < now:
r = requests.post( r = requests.post(
hook["webhook"], data=webhook_string, headers=custom_header hook["webhook"], data=webhook_string, headers=custom_header
) )
if r.status_code not in success_codes:
print(
"Error {} while trying to post {}".format(
r.status_code, hook["webhook"]
)
)
app_config["lastupdate"] = now app_config["lastupdate"] = now
with open(config_file_path, "w") as config_file: with open(config_file_path, "w") as config_file:
json.dump(app_config, config_file, indent=4) json.dump(app_config, config_file, indent=4)