Compare commits

...

7 commits

Author SHA1 Message Date
ce71ef1e81
chore: Changed warning text, and some logging values 2025-03-16 02:20:10 -04:00
b72f1d7291
docs: add syntax highlighting to README.md 2025-03-14 10:50:45 +00:00
457e2c3315
feat: rewrote get_description to allow extra text
Now if you want to add something to the end of the description,
you can pass it via the addons parameter. Also moved a line
that was duplicated to reduce the function length.
2025-03-06 20:11:51 -05:00
9a5c4616e3
Finished description cutoff detection.
Added min_length parameter, as well as an addon parameter that
might be used in the future to add extra text to the description
where needed.

Next up will be checking for media in the entry and adding a
second embed field or attachment so the media can be previewed
or listened to/watched right in the Discord post.
2025-03-04 16:36:01 -05:00
8129da759f
Trying to make description cutoff smarter. Changed hashing.
Hashing now takes the sha3_512 hash of both the title and
the published time, because some feeds (like weather alerts)
will have the same title all the time, for every entry.

The description cutoff now goes backwards until it finds a
space character, then it sets the cutoff there. Also, the
length can now be passed as a parameter, with default value
of 250. Might also add minimum length as a parameter too.
2025-03-04 16:27:06 -05:00
8ff64608cd
Fixed html regex filter. Edited some logging types. See full msg.
The HTML regex wasn't working because I was missing some
really obvious capture groups. The regex filter is really only
for the kernel.org Releases Feed, just to make it look a bit
cleaner. We don't actually need the direct links because the
post's title already links directly to the front page. We mostly
just want to know there's been a release.

Some logging parameters were changed to make the log less
cluttered by default. Going to write a logrotate config that
will be included in the README or something to help people with
rotating the logs automatically. Also added a few small
clarifying comments, as well as an IDEA!
2025-03-03 07:41:41 -05:00
1c78edd38e
Fixed status code checking 2025-02-26 20:12:29 -05:00
2 changed files with 38 additions and 21 deletions

View file

@ -23,7 +23,7 @@ Logging was recently enabled. Make sure that the user running the script (especi
To configure the script, create ~/.config/discorss/discorss.conf with the following structure: To configure the script, create ~/.config/discorss/discorss.conf with the following structure:
``` ```json
{ {
"feeds": [ "feeds": [
{ {
@ -54,7 +54,7 @@ To automate feed posting, create a systemd service and timer to execute the scri
Use the command `systemctl --user edit --full --force discorss.service` and then paste in something like this: Use the command `systemctl --user edit --full --force discorss.service` and then paste in something like this:
``` ```systemd
[Unit] [Unit]
Description=Discord RSS feeder Description=Discord RSS feeder
Wants=discorss.timer Wants=discorss.timer
@ -68,7 +68,7 @@ WantedBy=default.target
``` ```
Make sure to edit the ExecStart to point to the correct location. Then we need a systemd timer to automatically fire the script. Run `systemctl --user edit --full --force discorss.timer` and then paste in this: Make sure to edit the ExecStart to point to the correct location. Then we need a systemd timer to automatically fire the script. Run `systemctl --user edit --full --force discorss.timer` and then paste in this:
``` ```systemd
[Unit] [Unit]
Description=Timer for DiscoRSS Description=Timer for DiscoRSS
Requires=discorss.service Requires=discorss.service

View file

@ -32,28 +32,39 @@ log_dir = r"/var/log/discorss"
log_file_path = r"/app.log" log_file_path = r"/app.log"
# Yes, I know you "can't parse HTML with regex", but # Yes, I know you "can't parse HTML with regex", but
# just watch me. # just watch me.
html_filter = re.compile(r"\<\/?([A-Za-z \:\.\/\"\=])*\>") html_filter = re.compile(r"\<\/?([A-Za-z0-9 \:\.\-\/\"\=])*\>")
success_codes = ["200", "201", "202", "203", "204", "205", "206"] success_codes = [200, 201, 202, 203, 204, 205, 206]
app_config = {} app_config = {}
# IDEA: Consider making this into a class-based program
# This would solve a couple issues around global variables and generally
# make things a bit neater
# This function gets and formats the brief excerpt that goes in the embed # This function gets and formats the brief excerpt that goes in the embed
# Different feeds put summaries in different fields, so we pick the best # Different feeds put summaries in different fields, so we pick the best
# one and limit it to 250 characters. # one and limit it to 250 characters.
# TODO: make the character limit smarter, as to split at a natural point def get_description(feed, length=250, min_length=150, addons=None):
def get_description(feed):
try: try:
temporary_string = str(feed["summary_detail"]["value"]) temporary_string = str(feed["summary_detail"]["value"])
temporary_string = html_filter.sub("", temporary_string) temporary_string = html_filter.sub("", temporary_string)
desc = ( while length > min_length:
temporary_string[:250] if len(temporary_string) > 250 else temporary_string if temporary_string[length - 1 : length] == " ":
) break
else:
length -= 1
except KeyError: except KeyError:
temporary_string = str(feed["description"]) temporary_string = str(feed["description"])
temporary_string = html_filter.sub("", temporary_string) temporary_string = html_filter.sub("", temporary_string)
desc = ( while length > min_length:
temporary_string[:250] if len(temporary_string) > 250 else temporary_string if temporary_string[length - 1 : length] == " ":
) break
else:
length -= 1
desc = temporary_string[:length]
if addons is not None:
desc = desc + str(addons)
return desc return desc
@ -78,7 +89,7 @@ def setupPaths():
Path(config_dir).mkdir(parents=True, exist_ok=True) Path(config_dir).mkdir(parents=True, exist_ok=True)
except FileExistsError: except FileExistsError:
print( print(
"The config dir {} already exists and is not a directory! Please fix manually.".format( "The config dir {} already exists and is not a directory! Please fix manually. Quitting!".format(
config_dir config_dir
) )
) )
@ -108,9 +119,8 @@ def main():
last_check = app_config["lastupdate"] last_check = app_config["lastupdate"]
except KeyError: except KeyError:
last_check = now - 21600 # first run, no lastupdate, check up to 6 hours ago last_check = now - 21600 # first run, no lastupdate, check up to 6 hours ago
for i, hook in enumerate(app_config["feeds"]): for i, hook in enumerate(app_config["feeds"]): # Feed loop start
# Get the feed logger.debug("Parsing feed %s...", hook["name"])
logger.info("Parsing feed %s...", hook["name"])
feeds = feedparser.parse(hook["url"]) feeds = feedparser.parse(hook["url"])
latest_post = [] latest_post = []
prev_best = 0 prev_best = 0
@ -128,12 +138,15 @@ def main():
else: else:
continue continue
if bad_time is True: if bad_time is True:
logger.warning( logger.debug(
"Feed %s doesn't supply a published time, using updated time instead", "Feed %s doesn't supply a published time, using updated time instead",
hook["name"], hook["name"],
) )
# Hash the title of the latest post and use that to determine if it's been posted # Hash the title and time of the latest post and use that to determine if it's been posted
new_hash = hashlib.sha3_512(bytes(latest_post["title"], "utf-8")).hexdigest() # Yes, SHA3-512 is totally unnecessary for this purpose, but I love SHA3
new_hash = hashlib.sha3_512(
bytes(latest_post["title"] + str(published_time), "utf-8")
).hexdigest()
try: try:
if hook["lasthash"] != new_hash: if hook["lasthash"] != new_hash:
app_config["feeds"][i]["lasthash"] = new_hash app_config["feeds"][i]["lasthash"] = new_hash
@ -184,10 +197,14 @@ def main():
r = requests.post(hook["webhook"], data=webhook_string, headers=custom_header) r = requests.post(hook["webhook"], data=webhook_string, headers=custom_header)
if r.status_code not in success_codes: if r.status_code not in success_codes:
logger.error( logger.error(
"Error %d while trying to post %s", r.status_code, hook["webhook"] "Error %d while trying to post %s", r.status_code, hook["name"]
) )
else:
logger.debug("Got %d when posting %s", r.status_code, hook["name"])
# End of feed loop # End of feed loop
# Dump updated config back to json file
app_config["lastupdate"] = now app_config["lastupdate"] = now
with open(config_file_path, "w") as config_file: with open(config_file_path, "w") as config_file:
json.dump(app_config, config_file, indent=4) json.dump(app_config, config_file, indent=4)