Compare commits

...

7 commits

Author SHA1 Message Date
ce71ef1e81
chore: Changed warning text, and some logging values 2025-03-16 02:20:10 -04:00
b72f1d7291
docs: add syntax highlighting to README.md 2025-03-14 10:50:45 +00:00
457e2c3315
feat: rewrote get_description to allow extra text
Now if you want to add something to the end of the description,
you can pass it via the addons parameter. Also moved a line
that was duplicated to reduce the function length.
2025-03-06 20:11:51 -05:00
9a5c4616e3
Finished description cutoff detection.
Added min_length parameter, as well as an addon parameter that
might be used in the future to add extra text to the description
where needed.

Next up will be checking for media in the entry and adding a
second embed field or attachment so the media can be previewed
or listened to/watched right in the Discord post.
2025-03-04 16:36:01 -05:00
8129da759f
Trying to make description cutoff smarter. Changed hashing.
Hashing now takes the sha3_512 hash of both the title and
the published time, because some feeds (like weather alerts)
will have the same title all the time, for every entry.

The description cutoff now goes backwards until it finds a
space character, then it sets the cutoff there. Also, the
length can now be passed as a parameter, with default value
of 250. Might also add minimum length as a parameter too.
2025-03-04 16:27:06 -05:00
8ff64608cd
Fixed html regex filter. Edited some logging types. See full msg.
The HTML regex wasn't working because I was missing some
really obvious capture groups. The regex filter is really only
for the kernel.org Releases Feed, just to make it look a bit
cleaner. We don't actually need the direct links because the
post's title already links directly to the front page. We mostly
just want to know there's been a release.

Some logging parameters were changed to make the log less
cluttered by default. Going to write a logrotate config that
will be included in the README or something to help people with
rotating the logs automatically. Also added a few small
clarifying comments, as well as an IDEA!
2025-03-03 07:41:41 -05:00
1c78edd38e
Fixed status code checking 2025-02-26 20:12:29 -05:00
2 changed files with 38 additions and 21 deletions

View file

@ -23,7 +23,7 @@ Logging was recently enabled. Make sure that the user running the script (especi
To configure the script, create ~/.config/discorss/discorss.conf with the following structure:
```
```json
{
"feeds": [
{
@ -54,7 +54,7 @@ To automate feed posting, create a systemd service and timer to execute the scri
Use the command `systemctl --user edit --full --force discorss.service` and then paste in something like this:
```
```systemd
[Unit]
Description=Discord RSS feeder
Wants=discorss.timer
@ -68,7 +68,7 @@ WantedBy=default.target
```
Make sure to edit the ExecStart to point to the correct location. Then we need a systemd timer to automatically fire the script. Run `systemctl --user edit --full --force discorss.timer` and then paste in this:
```
```systemd
[Unit]
Description=Timer for DiscoRSS
Requires=discorss.service

View file

@ -32,28 +32,39 @@ log_dir = r"/var/log/discorss"
log_file_path = r"/app.log"
# Yes, I know you "can't parse HTML with regex", but
# just watch me.
html_filter = re.compile(r"\<\/?([A-Za-z \:\.\/\"\=])*\>")
success_codes = ["200", "201", "202", "203", "204", "205", "206"]
html_filter = re.compile(r"\<\/?([A-Za-z0-9 \:\.\-\/\"\=])*\>")
success_codes = [200, 201, 202, 203, 204, 205, 206]
app_config = {}
# IDEA: Consider making this into a class-based program
# This would solve a couple issues around global variables and generally
# make things a bit neater
# This function gets and formats the brief excerpt that goes in the embed
# Different feeds put summaries in different fields, so we pick the best
# one and limit it to 250 characters.
# TODO: make the character limit smarter, as to split at a natural point
def get_description(feed):
def get_description(feed, length=250, min_length=150, addons=None):
try:
temporary_string = str(feed["summary_detail"]["value"])
temporary_string = html_filter.sub("", temporary_string)
desc = (
temporary_string[:250] if len(temporary_string) > 250 else temporary_string
)
while length > min_length:
if temporary_string[length - 1 : length] == " ":
break
else:
length -= 1
except KeyError:
temporary_string = str(feed["description"])
temporary_string = html_filter.sub("", temporary_string)
desc = (
temporary_string[:250] if len(temporary_string) > 250 else temporary_string
)
while length > min_length:
if temporary_string[length - 1 : length] == " ":
break
else:
length -= 1
desc = temporary_string[:length]
if addons is not None:
desc = desc + str(addons)
return desc
@ -78,7 +89,7 @@ def setupPaths():
Path(config_dir).mkdir(parents=True, exist_ok=True)
except FileExistsError:
print(
"The config dir {} already exists and is not a directory! Please fix manually.".format(
"The config dir {} already exists and is not a directory! Please fix manually. Quitting!".format(
config_dir
)
)
@ -108,9 +119,8 @@ def main():
last_check = app_config["lastupdate"]
except KeyError:
last_check = now - 21600 # first run, no lastupdate, check up to 6 hours ago
for i, hook in enumerate(app_config["feeds"]):
# Get the feed
logger.info("Parsing feed %s...", hook["name"])
for i, hook in enumerate(app_config["feeds"]): # Feed loop start
logger.debug("Parsing feed %s...", hook["name"])
feeds = feedparser.parse(hook["url"])
latest_post = []
prev_best = 0
@ -128,12 +138,15 @@ def main():
else:
continue
if bad_time is True:
logger.warning(
logger.debug(
"Feed %s doesn't supply a published time, using updated time instead",
hook["name"],
)
# Hash the title of the latest post and use that to determine if it's been posted
new_hash = hashlib.sha3_512(bytes(latest_post["title"], "utf-8")).hexdigest()
# Hash the title and time of the latest post and use that to determine if it's been posted
# Yes, SHA3-512 is totally unnecessary for this purpose, but I love SHA3
new_hash = hashlib.sha3_512(
bytes(latest_post["title"] + str(published_time), "utf-8")
).hexdigest()
try:
if hook["lasthash"] != new_hash:
app_config["feeds"][i]["lasthash"] = new_hash
@ -184,10 +197,14 @@ def main():
r = requests.post(hook["webhook"], data=webhook_string, headers=custom_header)
if r.status_code not in success_codes:
logger.error(
"Error %d while trying to post %s", r.status_code, hook["webhook"]
"Error %d while trying to post %s", r.status_code, hook["name"]
)
else:
logger.debug("Got %d when posting %s", r.status_code, hook["name"])
# End of feed loop
# Dump updated config back to json file
app_config["lastupdate"] = now
with open(config_file_path, "w") as config_file:
json.dump(app_config, config_file, indent=4)