Compare commits
3 commits
3def57a933
...
dd6553a6f1
Author | SHA1 | Date | |
---|---|---|---|
dd6553a6f1 | |||
0a22cfe4ee | |||
a263f5cb93 |
2 changed files with 51 additions and 24 deletions
|
@ -13,6 +13,12 @@ feedparser
|
||||||
|
|
||||||
The remaining imports should all be part of the standard Python install.
|
The remaining imports should all be part of the standard Python install.
|
||||||
|
|
||||||
|
## Important Notes
|
||||||
|
|
||||||
|
As it currently is written, the script uses the hash of the post title to prevent sending duplicates. However, a recent change to check for the publish time was added, only because some feeds are not in reverse chronological order (latest post at top of feed, ie, entry index 0). Because of this, we do actually need to check the publish times. This still needs some testing and things might be a bit broken because of it. If you see any issues please let me know.
|
||||||
|
|
||||||
|
Logging was recently enabled. Make sure that the user running the script (especially when using systemd timers) has write access to the /var/log/discorss directory. The app will try and create the directory for you, but if your user doesn't have permissions to create directories in /var/log this will fail and this will probably crash the script as is. I will try and remember to catch that exception and exit gracefully with an error message to stdout. If you want the logs to go somewhere else, just edit the log_dir variable near the top of discorss.py. Choose a directory that makes sense. Unfortunately, as far as I know, the XDG standards don't have an equivalent to the /var/log directory in the user directory, so I wasn't sure what the best default was. In the future, we may switch to logging using systemd and journald directly, though it is nice to have a separate file.
|
||||||
|
|
||||||
## How to setup
|
## How to setup
|
||||||
|
|
||||||
To configure the script, create /etc/discorss.conf with the following structure:
|
To configure the script, create /etc/discorss.conf with the following structure:
|
||||||
|
|
57
discorss.py
57
discorss.py
|
@ -13,6 +13,7 @@
|
||||||
import requests
|
import requests
|
||||||
import feedparser
|
import feedparser
|
||||||
import hashlib
|
import hashlib
|
||||||
|
import logging
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
|
@ -42,13 +43,13 @@ app_config = {}
|
||||||
# TODO: make the character limit smarter, as to split at a natural point
|
# TODO: make the character limit smarter, as to split at a natural point
|
||||||
def get_description(feed):
|
def get_description(feed):
|
||||||
try:
|
try:
|
||||||
temporary_string = str(feed.entries[0]["summary_detail"]["value"])
|
temporary_string = str(feed["summary_detail"]["value"])
|
||||||
temporary_string = html_filter.sub("", temporary_string)
|
temporary_string = html_filter.sub("", temporary_string)
|
||||||
desc = (
|
desc = (
|
||||||
temporary_string[:250] if len(temporary_string) > 250 else temporary_string
|
temporary_string[:250] if len(temporary_string) > 250 else temporary_string
|
||||||
)
|
)
|
||||||
except KeyError:
|
except KeyError:
|
||||||
temporary_string = str(feed.entries[0]["description"])
|
temporary_string = str(feed["description"])
|
||||||
temporary_string = html_filter.sub("", temporary_string)
|
temporary_string = html_filter.sub("", temporary_string)
|
||||||
desc = (
|
desc = (
|
||||||
temporary_string[:250] if len(temporary_string) > 250 else temporary_string
|
temporary_string[:250] if len(temporary_string) > 250 else temporary_string
|
||||||
|
@ -58,10 +59,11 @@ def get_description(feed):
|
||||||
|
|
||||||
def setupPaths():
|
def setupPaths():
|
||||||
global app_config
|
global app_config
|
||||||
|
global logger
|
||||||
# Check for log and config files/paths, create empty directories if needed
|
# Check for log and config files/paths, create empty directories if needed
|
||||||
# TODO: make this cleaner
|
# TODO: make this cleaner
|
||||||
if not Path(log_file_path).exists():
|
if not Path(log_dir).exists():
|
||||||
print("No log file path exists. Yark! We'll try and make {}...", log_dir)
|
print("No log file path exists. Yark! We'll try and make {}...".format(log_dir))
|
||||||
try:
|
try:
|
||||||
Path(log_dir).mkdir(parents=True, exist_ok=True)
|
Path(log_dir).mkdir(parents=True, exist_ok=True)
|
||||||
except FileExistsError:
|
except FileExistsError:
|
||||||
|
@ -85,6 +87,15 @@ def setupPaths():
|
||||||
# Loading the config file
|
# Loading the config file
|
||||||
with open(config_file_path, "r") as config_file:
|
with open(config_file_path, "r") as config_file:
|
||||||
app_config = json.load(config_file)
|
app_config = json.load(config_file)
|
||||||
|
# Set up logging
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
logging.basicConfig(
|
||||||
|
filename=str(log_dir + log_file_path),
|
||||||
|
encoding="utf-8",
|
||||||
|
level=logging.INFO,
|
||||||
|
datefmt="%m/%d/%Y %H:%M:%S",
|
||||||
|
format="%(asctime)s: %(levelname)s: %(message)s",
|
||||||
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@ -99,24 +110,28 @@ def main():
|
||||||
last_check = now - 21600 # first run, no lastupdate, check up to 6 hours ago
|
last_check = now - 21600 # first run, no lastupdate, check up to 6 hours ago
|
||||||
for i, hook in enumerate(app_config["feeds"]):
|
for i, hook in enumerate(app_config["feeds"]):
|
||||||
# Get the feed
|
# Get the feed
|
||||||
print("Parsing feed {}...".format(hook["name"]))
|
logger.info("Parsing feed %s...", hook["name"])
|
||||||
feeds = feedparser.parse(hook["url"])
|
feeds = feedparser.parse(hook["url"])
|
||||||
latest_post = []
|
latest_post = []
|
||||||
prev_best = 0
|
prev_best = 0
|
||||||
for feed in feeds:
|
for feed in feeds["entries"]:
|
||||||
try:
|
try:
|
||||||
|
bad_time = False
|
||||||
published_time = time.mktime(feed["published_parsed"])
|
published_time = time.mktime(feed["published_parsed"])
|
||||||
published_time = published_time + hook["offset"]
|
published_time = published_time + hook["offset"]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
published_time = feed["published"]
|
published_time = time.mktime(feed["updated_parsed"])
|
||||||
print(published_time)
|
bad_time = True
|
||||||
sys.exit(254)
|
|
||||||
if published_time > prev_best:
|
if published_time > prev_best:
|
||||||
latest_post = feed
|
latest_post = feed
|
||||||
prev_best = published_time
|
prev_best = published_time
|
||||||
else:
|
else:
|
||||||
continue
|
continue
|
||||||
|
if bad_time is True:
|
||||||
|
logger.warning(
|
||||||
|
"Feed %s doesn't supply a published time, using updated time instead",
|
||||||
|
hook["name"],
|
||||||
|
)
|
||||||
# Hash the title of the latest post and use that to determine if it's been posted
|
# Hash the title of the latest post and use that to determine if it's been posted
|
||||||
new_hash = hashlib.sha3_512(bytes(latest_post["title"], "utf-8")).hexdigest()
|
new_hash = hashlib.sha3_512(bytes(latest_post["title"], "utf-8")).hexdigest()
|
||||||
try:
|
try:
|
||||||
|
@ -126,7 +141,16 @@ def main():
|
||||||
continue
|
continue
|
||||||
except KeyError:
|
except KeyError:
|
||||||
app_config["feeds"][i]["lasthash"] = new_hash
|
app_config["feeds"][i]["lasthash"] = new_hash
|
||||||
|
logger.info(
|
||||||
|
"Feed %s has no existing hash, likely a new feed!", hook["name"]
|
||||||
|
)
|
||||||
# Generate the webhook
|
# Generate the webhook
|
||||||
|
logger.info(
|
||||||
|
"Publishing webhook for %s. Last check was %d, now is %d",
|
||||||
|
hook["name"],
|
||||||
|
last_check,
|
||||||
|
now,
|
||||||
|
)
|
||||||
webhook = {
|
webhook = {
|
||||||
"embeds": [
|
"embeds": [
|
||||||
{
|
{
|
||||||
|
@ -157,16 +181,13 @@ def main():
|
||||||
}
|
}
|
||||||
webhook_string = json.dumps(webhook)
|
webhook_string = json.dumps(webhook)
|
||||||
|
|
||||||
if published_time > last_check:
|
r = requests.post(hook["webhook"], data=webhook_string, headers=custom_header)
|
||||||
r = requests.post(
|
|
||||||
hook["webhook"], data=webhook_string, headers=custom_header
|
|
||||||
)
|
|
||||||
if r.status_code not in success_codes:
|
if r.status_code not in success_codes:
|
||||||
print(
|
logger.error(
|
||||||
"Error {} while trying to post {}".format(
|
"Error %d while trying to post %s", r.status_code, hook["webhook"]
|
||||||
r.status_code, hook["webhook"]
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# End of feed loop
|
||||||
app_config["lastupdate"] = now
|
app_config["lastupdate"] = now
|
||||||
with open(config_file_path, "w") as config_file:
|
with open(config_file_path, "w") as config_file:
|
||||||
json.dump(app_config, config_file, indent=4)
|
json.dump(app_config, config_file, indent=4)
|
||||||
|
|
Loading…
Add table
Reference in a new issue