Compare commits
2 commits
main
...
feature/rs
| Author | SHA1 | Date | |
|---|---|---|---|
|
91c39042c8 |
|||
|
acb03cff27 |
1 changed files with 84 additions and 22 deletions
106
discorss.py
106
discorss.py
|
|
@ -28,7 +28,8 @@ from types import SimpleNamespace
|
||||||
class Discorss:
|
class Discorss:
|
||||||
FEED_TIMEOUT_SECONDS = 15
|
FEED_TIMEOUT_SECONDS = 15
|
||||||
HASH_HISTORY_LIMIT = 10
|
HASH_HISTORY_LIMIT = 10
|
||||||
APP_VERSION = "0.3rc1"
|
APP_VERSION = "0.3rc2"
|
||||||
|
IMAGE_EXTENSIONS = (".jpg", ".jpeg", ".png", ".gif", ".webp")
|
||||||
|
|
||||||
def __init__(self, args=None):
|
def __init__(self, args=None):
|
||||||
if args is None:
|
if args is None:
|
||||||
|
|
@ -54,6 +55,7 @@ class Discorss:
|
||||||
# Yes, I know you "can't parse HTML with regex", but
|
# Yes, I know you "can't parse HTML with regex", but
|
||||||
# just watch me.
|
# just watch me.
|
||||||
self.html_filter = re.compile(r"\<\/?([A-Za-z0-9 \:\.\-\/\"\=])*\>")
|
self.html_filter = re.compile(r"\<\/?([A-Za-z0-9 \:\.\-\/\"\=])*\>")
|
||||||
|
self.img_src_filter = re.compile(r'<img[^>]+src=["\']([^"\']+)["\']', re.I)
|
||||||
self.success_codes = [200, 201, 202, 203, 204, 205, 206]
|
self.success_codes = [200, 201, 202, 203, 204, 205, 206]
|
||||||
self.app_config = {}
|
self.app_config = {}
|
||||||
|
|
||||||
|
|
@ -142,29 +144,37 @@ class Discorss:
|
||||||
last_check,
|
last_check,
|
||||||
self.now,
|
self.now,
|
||||||
)
|
)
|
||||||
webhook = {
|
embed = {
|
||||||
"embeds": [
|
"title": str(latest_post["title"]),
|
||||||
|
"url": str(latest_post["link"]),
|
||||||
|
"color": 2123412,
|
||||||
|
"footer": {
|
||||||
|
"text": "DiscoRSS",
|
||||||
|
"icon_url": "https://frzn.dev/~amr/images/discorss.png",
|
||||||
|
},
|
||||||
|
"author": {
|
||||||
|
"name": str(hook["name"]),
|
||||||
|
"url": str(hook["siteurl"]),
|
||||||
|
},
|
||||||
|
"fields": [
|
||||||
{
|
{
|
||||||
"title": str(latest_post["title"]),
|
"name": "Excerpt from post:",
|
||||||
"url": str(latest_post["link"]),
|
"value": self.get_description(latest_post),
|
||||||
"color": 2123412,
|
|
||||||
"footer": {
|
|
||||||
"text": "DiscoRSS",
|
|
||||||
"icon_url": "https://frzn.dev/~amr/images/discorss.png",
|
|
||||||
},
|
|
||||||
"author": {
|
|
||||||
"name": str(hook["name"]),
|
|
||||||
"url": str(hook["siteurl"]),
|
|
||||||
},
|
|
||||||
"fields": [
|
|
||||||
{
|
|
||||||
"name": "Excerpt from post:",
|
|
||||||
"value": self.get_description(latest_post),
|
|
||||||
}
|
|
||||||
],
|
|
||||||
# "timestamp": str(self.now),
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
# "timestamp": str(self.now),
|
||||||
|
}
|
||||||
|
self.logger.debug(
|
||||||
|
"Checking for images in post %s from %s...",
|
||||||
|
latest_post["title"],
|
||||||
|
hook["name"],
|
||||||
|
)
|
||||||
|
image_url = self.get_image_url(latest_post)
|
||||||
|
if image_url is not None:
|
||||||
|
embed["thumbnail"] = {"url": image_url}
|
||||||
|
|
||||||
|
webhook = {
|
||||||
|
"embeds": [embed],
|
||||||
"attachments": [],
|
"attachments": [],
|
||||||
}
|
}
|
||||||
custom_header = {
|
custom_header = {
|
||||||
|
|
@ -264,6 +274,58 @@ class Discorss:
|
||||||
desc = desc + str(addons)
|
desc = desc + str(addons)
|
||||||
return desc
|
return desc
|
||||||
|
|
||||||
|
# attempting to extract image previews from feeds which primarily feature
|
||||||
|
# images, like NASA's Picture of the Day feed
|
||||||
|
def get_image_url(self, feed):
|
||||||
|
image_candidates = []
|
||||||
|
# check the most common fields, this should catch the majority of image
|
||||||
|
# feeds' embedded urls
|
||||||
|
for media in feed.get("media_content", []):
|
||||||
|
if self.is_image_url(media.get("url"), media.get("type")):
|
||||||
|
image_candidates.append(media["url"])
|
||||||
|
|
||||||
|
for enclosure in feed.get("enclosures", []):
|
||||||
|
if self.is_image_url(enclosure.get("href"), enclosure.get("type")):
|
||||||
|
image_candidates.append(enclosure["href"])
|
||||||
|
|
||||||
|
for link in feed.get("links", []):
|
||||||
|
if self.is_image_url(link.get("href"), link.get("type")):
|
||||||
|
image_candidates.append(link["href"])
|
||||||
|
|
||||||
|
for media in feed.get("media_thumbnail", []):
|
||||||
|
if self.is_image_url(media.get("url"), media.get("type")):
|
||||||
|
image_candidates.append(media["url"])
|
||||||
|
|
||||||
|
for field in ["summary_detail", "content"]:
|
||||||
|
value = feed.get(field)
|
||||||
|
if isinstance(value, list):
|
||||||
|
values = [item.get("value", "") for item in value]
|
||||||
|
elif isinstance(value, dict):
|
||||||
|
values = [value.get("value", "")]
|
||||||
|
else:
|
||||||
|
values = []
|
||||||
|
for text in values:
|
||||||
|
match = self.img_src_filter.search(str(text))
|
||||||
|
if match and self.is_image_url(match.group(1)):
|
||||||
|
image_candidates.append(match.group(1))
|
||||||
|
self.logger.debug("Found the following image candidates in %s...", feed["name"])
|
||||||
|
for i in image_candidates:
|
||||||
|
self.logger.debug("%s", i)
|
||||||
|
if len(image_candidates) > 0:
|
||||||
|
return image_candidates[0]
|
||||||
|
return None
|
||||||
|
|
||||||
|
# a silly little helper just to validate image links
|
||||||
|
# this isn't 100% foolproof but it should work for the
|
||||||
|
# vast majority of feeds out there, unless they use some
|
||||||
|
# really weird image type like image/bpg
|
||||||
|
def is_image_url(self, url, mime_type=None):
|
||||||
|
if not url:
|
||||||
|
return False
|
||||||
|
if mime_type and str(mime_type).lower().startswith("image/"):
|
||||||
|
return True
|
||||||
|
return str(url).lower().split("?", 1)[0].endswith(self.IMAGE_EXTENSIONS)
|
||||||
|
|
||||||
# Some of this could go in __init__
|
# Some of this could go in __init__
|
||||||
def setup(self):
|
def setup(self):
|
||||||
os.environ["TZ"] = "America/Toronto"
|
os.environ["TZ"] = "America/Toronto"
|
||||||
|
|
@ -274,7 +336,7 @@ class Discorss:
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
filename=self.log_file_path,
|
filename=self.log_file_path,
|
||||||
encoding="utf-8",
|
encoding="utf-8",
|
||||||
level=logging.WARNING,
|
level=logging.ERROR,
|
||||||
datefmt="%m/%d/%Y %H:%M:%S",
|
datefmt="%m/%d/%Y %H:%M:%S",
|
||||||
format="%(asctime)s [%(threadName)s] -> %(levelname)s: %(message)s",
|
format="%(asctime)s [%(threadName)s] -> %(levelname)s: %(message)s",
|
||||||
)
|
)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue