Compare commits
2 commits
main
...
feature/rs
| Author | SHA1 | Date | |
|---|---|---|---|
|
91c39042c8 |
|||
|
acb03cff27 |
1 changed files with 84 additions and 22 deletions
106
discorss.py
106
discorss.py
|
|
@ -28,7 +28,8 @@ from types import SimpleNamespace
|
|||
class Discorss:
|
||||
FEED_TIMEOUT_SECONDS = 15
|
||||
HASH_HISTORY_LIMIT = 10
|
||||
APP_VERSION = "0.3rc1"
|
||||
APP_VERSION = "0.3rc2"
|
||||
IMAGE_EXTENSIONS = (".jpg", ".jpeg", ".png", ".gif", ".webp")
|
||||
|
||||
def __init__(self, args=None):
|
||||
if args is None:
|
||||
|
|
@ -54,6 +55,7 @@ class Discorss:
|
|||
# Yes, I know you "can't parse HTML with regex", but
|
||||
# just watch me.
|
||||
self.html_filter = re.compile(r"\<\/?([A-Za-z0-9 \:\.\-\/\"\=])*\>")
|
||||
self.img_src_filter = re.compile(r'<img[^>]+src=["\']([^"\']+)["\']', re.I)
|
||||
self.success_codes = [200, 201, 202, 203, 204, 205, 206]
|
||||
self.app_config = {}
|
||||
|
||||
|
|
@ -142,29 +144,37 @@ class Discorss:
|
|||
last_check,
|
||||
self.now,
|
||||
)
|
||||
webhook = {
|
||||
"embeds": [
|
||||
embed = {
|
||||
"title": str(latest_post["title"]),
|
||||
"url": str(latest_post["link"]),
|
||||
"color": 2123412,
|
||||
"footer": {
|
||||
"text": "DiscoRSS",
|
||||
"icon_url": "https://frzn.dev/~amr/images/discorss.png",
|
||||
},
|
||||
"author": {
|
||||
"name": str(hook["name"]),
|
||||
"url": str(hook["siteurl"]),
|
||||
},
|
||||
"fields": [
|
||||
{
|
||||
"title": str(latest_post["title"]),
|
||||
"url": str(latest_post["link"]),
|
||||
"color": 2123412,
|
||||
"footer": {
|
||||
"text": "DiscoRSS",
|
||||
"icon_url": "https://frzn.dev/~amr/images/discorss.png",
|
||||
},
|
||||
"author": {
|
||||
"name": str(hook["name"]),
|
||||
"url": str(hook["siteurl"]),
|
||||
},
|
||||
"fields": [
|
||||
{
|
||||
"name": "Excerpt from post:",
|
||||
"value": self.get_description(latest_post),
|
||||
}
|
||||
],
|
||||
# "timestamp": str(self.now),
|
||||
"name": "Excerpt from post:",
|
||||
"value": self.get_description(latest_post),
|
||||
}
|
||||
],
|
||||
# "timestamp": str(self.now),
|
||||
}
|
||||
self.logger.debug(
|
||||
"Checking for images in post %s from %s...",
|
||||
latest_post["title"],
|
||||
hook["name"],
|
||||
)
|
||||
image_url = self.get_image_url(latest_post)
|
||||
if image_url is not None:
|
||||
embed["thumbnail"] = {"url": image_url}
|
||||
|
||||
webhook = {
|
||||
"embeds": [embed],
|
||||
"attachments": [],
|
||||
}
|
||||
custom_header = {
|
||||
|
|
@ -264,6 +274,58 @@ class Discorss:
|
|||
desc = desc + str(addons)
|
||||
return desc
|
||||
|
||||
# attempting to extract image previews from feeds which primarily feature
|
||||
# images, like NASA's Picture of the Day feed
|
||||
def get_image_url(self, feed):
|
||||
image_candidates = []
|
||||
# check the most common fields, this should catch the majority of image
|
||||
# feeds' embedded urls
|
||||
for media in feed.get("media_content", []):
|
||||
if self.is_image_url(media.get("url"), media.get("type")):
|
||||
image_candidates.append(media["url"])
|
||||
|
||||
for enclosure in feed.get("enclosures", []):
|
||||
if self.is_image_url(enclosure.get("href"), enclosure.get("type")):
|
||||
image_candidates.append(enclosure["href"])
|
||||
|
||||
for link in feed.get("links", []):
|
||||
if self.is_image_url(link.get("href"), link.get("type")):
|
||||
image_candidates.append(link["href"])
|
||||
|
||||
for media in feed.get("media_thumbnail", []):
|
||||
if self.is_image_url(media.get("url"), media.get("type")):
|
||||
image_candidates.append(media["url"])
|
||||
|
||||
for field in ["summary_detail", "content"]:
|
||||
value = feed.get(field)
|
||||
if isinstance(value, list):
|
||||
values = [item.get("value", "") for item in value]
|
||||
elif isinstance(value, dict):
|
||||
values = [value.get("value", "")]
|
||||
else:
|
||||
values = []
|
||||
for text in values:
|
||||
match = self.img_src_filter.search(str(text))
|
||||
if match and self.is_image_url(match.group(1)):
|
||||
image_candidates.append(match.group(1))
|
||||
self.logger.debug("Found the following image candidates in %s...", feed["name"])
|
||||
for i in image_candidates:
|
||||
self.logger.debug("%s", i)
|
||||
if len(image_candidates) > 0:
|
||||
return image_candidates[0]
|
||||
return None
|
||||
|
||||
# a silly little helper just to validate image links
|
||||
# this isn't 100% foolproof but it should work for the
|
||||
# vast majority of feeds out there, unless they use some
|
||||
# really weird image type like image/bpg
|
||||
def is_image_url(self, url, mime_type=None):
|
||||
if not url:
|
||||
return False
|
||||
if mime_type and str(mime_type).lower().startswith("image/"):
|
||||
return True
|
||||
return str(url).lower().split("?", 1)[0].endswith(self.IMAGE_EXTENSIONS)
|
||||
|
||||
# Some of this could go in __init__
|
||||
def setup(self):
|
||||
os.environ["TZ"] = "America/Toronto"
|
||||
|
|
@ -274,7 +336,7 @@ class Discorss:
|
|||
logging.basicConfig(
|
||||
filename=self.log_file_path,
|
||||
encoding="utf-8",
|
||||
level=logging.WARNING,
|
||||
level=logging.ERROR,
|
||||
datefmt="%m/%d/%Y %H:%M:%S",
|
||||
format="%(asctime)s [%(threadName)s] -> %(levelname)s: %(message)s",
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue