diff --git a/discorss.py b/discorss.py index 8513a97..bf431b9 100755 --- a/discorss.py +++ b/discorss.py @@ -28,8 +28,7 @@ from types import SimpleNamespace class Discorss: FEED_TIMEOUT_SECONDS = 15 HASH_HISTORY_LIMIT = 10 - APP_VERSION = "0.3rc2" - IMAGE_EXTENSIONS = (".jpg", ".jpeg", ".png", ".gif", ".webp") + APP_VERSION = "0.3rc1" def __init__(self, args=None): if args is None: @@ -55,7 +54,6 @@ class Discorss: # Yes, I know you "can't parse HTML with regex", but # just watch me. self.html_filter = re.compile(r"\<\/?([A-Za-z0-9 \:\.\-\/\"\=])*\>") - self.img_src_filter = re.compile(r']+src=["\']([^"\']+)["\']', re.I) self.success_codes = [200, 201, 202, 203, 204, 205, 206] self.app_config = {} @@ -144,37 +142,29 @@ class Discorss: last_check, self.now, ) - embed = { - "title": str(latest_post["title"]), - "url": str(latest_post["link"]), - "color": 2123412, - "footer": { - "text": "DiscoRSS", - "icon_url": "https://frzn.dev/~amr/images/discorss.png", - }, - "author": { - "name": str(hook["name"]), - "url": str(hook["siteurl"]), - }, - "fields": [ + webhook = { + "embeds": [ { - "name": "Excerpt from post:", - "value": self.get_description(latest_post), + "title": str(latest_post["title"]), + "url": str(latest_post["link"]), + "color": 2123412, + "footer": { + "text": "DiscoRSS", + "icon_url": "https://frzn.dev/~amr/images/discorss.png", + }, + "author": { + "name": str(hook["name"]), + "url": str(hook["siteurl"]), + }, + "fields": [ + { + "name": "Excerpt from post:", + "value": self.get_description(latest_post), + } + ], + # "timestamp": str(self.now), } ], - # "timestamp": str(self.now), - } - self.logger.debug( - "Checking for images in post %s from %s...", - latest_post["title"], - hook["name"], - ) - image_url = self.get_image_url(latest_post) - if image_url is not None: - embed["thumbnail"] = {"url": image_url} - - webhook = { - "embeds": [embed], "attachments": [], } custom_header = { @@ -274,58 +264,6 @@ class Discorss: desc = desc + str(addons) return desc - # attempting to extract image previews from feeds which primarily feature - # images, like NASA's Picture of the Day feed - def get_image_url(self, feed): - image_candidates = [] - # check the most common fields, this should catch the majority of image - # feeds' embedded urls - for media in feed.get("media_content", []): - if self.is_image_url(media.get("url"), media.get("type")): - image_candidates.append(media["url"]) - - for enclosure in feed.get("enclosures", []): - if self.is_image_url(enclosure.get("href"), enclosure.get("type")): - image_candidates.append(enclosure["href"]) - - for link in feed.get("links", []): - if self.is_image_url(link.get("href"), link.get("type")): - image_candidates.append(link["href"]) - - for media in feed.get("media_thumbnail", []): - if self.is_image_url(media.get("url"), media.get("type")): - image_candidates.append(media["url"]) - - for field in ["summary_detail", "content"]: - value = feed.get(field) - if isinstance(value, list): - values = [item.get("value", "") for item in value] - elif isinstance(value, dict): - values = [value.get("value", "")] - else: - values = [] - for text in values: - match = self.img_src_filter.search(str(text)) - if match and self.is_image_url(match.group(1)): - image_candidates.append(match.group(1)) - self.logger.debug("Found the following image candidates in %s...", feed["name"]) - for i in image_candidates: - self.logger.debug("%s", i) - if len(image_candidates) > 0: - return image_candidates[0] - return None - - # a silly little helper just to validate image links - # this isn't 100% foolproof but it should work for the - # vast majority of feeds out there, unless they use some - # really weird image type like image/bpg - def is_image_url(self, url, mime_type=None): - if not url: - return False - if mime_type and str(mime_type).lower().startswith("image/"): - return True - return str(url).lower().split("?", 1)[0].endswith(self.IMAGE_EXTENSIONS) - # Some of this could go in __init__ def setup(self): os.environ["TZ"] = "America/Toronto" @@ -336,7 +274,7 @@ class Discorss: logging.basicConfig( filename=self.log_file_path, encoding="utf-8", - level=logging.ERROR, + level=logging.WARNING, datefmt="%m/%d/%Y %H:%M:%S", format="%(asctime)s [%(threadName)s] -> %(levelname)s: %(message)s", )