From acb03cff271cfd434469d382eadb1e71377c0a46 Mon Sep 17 00:00:00 2001
From: "A.M. Rowsell" <amr@frzn.dev>
Date: Tue, 9 Jun 2026 18:10:22 -0400
Subject: [PATCH 1/2] dev: attempting to add image thumbnail support

---
 discorss.py | 97 +++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 76 insertions(+), 21 deletions(-)

diff --git a/discorss.py b/discorss.py
index bf431b9..49a7cfd 100755
--- a/discorss.py
+++ b/discorss.py
@@ -28,7 +28,8 @@ from types import SimpleNamespace
 class Discorss:
     FEED_TIMEOUT_SECONDS = 15
     HASH_HISTORY_LIMIT = 10
-    APP_VERSION = "0.3rc1"
+    APP_VERSION = "0.3rc2"
+    IMAGE_EXTENSIONS = (".jpg", ".jpeg", ".png", ".gif", ".webp")
 
     def __init__(self, args=None):
         if args is None:
@@ -54,6 +55,7 @@ class Discorss:
         # Yes, I know you "can't parse HTML with regex", but
         # just watch me.
         self.html_filter = re.compile(r"\<\/?([A-Za-z0-9 \:\.\-\/\"\=])*\>")
+        self.img_src_filter = re.compile(r'<img[^>]+src=["\']([^"\']+)["\']', re.I)
         self.success_codes = [200, 201, 202, 203, 204, 205, 206]
         self.app_config = {}
 
@@ -142,29 +144,32 @@ class Discorss:
             last_check,
             self.now,
         )
-        webhook = {
-            "embeds": [
+        embed = {
+            "title": str(latest_post["title"]),
+            "url": str(latest_post["link"]),
+            "color": 2123412,
+            "footer": {
+                "text": "DiscoRSS",
+                "icon_url": "https://frzn.dev/~amr/images/discorss.png",
+            },
+            "author": {
+                "name": str(hook["name"]),
+                "url": str(hook["siteurl"]),
+            },
+            "fields": [
                 {
-                    "title": str(latest_post["title"]),
-                    "url": str(latest_post["link"]),
-                    "color": 2123412,
-                    "footer": {
-                        "text": "DiscoRSS",
-                        "icon_url": "https://frzn.dev/~amr/images/discorss.png",
-                    },
-                    "author": {
-                        "name": str(hook["name"]),
-                        "url": str(hook["siteurl"]),
-                    },
-                    "fields": [
-                        {
-                            "name": "Excerpt from post:",
-                            "value": self.get_description(latest_post),
-                        }
-                    ],
-                    # "timestamp": str(self.now),
+                    "name": "Excerpt from post:",
+                    "value": self.get_description(latest_post),
                 }
             ],
+            # "timestamp": str(self.now),
+        }
+        image_url = self.get_image_url(latest_post)
+        if image_url is not None:
+            embed["thumbnail"] = {"url": image_url}
+
+        webhook = {
+            "embeds": [embed],
             "attachments": [],
         }
         custom_header = {
@@ -264,6 +269,56 @@ class Discorss:
             desc = desc + str(addons)
         return desc
 
+    # attempting to extract image previews from feeds which primarily feature
+    # images, like NASA's Picture of the Day feed
+    def get_image_url(self, feed):
+        image_candidates = []
+        # check the most common fields, this should catch the majority of image
+        # feeds' embedded urls
+        for media in feed.get("media_content", []):
+            if self.is_image_url(media.get("url"), media.get("type")):
+                image_candidates.append(media["url"])
+
+        for enclosure in feed.get("enclosures", []):
+            if self.is_image_url(enclosure.get("href"), enclosure.get("type")):
+                image_candidates.append(enclosure["href"])
+
+        for link in feed.get("links", []):
+            if self.is_image_url(link.get("href"), link.get("type")):
+                image_candidates.append(link["href"])
+
+        for media in feed.get("media_thumbnail", []):
+            if self.is_image_url(media.get("url"), media.get("type")):
+                image_candidates.append(media["url"])
+
+        for field in ["summary_detail", "content"]:
+            value = feed.get(field)
+            if isinstance(value, list):
+                values = [item.get("value", "") for item in value]
+            elif isinstance(value, dict):
+                values = [value.get("value", "")]
+            else:
+                values = []
+            for text in values:
+                match = self.img_src_filter.search(str(text))
+                if match and self.is_image_url(match.group(1)):
+                    image_candidates.append(match.group(1))
+
+        if len(image_candidates) > 0:
+            return image_candidates[0]
+        return None
+
+    # a silly little helper just to validate image links
+    # this isn't 100% foolproof but it should work for the
+    # vast majority of feeds out there, unless they use some
+    # really weird image type like image/bpg
+    def is_image_url(self, url, mime_type=None):
+        if not url:
+            return False
+        if mime_type and str(mime_type).lower().startswith("image/"):
+            return True
+        return str(url).lower().split("?", 1)[0].endswith(self.IMAGE_EXTENSIONS)
+
     # Some of this could go in __init__
     def setup(self):
         os.environ["TZ"] = "America/Toronto"

From 91c39042c80021a3ce03e8f870dd52dc11d2e9b8 Mon Sep 17 00:00:00 2001
From: "A.M. Rowsell" <amr@frzn.dev>
Date: Tue, 9 Jun 2026 18:36:31 -0400
Subject: [PATCH 2/2] logging: added some more logging statements

---
 discorss.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/discorss.py b/discorss.py
index 49a7cfd..8513a97 100755
--- a/discorss.py
+++ b/discorss.py
@@ -164,6 +164,11 @@ class Discorss:
             ],
             # "timestamp": str(self.now),
         }
+        self.logger.debug(
+            "Checking for images in post %s from %s...",
+            latest_post["title"],
+            hook["name"],
+        )
         image_url = self.get_image_url(latest_post)
         if image_url is not None:
             embed["thumbnail"] = {"url": image_url}
@@ -303,7 +308,9 @@ class Discorss:
                 match = self.img_src_filter.search(str(text))
                 if match and self.is_image_url(match.group(1)):
                     image_candidates.append(match.group(1))
-
+        self.logger.debug("Found the following image candidates in %s...", feed["name"])
+        for i in image_candidates:
+            self.logger.debug("%s", i)
         if len(image_candidates) > 0:
             return image_candidates[0]
         return None
@@ -329,7 +336,7 @@ class Discorss:
         logging.basicConfig(
             filename=self.log_file_path,
             encoding="utf-8",
-            level=logging.WARNING,
+            level=logging.ERROR,
             datefmt="%m/%d/%Y %H:%M:%S",
             format="%(asctime)s [%(threadName)s] -> %(levelname)s: %(message)s",
         )