logging: added some more logging statements

dev: attempting to add image thumbnail support
2026-06-09 18:36:31 -04:00 · 2026-06-09 18:10:22 -04:00
1 changed files with 84 additions and 22 deletions
--- a/discorss.py
+++ b/discorss.py
@ -28,7 +28,8 @@ from types import SimpleNamespace
 class Discorss:
    FEED_TIMEOUT_SECONDS = 15
    HASH_HISTORY_LIMIT = 10
-    APP_VERSION = "0.3rc1"
+    APP_VERSION = "0.3rc2"
    IMAGE_EXTENSIONS = (".jpg", ".jpeg", ".png", ".gif", ".webp")
    def __init__(self, args=None):
        if args is None:
@ -54,6 +55,7 @@ class Discorss:
        # Yes, I know you "can't parse HTML with regex", but
        # just watch me.
        self.html_filter = re.compile(r"\<\/?([A-Za-z0-9 \:\.\-\/\"\=])*\>")
        self.img_src_filter = re.compile(r'<img[^>]+src=["\']([^"\']+)["\']', re.I)
        self.success_codes = [200, 201, 202, 203, 204, 205, 206]
        self.app_config = {}
@ -142,29 +144,37 @@ class Discorss:
            last_check,
            self.now,
        )
-        webhook = {
+        embed = {
-            "embeds": [
+            "title": str(latest_post["title"]),
            "url": str(latest_post["link"]),
            "color": 2123412,
            "footer": {
                "text": "DiscoRSS",
                "icon_url": "https://frzn.dev/~amr/images/discorss.png",
            },
            "author": {
                "name": str(hook["name"]),
                "url": str(hook["siteurl"]),
            },
            "fields": [
                {
-                    "title": str(latest_post["title"]),
+                    "name": "Excerpt from post:",
-                    "url": str(latest_post["link"]),
+                    "value": self.get_description(latest_post),
                    "color": 2123412,
                    "footer": {
                        "text": "DiscoRSS",
                        "icon_url": "https://frzn.dev/~amr/images/discorss.png",
                    },
                    "author": {
                        "name": str(hook["name"]),
                        "url": str(hook["siteurl"]),
                    },
                    "fields": [
                        {
                            "name": "Excerpt from post:",
                            "value": self.get_description(latest_post),
                        }
                    ],
                    # "timestamp": str(self.now),
                }
            ],
            # "timestamp": str(self.now),
        }
        self.logger.debug(
            "Checking for images in post %s from %s...",
            latest_post["title"],
            hook["name"],
        )
        image_url = self.get_image_url(latest_post)
        if image_url is not None:
            embed["thumbnail"] = {"url": image_url}
        webhook = {
            "embeds": [embed],
            "attachments": [],
        }
        custom_header = {
@ -264,6 +274,58 @@ class Discorss:
            desc = desc + str(addons)
        return desc
    # attempting to extract image previews from feeds which primarily feature
    # images, like NASA's Picture of the Day feed
    def get_image_url(self, feed):
        image_candidates = []
        # check the most common fields, this should catch the majority of image
        # feeds' embedded urls
        for media in feed.get("media_content", []):
            if self.is_image_url(media.get("url"), media.get("type")):
                image_candidates.append(media["url"])
        for enclosure in feed.get("enclosures", []):
            if self.is_image_url(enclosure.get("href"), enclosure.get("type")):
                image_candidates.append(enclosure["href"])
        for link in feed.get("links", []):
            if self.is_image_url(link.get("href"), link.get("type")):
                image_candidates.append(link["href"])
        for media in feed.get("media_thumbnail", []):
            if self.is_image_url(media.get("url"), media.get("type")):
                image_candidates.append(media["url"])
        for field in ["summary_detail", "content"]:
            value = feed.get(field)
            if isinstance(value, list):
                values = [item.get("value", "") for item in value]
            elif isinstance(value, dict):
                values = [value.get("value", "")]
            else:
                values = []
            for text in values:
                match = self.img_src_filter.search(str(text))
                if match and self.is_image_url(match.group(1)):
                    image_candidates.append(match.group(1))
        self.logger.debug("Found the following image candidates in %s...", feed["name"])
        for i in image_candidates:
            self.logger.debug("%s", i)
        if len(image_candidates) > 0:
            return image_candidates[0]
        return None
    # a silly little helper just to validate image links
    # this isn't 100% foolproof but it should work for the
    # vast majority of feeds out there, unless they use some
    # really weird image type like image/bpg
    def is_image_url(self, url, mime_type=None):
        if not url:
            return False
        if mime_type and str(mime_type).lower().startswith("image/"):
            return True
        return str(url).lower().split("?", 1)[0].endswith(self.IMAGE_EXTENSIONS)
    # Some of this could go in __init__
    def setup(self):
        os.environ["TZ"] = "America/Toronto"
@ -274,7 +336,7 @@ class Discorss:
        logging.basicConfig(
            filename=self.log_file_path,
            encoding="utf-8",
-            level=logging.WARNING,
+            level=logging.ERROR,
            datefmt="%m/%d/%Y %H:%M:%S",
            format="%(asctime)s [%(threadName)s] -> %(levelname)s: %(message)s",
        )
Author	SHA1	Message	Date
A.M. Rowsell	91c39042c8	logging: added some more logging statements	2026-06-09 18:36:31 -04:00
A.M. Rowsell	acb03cff27	dev: attempting to add image thumbnail support	2026-06-09 18:10:22 -04:00