Skip to content

Commit 47764fd

Browse files
committed
rss-bot: Add option to convert body to Markdown
1 parent 1ae6dbe commit 47764fd

3 files changed

Lines changed: 22 additions & 1 deletion

File tree

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ module = [
6868
"google_auth_oauthlib.*",
6969
"googleapiclient.*",
7070
"irc.*",
71+
"markdownify.*",
7172
"mercurial.*",
7273
"nio.*",
7374
"oauth2client.*",
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
feedparser>=6.0.10
2+
markdownify>=0.11.6

zulip/integrations/rss/rss-bot

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,12 @@ import re
1313
import sys
1414
import time
1515
import urllib.parse
16+
from collections.abc import Callable
1617
from html.parser import HTMLParser
1718
from typing import Any, Dict, List
1819

1920
import feedparser
21+
from markdownify import markdownify
2022
from typing_extensions import override
2123

2224
import zulip
@@ -106,6 +108,18 @@ parser.add_argument(
106108
help="The earliest date (relative to today) you want to process entries from (in days)",
107109
default=30,
108110
action="store",
111+
body = parser.add_mutually_exclusive_group()
112+
body.add_argument(
113+
"--strip",
114+
dest="strip",
115+
action="store_true",
116+
help="Strip HTML tags from body",
117+
)
118+
body.add_argument(
119+
"--markdownify",
120+
dest="strip",
121+
action="store_false",
122+
help="Convert body from HTML to Markdown",
109123
)
110124

111125
opts = parser.parse_args()
@@ -198,7 +212,12 @@ def send_zulip(entry: Any, feed_name: str) -> Dict[str, Any]:
198212
body = unwrap_text(body)
199213

200214
title = f"**[{entry.title}]({entry.link})**\n" if hasattr(entry, "title") else ""
201-
content = f"{title}{strip_tags(body)}\n{entry.link}"
215+
216+
def md(html: str) -> str:
217+
return markdownify(html, escape_underscores=False)
218+
219+
convert: Callable[[str], str] = strip_tags if opts.strip else md
220+
content = f"{title}{convert(body)}\n{entry.link}"
202221

203222
if opts.math:
204223
content = content.replace("$", "$$")

0 commit comments

Comments
 (0)