Fix #29. Sanitizing HTML to leave only HTML tags allowed by Telegram Bot API.
This commit is contained in:
parent
7c373c8f78
commit
ecc97fa2a0
|
@ -1,3 +1,4 @@
|
||||||
|
bleach==5.0.1
|
||||||
certifi==2021.10.8
|
certifi==2021.10.8
|
||||||
charset-normalizer==2.0.12
|
charset-normalizer==2.0.12
|
||||||
decorator==5.1.1
|
decorator==5.1.1
|
||||||
|
@ -7,5 +8,7 @@ pyTelegramBotAPI==4.5.0
|
||||||
python-dotenv==0.20.0
|
python-dotenv==0.20.0
|
||||||
requests==2.27.1
|
requests==2.27.1
|
||||||
sgmllib3k==1.0.0
|
sgmllib3k==1.0.0
|
||||||
|
six==1.16.0
|
||||||
urllib3==1.26.9
|
urllib3==1.26.9
|
||||||
validators==0.19.0
|
validators==0.19.0
|
||||||
|
webencodings==0.5.1
|
||||||
|
|
20
telegram.py
20
telegram.py
|
@ -1,5 +1,6 @@
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
from bleach.sanitizer import Cleaner
|
||||||
from telebot import TeleBot
|
from telebot import TeleBot
|
||||||
from telebot.handler_backends import BaseMiddleware
|
from telebot.handler_backends import BaseMiddleware
|
||||||
from telebot.types import Message
|
from telebot.types import Message
|
||||||
|
@ -96,6 +97,12 @@ class Notifier:
|
||||||
|
|
||||||
def __init__(self, token: str):
|
def __init__(self, token: str):
|
||||||
self.bot: TeleBot = TeleBot(token)
|
self.bot: TeleBot = TeleBot(token)
|
||||||
|
self.html_sanitizer: Cleaner = Cleaner(
|
||||||
|
tags=['b', 'strong', 'i', 'em', 'u', 'ins', 's', 'strike', 'del', 'span', 'tg-spoiler', 'a', 'code', 'pre'],
|
||||||
|
attributes={"a": ["href", "title"]},
|
||||||
|
protocols=['http', 'https'],
|
||||||
|
strip=True,
|
||||||
|
)
|
||||||
|
|
||||||
def send_updates(self, chat_ids: list[int], updates: list[FeedItem], feed_title: str):
|
def send_updates(self, chat_ids: list[int], updates: list[FeedItem], feed_title: str):
|
||||||
"""Send notification about new items to the user"""
|
"""Send notification about new items to the user"""
|
||||||
|
@ -127,14 +134,19 @@ class Notifier:
|
||||||
self.sent_counter = 0
|
self.sent_counter = 0
|
||||||
self.sent_counter += 1
|
self.sent_counter += 1
|
||||||
|
|
||||||
@staticmethod
|
def __format_message(self, item: FeedItem) -> str:
|
||||||
def __format_message(item: FeedItem) -> str:
|
|
||||||
return (
|
return (
|
||||||
f"<strong><a href=\"{item.url}\">{item.title}</a></strong>\n\n"
|
f"<strong><a href=\"{item.url}\">{item.title}</a></strong>\n\n"
|
||||||
f"{item.date}\n"
|
# TODO: format properly when FeedItem starts to return proper datetime object
|
||||||
# f"{item.description}"
|
#f"{item.date}\n"
|
||||||
|
f"{self.__sanitize_html(item.description)}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def __sanitize_html(self, html: str) -> str:
|
||||||
|
if not html:
|
||||||
|
return ''
|
||||||
|
return self.html_sanitizer.clean(html)
|
||||||
|
|
||||||
|
|
||||||
class UserAuthMiddleware(BaseMiddleware):
|
class UserAuthMiddleware(BaseMiddleware):
|
||||||
"""Transparently authenticates and registers the user if needed."""
|
"""Transparently authenticates and registers the user if needed."""
|
||||||
|
|
Loading…
Reference in a new issue