From ecc97fa2a06f1eb29d407b431c61fa3acc5069d8 Mon Sep 17 00:00:00 2001 From: Alexey Skobkin Date: Sat, 9 Jul 2022 23:13:53 +0300 Subject: [PATCH] Fix #29. Sanitizing HTML to leave only HTML tags allowed by Telegram Bot API. --- requirements.txt | 3 +++ telegram.py | 20 ++++++++++++++++---- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/requirements.txt b/requirements.txt index 46d6009..ec28d3f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +bleach==5.0.1 certifi==2021.10.8 charset-normalizer==2.0.12 decorator==5.1.1 @@ -7,5 +8,7 @@ pyTelegramBotAPI==4.5.0 python-dotenv==0.20.0 requests==2.27.1 sgmllib3k==1.0.0 +six==1.16.0 urllib3==1.26.9 validators==0.19.0 +webencodings==0.5.1 diff --git a/telegram.py b/telegram.py index 6ed57d2..cf3cd63 100644 --- a/telegram.py +++ b/telegram.py @@ -1,5 +1,6 @@ import time +from bleach.sanitizer import Cleaner from telebot import TeleBot from telebot.handler_backends import BaseMiddleware from telebot.types import Message @@ -96,6 +97,12 @@ class Notifier: def __init__(self, token: str): self.bot: TeleBot = TeleBot(token) + self.html_sanitizer: Cleaner = Cleaner( + tags=['b', 'strong', 'i', 'em', 'u', 'ins', 's', 'strike', 'del', 'span', 'tg-spoiler', 'a', 'code', 'pre'], + attributes={"a": ["href", "title"]}, + protocols=['http', 'https'], + strip=True, + ) def send_updates(self, chat_ids: list[int], updates: list[FeedItem], feed_title: str): """Send notification about new items to the user""" @@ -127,14 +134,19 @@ class Notifier: self.sent_counter = 0 self.sent_counter += 1 - @staticmethod - def __format_message(item: FeedItem) -> str: + def __format_message(self, item: FeedItem) -> str: return ( f"{item.title}\n\n" - f"{item.date}\n" - # f"{item.description}" + # TODO: format properly when FeedItem starts to return proper datetime object + #f"{item.date}\n" + f"{self.__sanitize_html(item.description)}" ) + def __sanitize_html(self, html: str) -> str: + if not html: + return '' + return self.html_sanitizer.clean(html) + class UserAuthMiddleware(BaseMiddleware): """Transparently authenticates and registers the user if needed."""