Sanitizing HTML to leave only HTML tags allowed by Telegram Bot API. (#33)
All checks were successful
continuous-integration/drone/push Build is passing

Reviewed-on: #33
Reviewed-by: Miroslavsckaya <miroslavsckaya@noreply.git.skobk.in>
Co-authored-by: Alexey Skobkin <skobkin-ru@ya.ru>
Co-committed-by: Alexey Skobkin <skobkin-ru@ya.ru>
This commit is contained in:
Alexey Skobkin 2022-07-10 13:22:44 +03:00 committed by Miroslavsckaya
parent 7c373c8f78
commit 56cb4138b5
2 changed files with 19 additions and 4 deletions

View file

@ -1,3 +1,4 @@
bleach==5.0.1
certifi==2021.10.8 certifi==2021.10.8
charset-normalizer==2.0.12 charset-normalizer==2.0.12
decorator==5.1.1 decorator==5.1.1
@ -7,5 +8,7 @@ pyTelegramBotAPI==4.5.0
python-dotenv==0.20.0 python-dotenv==0.20.0
requests==2.27.1 requests==2.27.1
sgmllib3k==1.0.0 sgmllib3k==1.0.0
six==1.16.0
urllib3==1.26.9 urllib3==1.26.9
validators==0.19.0 validators==0.19.0
webencodings==0.5.1

View file

@ -1,5 +1,6 @@
import time import time
from bleach.sanitizer import Cleaner
from telebot import TeleBot from telebot import TeleBot
from telebot.handler_backends import BaseMiddleware from telebot.handler_backends import BaseMiddleware
from telebot.types import Message from telebot.types import Message
@ -32,6 +33,7 @@ class CommandProcessor:
self.bot.infinity_polling() self.bot.infinity_polling()
def __command_help(self, message: Message, data: dict): def __command_help(self, message: Message, data: dict):
# pylint: disable=unused-argument
self.bot.reply_to( self.bot.reply_to(
message, message,
'Supported commands:\n' 'Supported commands:\n'
@ -96,6 +98,12 @@ class Notifier:
def __init__(self, token: str): def __init__(self, token: str):
self.bot: TeleBot = TeleBot(token) self.bot: TeleBot = TeleBot(token)
self.html_sanitizer: Cleaner = Cleaner(
tags=['b', 'strong', 'i', 'em', 'u', 'ins', 's', 'strike', 'del', 'span', 'tg-spoiler', 'a', 'code', 'pre'],
attributes={"a": ["href", "title"]},
protocols=['http', 'https'],
strip=True,
)
def send_updates(self, chat_ids: list[int], updates: list[FeedItem], feed_title: str): def send_updates(self, chat_ids: list[int], updates: list[FeedItem], feed_title: str):
"""Send notification about new items to the user""" """Send notification about new items to the user"""
@ -127,14 +135,18 @@ class Notifier:
self.sent_counter = 0 self.sent_counter = 0
self.sent_counter += 1 self.sent_counter += 1
@staticmethod def __format_message(self, item: FeedItem) -> str:
def __format_message(item: FeedItem) -> str:
return ( return (
# TODO: Return date when FeedItem starts to return formattable datetime object
f"<strong><a href=\"{item.url}\">{item.title}</a></strong>\n\n" f"<strong><a href=\"{item.url}\">{item.title}</a></strong>\n\n"
f"{item.date}\n" f"{self.__sanitize_html(item.description)}"
# f"{item.description}"
) )
def __sanitize_html(self, html: str) -> str:
if not html:
return ''
return self.html_sanitizer.clean(html)
class UserAuthMiddleware(BaseMiddleware): class UserAuthMiddleware(BaseMiddleware):
"""Transparently authenticates and registers the user if needed.""" """Transparently authenticates and registers the user if needed."""