Sanitizing HTML to leave only HTML tags allowed by Telegram Bot API. (#33)
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
Reviewed-on: #33 Reviewed-by: Miroslavsckaya <miroslavsckaya@noreply.git.skobk.in> Co-authored-by: Alexey Skobkin <skobkin-ru@ya.ru> Co-committed-by: Alexey Skobkin <skobkin-ru@ya.ru>
This commit is contained in:
parent
7c373c8f78
commit
56cb4138b5
|
@ -1,3 +1,4 @@
|
|||
bleach==5.0.1
|
||||
certifi==2021.10.8
|
||||
charset-normalizer==2.0.12
|
||||
decorator==5.1.1
|
||||
|
@ -7,5 +8,7 @@ pyTelegramBotAPI==4.5.0
|
|||
python-dotenv==0.20.0
|
||||
requests==2.27.1
|
||||
sgmllib3k==1.0.0
|
||||
six==1.16.0
|
||||
urllib3==1.26.9
|
||||
validators==0.19.0
|
||||
webencodings==0.5.1
|
||||
|
|
20
telegram.py
20
telegram.py
|
@ -1,5 +1,6 @@
|
|||
import time
|
||||
|
||||
from bleach.sanitizer import Cleaner
|
||||
from telebot import TeleBot
|
||||
from telebot.handler_backends import BaseMiddleware
|
||||
from telebot.types import Message
|
||||
|
@ -32,6 +33,7 @@ class CommandProcessor:
|
|||
self.bot.infinity_polling()
|
||||
|
||||
def __command_help(self, message: Message, data: dict):
|
||||
# pylint: disable=unused-argument
|
||||
self.bot.reply_to(
|
||||
message,
|
||||
'Supported commands:\n'
|
||||
|
@ -96,6 +98,12 @@ class Notifier:
|
|||
|
||||
def __init__(self, token: str):
|
||||
self.bot: TeleBot = TeleBot(token)
|
||||
self.html_sanitizer: Cleaner = Cleaner(
|
||||
tags=['b', 'strong', 'i', 'em', 'u', 'ins', 's', 'strike', 'del', 'span', 'tg-spoiler', 'a', 'code', 'pre'],
|
||||
attributes={"a": ["href", "title"]},
|
||||
protocols=['http', 'https'],
|
||||
strip=True,
|
||||
)
|
||||
|
||||
def send_updates(self, chat_ids: list[int], updates: list[FeedItem], feed_title: str):
|
||||
"""Send notification about new items to the user"""
|
||||
|
@ -127,14 +135,18 @@ class Notifier:
|
|||
self.sent_counter = 0
|
||||
self.sent_counter += 1
|
||||
|
||||
@staticmethod
|
||||
def __format_message(item: FeedItem) -> str:
|
||||
def __format_message(self, item: FeedItem) -> str:
|
||||
return (
|
||||
# TODO: Return date when FeedItem starts to return formattable datetime object
|
||||
f"<strong><a href=\"{item.url}\">{item.title}</a></strong>\n\n"
|
||||
f"{item.date}\n"
|
||||
# f"{item.description}"
|
||||
f"{self.__sanitize_html(item.description)}"
|
||||
)
|
||||
|
||||
def __sanitize_html(self, html: str) -> str:
|
||||
if not html:
|
||||
return ''
|
||||
return self.html_sanitizer.clean(html)
|
||||
|
||||
|
||||
class UserAuthMiddleware(BaseMiddleware):
|
||||
"""Transparently authenticates and registers the user if needed."""
|
||||
|
|
Loading…
Reference in a new issue