Update manager #23

Merged
Miroslavsckaya merged 20 commits from feature_rss_update into master 2022-07-08 19:23:12 +00:00
7 changed files with 131 additions and 54 deletions

1
.gitignore vendored
View file

@ -3,6 +3,7 @@
# Python
/.venv
/__pycache__
skobkin marked this conversation as resolved
Review

Why? What is it for?

Why? What is it for?
# Database
/*.db

View file

@ -1,6 +1,7 @@
import sqlite3
from exceptions import DisplayableException
from rss import FeedItem
class Database:
@ -10,6 +11,7 @@ class Database:
"""Create a database file if not exists."""
# TODO: think about removing check_same_thread=False
self.conn = sqlite3.connect(path, check_same_thread=False)
self.conn.row_factory = sqlite3.Row
self.cur = self.conn.cursor()
self.__init_schema()
@ -25,7 +27,7 @@ class Database:
row = self.cur.fetchone()
if row is None:
return None
return row[0]
return row['id']
def add_feed(self, url: str) -> int:
"""Add a feed to the database and return its id."""
@ -39,7 +41,7 @@ class Database:
row = self.cur.fetchone()
if row is None:
return None
return row[0]
return row['id']
def subscribe_user_by_url(self, user_id: int, url: str) -> None:
"""Subscribe user to the feed creating it if does not exist yet."""
@ -58,6 +60,7 @@ class Database:
self.conn.commit()
def unsubscribe_user_by_url(self, user_id: int, url: str) -> None:
"""Subscribe a user to the feed by url."""
feed_id = self.find_feed_by_url(url)
if feed_id is None:
raise DisplayableException('Feed does not exist')
@ -91,36 +94,48 @@ class Database:
def get_feed_subscribers_count(self, feed_id: int) -> int:
"""Count feed subscribers."""
self.cur.execute('SELECT COUNT(user_id) FROM subscriptions WHERE feed_id = ?', [feed_id])
self.cur.execute('SELECT COUNT(user_id) AS amount_subscribers FROM subscriptions WHERE feed_id = ?', [feed_id])
row = self.cur.fetchone()
if row is None:
return 0
return int(row[0])
return row['amount_subscribers']
def find_feeds(self) -> list:
def find_feed_subscribers(self, feed_id: int) -> list[int]:
skobkin marked this conversation as resolved Outdated

Maybe we can use more specific type-hint here.

Maybe we can use more specific type-hint here.
"""Return feed subscribers"""
self.cur.execute('SELECT telegram_id FROM users WHERE id IN (SELECT user_id FROM subscriptions WHERE feed_id = ?)',
[feed_id])
subscribers = self.cur.fetchall()
return list(map(lambda x: x['telegram_id'], subscribers))
def find_feeds(self) -> list[sqlite3.Row]:
"""Get a list of feeds."""
self.cur.execute('SELECT * FROM feeds')
return self.cur.fetchall()
def find_user_feeds(self, user_id: int) -> list:
def find_user_feeds(self, user_id: int) -> list[sqlite3.Row]:
"""Return a list of feeds the user is subscribed to."""
self.cur.execute('SELECT * FROM feeds WHERE id IN (SELECT feed_id FROM subscriptions WHERE user_id = ?)',
[user_id])
return self.cur.fetchall()
def find_feed_items(self, feed_id: int) -> list:
def find_feed_items(self, feed_id: int) -> list[sqlite3.Row]:
"""Get last feed items."""
self.cur.execute('SELECT * FROM feeds_last_items WHERE feed_id = ?', [feed_id])
return self.cur.fetchall()
def update_feed_items(self, feed_id: int, new_items: list) -> None:
def find_feed_items_urls(self, feed_id: int) -> list[str]:
"""Return urls last feed items"""
items = self.find_feed_items(feed_id)
if not items:
return items
return list(map(lambda x: x['url'], items))
def update_feed_items(self, feed_id: int, new_items: list[FeedItem]) -> None:
"""Replace last feed items with a list items that receive."""
for i in range(len(new_items)):
new_items[i] = (feed_id,) + new_items[i]
for i, _ in enumerate(new_items):
new_items[i] = [feed_id] + list(new_items[i].__dict__.values())[:-1]
self.cur.execute('DELETE FROM feeds_last_items WHERE feed_id = ?', [feed_id])
self.cur.executemany(
'INSERT INTO feeds_last_items (feed_id, url, title, description, date) VALUES (?, ?, ?, ?, ?)', new_items)
'INSERT INTO feeds_last_items (feed_id, url, title, description) VALUES (?, ?, ?, ?)', new_items)
self.conn.commit()
def __init_schema(self):
@ -144,7 +159,6 @@ class Database:
' url TEXT NOT NULL UNIQUE,'
' title TEXT,'
' description TEXT,'
' date NUMERIC,'
skobkin marked this conversation as resolved Outdated

Now as we don't have migrations, to apply this change we either need to remove the database or execute the change statement manually.

It's just a note, not a change request.

Now as we don't have migrations, to apply this change we either need to remove the database or execute the change statement manually. It's just a note, not a change request.
' FOREIGN KEY(feed_id) REFERENCES feeds(id)'
')'
)

View file

@ -3,12 +3,12 @@
format = pylint
skip = .venv/*
linters = pyflakes,pylint,pycodestyle
ignore = F0401,C0114,R0903
ignore = F0401,C0114,R0903,C0115,C0116,W0511
[pylama:pylint]
max_line_length = 120
max_line_length = 130
score = yes
[pylama:pycodestyle]
# Maximum length of each line
max_line_length = 120
max_line_length = 130

49
rss.py
View file

@ -1,35 +1,26 @@
import feedparser
from feedparser import FeedParserDict, parse
class FeedItem():
def __init__(self, url: str, title: str, description: str) -> None:
class FeedItem:
def __init__(self, item: FeedParserDict) -> None:
skobkin marked this conversation as resolved Outdated

Just import FeedParserDict.

Just import `FeedParserDict`.
self.url = item.get('link', '')
self.title = item.get('title', '')
self.description = item.get('summary', '')
if 'published' in item:
skobkin marked this conversation as resolved Outdated

Do you really need a date here?

Do you really need a date here?
self.date = item.published_parsed()
else:
self.date = None
skobkin marked this conversation as resolved Outdated

None could probably be better here if we hadn't parse the date.

`None` could probably be better here if we hadn't parse the date.
skobkin marked this conversation as resolved Outdated

By the way, you can move all logic from RssReader.__get_items() and RssReader.__convert_to_feed_item() to the FeedItem itself.
If Feed knows what to do with URL, FeedItem could've known how to initialize itself from the dictionary.

// It's not a problem, just a suggestion of an alternative.
// You can resolve this thread yourself if you don't want to implement that this way.

By the way, you can move all logic from `RssReader.__get_items()` and `RssReader.__convert_to_feed_item()` to the `FeedItem` itself. If `Feed` knows what to do with URL, `FeedItem` could've known how to initialize itself from the dictionary. // It's not a problem, just a suggestion of an alternative. // You can **resolve** this thread yourself if you don't want to implement that this way.
class Feed:
def __init__(self, url: str, feed: FeedParserDict) -> None:
skobkin marked this conversation as resolved Outdated

It's just 3 lines. You can just do that in the constructor.

It's just 3 lines. You can just do that in the constructor.
self.url = url
self.title = title
self.description = description
self.items = []
self.title = feed.feed.get('title', '')
for item in feed.entries:
self.items.append(FeedItem(item))
class Feed():
def __init__(self, url: str, items: list[FeedItem]) -> None:
self.url = url
self.items = items
class RssReader():
class RssReader:
def get_feed(self, url: str) -> Feed:
f = feedparser.parse(url)
items = self.__get_items(f.entries)
return Feed(url, items)
def __convert_to_feed_item(self, item: dict) -> FeedItem:
if 'title' in item:
title = item['title']
if 'link' in item:
url = item['link']
if 'summary' in item:
description = item['summary']
return FeedItem(url, title, description)
def __get_items(self, items: list) -> list:
list_items = []
for item in items:
list_items.append(self.__convert_to_feed_item(item))
return list_items
return Feed(url, parse(url))

View file

@ -58,8 +58,8 @@ class CommandProcessor:
feeds = self.database.find_user_feeds(data['user_id'])
feed_list = ''
for feed in feeds:
feed_list += '* ' + str(feed[0]) + ': ' + feed[1] + '\n'
for index, feed in enumerate(feeds, start=1):
skobkin marked this conversation as resolved Outdated

count

Count Douku

It's better to use "number" or "index" here :)

> count ![Count Douku](https://d33wubrfki0l68.cloudfront.net/87c3b00eba91caf42e44d3d875fee707029779de/c8598/assets/img/products/hot-toys-mms-496-star-wars-ii-aotc-count-dooku/13.jpg) It's better to use "number" or "index" here :)
feed_list += '* ' + str(index) + ': ' + f'''<a href="{feed['url']}">{feed['title']}</a>''' + '\n'
skobkin marked this conversation as resolved Outdated

If we're using title now, we can show the title here and make it a link to the feed URL.

If we're using title now, we can show the title here and make it a link to the feed URL.
skobkin marked this conversation as resolved Outdated

You forgot the quotes surrounding href attribute value here.

http://htmlbook.ru/html/a/href

You forgot the quotes surrounding `href` attribute value here. http://htmlbook.ru/html/a/href
self.bot.reply_to(message, 'Your feeds:\n' + feed_list)
@ -97,16 +97,21 @@ class Notifier:
def __init__(self, token: str):
self.bot: TeleBot = TeleBot(token)
def send_updates(self, chat_ids: list[int], updates: list[FeedItem]):
def send_updates(self, chat_ids: list[int], updates: list[FeedItem], feed_title: str):
"""Send notification about new items to the user"""
if not updates:
return
skobkin marked this conversation as resolved Outdated

Do not forget about incrementing sent messages counter after sending the message.

Do not forget about incrementing sent messages counter after sending the message.
for chat_id in chat_ids:
self.__count_request_and_wait()
self.bot.send_message(
chat_id=chat_id,
text=f'Updates from the {feed_title} feed:'
)
for update in updates:
self.__count_request_and_wait()
self.__send_update(chat_id, update)
self.sent_counter += 1
if self.sent_counter >= self.BATCH_LIMIT:
# TODO: probably implement better later
time.sleep(1)
self.sent_counter = 0
def __send_update(self, chat_id: int, update: FeedItem):
self.bot.send_message(
@ -115,10 +120,18 @@ class Notifier:
parse_mode='HTML'
)
def __count_request_and_wait(self):
if self.sent_counter >= self.BATCH_LIMIT:
# TODO: probably implement better later
time.sleep(1)
self.sent_counter = 0

I'd suggest to place a date after the feed item title:

[title](https://skobk.in/some/post) [2022-06-12]

Once upon a time...
I'd suggest to place a date after the feed item title: ```markdown [title](https://skobk.in/some/post) [2022-06-12] Once upon a time... ```

Or HTML in our case... We've switched to it some time ago.

Or HTML in our case... We've switched to it some time ago.
self.sent_counter += 1
@staticmethod
def __format_message(item: FeedItem) -> str:
return (
f"<strong><a href=\"{item.url}\">{item.title}</a></strong>\n\n"
f"{item.date}\n"
f"{item.description}"
)

15
update.py Normal file
View file

@ -0,0 +1,15 @@
import os
from rss import RssReader
from update_manager import UpdateManager
from database import Database
from telegram import Notifier
token = os.getenv('TELEGRAM_TOKEN')
db_path = os.getenv('DATABASE_PATH')
db = Database(db_path)
notifier = Notifier(token)
rss_reader = RssReader()
updater = UpdateManager(db, notifier, rss_reader)
updater.update()

43
update_manager.py Normal file
View file

@ -0,0 +1,43 @@
from rss import RssReader, FeedItem
from database import Database
from telegram import Notifier
class UpdateManager:
"""Implement the feed update."""
skobkin marked this conversation as resolved Outdated

Typehints for properties would also be good too.

Typehints for properties would also be good too.
def __init__(self, database: Database, notifier: Notifier, rss_reader: RssReader) -> None:
self.database: Database = database
self.notifier: Notifier = notifier
self.rss_reader: RssReader = rss_reader
def update(self):
"""Send new feed items to the user."""
feeds = self.database.find_feeds()
skobkin marked this conversation as resolved Outdated

We're forgetting about the feed itself creating small UX problem for the bot user here. But it can be addressed later.

We're forgetting about the feed itself creating small UX problem for the bot user here. But it can be addressed later.
skobkin marked this conversation as resolved Outdated

old_items_urls (plural, same goes for method name)

`old_items_urls` (plural, same goes for method name)
for feed_id, feed_url in feeds:
feed = self.rss_reader.get_feed(feed_url)
new_items = feed.items
old_items_urls = self.database.find_feed_items_urls(feed_id)
diff = self.__calculate_difference(new_items, old_items_urls)
if not diff:
skobkin marked this conversation as resolved
Review

Why would you do that instead of just returning ready-for-use list of ID's from the Database?

Why would you do that instead of just returning ready-for-use list of ID's from the `Database`?
continue
chat_ids = self.database.find_feed_subscribers(feed_id)
self.notifier.send_updates(chat_ids, diff, feed.title)
self.database.update_feed_items(feed_id, new_items)
def __calculate_difference(self, new_items: list[FeedItem], old_items_urls: list[str]) -> list[FeedItem]:
"""Calculate new feed items."""
if not old_items_urls:
skobkin marked this conversation as resolved Outdated

The same as above goes here.

The same as above goes here.
return new_items
diff = []
for item in new_items:
if item.url not in old_items_urls:
diff.append(item)
skobkin marked this conversation as resolved Outdated

So many space in the end of the file. Is it reserved for future improvements? 😄

So many space in the end of the file. Is it reserved for future improvements? 😄
return diff