From 1bdb00e8e7cb0ce27e61f31276dfc8daef88bda2 Mon Sep 17 00:00:00 2001 From: mitsuha_s Date: Mon, 11 Jul 2022 00:25:44 +0000 Subject: [PATCH 01/16] WIP: migrate to PostgreSQL --- README.md | 8 +++---- bot.py | 6 ++--- database.py | 58 +++++++++++++++++++++++------------------------- requirements.txt | 1 + telegram.py | 7 +++--- update.py | 6 ++--- 6 files changed, 43 insertions(+), 43 deletions(-) diff --git a/README.md b/README.md index 2439a59..64300b7 100644 --- a/README.md +++ b/README.md @@ -33,14 +33,14 @@ the `requirements.txt` file. Otherwise old dependencies could be lost. ## Running the bot ```shell -export TELEGRAM_TOKEN=xxx -export DATABASE_PATH=./database.db +export RSSBOT_TG_TOKEN=xxx +export RSSBOT_DSN=xxx python bot.py ``` ## Running the update ```shell -export TELEGRAM_TOKEN=xxx -export DATABASE_PATH=./database.db +export RSSBOT_TG_TOKEN=xxx +export RSSBOT_DSN=xxx python update.py ``` \ No newline at end of file diff --git a/bot.py b/bot.py index f215ab1..254f022 100644 --- a/bot.py +++ b/bot.py @@ -8,8 +8,8 @@ from telegram import CommandProcessor load_dotenv() -token = os.getenv('TELEGRAM_TOKEN') -db_path = os.getenv('DATABASE_PATH', './bot.db') +token = os.getenv('RSSBOT_TG_TOKEN') +db_dsn = os.getenv('RSSBOT_DSN',) log_level = os.getenv('LOG_LEVEL', 'INFO') print('Starting the bot with logging level', log_level.upper()) @@ -19,7 +19,7 @@ logging.basicConfig( datefmt='%Y-%m-%d %H:%M:%S', ) -db = Database(db_path, logging.getLogger('Database')) +db = Database(db_dsn, logging.getLogger('Database')) bot = CommandProcessor(token, db, logging.getLogger('CommandProcessor')) bot.run() diff --git a/database.py b/database.py index 83908b3..ce25864 100644 --- a/database.py +++ b/database.py @@ -1,4 +1,5 @@ -import sqlite3 +import psycopg2 +import psycopg2.extras from logging import Logger @@ -9,27 +10,25 @@ from rss import FeedItem class Database: """Implement interaction with the database.""" - def __init__(self, path: str, log: Logger) -> None: + def __init__(self, dsn: str, log: Logger) -> None: """Create a database file if not exists.""" self.log: Logger = log - self.log.debug('Database.__init__(path=\'%s\')', path) - # TODO: think about removing check_same_thread=False - self.conn = sqlite3.connect(path, check_same_thread=False) - self.conn.row_factory = sqlite3.Row - self.cur = self.conn.cursor() + self.log.debug('Database.__init__(DSN=\'%s\')', dsn) + self.conn = psycopg2.connect(dsn) + self.cur = self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) self.__init_schema() def add_user(self, telegram_id: int) -> int: """Add a user's telegram id to the database and return its database id.""" self.log.debug('add_user(telegram_id=\'%s\')', telegram_id) - self.cur.execute('INSERT INTO users (telegram_id) VALUES (?)', [telegram_id]) + self.cur.execute('INSERT INTO users (telegram_id) VALUES (%s)', [telegram_id]) self.conn.commit() - return self.cur.lastrowid + return self.find_user(telegram_id) def find_user(self, telegram_id: int) -> int | None: """Get a user's telegram id and return its database id.""" self.log.debug('find_user(telegram_id=\'%s\')', telegram_id) - self.cur.execute('SELECT id FROM users WHERE telegram_id = ?', [telegram_id]) + self.cur.execute('SELECT id FROM users WHERE telegram_id = %s', [telegram_id]) row = self.cur.fetchone() if row is None: return None @@ -38,14 +37,14 @@ class Database: def add_feed(self, url: str) -> int: """Add a feed to the database and return its id.""" self.log.debug('add_feed(url=\'%s\')', url) - self.cur.execute('INSERT INTO feeds (url) VALUES (?)', [url]) + self.cur.execute('INSERT INTO feeds (url) VALUES (%s)', [url]) self.conn.commit() - return self.cur.lastrowid + return self.find_feed_by_url(url) def find_feed_by_url(self, url: str) -> int | None: """Find feed ID by url.""" self.log.debug('find_feed_by_url(url=\'%s\')', url) - self.cur.execute('SELECT id FROM feeds WHERE url = ?', [url]) + self.cur.execute('SELECT id FROM feeds WHERE url = %s', [url]) row = self.cur.fetchone() if row is None: return None @@ -66,7 +65,7 @@ class Database: def subscribe_user(self, user_id: int, feed_id: int) -> None: """Subscribe a user to the feed.""" self.log.debug('subscribe_user(user_id=\'%s\', feed_id=\'%s\')', user_id, feed_id) - self.cur.execute('INSERT INTO subscriptions (user_id, feed_id) VALUES (?, ?)', [user_id, feed_id]) + self.cur.execute('INSERT INTO subscriptions (user_id, feed_id) VALUES (%s, %s)', [user_id, feed_id]) self.conn.commit() def unsubscribe_user_by_url(self, user_id: int, url: str) -> None: @@ -88,13 +87,13 @@ class Database: def unsubscribe_user(self, user_id: int, feed_id: int) -> None: """Unsubscribe a user from the feed.""" self.log.debug('unsubscribe_user(user_id=\'%s\', feed_id=\'%s\')', user_id, feed_id) - self.cur.execute('DELETE FROM subscriptions WHERE feed_id = ? AND user_id = ?', [feed_id, user_id]) + self.cur.execute('DELETE FROM subscriptions WHERE feed_id = %s AND user_id = %s', [feed_id, user_id]) self.conn.commit() def is_user_subscribed(self, user_id: int, feed_id: int) -> bool: """Check if user subscribed to specific feed.""" self.log.debug('is_user_subscribed(user_id=\'%s\', feed_id=\'%s\')', user_id, feed_id) - self.cur.execute('SELECT 1 FROM subscriptions WHERE user_id = ? AND feed_id = ?', [user_id, feed_id]) + self.cur.execute('SELECT 1 FROM subscriptions WHERE user_id = %s AND feed_id = %s', [user_id, feed_id]) row = self.cur.fetchone() if row is None: return False @@ -103,41 +102,41 @@ class Database: def delete_feed(self, feed_id: int) -> None: """Delete a feed.""" self.log.debug('delete_feed(feed_id=\'%s\')', feed_id) - self.cur.execute('DELETE FROM feeds WHERE id = ?', [feed_id]) + self.cur.execute('DELETE FROM feeds WHERE id = %s', [feed_id]) self.conn.commit() def get_feed_subscribers_count(self, feed_id: int) -> int: """Count feed subscribers.""" self.log.debug('get_feed_subscribers_count(feed_id=\'%s\')', feed_id) - self.cur.execute('SELECT COUNT(user_id) AS amount_subscribers FROM subscriptions WHERE feed_id = ?', [feed_id]) + self.cur.execute('SELECT COUNT(user_id) AS amount_subscribers FROM subscriptions WHERE feed_id = %s', [feed_id]) row = self.cur.fetchone() return row['amount_subscribers'] def find_feed_subscribers(self, feed_id: int) -> list[int]: """Return feed subscribers""" self.log.debug('find_feed_subscribers(feed_id=\'%s\')', feed_id) - self.cur.execute('SELECT telegram_id FROM users WHERE id IN (SELECT user_id FROM subscriptions WHERE feed_id = ?)', + self.cur.execute('SELECT telegram_id FROM users WHERE id IN (SELECT user_id FROM subscriptions WHERE feed_id = %s)', [feed_id]) subscribers = self.cur.fetchall() return list(map(lambda x: x['telegram_id'], subscribers)) - def find_feeds(self) -> list[sqlite3.Row]: + def find_feeds(self) -> list[psycopg2.extras.DictRow]: """Get a list of feeds.""" self.log.debug('find_feeds()') self.cur.execute('SELECT * FROM feeds') return self.cur.fetchall() - def find_user_feeds(self, user_id: int) -> list[sqlite3.Row]: + def find_user_feeds(self, user_id: int) -> list[psycopg2.extras.DictRow]: """Return a list of feeds the user is subscribed to.""" self.log.debug('find_user_feeds(user_id=\'%s\')', user_id) - self.cur.execute('SELECT * FROM feeds WHERE id IN (SELECT feed_id FROM subscriptions WHERE user_id = ?)', + self.cur.execute('SELECT * FROM feeds WHERE id IN (SELECT feed_id FROM subscriptions WHERE user_id = %s)', [user_id]) return self.cur.fetchall() - def find_feed_items(self, feed_id: int) -> list[sqlite3.Row]: + def find_feed_items(self, feed_id: int) -> list[psycopg2.extras.DictRow]: """Get last feed items.""" self.log.debug('find_feed_items(feed_id=\'%s\')', feed_id) - self.cur.execute('SELECT * FROM feeds_last_items WHERE feed_id = ?', [feed_id]) + self.cur.execute('SELECT * FROM feeds_last_items WHERE feed_id = %s', [feed_id]) return self.cur.fetchall() def find_feed_items_urls(self, feed_id: int) -> list[str]: @@ -154,18 +153,17 @@ class Database: for i, _ in enumerate(new_items): new_items[i] = [feed_id] + list(new_items[i].__dict__.values())[:-1] - self.cur.execute('DELETE FROM feeds_last_items WHERE feed_id = ?', [feed_id]) + self.cur.execute('DELETE FROM feeds_last_items WHERE feed_id = %s', [feed_id]) self.cur.executemany( - 'INSERT INTO feeds_last_items (feed_id, url, title, description) VALUES (?, ?, ?, ?)', new_items) + 'INSERT INTO feeds_last_items (feed_id, url, title, description) VALUES (%s, %s, %s, %s)', new_items) self.conn.commit() def __init_schema(self) -> None: self.log.debug('__init_schema()') - # TODO: Change to migrations self.cur.execute( - 'CREATE TABLE IF NOT EXISTS users (id INTEGER, telegram_id INTEGER NOT NULL UNIQUE, PRIMARY KEY(id))' + 'CREATE TABLE IF NOT EXISTS users (id SERIAL PRIMARY KEY, telegram_id INTEGER NOT NULL UNIQUE)' ) - self.cur.execute('CREATE TABLE IF NOT EXISTS feeds (id INTEGER, url TEXT NOT NULL UNIQUE, PRIMARY KEY(id))') + self.cur.execute('CREATE TABLE IF NOT EXISTS feeds (id SERIAL PRIMARY KEY, url TEXT NOT NULL UNIQUE)') self.cur.execute( 'CREATE TABLE IF NOT EXISTS subscriptions (' ' user_id INTEGER,' @@ -178,7 +176,7 @@ class Database: self.cur.execute( 'CREATE TABLE IF NOT EXISTS feeds_last_items (' ' feed_id INTEGER,' - ' url TEXT NOT NULL UNIQUE,' + ' url TEXT NOT NULL,' ' title TEXT,' ' description TEXT,' ' FOREIGN KEY(feed_id) REFERENCES feeds(id)' diff --git a/requirements.txt b/requirements.txt index ec28d3f..cc5e556 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,7 @@ charset-normalizer==2.0.12 decorator==5.1.1 feedparser==6.0.2 idna==3.3 +psycopg2-binary==2.9.3 pyTelegramBotAPI==4.5.0 python-dotenv==0.20.0 requests==2.27.1 diff --git a/telegram.py b/telegram.py index 27e44dd..c86289d 100644 --- a/telegram.py +++ b/telegram.py @@ -106,7 +106,8 @@ class CommandProcessor: class Notifier: """Sends notifications to users about new RSS feed items.""" - BATCH_LIMIT: int = 30 + # https://core.telegram.org/bots/faq#my-bot-is-hitting-limits-how-do-i-avoid-this + BATCH_LIMIT: int = 25 sent_counter: int = 0 @@ -115,7 +116,7 @@ class Notifier: self.log.debug('Notifier.__init__(token=\'%s\', logger=%s)', token[:8] + '...', logger) self.bot: TeleBot = TeleBot(token) self.html_sanitizer: Cleaner = Cleaner( - tags=['b', 'strong', 'i', 'em', 'u', 'ins', 's', 'strike', 'del', 'span', 'tg-spoiler', 'a', 'code', 'pre'], + tags=['b', 'strong', 'i', 'em', 'u', 'ins', 's', 'strike', 'del', 'tg-spoiler', 'a', 'code', 'pre'], attributes={"a": ["href", "title"]}, protocols=['http', 'https'], strip=True, @@ -156,7 +157,7 @@ class Notifier: self.log.debug('__count_request_and_wait()') if self.sent_counter >= self.BATCH_LIMIT: self.log.debug('Requests limit exceeded, sleeping for a second') - time.sleep(1) + time.sleep(2) self.log.debug('Resetting counter') self.sent_counter = 0 self.sent_counter += 1 diff --git a/update.py b/update.py index 4714c24..d679170 100644 --- a/update.py +++ b/update.py @@ -10,8 +10,8 @@ from telegram import Notifier load_dotenv() -token = os.getenv('TELEGRAM_TOKEN') -db_path = os.getenv('DATABASE_PATH', './bot.db') +token = os.getenv('RSSBOT_TG_TOKEN') +db_dsn = os.getenv('RSSBOT_DSN') log_level = os.getenv('LOG_LEVEL', 'INFO') print('Starting the updater with logging level', log_level.upper()) @@ -21,7 +21,7 @@ logging.basicConfig( datefmt='%Y-%m-%d %H:%M:%S' ) -db = Database(db_path, logging.getLogger('Database')) +db = Database(db_dsn, logging.getLogger('Database')) notifier = Notifier(token, logging.getLogger('Notifier')) rss_reader = RssReader(logging.getLogger('RssReader')) -- 2.43.5 From f11b0b91af67eefb3da7c0a1dad27ae2b850e622 Mon Sep 17 00:00:00 2001 From: mitsuha_s Date: Wed, 13 Jul 2022 19:19:54 +0000 Subject: [PATCH 02/16] fix removal feed, refactor return id and add type hints --- database.py | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/database.py b/database.py index ce25864..9a20480 100644 --- a/database.py +++ b/database.py @@ -1,5 +1,6 @@ -import psycopg2 -import psycopg2.extras +import psycopg2 +from psycopg2.extensions import connection +from psycopg2.extras import DictCursor, DictRow from logging import Logger @@ -14,16 +15,16 @@ class Database: """Create a database file if not exists.""" self.log: Logger = log self.log.debug('Database.__init__(DSN=\'%s\')', dsn) - self.conn = psycopg2.connect(dsn) - self.cur = self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) + self.conn: connection = psycopg2.connect(dsn) + self.cur: DictCursor = self.conn.cursor(cursor_factory=DictCursor) self.__init_schema() def add_user(self, telegram_id: int) -> int: """Add a user's telegram id to the database and return its database id.""" self.log.debug('add_user(telegram_id=\'%s\')', telegram_id) - self.cur.execute('INSERT INTO users (telegram_id) VALUES (%s)', [telegram_id]) + id = self.cur.execute('INSERT INTO users (telegram_id) VALUES (%s) RETURNING id', [telegram_id]) self.conn.commit() - return self.find_user(telegram_id) + return id def find_user(self, telegram_id: int) -> int | None: """Get a user's telegram id and return its database id.""" @@ -37,9 +38,9 @@ class Database: def add_feed(self, url: str) -> int: """Add a feed to the database and return its id.""" self.log.debug('add_feed(url=\'%s\')', url) - self.cur.execute('INSERT INTO feeds (url) VALUES (%s)', [url]) + id = self.cur.execute('INSERT INTO feeds (url) VALUES (%s) RETURNING id', [url]) self.conn.commit() - return self.find_feed_by_url(url) + return id def find_feed_by_url(self, url: str) -> int | None: """Find feed ID by url.""" @@ -126,14 +127,14 @@ class Database: self.cur.execute('SELECT * FROM feeds') return self.cur.fetchall() - def find_user_feeds(self, user_id: int) -> list[psycopg2.extras.DictRow]: + def find_user_feeds(self, user_id: int) -> list[DictRow]: """Return a list of feeds the user is subscribed to.""" self.log.debug('find_user_feeds(user_id=\'%s\')', user_id) self.cur.execute('SELECT * FROM feeds WHERE id IN (SELECT feed_id FROM subscriptions WHERE user_id = %s)', [user_id]) return self.cur.fetchall() - def find_feed_items(self, feed_id: int) -> list[psycopg2.extras.DictRow]: + def find_feed_items(self, feed_id: int) -> list[DictRow]: """Get last feed items.""" self.log.debug('find_feed_items(feed_id=\'%s\')', feed_id) self.cur.execute('SELECT * FROM feeds_last_items WHERE feed_id = %s', [feed_id]) @@ -166,19 +167,16 @@ class Database: self.cur.execute('CREATE TABLE IF NOT EXISTS feeds (id SERIAL PRIMARY KEY, url TEXT NOT NULL UNIQUE)') self.cur.execute( 'CREATE TABLE IF NOT EXISTS subscriptions (' - ' user_id INTEGER,' - ' feed_id INTEGER,' - ' UNIQUE (user_id, feed_id),' - ' FOREIGN KEY(user_id) REFERENCES users(id),' - ' FOREIGN KEY(feed_id) REFERENCES feeds(id)' + ' user_id INTEGER REFERENCES users,' + ' feed_id INTEGER REFERENCES feeds,' + ' UNIQUE (user_id, feed_id)' ')' ) self.cur.execute( 'CREATE TABLE IF NOT EXISTS feeds_last_items (' - ' feed_id INTEGER,' + ' feed_id INTEGER REFERENCES feeds ON DELETE CASCADE,' ' url TEXT NOT NULL,' ' title TEXT,' - ' description TEXT,' - ' FOREIGN KEY(feed_id) REFERENCES feeds(id)' + ' description TEXT' ')' ) -- 2.43.5 From 5d0a6843b67aa96ee904a18393aa64402f9b755f Mon Sep 17 00:00:00 2001 From: mitsuha_s Date: Wed, 13 Jul 2022 20:15:11 +0000 Subject: [PATCH 03/16] fix return values in add_user and add_feed methods --- database.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/database.py b/database.py index 9a20480..6d8c947 100644 --- a/database.py +++ b/database.py @@ -22,9 +22,9 @@ class Database: def add_user(self, telegram_id: int) -> int: """Add a user's telegram id to the database and return its database id.""" self.log.debug('add_user(telegram_id=\'%s\')', telegram_id) - id = self.cur.execute('INSERT INTO users (telegram_id) VALUES (%s) RETURNING id', [telegram_id]) + self.cur.execute('INSERT INTO users (telegram_id) VALUES (%s) RETURNING id', [telegram_id]) self.conn.commit() - return id + return self.cur.fetchone()[0] def find_user(self, telegram_id: int) -> int | None: """Get a user's telegram id and return its database id.""" @@ -38,9 +38,9 @@ class Database: def add_feed(self, url: str) -> int: """Add a feed to the database and return its id.""" self.log.debug('add_feed(url=\'%s\')', url) - id = self.cur.execute('INSERT INTO feeds (url) VALUES (%s) RETURNING id', [url]) + self.cur.execute('INSERT INTO feeds (url) VALUES (%s) RETURNING id', [url]) self.conn.commit() - return id + return self.cur.fetchone()[0] def find_feed_by_url(self, url: str) -> int | None: """Find feed ID by url.""" @@ -121,7 +121,7 @@ class Database: subscribers = self.cur.fetchall() return list(map(lambda x: x['telegram_id'], subscribers)) - def find_feeds(self) -> list[psycopg2.extras.DictRow]: + def find_feeds(self) -> list[DictRow]: """Get a list of feeds.""" self.log.debug('find_feeds()') self.cur.execute('SELECT * FROM feeds') -- 2.43.5 From feef909c61859e21e75a08a2dcca41fbd7071fc7 Mon Sep 17 00:00:00 2001 From: mitsuha_s Date: Wed, 13 Jul 2022 20:39:17 +0000 Subject: [PATCH 04/16] add __convert_to_list_of_dicts method --- database.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/database.py b/database.py index 6d8c947..357bcc8 100644 --- a/database.py +++ b/database.py @@ -121,24 +121,24 @@ class Database: subscribers = self.cur.fetchall() return list(map(lambda x: x['telegram_id'], subscribers)) - def find_feeds(self) -> list[DictRow]: + def find_feeds(self) -> list[dict]: """Get a list of feeds.""" self.log.debug('find_feeds()') self.cur.execute('SELECT * FROM feeds') - return self.cur.fetchall() + return self.__convert_to_list_of_dicts(self.cur.fetchall()) - def find_user_feeds(self, user_id: int) -> list[DictRow]: + def find_user_feeds(self, user_id: int) -> list[dict]: """Return a list of feeds the user is subscribed to.""" self.log.debug('find_user_feeds(user_id=\'%s\')', user_id) self.cur.execute('SELECT * FROM feeds WHERE id IN (SELECT feed_id FROM subscriptions WHERE user_id = %s)', - [user_id]) - return self.cur.fetchall() + [user_id]) + return self.__convert_to_list_of_dicts(self.cur.fetchall()) - def find_feed_items(self, feed_id: int) -> list[DictRow]: + def find_feed_items(self, feed_id: int) -> list[dict]: """Get last feed items.""" self.log.debug('find_feed_items(feed_id=\'%s\')', feed_id) self.cur.execute('SELECT * FROM feeds_last_items WHERE feed_id = %s', [feed_id]) - return self.cur.fetchall() + return self.__convert_to_list_of_dicts(self.cur.fetchall()) def find_feed_items_urls(self, feed_id: int) -> list[str]: """Return urls last feed items""" @@ -180,3 +180,8 @@ class Database: ' description TEXT' ')' ) + + @staticmethod + def __convert_to_list_of_dicts(rows: list[DictRow]) -> list[dict]: + """Convert list of DictRows to list of dicts""" + return list(map(lambda x: dict(x), rows)) -- 2.43.5 From 50ac34409544e1ec346266183f9a35860a62d81b Mon Sep 17 00:00:00 2001 From: mitsuha_s Date: Wed, 13 Jul 2022 20:50:03 +0000 Subject: [PATCH 05/16] rename __convert_to_list_of_dicts method to __dictrow_to_dict_list --- database.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/database.py b/database.py index 357bcc8..a336317 100644 --- a/database.py +++ b/database.py @@ -125,20 +125,20 @@ class Database: """Get a list of feeds.""" self.log.debug('find_feeds()') self.cur.execute('SELECT * FROM feeds') - return self.__convert_to_list_of_dicts(self.cur.fetchall()) + return self.__dictrow_to_dict_list(self.cur.fetchall()) def find_user_feeds(self, user_id: int) -> list[dict]: """Return a list of feeds the user is subscribed to.""" self.log.debug('find_user_feeds(user_id=\'%s\')', user_id) self.cur.execute('SELECT * FROM feeds WHERE id IN (SELECT feed_id FROM subscriptions WHERE user_id = %s)', [user_id]) - return self.__convert_to_list_of_dicts(self.cur.fetchall()) + return self.__dictrow_to_dict_list(self.cur.fetchall()) def find_feed_items(self, feed_id: int) -> list[dict]: """Get last feed items.""" self.log.debug('find_feed_items(feed_id=\'%s\')', feed_id) self.cur.execute('SELECT * FROM feeds_last_items WHERE feed_id = %s', [feed_id]) - return self.__convert_to_list_of_dicts(self.cur.fetchall()) + return self.__dictrow_to_dict_list(self.cur.fetchall()) def find_feed_items_urls(self, feed_id: int) -> list[str]: """Return urls last feed items""" @@ -182,6 +182,6 @@ class Database: ) @staticmethod - def __convert_to_list_of_dicts(rows: list[DictRow]) -> list[dict]: + def __dictrow_to_dict_list(rows: list[DictRow]) -> list[dict]: """Convert list of DictRows to list of dicts""" return list(map(lambda x: dict(x), rows)) -- 2.43.5 From 2dde80f71552837d7cf7cd66b94d6e59238159b5 Mon Sep 17 00:00:00 2001 From: mitsuha_s Date: Wed, 13 Jul 2022 21:54:15 +0000 Subject: [PATCH 06/16] code style changes --- database.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/database.py b/database.py index a336317..2e0a58f 100644 --- a/database.py +++ b/database.py @@ -1,9 +1,7 @@ -import psycopg2 +from logging import Logger +import psycopg2 from psycopg2.extensions import connection from psycopg2.extras import DictCursor, DictRow - -from logging import Logger - from exceptions import DisplayableException from rss import FeedItem @@ -131,7 +129,7 @@ class Database: """Return a list of feeds the user is subscribed to.""" self.log.debug('find_user_feeds(user_id=\'%s\')', user_id) self.cur.execute('SELECT * FROM feeds WHERE id IN (SELECT feed_id FROM subscriptions WHERE user_id = %s)', - [user_id]) + [user_id]) return self.__dictrow_to_dict_list(self.cur.fetchall()) def find_feed_items(self, feed_id: int) -> list[dict]: @@ -184,4 +182,4 @@ class Database: @staticmethod def __dictrow_to_dict_list(rows: list[DictRow]) -> list[dict]: """Convert list of DictRows to list of dicts""" - return list(map(lambda x: dict(x), rows)) + return list(dict, rows) -- 2.43.5 From 6759e61bc27ce0b001b3a85a625a7d988efbe0f6 Mon Sep 17 00:00:00 2001 From: mitsuha_s Date: Wed, 13 Jul 2022 22:09:04 +0000 Subject: [PATCH 07/16] fix __dictrow_to_dict_list --- database.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/database.py b/database.py index 2e0a58f..e094b6f 100644 --- a/database.py +++ b/database.py @@ -182,4 +182,4 @@ class Database: @staticmethod def __dictrow_to_dict_list(rows: list[DictRow]) -> list[dict]: """Convert list of DictRows to list of dicts""" - return list(dict, rows) + return list(map(dict, rows)) -- 2.43.5 From ae31be33b67f54c187f6e0ef0ed1c02ce46cf573 Mon Sep 17 00:00:00 2001 From: mitsuha_s Date: Wed, 13 Jul 2022 22:32:47 +0000 Subject: [PATCH 08/16] fix update method in UpdateManager --- update_manager.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/update_manager.py b/update_manager.py index 9ff5218..ab0ea0d 100644 --- a/update_manager.py +++ b/update_manager.py @@ -24,7 +24,8 @@ class UpdateManager: feeds = self.database.find_feeds() self.log.info('Feeds to update: %d', len(feeds)) - for feed_id, feed_url in feeds: + for feed in feeds: + feed_id, feed_url = feed.values() self.log.info('Processing [%d] %s', feed_id, feed_url) feed = self.rss_reader.get_feed(feed_url) new_items = feed.items -- 2.43.5 From 65350552680796c7da9cd1bc68aaf3e929ebd0fd Mon Sep 17 00:00:00 2001 From: mitsuha_s Date: Wed, 13 Jul 2022 23:03:01 +0000 Subject: [PATCH 09/16] refuse to unpack the dict in UpdateManager --- update_manager.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/update_manager.py b/update_manager.py index ab0ea0d..abe94bf 100644 --- a/update_manager.py +++ b/update_manager.py @@ -25,20 +25,19 @@ class UpdateManager: self.log.info('Feeds to update: %d', len(feeds)) for feed in feeds: - feed_id, feed_url = feed.values() - self.log.info('Processing [%d] %s', feed_id, feed_url) - feed = self.rss_reader.get_feed(feed_url) - new_items = feed.items - old_items_urls = self.database.find_feed_items_urls(feed_id) + self.log.info('Processing [%d] %s', feed['id'], feed['url']) + feed_obj = self.rss_reader.get_feed(feed['url']) + new_items = feed_obj.items + old_items_urls = self.database.find_feed_items_urls(feed['id']) diff = self.__calculate_difference(new_items, old_items_urls) if not diff: continue - chat_ids = self.database.find_feed_subscribers(feed_id) - self.notifier.send_updates(chat_ids, diff, feed.title) - self.database.update_feed_items(feed_id, new_items) + chat_ids = self.database.find_feed_subscribers(feed['id']) + self.notifier.send_updates(chat_ids, diff, feed_obj.title) + self.database.update_feed_items(feed['id'], new_items) def __calculate_difference(self, new_items: list[FeedItem], old_items_urls: list[str]) -> list[FeedItem]: """Calculate new feed items.""" -- 2.43.5 From c91549a3111ea8545f73c50eb608f00a6f4a54f3 Mon Sep 17 00:00:00 2001 From: mitsuha_s Date: Thu, 14 Jul 2022 00:41:58 +0000 Subject: [PATCH 10/16] bug fix. add comparison by guid instead of url --- database.py | 5 +++-- rss.py | 1 + update_manager.py | 19 ++++++++++++------- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/database.py b/database.py index e094b6f..e577fba 100644 --- a/database.py +++ b/database.py @@ -154,7 +154,7 @@ class Database: self.cur.execute('DELETE FROM feeds_last_items WHERE feed_id = %s', [feed_id]) self.cur.executemany( - 'INSERT INTO feeds_last_items (feed_id, url, title, description) VALUES (%s, %s, %s, %s)', new_items) + 'INSERT INTO feeds_last_items (feed_id, url, title, description, guid) VALUES (%s, %s, %s, %s, %s)', new_items) self.conn.commit() def __init_schema(self) -> None: @@ -175,7 +175,8 @@ class Database: ' feed_id INTEGER REFERENCES feeds ON DELETE CASCADE,' ' url TEXT NOT NULL,' ' title TEXT,' - ' description TEXT' + ' description TEXT,' + ' guid TEXT' ')' ) diff --git a/rss.py b/rss.py index d591843..74cd94f 100644 --- a/rss.py +++ b/rss.py @@ -9,6 +9,7 @@ class FeedItem: self.url = item.get('link', '') self.title = item.get('title', '') self.description = item.get('summary', '') + self.guid = item.get('id', '') if 'published' in item: self.date = datetime.fromtimestamp(mktime(item.published_parsed)) else: diff --git a/update_manager.py b/update_manager.py index abe94bf..863a9c0 100644 --- a/update_manager.py +++ b/update_manager.py @@ -28,9 +28,9 @@ class UpdateManager: self.log.info('Processing [%d] %s', feed['id'], feed['url']) feed_obj = self.rss_reader.get_feed(feed['url']) new_items = feed_obj.items - old_items_urls = self.database.find_feed_items_urls(feed['id']) + old_items = self.database.find_feed_items(feed['id']) - diff = self.__calculate_difference(new_items, old_items_urls) + diff = self.__calculate_difference(new_items, old_items) if not diff: continue @@ -39,20 +39,25 @@ class UpdateManager: self.notifier.send_updates(chat_ids, diff, feed_obj.title) self.database.update_feed_items(feed['id'], new_items) - def __calculate_difference(self, new_items: list[FeedItem], old_items_urls: list[str]) -> list[FeedItem]: + def __calculate_difference(self, new_items: list[FeedItem], old_items: list[dict]) -> list[FeedItem]: """Calculate new feed items.""" self.log.debug( - '__calculate_difference(new_items=list(%d), old_items_urls=list(%d))', len(new_items), len(old_items_urls) + '__calculate_difference(new_items=list(%d), old_items=list(%d))', len(new_items), len(old_items) ) - if not old_items_urls: + if not old_items: self.log.debug('Old items are empty, returning new') return new_items diff = [] + guids = [item['guid'] for item in old_items if item['guid']] + urls = [item['url'] for item in old_items] - self.log.debug('Comparing %d new items with %d old', len(new_items), len(old_items_urls)) + self.log.debug('Comparing %d new items with %d old', len(new_items), len(old_items)) for item in new_items: - if item.url not in old_items_urls: + if not guids and item.url not in urls: + diff.append(item) + continue + if item.guid not in guids: diff.append(item) self.log.debug('%d updates found', len(diff)) -- 2.43.5 From 037272a167dc1c9f8f71c00802e220d1aa2a0c76 Mon Sep 17 00:00:00 2001 From: mitsuha_s Date: Thu, 14 Jul 2022 01:11:45 +0000 Subject: [PATCH 11/16] not saving description in the database anymore --- database.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/database.py b/database.py index e577fba..5abb15b 100644 --- a/database.py +++ b/database.py @@ -150,11 +150,10 @@ class Database: """Replace last feed items with a list items that receive.""" self.log.debug('update_feed_items(feed_id=\'%s\', new_items=list(%d))', feed_id, len(new_items)) for i, _ in enumerate(new_items): - new_items[i] = [feed_id] + list(new_items[i].__dict__.values())[:-1] - + new_items[i] = [feed_id] + [new_items[i].url, new_items[i].title, new_items[i].guid] self.cur.execute('DELETE FROM feeds_last_items WHERE feed_id = %s', [feed_id]) self.cur.executemany( - 'INSERT INTO feeds_last_items (feed_id, url, title, description, guid) VALUES (%s, %s, %s, %s, %s)', new_items) + 'INSERT INTO feeds_last_items (feed_id, url, title, guid) VALUES (%s, %s, %s, %s)', new_items) self.conn.commit() def __init_schema(self) -> None: @@ -175,7 +174,6 @@ class Database: ' feed_id INTEGER REFERENCES feeds ON DELETE CASCADE,' ' url TEXT NOT NULL,' ' title TEXT,' - ' description TEXT,' ' guid TEXT' ')' ) -- 2.43.5 From 61a2d8d2159e9a8169f8449ce429fa1ca4bff490 Mon Sep 17 00:00:00 2001 From: mitsuha_s Date: Thu, 14 Jul 2022 01:21:40 +0000 Subject: [PATCH 12/16] not saving feed items title in the database anymore --- database.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/database.py b/database.py index 5abb15b..d0c1c48 100644 --- a/database.py +++ b/database.py @@ -150,10 +150,10 @@ class Database: """Replace last feed items with a list items that receive.""" self.log.debug('update_feed_items(feed_id=\'%s\', new_items=list(%d))', feed_id, len(new_items)) for i, _ in enumerate(new_items): - new_items[i] = [feed_id] + [new_items[i].url, new_items[i].title, new_items[i].guid] + new_items[i] = [feed_id] + [new_items[i].url, new_items[i].guid] self.cur.execute('DELETE FROM feeds_last_items WHERE feed_id = %s', [feed_id]) self.cur.executemany( - 'INSERT INTO feeds_last_items (feed_id, url, title, guid) VALUES (%s, %s, %s, %s)', new_items) + 'INSERT INTO feeds_last_items (feed_id, url, guid) VALUES (%s, %s, %s)', new_items) self.conn.commit() def __init_schema(self) -> None: @@ -173,7 +173,6 @@ class Database: 'CREATE TABLE IF NOT EXISTS feeds_last_items (' ' feed_id INTEGER REFERENCES feeds ON DELETE CASCADE,' ' url TEXT NOT NULL,' - ' title TEXT,' ' guid TEXT' ')' ) -- 2.43.5 From 292a7a3c88a98348948913e30334cbc88adf670f Mon Sep 17 00:00:00 2001 From: mitsuha_s Date: Thu, 14 Jul 2022 01:29:37 +0000 Subject: [PATCH 13/16] little refactor in update_feed_items method --- database.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/database.py b/database.py index d0c1c48..4ea6a67 100644 --- a/database.py +++ b/database.py @@ -150,7 +150,7 @@ class Database: """Replace last feed items with a list items that receive.""" self.log.debug('update_feed_items(feed_id=\'%s\', new_items=list(%d))', feed_id, len(new_items)) for i, _ in enumerate(new_items): - new_items[i] = [feed_id] + [new_items[i].url, new_items[i].guid] + new_items[i] = [feed_id, new_items[i].url, new_items[i].guid] self.cur.execute('DELETE FROM feeds_last_items WHERE feed_id = %s', [feed_id]) self.cur.executemany( 'INSERT INTO feeds_last_items (feed_id, url, guid) VALUES (%s, %s, %s)', new_items) -- 2.43.5 From 2206c6299d8753d151b9d5852c7f16138a93fd2e Mon Sep 17 00:00:00 2001 From: mitsuha_s Date: Thu, 14 Jul 2022 01:37:18 +0000 Subject: [PATCH 14/16] change time.sleep to 1 --- telegram.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/telegram.py b/telegram.py index c86289d..63f783e 100644 --- a/telegram.py +++ b/telegram.py @@ -157,7 +157,7 @@ class Notifier: self.log.debug('__count_request_and_wait()') if self.sent_counter >= self.BATCH_LIMIT: self.log.debug('Requests limit exceeded, sleeping for a second') - time.sleep(2) + time.sleep(1) self.log.debug('Resetting counter') self.sent_counter = 0 self.sent_counter += 1 -- 2.43.5 From ab77d44ba3b9b01354ee4c59c5fba1ea0d252f2a Mon Sep 17 00:00:00 2001 From: mitsuha_s Date: Thu, 14 Jul 2022 01:41:57 +0000 Subject: [PATCH 15/16] rename db_dsn to dsn variable in bot.py and update.py --- bot.py | 2 +- update.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bot.py b/bot.py index 254f022..ef89fd6 100644 --- a/bot.py +++ b/bot.py @@ -9,7 +9,7 @@ from telegram import CommandProcessor load_dotenv() token = os.getenv('RSSBOT_TG_TOKEN') -db_dsn = os.getenv('RSSBOT_DSN',) +dsn = os.getenv('RSSBOT_DSN',) log_level = os.getenv('LOG_LEVEL', 'INFO') print('Starting the bot with logging level', log_level.upper()) diff --git a/update.py b/update.py index d679170..2a97740 100644 --- a/update.py +++ b/update.py @@ -11,7 +11,7 @@ from telegram import Notifier load_dotenv() token = os.getenv('RSSBOT_TG_TOKEN') -db_dsn = os.getenv('RSSBOT_DSN') +dsn = os.getenv('RSSBOT_DSN') log_level = os.getenv('LOG_LEVEL', 'INFO') print('Starting the updater with logging level', log_level.upper()) -- 2.43.5 From 2bb167fef534bd3b2c741070537cadfd05402ab8 Mon Sep 17 00:00:00 2001 From: mitsuha_s Date: Thu, 14 Jul 2022 01:48:20 +0000 Subject: [PATCH 16/16] little fix in bot.by and update.py --- bot.py | 4 ++-- update.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bot.py b/bot.py index ef89fd6..d56ab3c 100644 --- a/bot.py +++ b/bot.py @@ -9,7 +9,7 @@ from telegram import CommandProcessor load_dotenv() token = os.getenv('RSSBOT_TG_TOKEN') -dsn = os.getenv('RSSBOT_DSN',) +dsn = os.getenv('RSSBOT_DSN') log_level = os.getenv('LOG_LEVEL', 'INFO') print('Starting the bot with logging level', log_level.upper()) @@ -19,7 +19,7 @@ logging.basicConfig( datefmt='%Y-%m-%d %H:%M:%S', ) -db = Database(db_dsn, logging.getLogger('Database')) +db = Database(dsn, logging.getLogger('Database')) bot = CommandProcessor(token, db, logging.getLogger('CommandProcessor')) bot.run() diff --git a/update.py b/update.py index 2a97740..19e1976 100644 --- a/update.py +++ b/update.py @@ -21,7 +21,7 @@ logging.basicConfig( datefmt='%Y-%m-%d %H:%M:%S' ) -db = Database(db_dsn, logging.getLogger('Database')) +db = Database(dsn, logging.getLogger('Database')) notifier = Notifier(token, logging.getLogger('Notifier')) rss_reader = RssReader(logging.getLogger('RssReader')) -- 2.43.5