From c91549a3111ea8545f73c50eb608f00a6f4a54f3 Mon Sep 17 00:00:00 2001 From: mitsuha_s Date: Thu, 14 Jul 2022 00:41:58 +0000 Subject: [PATCH] bug fix. add comparison by guid instead of url --- database.py | 5 +++-- rss.py | 1 + update_manager.py | 19 ++++++++++++------- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/database.py b/database.py index e094b6f..e577fba 100644 --- a/database.py +++ b/database.py @@ -154,7 +154,7 @@ class Database: self.cur.execute('DELETE FROM feeds_last_items WHERE feed_id = %s', [feed_id]) self.cur.executemany( - 'INSERT INTO feeds_last_items (feed_id, url, title, description) VALUES (%s, %s, %s, %s)', new_items) + 'INSERT INTO feeds_last_items (feed_id, url, title, description, guid) VALUES (%s, %s, %s, %s, %s)', new_items) self.conn.commit() def __init_schema(self) -> None: @@ -175,7 +175,8 @@ class Database: ' feed_id INTEGER REFERENCES feeds ON DELETE CASCADE,' ' url TEXT NOT NULL,' ' title TEXT,' - ' description TEXT' + ' description TEXT,' + ' guid TEXT' ')' ) diff --git a/rss.py b/rss.py index d591843..74cd94f 100644 --- a/rss.py +++ b/rss.py @@ -9,6 +9,7 @@ class FeedItem: self.url = item.get('link', '') self.title = item.get('title', '') self.description = item.get('summary', '') + self.guid = item.get('id', '') if 'published' in item: self.date = datetime.fromtimestamp(mktime(item.published_parsed)) else: diff --git a/update_manager.py b/update_manager.py index abe94bf..863a9c0 100644 --- a/update_manager.py +++ b/update_manager.py @@ -28,9 +28,9 @@ class UpdateManager: self.log.info('Processing [%d] %s', feed['id'], feed['url']) feed_obj = self.rss_reader.get_feed(feed['url']) new_items = feed_obj.items - old_items_urls = self.database.find_feed_items_urls(feed['id']) + old_items = self.database.find_feed_items(feed['id']) - diff = self.__calculate_difference(new_items, old_items_urls) + diff = self.__calculate_difference(new_items, old_items) if not diff: continue @@ -39,20 +39,25 @@ class UpdateManager: self.notifier.send_updates(chat_ids, diff, feed_obj.title) self.database.update_feed_items(feed['id'], new_items) - def __calculate_difference(self, new_items: list[FeedItem], old_items_urls: list[str]) -> list[FeedItem]: + def __calculate_difference(self, new_items: list[FeedItem], old_items: list[dict]) -> list[FeedItem]: """Calculate new feed items.""" self.log.debug( - '__calculate_difference(new_items=list(%d), old_items_urls=list(%d))', len(new_items), len(old_items_urls) + '__calculate_difference(new_items=list(%d), old_items=list(%d))', len(new_items), len(old_items) ) - if not old_items_urls: + if not old_items: self.log.debug('Old items are empty, returning new') return new_items diff = [] + guids = [item['guid'] for item in old_items if item['guid']] + urls = [item['url'] for item in old_items] - self.log.debug('Comparing %d new items with %d old', len(new_items), len(old_items_urls)) + self.log.debug('Comparing %d new items with %d old', len(new_items), len(old_items)) for item in new_items: - if item.url not in old_items_urls: + if not guids and item.url not in urls: + diff.append(item) + continue + if item.guid not in guids: diff.append(item) self.log.debug('%d updates found', len(diff))