bug fix. add comparison by guid instead of url
continuous-integration/drone/push Build is passing Details
continuous-integration/drone/pr Build is passing Details

This commit is contained in:
mitsuha_s 2022-07-14 00:41:58 +00:00
parent 6535055268
commit c91549a311
3 changed files with 16 additions and 9 deletions

View File

@ -154,7 +154,7 @@ class Database:
self.cur.execute('DELETE FROM feeds_last_items WHERE feed_id = %s', [feed_id])
self.cur.executemany(
'INSERT INTO feeds_last_items (feed_id, url, title, description) VALUES (%s, %s, %s, %s)', new_items)
'INSERT INTO feeds_last_items (feed_id, url, title, description, guid) VALUES (%s, %s, %s, %s, %s)', new_items)
self.conn.commit()
def __init_schema(self) -> None:
@ -175,7 +175,8 @@ class Database:
' feed_id INTEGER REFERENCES feeds ON DELETE CASCADE,'
' url TEXT NOT NULL,'
' title TEXT,'
' description TEXT'
' description TEXT,'
' guid TEXT'
')'
)

1
rss.py
View File

@ -9,6 +9,7 @@ class FeedItem:
self.url = item.get('link', '')
self.title = item.get('title', '')
self.description = item.get('summary', '')
self.guid = item.get('id', '')
if 'published' in item:
self.date = datetime.fromtimestamp(mktime(item.published_parsed))
else:

View File

@ -28,9 +28,9 @@ class UpdateManager:
self.log.info('Processing [%d] %s', feed['id'], feed['url'])
feed_obj = self.rss_reader.get_feed(feed['url'])
new_items = feed_obj.items
old_items_urls = self.database.find_feed_items_urls(feed['id'])
old_items = self.database.find_feed_items(feed['id'])
diff = self.__calculate_difference(new_items, old_items_urls)
diff = self.__calculate_difference(new_items, old_items)
if not diff:
continue
@ -39,20 +39,25 @@ class UpdateManager:
self.notifier.send_updates(chat_ids, diff, feed_obj.title)
self.database.update_feed_items(feed['id'], new_items)
def __calculate_difference(self, new_items: list[FeedItem], old_items_urls: list[str]) -> list[FeedItem]:
def __calculate_difference(self, new_items: list[FeedItem], old_items: list[dict]) -> list[FeedItem]:
"""Calculate new feed items."""
self.log.debug(
'__calculate_difference(new_items=list(%d), old_items_urls=list(%d))', len(new_items), len(old_items_urls)
'__calculate_difference(new_items=list(%d), old_items=list(%d))', len(new_items), len(old_items)
)
if not old_items_urls:
if not old_items:
self.log.debug('Old items are empty, returning new')
return new_items
diff = []
guids = [item['guid'] for item in old_items if item['guid']]
urls = [item['url'] for item in old_items]
self.log.debug('Comparing %d new items with %d old', len(new_items), len(old_items_urls))
self.log.debug('Comparing %d new items with %d old', len(new_items), len(old_items))
for item in new_items:
if item.url not in old_items_urls:
if not guids and item.url not in urls:
diff.append(item)
continue
if item.guid not in guids:
diff.append(item)
self.log.debug('%d updates found', len(diff))