bug fix. add comparison by guid instead of url
All checks were successful
continuous-integration/drone/push Build is passing
continuous-integration/drone/pr Build is passing

This commit is contained in:
mitsuha_s 2022-07-14 00:41:58 +00:00
parent 6535055268
commit c91549a311
3 changed files with 16 additions and 9 deletions

View file

@ -154,7 +154,7 @@ class Database:
self.cur.execute('DELETE FROM feeds_last_items WHERE feed_id = %s', [feed_id]) self.cur.execute('DELETE FROM feeds_last_items WHERE feed_id = %s', [feed_id])
self.cur.executemany( self.cur.executemany(
'INSERT INTO feeds_last_items (feed_id, url, title, description) VALUES (%s, %s, %s, %s)', new_items) 'INSERT INTO feeds_last_items (feed_id, url, title, description, guid) VALUES (%s, %s, %s, %s, %s)', new_items)
self.conn.commit() self.conn.commit()
def __init_schema(self) -> None: def __init_schema(self) -> None:
@ -175,7 +175,8 @@ class Database:
' feed_id INTEGER REFERENCES feeds ON DELETE CASCADE,' ' feed_id INTEGER REFERENCES feeds ON DELETE CASCADE,'
' url TEXT NOT NULL,' ' url TEXT NOT NULL,'
' title TEXT,' ' title TEXT,'
' description TEXT' ' description TEXT,'
' guid TEXT'
')' ')'
) )

1
rss.py
View file

@ -9,6 +9,7 @@ class FeedItem:
self.url = item.get('link', '') self.url = item.get('link', '')
self.title = item.get('title', '') self.title = item.get('title', '')
self.description = item.get('summary', '') self.description = item.get('summary', '')
self.guid = item.get('id', '')
if 'published' in item: if 'published' in item:
self.date = datetime.fromtimestamp(mktime(item.published_parsed)) self.date = datetime.fromtimestamp(mktime(item.published_parsed))
else: else:

View file

@ -28,9 +28,9 @@ class UpdateManager:
self.log.info('Processing [%d] %s', feed['id'], feed['url']) self.log.info('Processing [%d] %s', feed['id'], feed['url'])
feed_obj = self.rss_reader.get_feed(feed['url']) feed_obj = self.rss_reader.get_feed(feed['url'])
new_items = feed_obj.items new_items = feed_obj.items
old_items_urls = self.database.find_feed_items_urls(feed['id']) old_items = self.database.find_feed_items(feed['id'])
diff = self.__calculate_difference(new_items, old_items_urls) diff = self.__calculate_difference(new_items, old_items)
if not diff: if not diff:
continue continue
@ -39,20 +39,25 @@ class UpdateManager:
self.notifier.send_updates(chat_ids, diff, feed_obj.title) self.notifier.send_updates(chat_ids, diff, feed_obj.title)
self.database.update_feed_items(feed['id'], new_items) self.database.update_feed_items(feed['id'], new_items)
def __calculate_difference(self, new_items: list[FeedItem], old_items_urls: list[str]) -> list[FeedItem]: def __calculate_difference(self, new_items: list[FeedItem], old_items: list[dict]) -> list[FeedItem]:
"""Calculate new feed items.""" """Calculate new feed items."""
self.log.debug( self.log.debug(
'__calculate_difference(new_items=list(%d), old_items_urls=list(%d))', len(new_items), len(old_items_urls) '__calculate_difference(new_items=list(%d), old_items=list(%d))', len(new_items), len(old_items)
) )
if not old_items_urls: if not old_items:
self.log.debug('Old items are empty, returning new') self.log.debug('Old items are empty, returning new')
return new_items return new_items
diff = [] diff = []
guids = [item['guid'] for item in old_items if item['guid']]
urls = [item['url'] for item in old_items]
self.log.debug('Comparing %d new items with %d old', len(new_items), len(old_items_urls)) self.log.debug('Comparing %d new items with %d old', len(new_items), len(old_items))
for item in new_items: for item in new_items:
if item.url not in old_items_urls: if not guids and item.url not in urls:
diff.append(item)
continue
if item.guid not in guids:
diff.append(item) diff.append(item)
self.log.debug('%d updates found', len(diff)) self.log.debug('%d updates found', len(diff))