handling the fact that Pushpull can return duplicates, so we added a dedupe
This commit is contained in:
@@ -70,6 +70,11 @@ def _parse_post(post_data: dict, subreddit_id: int, db: Session, hot_rank: int |
|
|||||||
def _upsert_posts(db: Session, posts: list[dict], update_hot_rank: bool = False):
|
def _upsert_posts(db: Session, posts: list[dict], update_hot_rank: bool = False):
|
||||||
if not posts:
|
if not posts:
|
||||||
return
|
return
|
||||||
|
# Deduplicate by reddit_id (Pullpush can return dupes)
|
||||||
|
seen = {}
|
||||||
|
for p in posts:
|
||||||
|
seen[p["reddit_id"]] = p
|
||||||
|
posts = list(seen.values())
|
||||||
update_set = {
|
update_set = {
|
||||||
"score": insert(Post).excluded.score,
|
"score": insert(Post).excluded.score,
|
||||||
"upvote_ratio": insert(Post).excluded.upvote_ratio,
|
"upvote_ratio": insert(Post).excluded.upvote_ratio,
|
||||||
|
|||||||
Reference in New Issue
Block a user