import logging from datetime import datetime, timezone, timedelta, date from sqlalchemy import select, func, create_engine from sqlalchemy.orm import sessionmaker from backend.config import settings from backend.models.subreddit import MonitoredSubreddit from backend.models.post import Post from backend.models.comment import Comment from backend.models.author import Author from backend.models.daily_digest import DailyDigest logger = logging.getLogger(__name__) _engine = create_engine(settings.database_url_sync, pool_size=2, pool_recycle=3600) SyncSession = sessionmaker(_engine) def generate_daily_digests(): """Generate daily digest for each active subreddit.""" yesterday = date.today() - timedelta(days=1) day_start = datetime(yesterday.year, yesterday.month, yesterday.day, tzinfo=timezone.utc) day_end = day_start + timedelta(days=1) with SyncSession() as db: subs = db.execute( select(MonitoredSubreddit).where(MonitoredSubreddit.is_active == True) # noqa: E712 ).scalars().all() for sub in subs: # Check if digest already exists existing = db.execute( select(DailyDigest).where( DailyDigest.subreddit_id == sub.id, DailyDigest.digest_date == yesterday, ) ).scalar_one_or_none() if existing: continue # Gather stats post_count = db.execute( select(func.count(Post.id)).where( Post.subreddit_id == sub.id, Post.created_utc >= day_start, Post.created_utc < day_end, ) ).scalar() or 0 comment_count = db.execute( select(func.count(Comment.id)) .join(Post) .where( Post.subreddit_id == sub.id, Comment.created_utc >= day_start, Comment.created_utc < day_end, ) ).scalar() or 0 # Top posts by score top_posts = db.execute( select(Post.title, Post.score, Post.num_comments, Post.permalink) .where( Post.subreddit_id == sub.id, Post.created_utc >= day_start, Post.created_utc < day_end, ) .order_by(Post.score.desc()) .limit(5) ).all() # Top authors top_authors = db.execute( select(Author.username, func.count(Comment.id).label("cnt")) .join(Comment, Comment.author_id == Author.id) .join(Post, Comment.post_id == Post.id) .where( Post.subreddit_id == sub.id, Comment.created_utc >= day_start, Comment.created_utc < day_end, ) .group_by(Author.username) .order_by(func.count(Comment.id).desc()) .limit(5) ).all() avg_score = db.execute( select(func.avg(Post.score)).where( Post.subreddit_id == sub.id, Post.created_utc >= day_start, Post.created_utc < day_end, ) ).scalar() # Build markdown digest lines = [ f"# r/{sub.name} — Daily Digest for {yesterday}", "", f"**Posts:** {post_count} | **Comments:** {comment_count} | **Avg Score:** {avg_score:.1f}" if avg_score else f"**Posts:** {post_count} | **Comments:** {comment_count}", "", ] if top_posts: lines.append("## Top Posts") for i, (title, score, num_comments, permalink) in enumerate(top_posts, 1): lines.append(f"{i}. **{title}** — {score} pts, {num_comments} comments") lines.append("") if top_authors: lines.append("## Most Active Users") for username, cnt in top_authors: lines.append(f"- u/{username}: {cnt} comments") lines.append("") content = "\n".join(lines) metadata = { "post_count": post_count, "comment_count": comment_count, "avg_score": float(avg_score) if avg_score else 0, "top_posts": [ {"title": t, "score": s, "num_comments": n} for t, s, n, _ in top_posts ], "top_authors": [ {"username": u, "comment_count": c} for u, c in top_authors ], } digest = DailyDigest( subreddit_id=sub.id, digest_date=yesterday, content=content, metadata_=metadata, ) db.add(digest) db.commit() logger.info(f"Generated daily digest for r/{sub.name} on {yesterday}")