Python/FastAPI backend with PostgreSQL for collecting Reddit data via public .json endpoints. React/Vite dashboard for analytics. Docker Compose setup with API and worker services connecting to shared PostgreSQL. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
141 lines
5.0 KiB
Python
141 lines
5.0 KiB
Python
import logging
|
|
from datetime import datetime, timezone, timedelta, date
|
|
|
|
from sqlalchemy import select, func, create_engine
|
|
from sqlalchemy.orm import sessionmaker
|
|
|
|
from backend.config import settings
|
|
from backend.models.subreddit import MonitoredSubreddit
|
|
from backend.models.post import Post
|
|
from backend.models.comment import Comment
|
|
from backend.models.author import Author
|
|
from backend.models.daily_digest import DailyDigest
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_engine = create_engine(settings.database_url_sync, pool_size=2, pool_recycle=3600)
|
|
SyncSession = sessionmaker(_engine)
|
|
|
|
|
|
def generate_daily_digests():
|
|
"""Generate daily digest for each active subreddit."""
|
|
yesterday = date.today() - timedelta(days=1)
|
|
day_start = datetime(yesterday.year, yesterday.month, yesterday.day, tzinfo=timezone.utc)
|
|
day_end = day_start + timedelta(days=1)
|
|
|
|
with SyncSession() as db:
|
|
subs = db.execute(
|
|
select(MonitoredSubreddit).where(MonitoredSubreddit.is_active == True) # noqa: E712
|
|
).scalars().all()
|
|
|
|
for sub in subs:
|
|
# Check if digest already exists
|
|
existing = db.execute(
|
|
select(DailyDigest).where(
|
|
DailyDigest.subreddit_id == sub.id,
|
|
DailyDigest.digest_date == yesterday,
|
|
)
|
|
).scalar_one_or_none()
|
|
if existing:
|
|
continue
|
|
|
|
# Gather stats
|
|
post_count = db.execute(
|
|
select(func.count(Post.id)).where(
|
|
Post.subreddit_id == sub.id,
|
|
Post.created_utc >= day_start,
|
|
Post.created_utc < day_end,
|
|
)
|
|
).scalar() or 0
|
|
|
|
comment_count = db.execute(
|
|
select(func.count(Comment.id))
|
|
.join(Post)
|
|
.where(
|
|
Post.subreddit_id == sub.id,
|
|
Comment.created_utc >= day_start,
|
|
Comment.created_utc < day_end,
|
|
)
|
|
).scalar() or 0
|
|
|
|
# Top posts by score
|
|
top_posts = db.execute(
|
|
select(Post.title, Post.score, Post.num_comments, Post.permalink)
|
|
.where(
|
|
Post.subreddit_id == sub.id,
|
|
Post.created_utc >= day_start,
|
|
Post.created_utc < day_end,
|
|
)
|
|
.order_by(Post.score.desc())
|
|
.limit(5)
|
|
).all()
|
|
|
|
# Top authors
|
|
top_authors = db.execute(
|
|
select(Author.username, func.count(Comment.id).label("cnt"))
|
|
.join(Comment, Comment.author_id == Author.id)
|
|
.join(Post, Comment.post_id == Post.id)
|
|
.where(
|
|
Post.subreddit_id == sub.id,
|
|
Comment.created_utc >= day_start,
|
|
Comment.created_utc < day_end,
|
|
)
|
|
.group_by(Author.username)
|
|
.order_by(func.count(Comment.id).desc())
|
|
.limit(5)
|
|
).all()
|
|
|
|
avg_score = db.execute(
|
|
select(func.avg(Post.score)).where(
|
|
Post.subreddit_id == sub.id,
|
|
Post.created_utc >= day_start,
|
|
Post.created_utc < day_end,
|
|
)
|
|
).scalar()
|
|
|
|
# Build markdown digest
|
|
lines = [
|
|
f"# r/{sub.name} — Daily Digest for {yesterday}",
|
|
"",
|
|
f"**Posts:** {post_count} | **Comments:** {comment_count} | **Avg Score:** {avg_score:.1f}" if avg_score else f"**Posts:** {post_count} | **Comments:** {comment_count}",
|
|
"",
|
|
]
|
|
|
|
if top_posts:
|
|
lines.append("## Top Posts")
|
|
for i, (title, score, num_comments, permalink) in enumerate(top_posts, 1):
|
|
lines.append(f"{i}. **{title}** — {score} pts, {num_comments} comments")
|
|
lines.append("")
|
|
|
|
if top_authors:
|
|
lines.append("## Most Active Users")
|
|
for username, cnt in top_authors:
|
|
lines.append(f"- u/{username}: {cnt} comments")
|
|
lines.append("")
|
|
|
|
content = "\n".join(lines)
|
|
|
|
metadata = {
|
|
"post_count": post_count,
|
|
"comment_count": comment_count,
|
|
"avg_score": float(avg_score) if avg_score else 0,
|
|
"top_posts": [
|
|
{"title": t, "score": s, "num_comments": n}
|
|
for t, s, n, _ in top_posts
|
|
],
|
|
"top_authors": [
|
|
{"username": u, "comment_count": c}
|
|
for u, c in top_authors
|
|
],
|
|
}
|
|
|
|
digest = DailyDigest(
|
|
subreddit_id=sub.id,
|
|
digest_date=yesterday,
|
|
content=content,
|
|
metadata_=metadata,
|
|
)
|
|
db.add(digest)
|
|
db.commit()
|
|
logger.info(f"Generated daily digest for r/{sub.name} on {yesterday}")
|