Add Reddit monitoring bot — backend, frontend, and Docker config

Python/FastAPI backend with PostgreSQL for collecting Reddit data via
public .json endpoints. React/Vite dashboard for analytics. Docker Compose
setup with API and worker services connecting to shared PostgreSQL.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-09 19:29:58 -05:00
parent aaa240dbf0
commit bc2203524f
76 changed files with 7570 additions and 0 deletions

View File

View File

@@ -0,0 +1,231 @@
from datetime import datetime, timedelta, timezone
from sqlalchemy import select, func, case, text
from sqlalchemy.ext.asyncio import AsyncSession
from backend.models.post import Post
from backend.models.comment import Comment
from backend.models.author import Author
from backend.models.subreddit import MonitoredSubreddit
async def get_engagement(
db: AsyncSession,
subreddit_id: int | None = None,
granularity: str = "day",
since: datetime | None = None,
until: datetime | None = None,
) -> list[dict]:
if not since:
since = datetime.now(timezone.utc) - timedelta(days=30)
if not until:
until = datetime.now(timezone.utc)
trunc = func.date_trunc(granularity, Post.created_utc)
stmt = select(
trunc.label("period"),
func.count(Post.id).label("posts"),
func.coalesce(func.avg(Post.score), 0).label("avg_score"),
).where(Post.created_utc >= since, Post.created_utc <= until)
if subreddit_id:
stmt = stmt.where(Post.subreddit_id == subreddit_id)
stmt = stmt.group_by("period").order_by("period")
result = await db.execute(stmt)
# Get comment counts per period
comment_trunc = func.date_trunc(granularity, Comment.created_utc)
comment_stmt = (
select(
comment_trunc.label("period"),
func.count(Comment.id).label("comments"),
)
.join(Post)
.where(Comment.created_utc >= since, Comment.created_utc <= until)
)
if subreddit_id:
comment_stmt = comment_stmt.where(Post.subreddit_id == subreddit_id)
comment_stmt = comment_stmt.group_by("period")
comment_result = await db.execute(comment_stmt)
comment_map = {str(r.period): r.comments for r in comment_result}
return [
{
"period": str(r.period),
"posts": r.posts,
"comments": comment_map.get(str(r.period), 0),
"avg_score": round(float(r.avg_score), 1),
}
for r in result
]
async def get_top_posts(
db: AsyncSession,
subreddit_id: int | None = None,
metric: str = "score",
since: datetime | None = None,
until: datetime | None = None,
limit: int = 10,
) -> list[dict]:
if not since:
since = datetime.now(timezone.utc) - timedelta(days=7)
stmt = (
select(Post, MonitoredSubreddit.name, Author.username)
.join(MonitoredSubreddit)
.outerjoin(Author)
.where(Post.created_utc >= since)
)
if until:
stmt = stmt.where(Post.created_utc <= until)
if subreddit_id:
stmt = stmt.where(Post.subreddit_id == subreddit_id)
sort_col = Post.score if metric == "score" else Post.num_comments
stmt = stmt.order_by(sort_col.desc()).limit(limit)
result = await db.execute(stmt)
return [
{
"id": post.id,
"title": post.title,
"score": post.score,
"num_comments": post.num_comments,
"author_name": author_name,
"subreddit_name": sub_name,
"created_utc": post.created_utc,
"permalink": post.permalink,
}
for post, sub_name, author_name in result.all()
]
async def get_top_authors(
db: AsyncSession,
subreddit_id: int | None = None,
since: datetime | None = None,
until: datetime | None = None,
limit: int = 10,
) -> list[dict]:
if not since:
since = datetime.now(timezone.utc) - timedelta(days=7)
post_count = (
select(func.count(Post.id))
.where(Post.author_id == Author.id, Post.created_utc >= since)
)
comment_count = (
select(func.count(Comment.id))
.where(Comment.author_id == Author.id, Comment.created_utc >= since)
)
if until:
post_count = post_count.where(Post.created_utc <= until)
comment_count = comment_count.where(Comment.created_utc <= until)
if subreddit_id:
post_count = post_count.where(Post.subreddit_id == subreddit_id)
comment_count = comment_count.join(Post).where(Post.subreddit_id == subreddit_id)
pc = post_count.correlate(Author).scalar_subquery().label("post_count")
cc = comment_count.correlate(Author).scalar_subquery().label("comment_count")
stmt = (
select(Author, pc, cc)
.order_by((pc + cc).desc())
.limit(limit)
)
result = await db.execute(stmt)
return [
{
"id": author.id,
"username": author.username,
"post_count": pc or 0,
"comment_count": cc or 0,
"total_activity": (pc or 0) + (cc or 0),
}
for author, pc, cc in result.all()
]
async def get_subreddit_summary(
db: AsyncSession,
since: datetime | None = None,
until: datetime | None = None,
) -> list[dict]:
if not since:
since = datetime.now(timezone.utc) - timedelta(days=7)
stmt = (
select(
MonitoredSubreddit.id,
MonitoredSubreddit.name,
func.count(Post.id).label("total_posts"),
func.coalesce(func.avg(Post.score), 0).label("avg_score"),
)
.outerjoin(Post, (Post.subreddit_id == MonitoredSubreddit.id) & (Post.created_utc >= since))
.where(MonitoredSubreddit.is_active == True) # noqa: E712
.group_by(MonitoredSubreddit.id)
.order_by(MonitoredSubreddit.name)
)
if until:
stmt = stmt.where(Post.created_utc <= until)
result = await db.execute(stmt)
summaries = []
for sub_id, sub_name, total_posts, avg_score in result.all():
# Get comment count
cc = await db.execute(
select(func.count(Comment.id))
.join(Post)
.where(Post.subreddit_id == sub_id, Comment.created_utc >= since)
)
comment_count = cc.scalar() or 0
# Top flair
flair_stmt = (
select(Post.flair, func.count(Post.id).label("cnt"))
.where(Post.subreddit_id == sub_id, Post.created_utc >= since, Post.flair.isnot(None))
.group_by(Post.flair)
.order_by(func.count(Post.id).desc())
.limit(1)
)
flair_result = await db.execute(flair_stmt)
top_flair_row = flair_result.first()
summaries.append({
"subreddit_id": sub_id,
"subreddit_name": sub_name,
"total_posts": total_posts,
"total_comments": comment_count,
"avg_score": round(float(avg_score), 1),
"top_flair": top_flair_row[0] if top_flair_row else None,
})
return summaries
async def get_flair_distribution(
db: AsyncSession,
subreddit_id: int,
since: datetime | None = None,
until: datetime | None = None,
) -> list[dict]:
if not since:
since = datetime.now(timezone.utc) - timedelta(days=30)
stmt = (
select(Post.flair, func.count(Post.id).label("count"))
.where(Post.subreddit_id == subreddit_id, Post.created_utc >= since)
.group_by(Post.flair)
.order_by(func.count(Post.id).desc())
)
if until:
stmt = stmt.where(Post.created_utc <= until)
result = await db.execute(stmt)
return [{"flair": flair, "count": count} for flair, count in result.all()]

View File

@@ -0,0 +1,79 @@
from datetime import datetime
from sqlalchemy import select, func
from sqlalchemy.ext.asyncio import AsyncSession
from backend.models.author import Author
from backend.models.post import Post
from backend.models.comment import Comment
async def list_authors(
db: AsyncSession,
subreddit_id: int | None = None,
sort_by: str = "total_comments",
sort_order: str = "desc",
since: datetime | None = None,
until: datetime | None = None,
page: int = 1,
per_page: int = 25,
) -> tuple[list[dict], int]:
base = select(Author)
if subreddit_id or since or until:
# Need to compute activity counts with filters
post_count = (
select(func.count(Post.id))
.where(Post.author_id == Author.id)
)
comment_count = (
select(func.count(Comment.id))
.where(Comment.author_id == Author.id)
)
if subreddit_id:
post_count = post_count.where(Post.subreddit_id == subreddit_id)
comment_count = comment_count.join(Post).where(Post.subreddit_id == subreddit_id)
if since:
post_count = post_count.where(Post.created_utc >= since)
comment_count = comment_count.where(Comment.created_utc >= since)
if until:
post_count = post_count.where(Post.created_utc <= until)
comment_count = comment_count.where(Comment.created_utc <= until)
base = select(
Author,
post_count.correlate(Author).scalar_subquery().label("filtered_posts"),
comment_count.correlate(Author).scalar_subquery().label("filtered_comments"),
)
else:
base = select(Author)
count_stmt = select(func.count()).select_from(base.subquery())
total = (await db.execute(count_stmt)).scalar() or 0
sort_col = getattr(Author, sort_by, Author.total_comments)
if sort_order == "asc":
base = base.order_by(sort_col.asc())
else:
base = base.order_by(sort_col.desc())
base = base.offset((page - 1) * per_page).limit(per_page)
result = await db.execute(base)
authors = []
for row in result.all():
if isinstance(row, tuple):
author = row[0]
else:
author = row
data = {c.name: getattr(author, c.name) for c in author.__table__.columns}
authors.append(data)
return authors, total
async def get_author(db: AsyncSession, author_id: int) -> dict | None:
author = await db.get(Author, author_id)
if not author:
return None
return {c.name: getattr(author, c.name) for c in author.__table__.columns}

View File

@@ -0,0 +1,57 @@
from datetime import datetime
from sqlalchemy import select, func
from sqlalchemy.ext.asyncio import AsyncSession
from backend.models.comment import Comment
from backend.models.post import Post
from backend.models.author import Author
async def list_comments(
db: AsyncSession,
post_id: int | None = None,
subreddit_id: int | None = None,
author: str | None = None,
sort_by: str = "created_utc",
sort_order: str = "desc",
since: datetime | None = None,
until: datetime | None = None,
page: int = 1,
per_page: int = 25,
) -> tuple[list[dict], int]:
base = select(Comment, Author.username).outerjoin(Author).join(Post)
filters = []
if post_id:
filters.append(Comment.post_id == post_id)
if subreddit_id:
filters.append(Post.subreddit_id == subreddit_id)
if author:
filters.append(Author.username == author)
if since:
filters.append(Comment.created_utc >= since)
if until:
filters.append(Comment.created_utc <= until)
if filters:
base = base.where(*filters)
count_stmt = select(func.count()).select_from(base.subquery())
total = (await db.execute(count_stmt)).scalar() or 0
sort_col = getattr(Comment, sort_by, Comment.created_utc)
if sort_order == "asc":
base = base.order_by(sort_col.asc())
else:
base = base.order_by(sort_col.desc())
base = base.offset((page - 1) * per_page).limit(per_page)
result = await db.execute(base)
comments = []
for comment, author_name in result.all():
data = {c.name: getattr(comment, c.name) for c in comment.__table__.columns}
data["author_name"] = author_name
comments.append(data)
return comments, total

View File

@@ -0,0 +1,102 @@
from datetime import datetime
from sqlalchemy import select, func
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import joinedload
from backend.models.post import Post
from backend.models.subreddit import MonitoredSubreddit
from backend.models.author import Author
from backend.models.comment import Comment
async def list_posts(
db: AsyncSession,
subreddit_id: int | None = None,
author: str | None = None,
flair: str | None = None,
sort_by: str = "created_utc",
sort_order: str = "desc",
since: datetime | None = None,
until: datetime | None = None,
page: int = 1,
per_page: int = 25,
) -> tuple[list[dict], int]:
base = select(Post, MonitoredSubreddit.name, Author.username).join(
MonitoredSubreddit
).outerjoin(Author)
filters = []
if subreddit_id:
filters.append(Post.subreddit_id == subreddit_id)
if flair:
filters.append(Post.flair == flair)
if since:
filters.append(Post.created_utc >= since)
if until:
filters.append(Post.created_utc <= until)
if author:
filters.append(Author.username == author)
if filters:
base = base.where(*filters)
# Count
count_stmt = select(func.count()).select_from(base.subquery())
total = (await db.execute(count_stmt)).scalar() or 0
# Sort
sort_col = getattr(Post, sort_by, Post.created_utc)
if sort_order == "asc":
base = base.order_by(sort_col.asc())
else:
base = base.order_by(sort_col.desc())
# Paginate
base = base.offset((page - 1) * per_page).limit(per_page)
result = await db.execute(base)
rows = result.all()
posts = []
for post, sub_name, author_name in rows:
data = {c.name: getattr(post, c.name) for c in post.__table__.columns}
data["subreddit_name"] = sub_name
data["author_name"] = author_name
posts.append(data)
return posts, total
async def get_post(db: AsyncSession, post_id: int) -> dict | None:
stmt = (
select(Post, MonitoredSubreddit.name, Author.username)
.join(MonitoredSubreddit)
.outerjoin(Author)
.where(Post.id == post_id)
)
result = await db.execute(stmt)
row = result.first()
if not row:
return None
post, sub_name, author_name = row
data = {c.name: getattr(post, c.name) for c in post.__table__.columns}
data["subreddit_name"] = sub_name
data["author_name"] = author_name
# Get comments
comment_stmt = (
select(Comment, Author.username)
.outerjoin(Author)
.where(Comment.post_id == post_id)
.order_by(Comment.created_utc.asc())
)
comment_result = await db.execute(comment_stmt)
comments = []
for comment, c_author in comment_result.all():
c_data = {c.name: getattr(comment, c.name) for c in comment.__table__.columns}
c_data["author_name"] = c_author
comments.append(c_data)
data["comments"] = comments
return data

View File

@@ -0,0 +1,75 @@
from sqlalchemy import select, func
from sqlalchemy.ext.asyncio import AsyncSession
from backend.models.subreddit import MonitoredSubreddit
from backend.models.post import Post
async def list_subreddits(db: AsyncSession) -> list[dict]:
stmt = (
select(
MonitoredSubreddit,
func.count(Post.id).label("post_count"),
)
.outerjoin(Post, Post.subreddit_id == MonitoredSubreddit.id)
.group_by(MonitoredSubreddit.id)
.order_by(MonitoredSubreddit.name)
)
result = await db.execute(stmt)
rows = result.all()
out = []
for sub, post_count in rows:
data = {c.name: getattr(sub, c.name) for c in sub.__table__.columns}
data["post_count"] = post_count
out.append(data)
return out
async def get_subreddit(db: AsyncSession, subreddit_id: int) -> dict | None:
stmt = (
select(
MonitoredSubreddit,
func.count(Post.id).label("post_count"),
)
.outerjoin(Post, Post.subreddit_id == MonitoredSubreddit.id)
.where(MonitoredSubreddit.id == subreddit_id)
.group_by(MonitoredSubreddit.id)
)
result = await db.execute(stmt)
row = result.first()
if not row:
return None
sub, post_count = row
data = {c.name: getattr(sub, c.name) for c in sub.__table__.columns}
data["post_count"] = post_count
return data
async def create_subreddit(db: AsyncSession, name: str) -> MonitoredSubreddit:
sub = MonitoredSubreddit(name=name.lower().strip())
db.add(sub)
await db.commit()
await db.refresh(sub)
return sub
async def update_subreddit(
db: AsyncSession, subreddit_id: int, is_active: bool | None = None
) -> MonitoredSubreddit | None:
sub = await db.get(MonitoredSubreddit, subreddit_id)
if not sub:
return None
if is_active is not None:
sub.is_active = is_active
await db.commit()
await db.refresh(sub)
return sub
async def delete_subreddit(db: AsyncSession, subreddit_id: int) -> bool:
sub = await db.get(MonitoredSubreddit, subreddit_id)
if not sub:
return False
sub.is_active = False
await db.commit()
return True