Add Reddit monitoring bot — backend, frontend, and Docker config
Python/FastAPI backend with PostgreSQL for collecting Reddit data via public .json endpoints. React/Vite dashboard for analytics. Docker Compose setup with API and worker services connecting to shared PostgreSQL. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
0
backend/__init__.py
Normal file
0
backend/__init__.py
Normal file
18
backend/config.py
Normal file
18
backend/config.py
Normal file
@@ -0,0 +1,18 @@
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
database_url: str = "postgresql+asyncpg://reddit:changeme@localhost:5432/reddit_monitor"
|
||||
reddit_user_agent: str = "reddit-monitor:v1.0"
|
||||
seed_subreddits: str = ""
|
||||
digest_hour_utc: int = 23
|
||||
ai_summary_enabled: bool = False
|
||||
|
||||
@property
|
||||
def database_url_sync(self) -> str:
|
||||
return self.database_url.replace("+asyncpg", "+psycopg2")
|
||||
|
||||
model_config = {"env_file": ".env", "env_file_encoding": "utf-8"}
|
||||
|
||||
|
||||
settings = Settings()
|
||||
18
backend/database.py
Normal file
18
backend/database.py
Normal file
@@ -0,0 +1,18 @@
|
||||
from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine, AsyncSession
|
||||
from collections.abc import AsyncGenerator
|
||||
|
||||
from backend.config import settings
|
||||
|
||||
engine = create_async_engine(
|
||||
settings.database_url,
|
||||
pool_size=5,
|
||||
max_overflow=10,
|
||||
pool_recycle=3600,
|
||||
)
|
||||
|
||||
async_session = async_sessionmaker(engine, expire_on_commit=False)
|
||||
|
||||
|
||||
async def get_db() -> AsyncGenerator[AsyncSession, None]:
|
||||
async with async_session() as session:
|
||||
yield session
|
||||
39
backend/main.py
Normal file
39
backend/main.py
Normal file
@@ -0,0 +1,39 @@
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.responses import FileResponse
|
||||
|
||||
from backend.database import engine
|
||||
from backend.routers import health, subreddits, posts, comments, authors, analytics, digests, summaries
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
yield
|
||||
await engine.dispose()
|
||||
|
||||
|
||||
app = FastAPI(title="Reddit Monitor", lifespan=lifespan)
|
||||
|
||||
# API routes
|
||||
app.include_router(health.router, prefix="/api/v1")
|
||||
app.include_router(subreddits.router, prefix="/api/v1")
|
||||
app.include_router(posts.router, prefix="/api/v1")
|
||||
app.include_router(comments.router, prefix="/api/v1")
|
||||
app.include_router(authors.router, prefix="/api/v1")
|
||||
app.include_router(analytics.router, prefix="/api/v1")
|
||||
app.include_router(digests.router, prefix="/api/v1")
|
||||
app.include_router(summaries.router, prefix="/api/v1")
|
||||
|
||||
|
||||
# SPA static file serving (only when frontend is built)
|
||||
import os
|
||||
|
||||
static_dir = os.path.join(os.path.dirname(__file__), "..", "static")
|
||||
if os.path.isdir(static_dir):
|
||||
app.mount("/assets", StaticFiles(directory=os.path.join(static_dir, "assets")), name="assets")
|
||||
|
||||
@app.get("/{full_path:path}")
|
||||
async def serve_spa(full_path: str):
|
||||
return FileResponse(os.path.join(static_dir, "index.html"))
|
||||
19
backend/models/__init__.py
Normal file
19
backend/models/__init__.py
Normal file
@@ -0,0 +1,19 @@
|
||||
from backend.models.base import Base
|
||||
from backend.models.subreddit import MonitoredSubreddit
|
||||
from backend.models.author import Author
|
||||
from backend.models.post import Post
|
||||
from backend.models.comment import Comment
|
||||
from backend.models.metric_snapshot import MetricSnapshot
|
||||
from backend.models.daily_digest import DailyDigest
|
||||
from backend.models.summary import Summary
|
||||
|
||||
__all__ = [
|
||||
"Base",
|
||||
"MonitoredSubreddit",
|
||||
"Author",
|
||||
"Post",
|
||||
"Comment",
|
||||
"MetricSnapshot",
|
||||
"DailyDigest",
|
||||
"Summary",
|
||||
]
|
||||
23
backend/models/author.py
Normal file
23
backend/models/author.py
Normal file
@@ -0,0 +1,23 @@
|
||||
from datetime import datetime, timezone
|
||||
from sqlalchemy import String, Integer, DateTime
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from backend.models.base import Base
|
||||
|
||||
|
||||
class Author(Base):
|
||||
__tablename__ = "authors"
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
username: Mapped[str] = mapped_column(String(255), unique=True, nullable=False)
|
||||
first_seen_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
|
||||
)
|
||||
last_seen_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
|
||||
)
|
||||
total_posts: Mapped[int] = mapped_column(Integer, default=0)
|
||||
total_comments: Mapped[int] = mapped_column(Integer, default=0)
|
||||
|
||||
posts: Mapped[list["Post"]] = relationship(back_populates="author") # noqa: F821
|
||||
comments: Mapped[list["Comment"]] = relationship(back_populates="author") # noqa: F821
|
||||
5
backend/models/base.py
Normal file
5
backend/models/base.py
Normal file
@@ -0,0 +1,5 @@
|
||||
from sqlalchemy.orm import DeclarativeBase
|
||||
|
||||
|
||||
class Base(DeclarativeBase):
|
||||
pass
|
||||
34
backend/models/comment.py
Normal file
34
backend/models/comment.py
Normal file
@@ -0,0 +1,34 @@
|
||||
from datetime import datetime, timezone
|
||||
from sqlalchemy import String, Integer, DateTime, ForeignKey
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from backend.models.base import Base
|
||||
|
||||
|
||||
class Comment(Base):
|
||||
__tablename__ = "comments"
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
reddit_id: Mapped[str] = mapped_column(String(20), unique=True, nullable=False)
|
||||
post_id: Mapped[int] = mapped_column(ForeignKey("posts.id"), nullable=False, index=True)
|
||||
parent_comment_id: Mapped[int | None] = mapped_column(
|
||||
ForeignKey("comments.id"), index=True
|
||||
)
|
||||
author_id: Mapped[int | None] = mapped_column(ForeignKey("authors.id"), index=True)
|
||||
body: Mapped[str] = mapped_column(nullable=False)
|
||||
score: Mapped[int] = mapped_column(Integer, default=0)
|
||||
created_utc: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False)
|
||||
collected_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
|
||||
)
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
default=lambda: datetime.now(timezone.utc),
|
||||
onupdate=lambda: datetime.now(timezone.utc),
|
||||
)
|
||||
|
||||
post: Mapped["Post"] = relationship(back_populates="comments") # noqa: F821
|
||||
author: Mapped["Author | None"] = relationship(back_populates="comments") # noqa: F821
|
||||
parent_comment: Mapped["Comment | None"] = relationship(
|
||||
remote_side="Comment.id", foreign_keys=[parent_comment_id]
|
||||
)
|
||||
22
backend/models/daily_digest.py
Normal file
22
backend/models/daily_digest.py
Normal file
@@ -0,0 +1,22 @@
|
||||
from datetime import date, datetime, timezone
|
||||
from sqlalchemy import Date, DateTime, ForeignKey, JSON
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from backend.models.base import Base
|
||||
|
||||
|
||||
class DailyDigest(Base):
|
||||
__tablename__ = "daily_digests"
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
subreddit_id: Mapped[int] = mapped_column(
|
||||
ForeignKey("monitored_subreddits.id"), nullable=False
|
||||
)
|
||||
digest_date: Mapped[date] = mapped_column(Date, nullable=False)
|
||||
content: Mapped[str] = mapped_column(nullable=False)
|
||||
metadata_: Mapped[dict | None] = mapped_column("metadata", JSON)
|
||||
generated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
|
||||
)
|
||||
|
||||
subreddit: Mapped["MonitoredSubreddit"] = relationship(back_populates="daily_digests") # noqa: F821
|
||||
23
backend/models/metric_snapshot.py
Normal file
23
backend/models/metric_snapshot.py
Normal file
@@ -0,0 +1,23 @@
|
||||
from datetime import datetime, timezone
|
||||
from sqlalchemy import Integer, Float, DateTime, ForeignKey, Index
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from backend.models.base import Base
|
||||
|
||||
|
||||
class MetricSnapshot(Base):
|
||||
__tablename__ = "metric_snapshots"
|
||||
__table_args__ = (
|
||||
Index("ix_metric_snapshots_post_snapshot", "post_id", "snapshot_at"),
|
||||
)
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
post_id: Mapped[int] = mapped_column(ForeignKey("posts.id"), nullable=False)
|
||||
score: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||
num_comments: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||
upvote_ratio: Mapped[float | None] = mapped_column(Float)
|
||||
snapshot_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
|
||||
)
|
||||
|
||||
post: Mapped["Post"] = relationship(back_populates="metric_snapshots") # noqa: F821
|
||||
42
backend/models/post.py
Normal file
42
backend/models/post.py
Normal file
@@ -0,0 +1,42 @@
|
||||
from datetime import datetime, timezone
|
||||
from sqlalchemy import String, Boolean, Integer, Float, DateTime, ForeignKey, Index
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from backend.models.base import Base
|
||||
|
||||
|
||||
class Post(Base):
|
||||
__tablename__ = "posts"
|
||||
__table_args__ = (
|
||||
Index("ix_posts_subreddit_created", "subreddit_id", "created_utc"),
|
||||
)
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
reddit_id: Mapped[str] = mapped_column(String(20), unique=True, nullable=False)
|
||||
subreddit_id: Mapped[int] = mapped_column(ForeignKey("monitored_subreddits.id"), index=True)
|
||||
author_id: Mapped[int | None] = mapped_column(ForeignKey("authors.id"), index=True)
|
||||
title: Mapped[str] = mapped_column(nullable=False)
|
||||
selftext: Mapped[str | None] = mapped_column()
|
||||
url: Mapped[str | None] = mapped_column()
|
||||
permalink: Mapped[str | None] = mapped_column()
|
||||
flair: Mapped[str | None] = mapped_column(String(255))
|
||||
score: Mapped[int] = mapped_column(Integer, default=0, index=True)
|
||||
upvote_ratio: Mapped[float | None] = mapped_column(Float)
|
||||
num_comments: Mapped[int] = mapped_column(Integer, default=0)
|
||||
is_self: Mapped[bool | None] = mapped_column(Boolean)
|
||||
over_18: Mapped[bool] = mapped_column(Boolean, default=False)
|
||||
hot_rank: Mapped[int | None] = mapped_column(Integer)
|
||||
created_utc: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False)
|
||||
collected_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
|
||||
)
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
default=lambda: datetime.now(timezone.utc),
|
||||
onupdate=lambda: datetime.now(timezone.utc),
|
||||
)
|
||||
|
||||
subreddit: Mapped["MonitoredSubreddit"] = relationship(back_populates="posts") # noqa: F821
|
||||
author: Mapped["Author | None"] = relationship(back_populates="posts") # noqa: F821
|
||||
comments: Mapped[list["Comment"]] = relationship(back_populates="post") # noqa: F821
|
||||
metric_snapshots: Mapped[list["MetricSnapshot"]] = relationship(back_populates="post") # noqa: F821
|
||||
28
backend/models/subreddit.py
Normal file
28
backend/models/subreddit.py
Normal file
@@ -0,0 +1,28 @@
|
||||
from datetime import datetime, timezone
|
||||
from sqlalchemy import String, Boolean, Integer, DateTime
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from backend.models.base import Base
|
||||
|
||||
|
||||
class MonitoredSubreddit(Base):
|
||||
__tablename__ = "monitored_subreddits"
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
name: Mapped[str] = mapped_column(String(255), unique=True, nullable=False)
|
||||
display_name: Mapped[str | None] = mapped_column(String(255))
|
||||
description: Mapped[str | None] = mapped_column()
|
||||
subscribers: Mapped[int | None] = mapped_column(Integer)
|
||||
is_active: Mapped[bool] = mapped_column(Boolean, default=True)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
|
||||
)
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
default=lambda: datetime.now(timezone.utc),
|
||||
onupdate=lambda: datetime.now(timezone.utc),
|
||||
)
|
||||
|
||||
posts: Mapped[list["Post"]] = relationship(back_populates="subreddit") # noqa: F821
|
||||
daily_digests: Mapped[list["DailyDigest"]] = relationship(back_populates="subreddit") # noqa: F821
|
||||
summaries: Mapped[list["Summary"]] = relationship(back_populates="subreddit") # noqa: F821
|
||||
25
backend/models/summary.py
Normal file
25
backend/models/summary.py
Normal file
@@ -0,0 +1,25 @@
|
||||
from datetime import datetime, timezone
|
||||
from sqlalchemy import String, DateTime, ForeignKey, JSON
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from backend.models.base import Base
|
||||
|
||||
|
||||
class Summary(Base):
|
||||
__tablename__ = "summaries"
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
subreddit_id: Mapped[int] = mapped_column(
|
||||
ForeignKey("monitored_subreddits.id"), nullable=False
|
||||
)
|
||||
summary_type: Mapped[str] = mapped_column(String(50), nullable=False)
|
||||
content: Mapped[str | None] = mapped_column()
|
||||
metadata_: Mapped[dict | None] = mapped_column("metadata", JSON)
|
||||
period_start: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
period_end: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
provider: Mapped[str | None] = mapped_column(String(100))
|
||||
generated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
|
||||
)
|
||||
|
||||
subreddit: Mapped["MonitoredSubreddit"] = relationship(back_populates="summaries") # noqa: F821
|
||||
0
backend/routers/__init__.py
Normal file
0
backend/routers/__init__.py
Normal file
61
backend/routers/analytics.py
Normal file
61
backend/routers/analytics.py
Normal file
@@ -0,0 +1,61 @@
|
||||
from datetime import datetime
|
||||
from fastapi import APIRouter, Depends, Query
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from backend.database import get_db
|
||||
from backend.services import analytics_service
|
||||
|
||||
router = APIRouter(prefix="/analytics", tags=["analytics"])
|
||||
|
||||
|
||||
@router.get("/engagement")
|
||||
async def engagement(
|
||||
subreddit_id: int | None = None,
|
||||
granularity: str = Query("day", pattern="^(hour|day|week)$"),
|
||||
since: datetime | None = None,
|
||||
until: datetime | None = None,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
return await analytics_service.get_engagement(db, subreddit_id, granularity, since, until)
|
||||
|
||||
|
||||
@router.get("/top-posts")
|
||||
async def top_posts(
|
||||
subreddit_id: int | None = None,
|
||||
metric: str = Query("score", pattern="^(score|num_comments)$"),
|
||||
since: datetime | None = None,
|
||||
until: datetime | None = None,
|
||||
limit: int = Query(10, ge=1, le=50),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
return await analytics_service.get_top_posts(db, subreddit_id, metric, since, until, limit)
|
||||
|
||||
|
||||
@router.get("/top-authors")
|
||||
async def top_authors(
|
||||
subreddit_id: int | None = None,
|
||||
since: datetime | None = None,
|
||||
until: datetime | None = None,
|
||||
limit: int = Query(10, ge=1, le=50),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
return await analytics_service.get_top_authors(db, subreddit_id, since, until, limit)
|
||||
|
||||
|
||||
@router.get("/subreddit-summary")
|
||||
async def subreddit_summary(
|
||||
since: datetime | None = None,
|
||||
until: datetime | None = None,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
return await analytics_service.get_subreddit_summary(db, since, until)
|
||||
|
||||
|
||||
@router.get("/flair-distribution")
|
||||
async def flair_distribution(
|
||||
subreddit_id: int = Query(...),
|
||||
since: datetime | None = None,
|
||||
until: datetime | None = None,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
return await analytics_service.get_flair_distribution(db, subreddit_id, since, until)
|
||||
39
backend/routers/authors.py
Normal file
39
backend/routers/authors.py
Normal file
@@ -0,0 +1,39 @@
|
||||
from datetime import datetime
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from backend.database import get_db
|
||||
from backend.services import author_service
|
||||
|
||||
router = APIRouter(prefix="/authors", tags=["authors"])
|
||||
|
||||
|
||||
@router.get("")
|
||||
async def list_authors(
|
||||
subreddit_id: int | None = None,
|
||||
sort_by: str = Query("total_comments", pattern="^(total_posts|total_comments)$"),
|
||||
sort_order: str = Query("desc", pattern="^(asc|desc)$"),
|
||||
since: datetime | None = None,
|
||||
until: datetime | None = None,
|
||||
page: int = Query(1, ge=1),
|
||||
per_page: int = Query(25, ge=1, le=100),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
authors, total = await author_service.list_authors(
|
||||
db, subreddit_id, sort_by, sort_order, since, until, page, per_page
|
||||
)
|
||||
return {
|
||||
"data": authors,
|
||||
"total": total,
|
||||
"page": page,
|
||||
"per_page": per_page,
|
||||
"pages": (total + per_page - 1) // per_page if per_page else 0,
|
||||
}
|
||||
|
||||
|
||||
@router.get("/{author_id}")
|
||||
async def get_author(author_id: int, db: AsyncSession = Depends(get_db)):
|
||||
author = await author_service.get_author(db, author_id)
|
||||
if not author:
|
||||
raise HTTPException(status_code=404, detail="Author not found")
|
||||
return author
|
||||
33
backend/routers/comments.py
Normal file
33
backend/routers/comments.py
Normal file
@@ -0,0 +1,33 @@
|
||||
from datetime import datetime
|
||||
from fastapi import APIRouter, Depends, Query
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from backend.database import get_db
|
||||
from backend.services import comment_service
|
||||
|
||||
router = APIRouter(prefix="/comments", tags=["comments"])
|
||||
|
||||
|
||||
@router.get("")
|
||||
async def list_comments(
|
||||
post_id: int | None = None,
|
||||
subreddit_id: int | None = None,
|
||||
author: str | None = None,
|
||||
sort_by: str = Query("created_utc", pattern="^(created_utc|score)$"),
|
||||
sort_order: str = Query("desc", pattern="^(asc|desc)$"),
|
||||
since: datetime | None = None,
|
||||
until: datetime | None = None,
|
||||
page: int = Query(1, ge=1),
|
||||
per_page: int = Query(25, ge=1, le=100),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
comments, total = await comment_service.list_comments(
|
||||
db, post_id, subreddit_id, author, sort_by, sort_order, since, until, page, per_page
|
||||
)
|
||||
return {
|
||||
"data": comments,
|
||||
"total": total,
|
||||
"page": page,
|
||||
"per_page": per_page,
|
||||
"pages": (total + per_page - 1) // per_page if per_page else 0,
|
||||
}
|
||||
54
backend/routers/digests.py
Normal file
54
backend/routers/digests.py
Normal file
@@ -0,0 +1,54 @@
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from backend.database import get_db
|
||||
from backend.models.daily_digest import DailyDigest
|
||||
from backend.models.subreddit import MonitoredSubreddit
|
||||
|
||||
router = APIRouter(prefix="/digests", tags=["digests"])
|
||||
|
||||
|
||||
@router.get("")
|
||||
async def list_digests(
|
||||
subreddit_id: int | None = None,
|
||||
page: int = Query(1, ge=1),
|
||||
per_page: int = Query(25, ge=1, le=100),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
stmt = (
|
||||
select(DailyDigest, MonitoredSubreddit.name)
|
||||
.join(MonitoredSubreddit)
|
||||
.order_by(DailyDigest.digest_date.desc())
|
||||
)
|
||||
if subreddit_id:
|
||||
stmt = stmt.where(DailyDigest.subreddit_id == subreddit_id)
|
||||
|
||||
stmt = stmt.offset((page - 1) * per_page).limit(per_page)
|
||||
result = await db.execute(stmt)
|
||||
|
||||
digests = []
|
||||
for digest, sub_name in result.all():
|
||||
data = {c.name: getattr(digest, c.name) for c in digest.__table__.columns}
|
||||
data["subreddit_name"] = sub_name
|
||||
digests.append(data)
|
||||
|
||||
return {"data": digests, "page": page, "per_page": per_page}
|
||||
|
||||
|
||||
@router.get("/{digest_id}")
|
||||
async def get_digest(digest_id: int, db: AsyncSession = Depends(get_db)):
|
||||
stmt = (
|
||||
select(DailyDigest, MonitoredSubreddit.name)
|
||||
.join(MonitoredSubreddit)
|
||||
.where(DailyDigest.id == digest_id)
|
||||
)
|
||||
result = await db.execute(stmt)
|
||||
row = result.first()
|
||||
if not row:
|
||||
raise HTTPException(status_code=404, detail="Digest not found")
|
||||
|
||||
digest, sub_name = row
|
||||
data = {c.name: getattr(digest, c.name) for c in digest.__table__.columns}
|
||||
data["subreddit_name"] = sub_name
|
||||
return data
|
||||
16
backend/routers/health.py
Normal file
16
backend/routers/health.py
Normal file
@@ -0,0 +1,16 @@
|
||||
from fastapi import APIRouter, Depends
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from backend.database import get_db
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get("/health")
|
||||
async def health_check(db: AsyncSession = Depends(get_db)):
|
||||
try:
|
||||
await db.execute(text("SELECT 1"))
|
||||
return {"status": "ok", "db": "connected"}
|
||||
except Exception:
|
||||
return {"status": "degraded", "db": "disconnected"}
|
||||
64
backend/routers/posts.py
Normal file
64
backend/routers/posts.py
Normal file
@@ -0,0 +1,64 @@
|
||||
from datetime import datetime
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from backend.database import get_db
|
||||
from backend.schemas.post import PostResponse, PostDetailResponse
|
||||
from backend.services import post_service
|
||||
from backend.models.metric_snapshot import MetricSnapshot
|
||||
from sqlalchemy import select
|
||||
|
||||
router = APIRouter(prefix="/posts", tags=["posts"])
|
||||
|
||||
|
||||
@router.get("")
|
||||
async def list_posts(
|
||||
subreddit_id: int | None = None,
|
||||
author: str | None = None,
|
||||
flair: str | None = None,
|
||||
sort_by: str = Query("created_utc", pattern="^(created_utc|score|num_comments)$"),
|
||||
sort_order: str = Query("desc", pattern="^(asc|desc)$"),
|
||||
since: datetime | None = None,
|
||||
until: datetime | None = None,
|
||||
page: int = Query(1, ge=1),
|
||||
per_page: int = Query(25, ge=1, le=100),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
posts, total = await post_service.list_posts(
|
||||
db, subreddit_id, author, flair, sort_by, sort_order, since, until, page, per_page
|
||||
)
|
||||
return {
|
||||
"data": posts,
|
||||
"total": total,
|
||||
"page": page,
|
||||
"per_page": per_page,
|
||||
"pages": (total + per_page - 1) // per_page if per_page else 0,
|
||||
}
|
||||
|
||||
|
||||
@router.get("/{post_id}")
|
||||
async def get_post(post_id: int, db: AsyncSession = Depends(get_db)):
|
||||
post = await post_service.get_post(db, post_id)
|
||||
if not post:
|
||||
raise HTTPException(status_code=404, detail="Post not found")
|
||||
return post
|
||||
|
||||
|
||||
@router.get("/{post_id}/snapshots")
|
||||
async def get_post_snapshots(post_id: int, db: AsyncSession = Depends(get_db)):
|
||||
stmt = (
|
||||
select(MetricSnapshot)
|
||||
.where(MetricSnapshot.post_id == post_id)
|
||||
.order_by(MetricSnapshot.snapshot_at.asc())
|
||||
)
|
||||
result = await db.execute(stmt)
|
||||
snapshots = result.scalars().all()
|
||||
return [
|
||||
{
|
||||
"score": s.score,
|
||||
"num_comments": s.num_comments,
|
||||
"upvote_ratio": s.upvote_ratio,
|
||||
"snapshot_at": s.snapshot_at,
|
||||
}
|
||||
for s in snapshots
|
||||
]
|
||||
47
backend/routers/subreddits.py
Normal file
47
backend/routers/subreddits.py
Normal file
@@ -0,0 +1,47 @@
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from backend.database import get_db
|
||||
from backend.schemas.subreddit import SubredditCreate, SubredditUpdate, SubredditResponse
|
||||
from backend.services import subreddit_service
|
||||
|
||||
router = APIRouter(prefix="/subreddits", tags=["subreddits"])
|
||||
|
||||
|
||||
@router.get("", response_model=list[SubredditResponse])
|
||||
async def list_subreddits(db: AsyncSession = Depends(get_db)):
|
||||
return await subreddit_service.list_subreddits(db)
|
||||
|
||||
|
||||
@router.post("", response_model=SubredditResponse, status_code=201)
|
||||
async def create_subreddit(body: SubredditCreate, db: AsyncSession = Depends(get_db)):
|
||||
sub = await subreddit_service.create_subreddit(db, body.name)
|
||||
data = {c.name: getattr(sub, c.name) for c in sub.__table__.columns}
|
||||
data["post_count"] = 0
|
||||
return data
|
||||
|
||||
|
||||
@router.get("/{subreddit_id}", response_model=SubredditResponse)
|
||||
async def get_subreddit(subreddit_id: int, db: AsyncSession = Depends(get_db)):
|
||||
sub = await subreddit_service.get_subreddit(db, subreddit_id)
|
||||
if not sub:
|
||||
raise HTTPException(status_code=404, detail="Subreddit not found")
|
||||
return sub
|
||||
|
||||
|
||||
@router.patch("/{subreddit_id}", response_model=SubredditResponse)
|
||||
async def update_subreddit(
|
||||
subreddit_id: int, body: SubredditUpdate, db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
sub = await subreddit_service.update_subreddit(db, subreddit_id, body.is_active)
|
||||
if not sub:
|
||||
raise HTTPException(status_code=404, detail="Subreddit not found")
|
||||
result = await subreddit_service.get_subreddit(db, subreddit_id)
|
||||
return result
|
||||
|
||||
|
||||
@router.delete("/{subreddit_id}", status_code=204)
|
||||
async def delete_subreddit(subreddit_id: int, db: AsyncSession = Depends(get_db)):
|
||||
deleted = await subreddit_service.delete_subreddit(db, subreddit_id)
|
||||
if not deleted:
|
||||
raise HTTPException(status_code=404, detail="Subreddit not found")
|
||||
13
backend/routers/summaries.py
Normal file
13
backend/routers/summaries.py
Normal file
@@ -0,0 +1,13 @@
|
||||
from fastapi import APIRouter
|
||||
|
||||
router = APIRouter(prefix="/summaries", tags=["summaries"])
|
||||
|
||||
|
||||
@router.get("")
|
||||
async def list_summaries():
|
||||
return {"data": [], "message": "AI summaries not yet configured"}
|
||||
|
||||
|
||||
@router.get("/{summary_id}")
|
||||
async def get_summary(summary_id: int):
|
||||
return {"detail": "AI summaries not yet configured"}
|
||||
0
backend/schemas/__init__.py
Normal file
0
backend/schemas/__init__.py
Normal file
54
backend/schemas/analytics.py
Normal file
54
backend/schemas/analytics.py
Normal file
@@ -0,0 +1,54 @@
|
||||
from datetime import datetime, date
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class EngagementPoint(BaseModel):
|
||||
period: str
|
||||
posts: int
|
||||
comments: int
|
||||
avg_score: float
|
||||
|
||||
|
||||
class TopPost(BaseModel):
|
||||
id: int
|
||||
title: str
|
||||
score: int
|
||||
num_comments: int
|
||||
author_name: str | None
|
||||
subreddit_name: str
|
||||
created_utc: datetime
|
||||
permalink: str | None
|
||||
|
||||
|
||||
class TopAuthor(BaseModel):
|
||||
id: int
|
||||
username: str
|
||||
post_count: int
|
||||
comment_count: int
|
||||
total_activity: int
|
||||
|
||||
|
||||
class SubredditSummary(BaseModel):
|
||||
subreddit_id: int
|
||||
subreddit_name: str
|
||||
total_posts: int
|
||||
total_comments: int
|
||||
avg_score: float
|
||||
top_flair: str | None
|
||||
|
||||
|
||||
class FlairCount(BaseModel):
|
||||
flair: str | None
|
||||
count: int
|
||||
|
||||
|
||||
class DigestResponse(BaseModel):
|
||||
id: int
|
||||
subreddit_id: int
|
||||
subreddit_name: str | None = None
|
||||
digest_date: date
|
||||
content: str
|
||||
metadata_: dict | None = None
|
||||
generated_at: datetime
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
13
backend/schemas/author.py
Normal file
13
backend/schemas/author.py
Normal file
@@ -0,0 +1,13 @@
|
||||
from datetime import datetime
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class AuthorResponse(BaseModel):
|
||||
id: int
|
||||
username: str
|
||||
first_seen_at: datetime
|
||||
last_seen_at: datetime
|
||||
total_posts: int
|
||||
total_comments: int
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
19
backend/schemas/common.py
Normal file
19
backend/schemas/common.py
Normal file
@@ -0,0 +1,19 @@
|
||||
from datetime import datetime
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class PaginationParams(BaseModel):
|
||||
page: int = 1
|
||||
per_page: int = 25
|
||||
|
||||
|
||||
class PaginatedResponse(BaseModel):
|
||||
total: int
|
||||
page: int
|
||||
per_page: int
|
||||
pages: int
|
||||
|
||||
|
||||
class TimeRangeParams(BaseModel):
|
||||
since: datetime | None = None
|
||||
until: datetime | None = None
|
||||
45
backend/schemas/post.py
Normal file
45
backend/schemas/post.py
Normal file
@@ -0,0 +1,45 @@
|
||||
from datetime import datetime
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class PostResponse(BaseModel):
|
||||
id: int
|
||||
reddit_id: str
|
||||
subreddit_id: int
|
||||
subreddit_name: str | None = None
|
||||
author_id: int | None
|
||||
author_name: str | None = None
|
||||
title: str
|
||||
selftext: str | None
|
||||
url: str | None
|
||||
permalink: str | None
|
||||
flair: str | None
|
||||
score: int
|
||||
upvote_ratio: float | None
|
||||
num_comments: int
|
||||
is_self: bool | None
|
||||
over_18: bool
|
||||
hot_rank: int | None
|
||||
created_utc: datetime
|
||||
collected_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class PostDetailResponse(PostResponse):
|
||||
comments: list["CommentResponse"] = []
|
||||
|
||||
|
||||
class CommentResponse(BaseModel):
|
||||
id: int
|
||||
reddit_id: str
|
||||
post_id: int
|
||||
parent_comment_id: int | None
|
||||
author_id: int | None
|
||||
author_name: str | None = None
|
||||
body: str
|
||||
score: int
|
||||
created_utc: datetime
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
24
backend/schemas/subreddit.py
Normal file
24
backend/schemas/subreddit.py
Normal file
@@ -0,0 +1,24 @@
|
||||
from datetime import datetime
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class SubredditCreate(BaseModel):
|
||||
name: str
|
||||
|
||||
|
||||
class SubredditUpdate(BaseModel):
|
||||
is_active: bool | None = None
|
||||
|
||||
|
||||
class SubredditResponse(BaseModel):
|
||||
id: int
|
||||
name: str
|
||||
display_name: str | None
|
||||
description: str | None
|
||||
subscribers: int | None
|
||||
is_active: bool
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
post_count: int = 0
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
0
backend/services/__init__.py
Normal file
0
backend/services/__init__.py
Normal file
231
backend/services/analytics_service.py
Normal file
231
backend/services/analytics_service.py
Normal file
@@ -0,0 +1,231 @@
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from sqlalchemy import select, func, case, text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from backend.models.post import Post
|
||||
from backend.models.comment import Comment
|
||||
from backend.models.author import Author
|
||||
from backend.models.subreddit import MonitoredSubreddit
|
||||
|
||||
|
||||
async def get_engagement(
|
||||
db: AsyncSession,
|
||||
subreddit_id: int | None = None,
|
||||
granularity: str = "day",
|
||||
since: datetime | None = None,
|
||||
until: datetime | None = None,
|
||||
) -> list[dict]:
|
||||
if not since:
|
||||
since = datetime.now(timezone.utc) - timedelta(days=30)
|
||||
if not until:
|
||||
until = datetime.now(timezone.utc)
|
||||
|
||||
trunc = func.date_trunc(granularity, Post.created_utc)
|
||||
|
||||
stmt = select(
|
||||
trunc.label("period"),
|
||||
func.count(Post.id).label("posts"),
|
||||
func.coalesce(func.avg(Post.score), 0).label("avg_score"),
|
||||
).where(Post.created_utc >= since, Post.created_utc <= until)
|
||||
|
||||
if subreddit_id:
|
||||
stmt = stmt.where(Post.subreddit_id == subreddit_id)
|
||||
|
||||
stmt = stmt.group_by("period").order_by("period")
|
||||
result = await db.execute(stmt)
|
||||
|
||||
# Get comment counts per period
|
||||
comment_trunc = func.date_trunc(granularity, Comment.created_utc)
|
||||
comment_stmt = (
|
||||
select(
|
||||
comment_trunc.label("period"),
|
||||
func.count(Comment.id).label("comments"),
|
||||
)
|
||||
.join(Post)
|
||||
.where(Comment.created_utc >= since, Comment.created_utc <= until)
|
||||
)
|
||||
if subreddit_id:
|
||||
comment_stmt = comment_stmt.where(Post.subreddit_id == subreddit_id)
|
||||
comment_stmt = comment_stmt.group_by("period")
|
||||
comment_result = await db.execute(comment_stmt)
|
||||
comment_map = {str(r.period): r.comments for r in comment_result}
|
||||
|
||||
return [
|
||||
{
|
||||
"period": str(r.period),
|
||||
"posts": r.posts,
|
||||
"comments": comment_map.get(str(r.period), 0),
|
||||
"avg_score": round(float(r.avg_score), 1),
|
||||
}
|
||||
for r in result
|
||||
]
|
||||
|
||||
|
||||
async def get_top_posts(
|
||||
db: AsyncSession,
|
||||
subreddit_id: int | None = None,
|
||||
metric: str = "score",
|
||||
since: datetime | None = None,
|
||||
until: datetime | None = None,
|
||||
limit: int = 10,
|
||||
) -> list[dict]:
|
||||
if not since:
|
||||
since = datetime.now(timezone.utc) - timedelta(days=7)
|
||||
|
||||
stmt = (
|
||||
select(Post, MonitoredSubreddit.name, Author.username)
|
||||
.join(MonitoredSubreddit)
|
||||
.outerjoin(Author)
|
||||
.where(Post.created_utc >= since)
|
||||
)
|
||||
if until:
|
||||
stmt = stmt.where(Post.created_utc <= until)
|
||||
if subreddit_id:
|
||||
stmt = stmt.where(Post.subreddit_id == subreddit_id)
|
||||
|
||||
sort_col = Post.score if metric == "score" else Post.num_comments
|
||||
stmt = stmt.order_by(sort_col.desc()).limit(limit)
|
||||
|
||||
result = await db.execute(stmt)
|
||||
return [
|
||||
{
|
||||
"id": post.id,
|
||||
"title": post.title,
|
||||
"score": post.score,
|
||||
"num_comments": post.num_comments,
|
||||
"author_name": author_name,
|
||||
"subreddit_name": sub_name,
|
||||
"created_utc": post.created_utc,
|
||||
"permalink": post.permalink,
|
||||
}
|
||||
for post, sub_name, author_name in result.all()
|
||||
]
|
||||
|
||||
|
||||
async def get_top_authors(
|
||||
db: AsyncSession,
|
||||
subreddit_id: int | None = None,
|
||||
since: datetime | None = None,
|
||||
until: datetime | None = None,
|
||||
limit: int = 10,
|
||||
) -> list[dict]:
|
||||
if not since:
|
||||
since = datetime.now(timezone.utc) - timedelta(days=7)
|
||||
|
||||
post_count = (
|
||||
select(func.count(Post.id))
|
||||
.where(Post.author_id == Author.id, Post.created_utc >= since)
|
||||
)
|
||||
comment_count = (
|
||||
select(func.count(Comment.id))
|
||||
.where(Comment.author_id == Author.id, Comment.created_utc >= since)
|
||||
)
|
||||
|
||||
if until:
|
||||
post_count = post_count.where(Post.created_utc <= until)
|
||||
comment_count = comment_count.where(Comment.created_utc <= until)
|
||||
|
||||
if subreddit_id:
|
||||
post_count = post_count.where(Post.subreddit_id == subreddit_id)
|
||||
comment_count = comment_count.join(Post).where(Post.subreddit_id == subreddit_id)
|
||||
|
||||
pc = post_count.correlate(Author).scalar_subquery().label("post_count")
|
||||
cc = comment_count.correlate(Author).scalar_subquery().label("comment_count")
|
||||
|
||||
stmt = (
|
||||
select(Author, pc, cc)
|
||||
.order_by((pc + cc).desc())
|
||||
.limit(limit)
|
||||
)
|
||||
|
||||
result = await db.execute(stmt)
|
||||
return [
|
||||
{
|
||||
"id": author.id,
|
||||
"username": author.username,
|
||||
"post_count": pc or 0,
|
||||
"comment_count": cc or 0,
|
||||
"total_activity": (pc or 0) + (cc or 0),
|
||||
}
|
||||
for author, pc, cc in result.all()
|
||||
]
|
||||
|
||||
|
||||
async def get_subreddit_summary(
|
||||
db: AsyncSession,
|
||||
since: datetime | None = None,
|
||||
until: datetime | None = None,
|
||||
) -> list[dict]:
|
||||
if not since:
|
||||
since = datetime.now(timezone.utc) - timedelta(days=7)
|
||||
|
||||
stmt = (
|
||||
select(
|
||||
MonitoredSubreddit.id,
|
||||
MonitoredSubreddit.name,
|
||||
func.count(Post.id).label("total_posts"),
|
||||
func.coalesce(func.avg(Post.score), 0).label("avg_score"),
|
||||
)
|
||||
.outerjoin(Post, (Post.subreddit_id == MonitoredSubreddit.id) & (Post.created_utc >= since))
|
||||
.where(MonitoredSubreddit.is_active == True) # noqa: E712
|
||||
.group_by(MonitoredSubreddit.id)
|
||||
.order_by(MonitoredSubreddit.name)
|
||||
)
|
||||
if until:
|
||||
stmt = stmt.where(Post.created_utc <= until)
|
||||
|
||||
result = await db.execute(stmt)
|
||||
|
||||
summaries = []
|
||||
for sub_id, sub_name, total_posts, avg_score in result.all():
|
||||
# Get comment count
|
||||
cc = await db.execute(
|
||||
select(func.count(Comment.id))
|
||||
.join(Post)
|
||||
.where(Post.subreddit_id == sub_id, Comment.created_utc >= since)
|
||||
)
|
||||
comment_count = cc.scalar() or 0
|
||||
|
||||
# Top flair
|
||||
flair_stmt = (
|
||||
select(Post.flair, func.count(Post.id).label("cnt"))
|
||||
.where(Post.subreddit_id == sub_id, Post.created_utc >= since, Post.flair.isnot(None))
|
||||
.group_by(Post.flair)
|
||||
.order_by(func.count(Post.id).desc())
|
||||
.limit(1)
|
||||
)
|
||||
flair_result = await db.execute(flair_stmt)
|
||||
top_flair_row = flair_result.first()
|
||||
|
||||
summaries.append({
|
||||
"subreddit_id": sub_id,
|
||||
"subreddit_name": sub_name,
|
||||
"total_posts": total_posts,
|
||||
"total_comments": comment_count,
|
||||
"avg_score": round(float(avg_score), 1),
|
||||
"top_flair": top_flair_row[0] if top_flair_row else None,
|
||||
})
|
||||
|
||||
return summaries
|
||||
|
||||
|
||||
async def get_flair_distribution(
|
||||
db: AsyncSession,
|
||||
subreddit_id: int,
|
||||
since: datetime | None = None,
|
||||
until: datetime | None = None,
|
||||
) -> list[dict]:
|
||||
if not since:
|
||||
since = datetime.now(timezone.utc) - timedelta(days=30)
|
||||
|
||||
stmt = (
|
||||
select(Post.flair, func.count(Post.id).label("count"))
|
||||
.where(Post.subreddit_id == subreddit_id, Post.created_utc >= since)
|
||||
.group_by(Post.flair)
|
||||
.order_by(func.count(Post.id).desc())
|
||||
)
|
||||
if until:
|
||||
stmt = stmt.where(Post.created_utc <= until)
|
||||
|
||||
result = await db.execute(stmt)
|
||||
return [{"flair": flair, "count": count} for flair, count in result.all()]
|
||||
79
backend/services/author_service.py
Normal file
79
backend/services/author_service.py
Normal file
@@ -0,0 +1,79 @@
|
||||
from datetime import datetime
|
||||
from sqlalchemy import select, func
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from backend.models.author import Author
|
||||
from backend.models.post import Post
|
||||
from backend.models.comment import Comment
|
||||
|
||||
|
||||
async def list_authors(
|
||||
db: AsyncSession,
|
||||
subreddit_id: int | None = None,
|
||||
sort_by: str = "total_comments",
|
||||
sort_order: str = "desc",
|
||||
since: datetime | None = None,
|
||||
until: datetime | None = None,
|
||||
page: int = 1,
|
||||
per_page: int = 25,
|
||||
) -> tuple[list[dict], int]:
|
||||
base = select(Author)
|
||||
|
||||
if subreddit_id or since or until:
|
||||
# Need to compute activity counts with filters
|
||||
post_count = (
|
||||
select(func.count(Post.id))
|
||||
.where(Post.author_id == Author.id)
|
||||
)
|
||||
comment_count = (
|
||||
select(func.count(Comment.id))
|
||||
.where(Comment.author_id == Author.id)
|
||||
)
|
||||
|
||||
if subreddit_id:
|
||||
post_count = post_count.where(Post.subreddit_id == subreddit_id)
|
||||
comment_count = comment_count.join(Post).where(Post.subreddit_id == subreddit_id)
|
||||
if since:
|
||||
post_count = post_count.where(Post.created_utc >= since)
|
||||
comment_count = comment_count.where(Comment.created_utc >= since)
|
||||
if until:
|
||||
post_count = post_count.where(Post.created_utc <= until)
|
||||
comment_count = comment_count.where(Comment.created_utc <= until)
|
||||
|
||||
base = select(
|
||||
Author,
|
||||
post_count.correlate(Author).scalar_subquery().label("filtered_posts"),
|
||||
comment_count.correlate(Author).scalar_subquery().label("filtered_comments"),
|
||||
)
|
||||
else:
|
||||
base = select(Author)
|
||||
|
||||
count_stmt = select(func.count()).select_from(base.subquery())
|
||||
total = (await db.execute(count_stmt)).scalar() or 0
|
||||
|
||||
sort_col = getattr(Author, sort_by, Author.total_comments)
|
||||
if sort_order == "asc":
|
||||
base = base.order_by(sort_col.asc())
|
||||
else:
|
||||
base = base.order_by(sort_col.desc())
|
||||
|
||||
base = base.offset((page - 1) * per_page).limit(per_page)
|
||||
|
||||
result = await db.execute(base)
|
||||
authors = []
|
||||
for row in result.all():
|
||||
if isinstance(row, tuple):
|
||||
author = row[0]
|
||||
else:
|
||||
author = row
|
||||
data = {c.name: getattr(author, c.name) for c in author.__table__.columns}
|
||||
authors.append(data)
|
||||
|
||||
return authors, total
|
||||
|
||||
|
||||
async def get_author(db: AsyncSession, author_id: int) -> dict | None:
|
||||
author = await db.get(Author, author_id)
|
||||
if not author:
|
||||
return None
|
||||
return {c.name: getattr(author, c.name) for c in author.__table__.columns}
|
||||
57
backend/services/comment_service.py
Normal file
57
backend/services/comment_service.py
Normal file
@@ -0,0 +1,57 @@
|
||||
from datetime import datetime
|
||||
from sqlalchemy import select, func
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from backend.models.comment import Comment
|
||||
from backend.models.post import Post
|
||||
from backend.models.author import Author
|
||||
|
||||
|
||||
async def list_comments(
|
||||
db: AsyncSession,
|
||||
post_id: int | None = None,
|
||||
subreddit_id: int | None = None,
|
||||
author: str | None = None,
|
||||
sort_by: str = "created_utc",
|
||||
sort_order: str = "desc",
|
||||
since: datetime | None = None,
|
||||
until: datetime | None = None,
|
||||
page: int = 1,
|
||||
per_page: int = 25,
|
||||
) -> tuple[list[dict], int]:
|
||||
base = select(Comment, Author.username).outerjoin(Author).join(Post)
|
||||
|
||||
filters = []
|
||||
if post_id:
|
||||
filters.append(Comment.post_id == post_id)
|
||||
if subreddit_id:
|
||||
filters.append(Post.subreddit_id == subreddit_id)
|
||||
if author:
|
||||
filters.append(Author.username == author)
|
||||
if since:
|
||||
filters.append(Comment.created_utc >= since)
|
||||
if until:
|
||||
filters.append(Comment.created_utc <= until)
|
||||
|
||||
if filters:
|
||||
base = base.where(*filters)
|
||||
|
||||
count_stmt = select(func.count()).select_from(base.subquery())
|
||||
total = (await db.execute(count_stmt)).scalar() or 0
|
||||
|
||||
sort_col = getattr(Comment, sort_by, Comment.created_utc)
|
||||
if sort_order == "asc":
|
||||
base = base.order_by(sort_col.asc())
|
||||
else:
|
||||
base = base.order_by(sort_col.desc())
|
||||
|
||||
base = base.offset((page - 1) * per_page).limit(per_page)
|
||||
|
||||
result = await db.execute(base)
|
||||
comments = []
|
||||
for comment, author_name in result.all():
|
||||
data = {c.name: getattr(comment, c.name) for c in comment.__table__.columns}
|
||||
data["author_name"] = author_name
|
||||
comments.append(data)
|
||||
|
||||
return comments, total
|
||||
102
backend/services/post_service.py
Normal file
102
backend/services/post_service.py
Normal file
@@ -0,0 +1,102 @@
|
||||
from datetime import datetime
|
||||
from sqlalchemy import select, func
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.orm import joinedload
|
||||
|
||||
from backend.models.post import Post
|
||||
from backend.models.subreddit import MonitoredSubreddit
|
||||
from backend.models.author import Author
|
||||
from backend.models.comment import Comment
|
||||
|
||||
|
||||
async def list_posts(
|
||||
db: AsyncSession,
|
||||
subreddit_id: int | None = None,
|
||||
author: str | None = None,
|
||||
flair: str | None = None,
|
||||
sort_by: str = "created_utc",
|
||||
sort_order: str = "desc",
|
||||
since: datetime | None = None,
|
||||
until: datetime | None = None,
|
||||
page: int = 1,
|
||||
per_page: int = 25,
|
||||
) -> tuple[list[dict], int]:
|
||||
base = select(Post, MonitoredSubreddit.name, Author.username).join(
|
||||
MonitoredSubreddit
|
||||
).outerjoin(Author)
|
||||
|
||||
filters = []
|
||||
if subreddit_id:
|
||||
filters.append(Post.subreddit_id == subreddit_id)
|
||||
if flair:
|
||||
filters.append(Post.flair == flair)
|
||||
if since:
|
||||
filters.append(Post.created_utc >= since)
|
||||
if until:
|
||||
filters.append(Post.created_utc <= until)
|
||||
if author:
|
||||
filters.append(Author.username == author)
|
||||
|
||||
if filters:
|
||||
base = base.where(*filters)
|
||||
|
||||
# Count
|
||||
count_stmt = select(func.count()).select_from(base.subquery())
|
||||
total = (await db.execute(count_stmt)).scalar() or 0
|
||||
|
||||
# Sort
|
||||
sort_col = getattr(Post, sort_by, Post.created_utc)
|
||||
if sort_order == "asc":
|
||||
base = base.order_by(sort_col.asc())
|
||||
else:
|
||||
base = base.order_by(sort_col.desc())
|
||||
|
||||
# Paginate
|
||||
base = base.offset((page - 1) * per_page).limit(per_page)
|
||||
|
||||
result = await db.execute(base)
|
||||
rows = result.all()
|
||||
|
||||
posts = []
|
||||
for post, sub_name, author_name in rows:
|
||||
data = {c.name: getattr(post, c.name) for c in post.__table__.columns}
|
||||
data["subreddit_name"] = sub_name
|
||||
data["author_name"] = author_name
|
||||
posts.append(data)
|
||||
|
||||
return posts, total
|
||||
|
||||
|
||||
async def get_post(db: AsyncSession, post_id: int) -> dict | None:
|
||||
stmt = (
|
||||
select(Post, MonitoredSubreddit.name, Author.username)
|
||||
.join(MonitoredSubreddit)
|
||||
.outerjoin(Author)
|
||||
.where(Post.id == post_id)
|
||||
)
|
||||
result = await db.execute(stmt)
|
||||
row = result.first()
|
||||
if not row:
|
||||
return None
|
||||
|
||||
post, sub_name, author_name = row
|
||||
data = {c.name: getattr(post, c.name) for c in post.__table__.columns}
|
||||
data["subreddit_name"] = sub_name
|
||||
data["author_name"] = author_name
|
||||
|
||||
# Get comments
|
||||
comment_stmt = (
|
||||
select(Comment, Author.username)
|
||||
.outerjoin(Author)
|
||||
.where(Comment.post_id == post_id)
|
||||
.order_by(Comment.created_utc.asc())
|
||||
)
|
||||
comment_result = await db.execute(comment_stmt)
|
||||
comments = []
|
||||
for comment, c_author in comment_result.all():
|
||||
c_data = {c.name: getattr(comment, c.name) for c in comment.__table__.columns}
|
||||
c_data["author_name"] = c_author
|
||||
comments.append(c_data)
|
||||
|
||||
data["comments"] = comments
|
||||
return data
|
||||
75
backend/services/subreddit_service.py
Normal file
75
backend/services/subreddit_service.py
Normal file
@@ -0,0 +1,75 @@
|
||||
from sqlalchemy import select, func
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from backend.models.subreddit import MonitoredSubreddit
|
||||
from backend.models.post import Post
|
||||
|
||||
|
||||
async def list_subreddits(db: AsyncSession) -> list[dict]:
|
||||
stmt = (
|
||||
select(
|
||||
MonitoredSubreddit,
|
||||
func.count(Post.id).label("post_count"),
|
||||
)
|
||||
.outerjoin(Post, Post.subreddit_id == MonitoredSubreddit.id)
|
||||
.group_by(MonitoredSubreddit.id)
|
||||
.order_by(MonitoredSubreddit.name)
|
||||
)
|
||||
result = await db.execute(stmt)
|
||||
rows = result.all()
|
||||
out = []
|
||||
for sub, post_count in rows:
|
||||
data = {c.name: getattr(sub, c.name) for c in sub.__table__.columns}
|
||||
data["post_count"] = post_count
|
||||
out.append(data)
|
||||
return out
|
||||
|
||||
|
||||
async def get_subreddit(db: AsyncSession, subreddit_id: int) -> dict | None:
|
||||
stmt = (
|
||||
select(
|
||||
MonitoredSubreddit,
|
||||
func.count(Post.id).label("post_count"),
|
||||
)
|
||||
.outerjoin(Post, Post.subreddit_id == MonitoredSubreddit.id)
|
||||
.where(MonitoredSubreddit.id == subreddit_id)
|
||||
.group_by(MonitoredSubreddit.id)
|
||||
)
|
||||
result = await db.execute(stmt)
|
||||
row = result.first()
|
||||
if not row:
|
||||
return None
|
||||
sub, post_count = row
|
||||
data = {c.name: getattr(sub, c.name) for c in sub.__table__.columns}
|
||||
data["post_count"] = post_count
|
||||
return data
|
||||
|
||||
|
||||
async def create_subreddit(db: AsyncSession, name: str) -> MonitoredSubreddit:
|
||||
sub = MonitoredSubreddit(name=name.lower().strip())
|
||||
db.add(sub)
|
||||
await db.commit()
|
||||
await db.refresh(sub)
|
||||
return sub
|
||||
|
||||
|
||||
async def update_subreddit(
|
||||
db: AsyncSession, subreddit_id: int, is_active: bool | None = None
|
||||
) -> MonitoredSubreddit | None:
|
||||
sub = await db.get(MonitoredSubreddit, subreddit_id)
|
||||
if not sub:
|
||||
return None
|
||||
if is_active is not None:
|
||||
sub.is_active = is_active
|
||||
await db.commit()
|
||||
await db.refresh(sub)
|
||||
return sub
|
||||
|
||||
|
||||
async def delete_subreddit(db: AsyncSession, subreddit_id: int) -> bool:
|
||||
sub = await db.get(MonitoredSubreddit, subreddit_id)
|
||||
if not sub:
|
||||
return False
|
||||
sub.is_active = False
|
||||
await db.commit()
|
||||
return True
|
||||
0
backend/utils/__init__.py
Normal file
0
backend/utils/__init__.py
Normal file
0
backend/worker/__init__.py
Normal file
0
backend/worker/__init__.py
Normal file
140
backend/worker/digest_job.py
Normal file
140
backend/worker/digest_job.py
Normal file
@@ -0,0 +1,140 @@
|
||||
import logging
|
||||
from datetime import datetime, timezone, timedelta, date
|
||||
|
||||
from sqlalchemy import select, func, create_engine
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
from backend.config import settings
|
||||
from backend.models.subreddit import MonitoredSubreddit
|
||||
from backend.models.post import Post
|
||||
from backend.models.comment import Comment
|
||||
from backend.models.author import Author
|
||||
from backend.models.daily_digest import DailyDigest
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_engine = create_engine(settings.database_url_sync, pool_size=2, pool_recycle=3600)
|
||||
SyncSession = sessionmaker(_engine)
|
||||
|
||||
|
||||
def generate_daily_digests():
|
||||
"""Generate daily digest for each active subreddit."""
|
||||
yesterday = date.today() - timedelta(days=1)
|
||||
day_start = datetime(yesterday.year, yesterday.month, yesterday.day, tzinfo=timezone.utc)
|
||||
day_end = day_start + timedelta(days=1)
|
||||
|
||||
with SyncSession() as db:
|
||||
subs = db.execute(
|
||||
select(MonitoredSubreddit).where(MonitoredSubreddit.is_active == True) # noqa: E712
|
||||
).scalars().all()
|
||||
|
||||
for sub in subs:
|
||||
# Check if digest already exists
|
||||
existing = db.execute(
|
||||
select(DailyDigest).where(
|
||||
DailyDigest.subreddit_id == sub.id,
|
||||
DailyDigest.digest_date == yesterday,
|
||||
)
|
||||
).scalar_one_or_none()
|
||||
if existing:
|
||||
continue
|
||||
|
||||
# Gather stats
|
||||
post_count = db.execute(
|
||||
select(func.count(Post.id)).where(
|
||||
Post.subreddit_id == sub.id,
|
||||
Post.created_utc >= day_start,
|
||||
Post.created_utc < day_end,
|
||||
)
|
||||
).scalar() or 0
|
||||
|
||||
comment_count = db.execute(
|
||||
select(func.count(Comment.id))
|
||||
.join(Post)
|
||||
.where(
|
||||
Post.subreddit_id == sub.id,
|
||||
Comment.created_utc >= day_start,
|
||||
Comment.created_utc < day_end,
|
||||
)
|
||||
).scalar() or 0
|
||||
|
||||
# Top posts by score
|
||||
top_posts = db.execute(
|
||||
select(Post.title, Post.score, Post.num_comments, Post.permalink)
|
||||
.where(
|
||||
Post.subreddit_id == sub.id,
|
||||
Post.created_utc >= day_start,
|
||||
Post.created_utc < day_end,
|
||||
)
|
||||
.order_by(Post.score.desc())
|
||||
.limit(5)
|
||||
).all()
|
||||
|
||||
# Top authors
|
||||
top_authors = db.execute(
|
||||
select(Author.username, func.count(Comment.id).label("cnt"))
|
||||
.join(Comment, Comment.author_id == Author.id)
|
||||
.join(Post, Comment.post_id == Post.id)
|
||||
.where(
|
||||
Post.subreddit_id == sub.id,
|
||||
Comment.created_utc >= day_start,
|
||||
Comment.created_utc < day_end,
|
||||
)
|
||||
.group_by(Author.username)
|
||||
.order_by(func.count(Comment.id).desc())
|
||||
.limit(5)
|
||||
).all()
|
||||
|
||||
avg_score = db.execute(
|
||||
select(func.avg(Post.score)).where(
|
||||
Post.subreddit_id == sub.id,
|
||||
Post.created_utc >= day_start,
|
||||
Post.created_utc < day_end,
|
||||
)
|
||||
).scalar()
|
||||
|
||||
# Build markdown digest
|
||||
lines = [
|
||||
f"# r/{sub.name} — Daily Digest for {yesterday}",
|
||||
"",
|
||||
f"**Posts:** {post_count} | **Comments:** {comment_count} | **Avg Score:** {avg_score:.1f}" if avg_score else f"**Posts:** {post_count} | **Comments:** {comment_count}",
|
||||
"",
|
||||
]
|
||||
|
||||
if top_posts:
|
||||
lines.append("## Top Posts")
|
||||
for i, (title, score, num_comments, permalink) in enumerate(top_posts, 1):
|
||||
lines.append(f"{i}. **{title}** — {score} pts, {num_comments} comments")
|
||||
lines.append("")
|
||||
|
||||
if top_authors:
|
||||
lines.append("## Most Active Users")
|
||||
for username, cnt in top_authors:
|
||||
lines.append(f"- u/{username}: {cnt} comments")
|
||||
lines.append("")
|
||||
|
||||
content = "\n".join(lines)
|
||||
|
||||
metadata = {
|
||||
"post_count": post_count,
|
||||
"comment_count": comment_count,
|
||||
"avg_score": float(avg_score) if avg_score else 0,
|
||||
"top_posts": [
|
||||
{"title": t, "score": s, "num_comments": n}
|
||||
for t, s, n, _ in top_posts
|
||||
],
|
||||
"top_authors": [
|
||||
{"username": u, "comment_count": c}
|
||||
for u, c in top_authors
|
||||
],
|
||||
}
|
||||
|
||||
digest = DailyDigest(
|
||||
subreddit_id=sub.id,
|
||||
digest_date=yesterday,
|
||||
content=content,
|
||||
metadata_=metadata,
|
||||
)
|
||||
db.add(digest)
|
||||
db.commit()
|
||||
logger.info(f"Generated daily digest for r/{sub.name} on {yesterday}")
|
||||
90
backend/worker/main.py
Normal file
90
backend/worker/main.py
Normal file
@@ -0,0 +1,90 @@
|
||||
import logging
|
||||
import signal
|
||||
import sys
|
||||
|
||||
from apscheduler.schedulers.blocking import BlockingScheduler
|
||||
from apscheduler.triggers.interval import IntervalTrigger
|
||||
from apscheduler.triggers.cron import CronTrigger
|
||||
|
||||
from backend.config import settings
|
||||
from backend.worker.monitor import poll_new_posts, poll_hot_posts, collect_comments, update_scores
|
||||
from backend.worker.snapshot import take_metric_snapshots
|
||||
from backend.worker.digest_job import generate_daily_digests
|
||||
from backend.worker.summary_job import generate_summaries
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def seed_subreddits():
|
||||
"""Add seed subreddits on first startup if configured."""
|
||||
if not settings.seed_subreddits:
|
||||
return
|
||||
from sqlalchemy import select, create_engine
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from backend.models.subreddit import MonitoredSubreddit
|
||||
|
||||
engine = create_engine(settings.database_url_sync)
|
||||
Session = sessionmaker(engine)
|
||||
|
||||
names = [s.strip().lower() for s in settings.seed_subreddits.split(",") if s.strip()]
|
||||
with Session() as db:
|
||||
for name in names:
|
||||
existing = db.execute(
|
||||
select(MonitoredSubreddit).where(MonitoredSubreddit.name == name)
|
||||
).scalar_one_or_none()
|
||||
if not existing:
|
||||
db.add(MonitoredSubreddit(name=name))
|
||||
logger.info(f"Seeded subreddit: r/{name}")
|
||||
db.commit()
|
||||
engine.dispose()
|
||||
|
||||
|
||||
def main():
|
||||
logger.info("Starting Reddit monitor worker")
|
||||
seed_subreddits()
|
||||
|
||||
scheduler = BlockingScheduler()
|
||||
|
||||
# Reddit polling jobs
|
||||
scheduler.add_job(poll_new_posts, IntervalTrigger(minutes=2), id="poll_new", max_instances=1)
|
||||
scheduler.add_job(poll_hot_posts, IntervalTrigger(minutes=2), id="poll_hot", max_instances=1)
|
||||
scheduler.add_job(collect_comments, IntervalTrigger(minutes=5), id="comments", max_instances=1)
|
||||
scheduler.add_job(update_scores, IntervalTrigger(minutes=15), id="scores", max_instances=1)
|
||||
|
||||
# Metric snapshots
|
||||
scheduler.add_job(take_metric_snapshots, IntervalTrigger(minutes=30), id="snapshots", max_instances=1)
|
||||
|
||||
# Daily digest
|
||||
scheduler.add_job(
|
||||
generate_daily_digests,
|
||||
CronTrigger(hour=settings.digest_hour_utc, minute=0),
|
||||
id="digest",
|
||||
max_instances=1,
|
||||
)
|
||||
|
||||
# AI summary stub
|
||||
scheduler.add_job(
|
||||
generate_summaries,
|
||||
CronTrigger(hour=settings.digest_hour_utc, minute=30),
|
||||
id="summary",
|
||||
max_instances=1,
|
||||
)
|
||||
|
||||
def shutdown(signum, frame):
|
||||
logger.info("Shutting down worker...")
|
||||
scheduler.shutdown(wait=False)
|
||||
sys.exit(0)
|
||||
|
||||
signal.signal(signal.SIGTERM, shutdown)
|
||||
signal.signal(signal.SIGINT, shutdown)
|
||||
|
||||
logger.info("Worker started. Scheduled jobs are running.")
|
||||
scheduler.start()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
295
backend/worker/monitor.py
Normal file
295
backend/worker/monitor.py
Normal file
@@ -0,0 +1,295 @@
|
||||
import logging
|
||||
from datetime import datetime, timezone, timedelta
|
||||
|
||||
from sqlalchemy import select, create_engine
|
||||
from sqlalchemy.orm import Session, sessionmaker
|
||||
from sqlalchemy.dialects.postgresql import insert
|
||||
|
||||
from backend.config import settings
|
||||
from backend.models.subreddit import MonitoredSubreddit
|
||||
from backend.models.author import Author
|
||||
from backend.models.post import Post
|
||||
from backend.models.comment import Comment
|
||||
from backend.worker.reddit_client import create_client, fetch_json
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Sync engine for worker (PRAW-replacement uses async httpx, but DB writes are sync for simplicity with APScheduler)
|
||||
_engine = create_engine(settings.database_url_sync, pool_size=3, max_overflow=5, pool_recycle=3600)
|
||||
SyncSession = sessionmaker(_engine)
|
||||
|
||||
|
||||
def _get_active_subreddits() -> list[dict]:
|
||||
with SyncSession() as db:
|
||||
stmt = select(MonitoredSubreddit).where(MonitoredSubreddit.is_active == True) # noqa: E712
|
||||
result = db.execute(stmt)
|
||||
return [{"id": s.id, "name": s.name} for s in result.scalars()]
|
||||
|
||||
|
||||
def _upsert_author(db: Session, username: str) -> int | None:
|
||||
if not username or username == "[deleted]":
|
||||
return None
|
||||
now = datetime.now(timezone.utc)
|
||||
stmt = insert(Author).values(username=username, first_seen_at=now, last_seen_at=now)
|
||||
stmt = stmt.on_conflict_do_update(
|
||||
index_elements=[Author.username],
|
||||
set_={"last_seen_at": now},
|
||||
)
|
||||
db.execute(stmt)
|
||||
result = db.execute(select(Author.id).where(Author.username == username))
|
||||
row = result.first()
|
||||
return row[0] if row else None
|
||||
|
||||
|
||||
def _parse_post(post_data: dict, subreddit_id: int, db: Session, hot_rank: int | None = None) -> dict:
|
||||
data = post_data.get("data", post_data)
|
||||
author_id = _upsert_author(db, data.get("author"))
|
||||
created = datetime.fromtimestamp(data.get("created_utc", 0), tz=timezone.utc)
|
||||
return {
|
||||
"reddit_id": data.get("name", f"t3_{data.get('id', '')}"),
|
||||
"subreddit_id": subreddit_id,
|
||||
"author_id": author_id,
|
||||
"title": data.get("title", ""),
|
||||
"selftext": data.get("selftext"),
|
||||
"url": data.get("url"),
|
||||
"permalink": data.get("permalink"),
|
||||
"flair": data.get("link_flair_text"),
|
||||
"score": data.get("score", 0),
|
||||
"upvote_ratio": data.get("upvote_ratio"),
|
||||
"num_comments": data.get("num_comments", 0),
|
||||
"is_self": data.get("is_self"),
|
||||
"over_18": data.get("over_18", False),
|
||||
"hot_rank": hot_rank,
|
||||
"created_utc": created,
|
||||
"collected_at": datetime.now(timezone.utc),
|
||||
"updated_at": datetime.now(timezone.utc),
|
||||
}
|
||||
|
||||
|
||||
def _upsert_posts(db: Session, posts: list[dict], update_hot_rank: bool = False):
|
||||
if not posts:
|
||||
return
|
||||
update_set = {
|
||||
"score": insert(Post).excluded.score,
|
||||
"upvote_ratio": insert(Post).excluded.upvote_ratio,
|
||||
"num_comments": insert(Post).excluded.num_comments,
|
||||
"updated_at": insert(Post).excluded.updated_at,
|
||||
}
|
||||
if update_hot_rank:
|
||||
update_set["hot_rank"] = insert(Post).excluded.hot_rank
|
||||
|
||||
stmt = insert(Post).values(posts)
|
||||
stmt = stmt.on_conflict_do_update(
|
||||
index_elements=[Post.reddit_id],
|
||||
set_=update_set,
|
||||
)
|
||||
db.execute(stmt)
|
||||
|
||||
|
||||
def _parse_comment(comment_data: dict, post_id: int, db: Session, parent_map: dict) -> dict | None:
|
||||
data = comment_data.get("data", comment_data)
|
||||
if data.get("kind") == "more" or not data.get("body"):
|
||||
return None
|
||||
reddit_id = data.get("name", f"t1_{data.get('id', '')}")
|
||||
author_id = _upsert_author(db, data.get("author"))
|
||||
created = datetime.fromtimestamp(data.get("created_utc", 0), tz=timezone.utc)
|
||||
|
||||
parent_reddit_id = data.get("parent_id", "")
|
||||
parent_comment_id = parent_map.get(parent_reddit_id)
|
||||
|
||||
return {
|
||||
"reddit_id": reddit_id,
|
||||
"post_id": post_id,
|
||||
"parent_comment_id": parent_comment_id,
|
||||
"author_id": author_id,
|
||||
"body": data.get("body", ""),
|
||||
"score": data.get("score", 0),
|
||||
"created_utc": created,
|
||||
"collected_at": datetime.now(timezone.utc),
|
||||
"updated_at": datetime.now(timezone.utc),
|
||||
}
|
||||
|
||||
|
||||
import asyncio
|
||||
|
||||
|
||||
def poll_new_posts():
|
||||
"""Fetch /new for each active subreddit and upsert posts."""
|
||||
asyncio.run(_poll_new_posts_async())
|
||||
|
||||
|
||||
async def _poll_new_posts_async():
|
||||
subreddits = _get_active_subreddits()
|
||||
if not subreddits:
|
||||
return
|
||||
|
||||
client = create_client()
|
||||
async with client:
|
||||
for sub in subreddits:
|
||||
data = await fetch_json(client, f"/r/{sub['name']}/new", {"limit": "100"})
|
||||
if not data:
|
||||
continue
|
||||
children = data.get("data", {}).get("children", [])
|
||||
if not children:
|
||||
continue
|
||||
|
||||
with SyncSession() as db:
|
||||
posts = [_parse_post(child, sub["id"], db) for child in children]
|
||||
_upsert_posts(db, posts)
|
||||
db.commit()
|
||||
logger.info(f"r/{sub['name']}: upserted {len(children)} new posts")
|
||||
|
||||
|
||||
def poll_hot_posts():
|
||||
"""Fetch /hot for each active subreddit and update hot_rank."""
|
||||
asyncio.run(_poll_hot_posts_async())
|
||||
|
||||
|
||||
async def _poll_hot_posts_async():
|
||||
subreddits = _get_active_subreddits()
|
||||
if not subreddits:
|
||||
return
|
||||
|
||||
client = create_client()
|
||||
async with client:
|
||||
for sub in subreddits:
|
||||
data = await fetch_json(client, f"/r/{sub['name']}/hot", {"limit": "100"})
|
||||
if not data:
|
||||
continue
|
||||
children = data.get("data", {}).get("children", [])
|
||||
if not children:
|
||||
continue
|
||||
|
||||
with SyncSession() as db:
|
||||
posts = [
|
||||
_parse_post(child, sub["id"], db, hot_rank=i + 1)
|
||||
for i, child in enumerate(children)
|
||||
]
|
||||
_upsert_posts(db, posts, update_hot_rank=True)
|
||||
db.commit()
|
||||
logger.info(f"r/{sub['name']}: updated hot ranks for {len(children)} posts")
|
||||
|
||||
|
||||
def collect_comments():
|
||||
"""Fetch comments for recent posts."""
|
||||
asyncio.run(_collect_comments_async())
|
||||
|
||||
|
||||
async def _collect_comments_async():
|
||||
cutoff = datetime.now(timezone.utc) - timedelta(hours=48)
|
||||
|
||||
with SyncSession() as db:
|
||||
stmt = (
|
||||
select(Post.id, Post.reddit_id, Post.subreddit_id)
|
||||
.join(MonitoredSubreddit)
|
||||
.where(
|
||||
MonitoredSubreddit.is_active == True, # noqa: E712
|
||||
Post.created_utc >= cutoff,
|
||||
)
|
||||
.order_by(Post.created_utc.desc())
|
||||
.limit(50)
|
||||
)
|
||||
result = db.execute(stmt)
|
||||
recent_posts = [{"id": r[0], "reddit_id": r[1], "subreddit_id": r[2]} for r in result]
|
||||
|
||||
if not recent_posts:
|
||||
return
|
||||
|
||||
client = create_client()
|
||||
async with client:
|
||||
for post in recent_posts:
|
||||
short_id = post["reddit_id"].replace("t3_", "")
|
||||
data = await fetch_json(client, f"/comments/{short_id}", {"limit": "500", "sort": "new"})
|
||||
if not data or len(data) < 2:
|
||||
continue
|
||||
|
||||
comment_listing = data[1].get("data", {}).get("children", [])
|
||||
|
||||
with SyncSession() as db:
|
||||
# Build parent_map from existing comments
|
||||
existing = db.execute(
|
||||
select(Comment.id, Comment.reddit_id).where(Comment.post_id == post["id"])
|
||||
)
|
||||
parent_map = {r[1]: r[0] for r in existing}
|
||||
|
||||
comments_to_upsert = []
|
||||
|
||||
def process_comments(children):
|
||||
for child in children:
|
||||
if child.get("kind") == "more":
|
||||
continue
|
||||
c_data = child.get("data", {})
|
||||
parsed = _parse_comment(c_data, post["id"], db, parent_map)
|
||||
if parsed:
|
||||
comments_to_upsert.append(parsed)
|
||||
# Process replies recursively
|
||||
replies = c_data.get("replies")
|
||||
if isinstance(replies, dict):
|
||||
reply_children = replies.get("data", {}).get("children", [])
|
||||
process_comments(reply_children)
|
||||
|
||||
process_comments(comment_listing)
|
||||
|
||||
if comments_to_upsert:
|
||||
# Upsert comments one at a time to handle parent references
|
||||
for comment in comments_to_upsert:
|
||||
stmt = insert(Comment).values(comment)
|
||||
stmt = stmt.on_conflict_do_update(
|
||||
index_elements=[Comment.reddit_id],
|
||||
set_={
|
||||
"score": stmt.excluded.score,
|
||||
"body": stmt.excluded.body,
|
||||
"updated_at": stmt.excluded.updated_at,
|
||||
},
|
||||
)
|
||||
db.execute(stmt)
|
||||
db.commit()
|
||||
logger.info(f"Post {short_id}: upserted {len(comments_to_upsert)} comments")
|
||||
|
||||
|
||||
def update_scores():
|
||||
"""Re-fetch recent posts to update scores and comment counts."""
|
||||
asyncio.run(_update_scores_async())
|
||||
|
||||
|
||||
async def _update_scores_async():
|
||||
cutoff = datetime.now(timezone.utc) - timedelta(days=7)
|
||||
|
||||
with SyncSession() as db:
|
||||
stmt = (
|
||||
select(Post.reddit_id, Post.subreddit_id, MonitoredSubreddit.name)
|
||||
.join(MonitoredSubreddit)
|
||||
.where(
|
||||
MonitoredSubreddit.is_active == True, # noqa: E712
|
||||
Post.created_utc >= cutoff,
|
||||
)
|
||||
)
|
||||
result = db.execute(stmt)
|
||||
posts_by_sub: dict[str, list[str]] = {}
|
||||
for reddit_id, _, sub_name in result:
|
||||
posts_by_sub.setdefault(sub_name, []).append(reddit_id)
|
||||
|
||||
if not posts_by_sub:
|
||||
return
|
||||
|
||||
# Score updates piggyback on the new/hot polls — the upsert already updates scores.
|
||||
# This job explicitly re-fetches to catch score changes on older posts.
|
||||
client = create_client()
|
||||
async with client:
|
||||
for sub_name, reddit_ids in posts_by_sub.items():
|
||||
data = await fetch_json(client, f"/r/{sub_name}/new", {"limit": "100"})
|
||||
if not data:
|
||||
continue
|
||||
children = data.get("data", {}).get("children", [])
|
||||
|
||||
with SyncSession() as db:
|
||||
sub = db.execute(
|
||||
select(MonitoredSubreddit).where(MonitoredSubreddit.name == sub_name)
|
||||
).scalar_one_or_none()
|
||||
if not sub:
|
||||
continue
|
||||
posts = [_parse_post(child, sub.id, db) for child in children]
|
||||
_upsert_posts(db, posts)
|
||||
db.commit()
|
||||
|
||||
logger.info(f"Score update complete for {len(posts_by_sub)} subreddits")
|
||||
58
backend/worker/reddit_client.py
Normal file
58
backend/worker/reddit_client.py
Normal file
@@ -0,0 +1,58 @@
|
||||
import asyncio
|
||||
import logging
|
||||
import time
|
||||
|
||||
import httpx
|
||||
|
||||
from backend.config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
BASE_URL = "https://www.reddit.com"
|
||||
|
||||
# Simple in-process rate limiter: track request timestamps
|
||||
_request_times: list[float] = []
|
||||
MAX_REQUESTS_PER_MINUTE = 9 # Stay under Reddit's ~10/min limit
|
||||
|
||||
|
||||
async def _wait_for_rate_limit():
|
||||
"""Block until we have budget for another request."""
|
||||
now = time.monotonic()
|
||||
# Remove timestamps older than 60 seconds
|
||||
while _request_times and _request_times[0] < now - 60:
|
||||
_request_times.pop(0)
|
||||
if len(_request_times) >= MAX_REQUESTS_PER_MINUTE:
|
||||
wait = 60 - (now - _request_times[0]) + 0.5
|
||||
logger.info(f"Rate limit: waiting {wait:.1f}s")
|
||||
await asyncio.sleep(wait)
|
||||
_request_times.append(time.monotonic())
|
||||
|
||||
|
||||
async def fetch_json(client: httpx.AsyncClient, path: str, params: dict | None = None) -> dict | None:
|
||||
"""Fetch a Reddit .json endpoint with rate limiting and error handling."""
|
||||
await _wait_for_rate_limit()
|
||||
url = f"{BASE_URL}{path}.json"
|
||||
try:
|
||||
response = await client.get(url, params=params)
|
||||
if response.status_code == 429:
|
||||
retry_after = int(response.headers.get("Retry-After", 60))
|
||||
logger.warning(f"Rate limited, waiting {retry_after}s")
|
||||
await asyncio.sleep(retry_after)
|
||||
return await fetch_json(client, path, params)
|
||||
if response.status_code >= 500:
|
||||
logger.warning(f"Reddit returned {response.status_code} for {path}")
|
||||
return None
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
except httpx.HTTPError as e:
|
||||
logger.error(f"HTTP error fetching {path}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def create_client() -> httpx.AsyncClient:
|
||||
"""Create an httpx client configured for Reddit."""
|
||||
return httpx.AsyncClient(
|
||||
headers={"User-Agent": settings.reddit_user_agent},
|
||||
timeout=30.0,
|
||||
follow_redirects=True,
|
||||
)
|
||||
47
backend/worker/snapshot.py
Normal file
47
backend/worker/snapshot.py
Normal file
@@ -0,0 +1,47 @@
|
||||
import logging
|
||||
from datetime import datetime, timezone, timedelta
|
||||
|
||||
from sqlalchemy import select, create_engine
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
from backend.config import settings
|
||||
from backend.models.post import Post
|
||||
from backend.models.metric_snapshot import MetricSnapshot
|
||||
from backend.models.subreddit import MonitoredSubreddit
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_engine = create_engine(settings.database_url_sync, pool_size=2, pool_recycle=3600)
|
||||
SyncSession = sessionmaker(_engine)
|
||||
|
||||
|
||||
def take_metric_snapshots():
|
||||
"""Snapshot current metrics for recent posts."""
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
with SyncSession() as db:
|
||||
# Posts < 48h old: snapshot every run (every 30 min)
|
||||
cutoff_recent = now - timedelta(hours=48)
|
||||
stmt = (
|
||||
select(Post.id, Post.score, Post.num_comments, Post.upvote_ratio)
|
||||
.join(MonitoredSubreddit)
|
||||
.where(
|
||||
MonitoredSubreddit.is_active == True, # noqa: E712
|
||||
Post.created_utc >= cutoff_recent,
|
||||
)
|
||||
)
|
||||
result = db.execute(stmt)
|
||||
snapshots = []
|
||||
for post_id, score, num_comments, upvote_ratio in result:
|
||||
snapshots.append(MetricSnapshot(
|
||||
post_id=post_id,
|
||||
score=score,
|
||||
num_comments=num_comments,
|
||||
upvote_ratio=upvote_ratio,
|
||||
snapshot_at=now,
|
||||
))
|
||||
|
||||
if snapshots:
|
||||
db.add_all(snapshots)
|
||||
db.commit()
|
||||
logger.info(f"Took {len(snapshots)} metric snapshots")
|
||||
12
backend/worker/summary_job.py
Normal file
12
backend/worker/summary_job.py
Normal file
@@ -0,0 +1,12 @@
|
||||
import logging
|
||||
|
||||
from backend.config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def generate_summaries():
|
||||
"""Stub: AI summary generation. Enable when a provider is configured."""
|
||||
if not settings.ai_summary_enabled:
|
||||
return
|
||||
logger.info("AI summary generation not yet configured")
|
||||
Reference in New Issue
Block a user