import asyncio import logging import time import httpx from backend.config import settings logger = logging.getLogger(__name__) BASE_URL = "https://www.reddit.com" # Simple in-process rate limiter: track request timestamps _request_times: list[float] = [] MAX_REQUESTS_PER_MINUTE = 9 # Stay under Reddit's ~10/min limit async def _wait_for_rate_limit(): """Block until we have budget for another request.""" now = time.monotonic() # Remove timestamps older than 60 seconds while _request_times and _request_times[0] < now - 60: _request_times.pop(0) if len(_request_times) >= MAX_REQUESTS_PER_MINUTE: wait = 60 - (now - _request_times[0]) + 0.5 logger.info(f"Rate limit: waiting {wait:.1f}s") await asyncio.sleep(wait) _request_times.append(time.monotonic()) async def fetch_json(client: httpx.AsyncClient, path: str, params: dict | None = None) -> dict | None: """Fetch a Reddit .json endpoint with rate limiting and error handling.""" await _wait_for_rate_limit() url = f"{BASE_URL}{path}.json" try: response = await client.get(url, params=params) if response.status_code == 429: retry_after = int(response.headers.get("Retry-After", 60)) logger.warning(f"Rate limited, waiting {retry_after}s") await asyncio.sleep(retry_after) return await fetch_json(client, path, params) if response.status_code >= 500: logger.warning(f"Reddit returned {response.status_code} for {path}") return None response.raise_for_status() return response.json() except httpx.HTTPError as e: logger.error(f"HTTP error fetching {path}: {e}") return None def create_client() -> httpx.AsyncClient: """Create an httpx client configured for Reddit.""" return httpx.AsyncClient( headers={"User-Agent": settings.reddit_user_agent}, timeout=30.0, follow_redirects=True, )