Spaces:
Running
Running
File size: 4,416 Bytes
1b21566 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
from fastapi import FastAPI, HTTPException, Depends, Query
from fastapi.middleware.cors import CORSMiddleware
from typing import List, Optional
import uvicorn
from contextlib import asynccontextmanager
from data_loader import DataLoader
from models import ArticleResponse, ArticleDetail, FiltersResponse
# Initialize data loader
data_loader = DataLoader()
# Dependency functions for API parameters
def get_filter_params(
document_type: Optional[List[str]] = Query(None, description="Filter by document types"),
author_type: Optional[List[str]] = Query(None, description="Filter by author types"),
min_relevance: Optional[float] = Query(None, ge=0, le=10, description="Minimum AI labor relevance score"),
max_relevance: Optional[float] = Query(None, ge=0, le=10, description="Maximum AI labor relevance score"),
start_date: Optional[str] = Query(None, description="Start date (YYYY-MM-DD)"),
end_date: Optional[str] = Query(None, description="End date (YYYY-MM-DD)"),
topic: Optional[List[str]] = Query(None, description="Filter by document topics"),
search_query: Optional[str] = Query(None, description="Search query for text matching"),
search_type: Optional[str] = Query("exact", description="Search type: 'exact' or 'dense'"),
) -> dict:
return {
'document_types': document_type,
'author_types': author_type,
'min_relevance': min_relevance,
'max_relevance': max_relevance,
'start_date': start_date,
'end_date': end_date,
'topics': topic,
'search_query': search_query,
'search_type': search_type,
}
def get_pagination_params(
page: int = Query(1, ge=1, description="Page number"),
limit: int = Query(20, ge=1, le=100, description="Items per page"),
sort_by: Optional[str] = Query("date", description="Sort by 'date' or 'score'"),
) -> dict:
return {
'page': page,
'limit': limit,
'sort_by': sort_by,
}
@asynccontextmanager
async def lifespan(app: FastAPI):
# Startup
print("Loading dataset from HuggingFace...")
await data_loader.load_dataset()
print(f"Dataset loaded: {len(data_loader.articles)} articles")
yield
# Shutdown (nothing needed)
app = FastAPI(title="Archive Explorer API: AI, Labor and the Economy", version="1.0.0", lifespan=lifespan)
# Enable CORS for frontend
app.add_middleware(
CORSMiddleware,
allow_origins=[
"http://localhost:3000",
"http://localhost:5173",
"https://yjernite-labor-archive-backend.hf.space" # Add this line
],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.get("/")
async def root():
return {"message": "Archive Explorer API: AI, Labor and the Economy", "articles_count": len(data_loader.articles)}
@app.get("/filters", response_model=FiltersResponse)
async def get_filters():
"""Get all available filter options"""
return data_loader.get_filter_options()
@app.get("/articles", response_model=List[ArticleResponse])
async def get_articles(
pagination: dict = Depends(get_pagination_params),
filters: dict = Depends(get_filter_params),
):
"""Get filtered and paginated articles"""
return data_loader.get_articles(
**pagination,
**filters,
)
@app.get("/articles/count")
async def get_articles_count(
filters: dict = Depends(get_filter_params),
):
"""Get count of articles matching filters"""
return {"count": data_loader.get_articles_count(**filters)}
@app.get("/filter-counts/{filter_type}")
async def get_filter_counts(
filter_type: str,
filters: dict = Depends(get_filter_params),
):
"""Get counts for each option in a specific filter type"""
if filter_type not in ['document_types', 'author_types', 'topics']:
raise HTTPException(status_code=400, detail="Invalid filter type")
counts = data_loader.get_filter_counts(
filter_type=filter_type,
**filters
)
return counts
@app.get("/articles/{article_id}", response_model=ArticleDetail)
async def get_article(article_id: int):
"""Get detailed article by ID"""
return data_loader.get_article_detail(article_id)
@app.get("/test-search")
async def test_search(q: str):
"""Test search functionality"""
return data_loader._search_articles(q, 'exact')
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8000)
|