File size: 4,416 Bytes
1b21566
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
from fastapi import FastAPI, HTTPException, Depends, Query
from fastapi.middleware.cors import CORSMiddleware
from typing import List, Optional
import uvicorn
from contextlib import asynccontextmanager
from data_loader import DataLoader
from models import ArticleResponse, ArticleDetail, FiltersResponse

# Initialize data loader
data_loader = DataLoader()

# Dependency functions for API parameters
def get_filter_params(
    document_type: Optional[List[str]] = Query(None, description="Filter by document types"),
    author_type: Optional[List[str]] = Query(None, description="Filter by author types"),
    min_relevance: Optional[float] = Query(None, ge=0, le=10, description="Minimum AI labor relevance score"),
    max_relevance: Optional[float] = Query(None, ge=0, le=10, description="Maximum AI labor relevance score"),
    start_date: Optional[str] = Query(None, description="Start date (YYYY-MM-DD)"),
    end_date: Optional[str] = Query(None, description="End date (YYYY-MM-DD)"),
    topic: Optional[List[str]] = Query(None, description="Filter by document topics"),
    search_query: Optional[str] = Query(None, description="Search query for text matching"),
    search_type: Optional[str] = Query("exact", description="Search type: 'exact' or 'dense'"),
) -> dict:
    return {
        'document_types': document_type,
        'author_types': author_type,
        'min_relevance': min_relevance,
        'max_relevance': max_relevance,
        'start_date': start_date,
        'end_date': end_date,
        'topics': topic,
        'search_query': search_query,
        'search_type': search_type,
    }

def get_pagination_params(
    page: int = Query(1, ge=1, description="Page number"),
    limit: int = Query(20, ge=1, le=100, description="Items per page"),
    sort_by: Optional[str] = Query("date", description="Sort by 'date' or 'score'"),
) -> dict:
    return {
        'page': page,
        'limit': limit,
        'sort_by': sort_by,
    }

@asynccontextmanager
async def lifespan(app: FastAPI):
    # Startup
    print("Loading dataset from HuggingFace...")
    await data_loader.load_dataset()
    print(f"Dataset loaded: {len(data_loader.articles)} articles")
    yield
    # Shutdown (nothing needed)

app = FastAPI(title="Archive Explorer API: AI, Labor and the Economy", version="1.0.0", lifespan=lifespan)

# Enable CORS for frontend
app.add_middleware(
    CORSMiddleware,
    allow_origins=[
        "http://localhost:3000", 
        "http://localhost:5173",
        "https://yjernite-labor-archive-backend.hf.space"  # Add this line
    ],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

@app.get("/")
async def root():
    return {"message": "Archive Explorer API: AI, Labor and the Economy", "articles_count": len(data_loader.articles)}

@app.get("/filters", response_model=FiltersResponse)
async def get_filters():
    """Get all available filter options"""
    return data_loader.get_filter_options()

@app.get("/articles", response_model=List[ArticleResponse])
async def get_articles(
    pagination: dict = Depends(get_pagination_params),
    filters: dict = Depends(get_filter_params),
):
    """Get filtered and paginated articles"""
    return data_loader.get_articles(
        **pagination,
        **filters,
    )

@app.get("/articles/count")
async def get_articles_count(
    filters: dict = Depends(get_filter_params),
):
    """Get count of articles matching filters"""
    return {"count": data_loader.get_articles_count(**filters)}

@app.get("/filter-counts/{filter_type}")
async def get_filter_counts(
    filter_type: str,
    filters: dict = Depends(get_filter_params),
):
    """Get counts for each option in a specific filter type"""
    if filter_type not in ['document_types', 'author_types', 'topics']:
        raise HTTPException(status_code=400, detail="Invalid filter type")
    
    counts = data_loader.get_filter_counts(
        filter_type=filter_type,
        **filters
    )
    return counts

@app.get("/articles/{article_id}", response_model=ArticleDetail)
async def get_article(article_id: int):
    """Get detailed article by ID"""
    return data_loader.get_article_detail(article_id)

@app.get("/test-search")
async def test_search(q: str):
    """Test search functionality"""
    return data_loader._search_articles(q, 'exact')

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8000)