""" Hugging Face integration for dataset management and model deployment. """ import os import pandas as pd from datasets import Dataset, DatasetDict from huggingface_hub import HfApi, create_repo, upload_file from pathlib import Path from typing import Optional, Dict, Any import json class HuggingFaceIntegration: """Handles Hugging Face dataset and model operations.""" def __init__(self, token: Optional[str] = None, dataset_id: str = "HackathonCRA/2024"): self.token = token or os.environ.get("HF_TOKEN") self.dataset_id = dataset_id self.api = HfApi(token=self.token) if self.token else None def prepare_dataset_from_local_files(self, data_path: str) -> Dataset: """Prepare dataset from local CSV/Excel files.""" from data_loader import AgriculturalDataLoader # Load and combine all data files loader = AgriculturalDataLoader(data_path=data_path) df = loader.load_all_files() # Convert to Hugging Face Dataset dataset = Dataset.from_pandas(df) return dataset def upload_dataset(self, data_path: str, private: bool = False) -> str: """Upload agricultural data to Hugging Face Hub.""" if not self.token: raise ValueError("HF_TOKEN required for uploading") # Prepare dataset dataset = self.prepare_dataset_from_local_files(data_path) # Create repository if it doesn't exist try: create_repo( repo_id=self.dataset_id, token=self.token, repo_type="dataset", private=private, exist_ok=True ) except Exception as e: print(f"Repository might already exist: {e}") # Upload dataset dataset.push_to_hub( repo_id=self.dataset_id, token=self.token, private=private ) return f"Dataset uploaded to https://huggingface.co/datasets/{self.dataset_id}" def create_dataset_card(self) -> str: """Create a dataset card for the agricultural data.""" card_content = """ --- license: cc-by-4.0 task_categories: - tabular-regression - time-series-forecasting language: - fr tags: - agriculture - herbicides - weed-pressure - crop-rotation - france - bretagne size_categories: - 1K str: """Upload the Gradio app as a Hugging Face Space.""" if not self.token: raise ValueError("HF_TOKEN required for uploading") repo_id = f"{self.api.whoami()['name']}/{space_name}" # Create Space repository try: create_repo( repo_id=repo_id, token=self.token, repo_type="space", space_sdk="gradio", private=False, exist_ok=True ) except Exception as e: print(f"Space might already exist: {e}") # Upload files app_files = [ "app.py", "requirements.txt", "gradio_app.py", "data_loader.py", "analysis_tools.py", "mcp_server.py", "README.md" ] for file_name in app_files: file_path = Path(local_app_path) / file_name if file_path.exists(): upload_file( path_or_fileobj=str(file_path), path_in_repo=file_name, repo_id=repo_id, repo_type="space", token=self.token ) print(f"Uploaded {file_name}") return f"Space created at https://huggingface.co/spaces/{repo_id}" def create_space_readme(self) -> str: """Create README for Hugging Face Space.""" readme_content = """ --- title: Agricultural Analysis - Kerguéhennec emoji: 🚜 colorFrom: green colorTo: blue sdk: gradio sdk_version: 4.0.0 app_file: app.py pinned: false license: cc-by-4.0 --- # 🚜 Agricultural Analysis - Station de Kerguéhennec Outil d'analyse des données agricoles pour l'optimisation des pratiques phytosanitaires et l'identification des parcelles adaptées aux cultures sensibles. ## Fonctionnalités - 📊 Analyse des données d'interventions agricoles - 🌿 Évaluation de la pression adventices (IFT) - 🔮 Prédictions pour les 3 prochaines années - 🔄 Analyse de l'impact des rotations culturales - 💊 Étude des herbicides utilisés - 🎯 Identification des parcelles pour cultures sensibles ## Utilisation 1. Sélectionnez l'onglet correspondant à votre analyse 2. Configurez les filtres selon vos besoins 3. Lancez l'analyse pour obtenir les résultats 4. Explorez les visualisations interactives ## Données Basé sur les données de la Station Expérimentale de Kerguéhennec (2014-2024). """ return readme_content def setup_environment_variables(self) -> Dict[str, str]: """Setup environment variables for Hugging Face deployment.""" env_vars = { "HF_TOKEN": self.token or "your_hf_token_here", "DATASET_ID": self.dataset_id, "GRADIO_SERVER_NAME": "0.0.0.0", "GRADIO_SERVER_PORT": "7860" } return env_vars # Usage example if __name__ == "__main__": # Initialize HF integration hf = HuggingFaceIntegration() # Upload dataset (requires HF_TOKEN) if hf.token: try: result = hf.upload_dataset("/Users/tracyandre/Downloads/OneDrive_1_9-17-2025") print(result) except Exception as e: print(f"Dataset upload failed: {e}") # Create dataset card card = hf.create_dataset_card() print("Dataset card created") # Show environment setup env_vars = hf.setup_environment_variables() print("Environment variables:", env_vars)