Spaces:

p3rc03
/

2B

Sleeping

App Files Files Community

37-AN commited on May 12

Commit

403ced7

1 Parent(s): 207d24c

Fix 403 error by using local models

Browse files

Files changed (4) hide show

Dockerfile +18 -7
app/core/llm.py +42 -43
deploy_to_hf.py +116 -49
push_to_hf.py +66 -0

Dockerfile CHANGED Viewed

@@ -14,14 +14,15 @@ COPY requirements.txt .
 # Install Python dependencies
 RUN pip install --no-cache-dir -r requirements.txt
-# Create cache directories with proper permissions
 RUN mkdir -p /.cache && chmod 777 /.cache
 RUN mkdir -p /root/.cache && chmod 777 /root/.cache
 RUN mkdir -p /app/.cache && chmod 777 /app/.cache
-# Create models directory for Hugging Face
 RUN mkdir -p /app/models && chmod 777 /app/models
-ENV TRANSFORMERS_CACHE=/app/models
 # Copy the rest of the application
 COPY . .
@@ -30,18 +31,28 @@ COPY . .
 RUN mkdir -p data/documents data/vector_db && \
     chmod -R 777 data
-# Set environment variables
 ENV TOKENIZERS_PARALLELISM=false
 ENV HF_HOME=/app/.cache
 ENV XDG_CACHE_HOME=/app/.cache
 ENV HUGGINGFACEHUB_API_TOKEN=""
 ENV HF_API_KEY=""
-# Use completely open models that don't require API keys
 ENV LLM_MODEL="distilgpt2"
 ENV EMBEDDING_MODEL="sentence-transformers/all-MiniLM-L6-v2"
-# Expose the port required by Hugging Face Spaces
 EXPOSE 7860
-# Set the entrypoint command to run the Streamlit app on port 7860
 CMD ["streamlit", "run", "app/ui/streamlit_app.py", "--server.port=7860", "--server.address=0.0.0.0"]

 # Install Python dependencies
 RUN pip install --no-cache-dir -r requirements.txt
+# Create all cache directories with proper permissions
 RUN mkdir -p /.cache && chmod 777 /.cache
 RUN mkdir -p /root/.cache && chmod 777 /root/.cache
 RUN mkdir -p /app/.cache && chmod 777 /app/.cache
+RUN mkdir -p /tmp/.cache && chmod 777 /tmp/.cache
+RUN mkdir -p /home/.cache && chmod 777 /home/.cache
+# Create models directory with proper permissions
 RUN mkdir -p /app/models && chmod 777 /app/models
 # Copy the rest of the application
 COPY . .
 RUN mkdir -p data/documents data/vector_db && \
     chmod -R 777 data
+# Set environment variables for cache locations
+ENV TRANSFORMERS_CACHE=/app/models
 ENV TOKENIZERS_PARALLELISM=false
 ENV HF_HOME=/app/.cache
 ENV XDG_CACHE_HOME=/app/.cache
 ENV HUGGINGFACEHUB_API_TOKEN=""
 ENV HF_API_KEY=""
+# Use small local models that don't require API access
+# distilgpt2 is a small model that works well locally
 ENV LLM_MODEL="distilgpt2"
+# all-MiniLM-L6-v2 is small and efficient for embeddings
 ENV EMBEDDING_MODEL="sentence-transformers/all-MiniLM-L6-v2"
+# Set moderate temperature and token limit
+ENV DEFAULT_TEMPERATURE=0.7
+ENV MAX_TOKENS=256
+ENV CHUNK_SIZE=512
+ENV CHUNK_OVERLAP=128
+# Expose port for Hugging Face Spaces
 EXPOSE 7860
+# Run the Streamlit app on the correct port
 CMD ["streamlit", "run", "app/ui/streamlit_app.py", "--server.port=7860", "--server.address=0.0.0.0"]

app/core/llm.py CHANGED Viewed

@@ -5,6 +5,11 @@ from langchain.chains import LLMChain
 from langchain.prompts import PromptTemplate
 import sys
 import os
 # Add project root to path for imports
 sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
@@ -19,21 +24,39 @@ def get_llm():
             os.makedirs(cache_dir, exist_ok=True)
             os.chmod(cache_dir, 0o777)
         except Exception as e:
-            print(f"Warning: Could not create cache directory: {e}")
             cache_dir = None
-    # Set environment variable for Hugging Face Hub
-    os.environ["HUGGINGFACEHUB_API_TOKEN"] = HF_API_KEY
-    # Try different approaches to load a model, from most to least sophisticated
     try:
-        print(f"Attempting to load model {LLM_MODEL} using local pipeline...")
-        # Try using Hugging Face pipeline locally
         try:
-            from transformers import pipeline
-            # Use a simple pipeline with a small model
             pipe = pipeline(
                 "text-generation",
                 model=LLM_MODEL,
@@ -42,44 +65,18 @@ def get_llm():
             )
             return HuggingFacePipeline(pipeline=pipe)
-        except Exception as pipe_error:
-            print(f"Error loading pipeline: {pipe_error}")
-            # Try using the API if we have a token
-            if HF_API_KEY:
-                print("Falling back to API with auth token...")
-                return HuggingFaceHub(
-                    huggingfacehub_api_token=HF_API_KEY,
-                    repo_id=LLM_MODEL,
-                    model_kwargs={
-                        "temperature": DEFAULT_TEMPERATURE,
-                        "max_length": MAX_TOKENS
-                    }
-                )
-            else:
-                print("No API key, using endpoint without auth...")
-                # Try a simple endpoint without auth
-                return HuggingFaceEndpoint(
-                    endpoint_url=f"https://api-inference.huggingface.co/models/{LLM_MODEL}",
-                    task="text-generation",
-                    model_kwargs={
-                        "temperature": DEFAULT_TEMPERATURE,
-                        "max_length": MAX_TOKENS
-                    }
-                )
     except Exception as e:
-        print(f"All LLM approaches failed: {e}")
-        print("Using a fallback mock LLM.")
-        # Create a very simple mock LLM for fallback
         from langchain.llms.fake import FakeListLLM
         return FakeListLLM(
             responses=[
-                "I'm a simple AI assistant. I can't access external knowledge right now, but I'll try to help with basic questions.",
-                "I'm currently operating in a limited mode. How else can I assist you?",
-                "I'm sorry, but I don't have access to that information at the moment.",
-                "I'm a basic AI assistant running in fallback mode. Let me try to help.",
-                "I'm operating with limited capabilities right now. Could you ask something simpler?"
             ]
         )
@@ -92,20 +89,22 @@ def get_embeddings():
             os.makedirs(cache_dir, exist_ok=True)
             os.chmod(cache_dir, 0o777)
         except Exception as e:
-            print(f"Warning: Could not create cache directory: {e}")
             cache_dir = None
-    # Try to use local embeddings first (most reliable)
     try:
         return HuggingFaceEmbeddings(
             model_name=EMBEDDING_MODEL,
             cache_folder=cache_dir
         )
     except Exception as e:
-        print(f"Error initializing embeddings: {e}")
         # Create mock embeddings that return random vectors for fallback
         from langchain.embeddings.fake import FakeEmbeddings
         return FakeEmbeddings(size=384)  # Standard size for small embedding models
 def get_chat_model():

 from langchain.prompts import PromptTemplate
 import sys
 import os
+import logging
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 # Add project root to path for imports
 sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
             os.makedirs(cache_dir, exist_ok=True)
             os.chmod(cache_dir, 0o777)
         except Exception as e:
+            logger.warning(f"Could not create cache directory: {e}")
             cache_dir = None
+    # Never rely on API key in Spaces environment
+    api_key = os.getenv("HUGGINGFACEHUB_API_TOKEN", "") or os.getenv("HF_API_KEY", "")
+    logger.info(f"Using model: {LLM_MODEL}")
+    # Always try local pipeline first (most reliable in Spaces)
     try:
+        from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
+        logger.info(f"Loading model {LLM_MODEL} as local pipeline")
+        # Try loading with more specific model classes for better compatibility
         try:
+            # Load tokenizer and model explicitly
+            tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL)
+            model = AutoModelForCausalLM.from_pretrained(LLM_MODEL)
+            # Create pipeline with loaded components
+            pipe = pipeline(
+                "text-generation",
+                model=model,
+                tokenizer=tokenizer,
+                max_length=MAX_TOKENS,
+                temperature=DEFAULT_TEMPERATURE
+            )
+            return HuggingFacePipeline(pipeline=pipe)
+        except Exception as e:
+            logger.warning(f"Error loading with explicit model/tokenizer: {e}")
+            # Fallback to simpler pipeline instantiation
             pipe = pipeline(
                 "text-generation",
                 model=LLM_MODEL,
             )
             return HuggingFacePipeline(pipeline=pipe)
     except Exception as e:
+        logger.warning(f"Error creating local pipeline: {e}")
+        # Last resort - mock LLM for fallback
         from langchain.llms.fake import FakeListLLM
+        logger.warning("Using mock LLM as fallback")
         return FakeListLLM(
             responses=[
+                "I'm running in fallback mode due to model loading issues. I have limited capabilities right now.",
+                "I can't access the language model currently. Please check the Space logs for more information.",
+                "I'm operating with a simplified model. For better performance, try running this app locally with proper models configured."
             ]
         )
             os.makedirs(cache_dir, exist_ok=True)
             os.chmod(cache_dir, 0o777)
         except Exception as e:
+            logger.warning(f"Could not create cache directory: {e}")
             cache_dir = None
+    # Try to use local embeddings
     try:
+        logger.info(f"Loading embeddings model: {EMBEDDING_MODEL}")
         return HuggingFaceEmbeddings(
             model_name=EMBEDDING_MODEL,
             cache_folder=cache_dir
         )
     except Exception as e:
+        logger.warning(f"Error initializing embeddings: {e}")
         # Create mock embeddings that return random vectors for fallback
         from langchain.embeddings.fake import FakeEmbeddings
+        logger.warning("Using mock embeddings as fallback")
         return FakeEmbeddings(size=384)  # Standard size for small embedding models
 def get_chat_model():

deploy_to_hf.py CHANGED Viewed

@@ -6,6 +6,7 @@ This script will help you set environment variables and deploy your app.
 import os
 import sys
 import subprocess
 from getpass import getpass
 from huggingface_hub import HfApi, SpaceHardware, SpaceStage
@@ -15,29 +16,35 @@ def setup_deployment():
     print("Hugging Face Spaces Deployment Setup")
     print("="*50)
-    # Get user credentials
-    username = input("Enter your Hugging Face username: ")
-    token = getpass("Enter your Hugging Face token (from https://huggingface.co/settings/tokens): ")
-    space_name = input("Enter your Space name (default: personal-rag-assistant): ") or "personal-rag-assistant"
-    # Set environment variables
-    os.environ["HF_USERNAME"] = username
-    os.environ["HF_TOKEN"] = token
-    os.environ["SPACE_NAME"] = space_name
     # Write credentials to .env file
     with open(".env", "w") as f:
         f.write(f"HF_API_KEY={token}\n")
         f.write(f"HF_USERNAME={username}\n")
         f.write(f"SPACE_NAME={space_name}\n")
-        f.write("LLM_MODEL=google/flan-t5-large\n")
         f.write("EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2\n")
         f.write("VECTOR_DB_PATH=./data/vector_db\n")
         f.write("COLLECTION_NAME=personal_assistant\n")
         f.write("DEFAULT_TEMPERATURE=0.7\n")
-        f.write("CHUNK_SIZE=1000\n")
-        f.write("CHUNK_OVERLAP=200\n")
-        f.write("MAX_TOKENS=512\n")
     # Set up git credential helper for Hugging Face
     try:
@@ -80,6 +87,19 @@ def create_space(username, token, space_name):
             exists = any(space.id == f"{username}/{space_name}" for space in spaces)
             if exists:
                 print(f"Space {username}/{space_name} exists.")
             else:
                 print(f"Space {username}/{space_name} does not exist. Creating...")
                 # Create the space
@@ -175,7 +195,7 @@ def prepare_git_push(username, space_name):
         # Add and commit files
         subprocess.run(["git", "add", "."], check=True)
         try:
-            subprocess.run(["git", "commit", "-m", "Initial commit for Hugging Face Space deployment"], check=True)
         except subprocess.CalledProcessError:
             # Check if there are changes to commit
             status = subprocess.run(["git", "status", "--porcelain"], capture_output=True, text=True, check=True).stdout.strip()
@@ -203,6 +223,9 @@ def push_to_space(username, token):
         env["GIT_USERNAME"] = username
         env["GIT_PASSWORD"] = token
         # Determine current branch
         current_branch = subprocess.run(
             ["git", "branch", "--show-current"],
@@ -211,6 +234,8 @@ def push_to_space(username, token):
         if not current_branch:
             current_branch = "master"  # Default to master if no branch is returned
         # Push code - force push to override any existing content
         print(f"Pushing from branch {current_branch} to main...")
@@ -219,25 +244,52 @@ def push_to_space(username, token):
         print("\nRunning git push command...")
         print(f"Pushing to Space as user: {username}")
-        # Try to push
-        try:
-            subprocess.run(cmd, check=True, env=env)
-        except subprocess.CalledProcessError as e:
-            print(f"Error during push: {e}")
-            # Try direct URL push as alternative
-            print("\nTrying alternative direct URL push...")
-            direct_url = f"https://{username}:{token}@huggingface.co/spaces/{username}/{os.environ.get('SPACE_NAME')}"
-            alt_cmd = ["git", "push", "-f", direct_url, f"{current_branch}:main"]
             try:
-                subprocess.run(alt_cmd, check=True, env=env)
             except subprocess.CalledProcessError as e:
-                print(f"Direct URL push also failed: {e}")
-                raise
-        print("\nCode pushed to Hugging Face Space successfully!")
-    except subprocess.CalledProcessError as e:
         print(f"Error pushing code: {e}")
         print("\nTroubleshooting git push issues:")
         print("1. Ensure your Hugging Face token has write access")
@@ -251,29 +303,44 @@ def push_to_space(username, token):
     return True
 def main():
-    """Main function to run the deployment process."""
-    username, token, space_name = setup_deployment()
-    # Create the Space
-    if not create_space(username, token, space_name):
-        print("Failed to create Space. Attempting to continue anyway.")
-    # Prepare git for pushing
-    if not prepare_git_push(username, space_name):
-        print("Failed to prepare git. Exiting.")
-        return
-    # Push code to Space
-    if not push_to_space(username, token):
-        print("Failed to push code. Exiting.")
-        return
-    print("\n" + "="*50)
-    print(f"Deployment completed! Your app should be available at:")
-    print(f"https://huggingface.co/spaces/{username}/{space_name}")
     print("="*50)
-    print("\nNote: It may take a few minutes for the Space to build and deploy your app.")
-    print("You can monitor the build progress on the Space page.")
 if __name__ == "__main__":
     main()

 import os
 import sys
 import subprocess
+import time
 from getpass import getpass
 from huggingface_hub import HfApi, SpaceHardware, SpaceStage
     print("Hugging Face Spaces Deployment Setup")
     print("="*50)
+    # Check if running in an environment with saved credentials
+    username = os.environ.get("HF_USERNAME")
+    token = os.environ.get("HF_TOKEN")
+    space_name = os.environ.get("SPACE_NAME")
+    # If not, ask for credentials
+    if not (username and token and space_name):
+        username = input("Enter your Hugging Face username: ")
+        token = getpass("Enter your Hugging Face token (from https://huggingface.co/settings/tokens): ")
+        space_name = input("Enter your Space name (default: personal-rag-assistant): ") or "personal-rag-assistant"
+        # Set environment variables
+        os.environ["HF_USERNAME"] = username
+        os.environ["HF_TOKEN"] = token
+        os.environ["SPACE_NAME"] = space_name
     # Write credentials to .env file
     with open(".env", "w") as f:
         f.write(f"HF_API_KEY={token}\n")
         f.write(f"HF_USERNAME={username}\n")
         f.write(f"SPACE_NAME={space_name}\n")
+        f.write("LLM_MODEL=distilgpt2\n")  # Use smaller model to avoid 403 errors
         f.write("EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2\n")
         f.write("VECTOR_DB_PATH=./data/vector_db\n")
         f.write("COLLECTION_NAME=personal_assistant\n")
         f.write("DEFAULT_TEMPERATURE=0.7\n")
+        f.write("CHUNK_SIZE=512\n")  # Smaller chunk size
+        f.write("CHUNK_OVERLAP=128\n")  # Smaller overlap
+        f.write("MAX_TOKENS=256\n")  # Smaller token limit
     # Set up git credential helper for Hugging Face
     try:
             exists = any(space.id == f"{username}/{space_name}" for space in spaces)
             if exists:
                 print(f"Space {username}/{space_name} exists.")
+                # Check if we need to update space configuration
+                try:
+                    print("Updating Space configuration to use Docker...")
+                    api.update_space(
+                        repo_id=f"{username}/{space_name}",
+                        private=False,
+                        sdk="docker",
+                        hardware=SpaceHardware.CPU_BASIC
+                    )
+                    print("Space configuration updated.")
+                except Exception as e:
+                    print(f"Note: Could not update space configuration: {e}")
             else:
                 print(f"Space {username}/{space_name} does not exist. Creating...")
                 # Create the space
         # Add and commit files
         subprocess.run(["git", "add", "."], check=True)
         try:
+            subprocess.run(["git", "commit", "-m", "Update for Hugging Face Space deployment"], check=True)
         except subprocess.CalledProcessError:
             # Check if there are changes to commit
             status = subprocess.run(["git", "status", "--porcelain"], capture_output=True, text=True, check=True).stdout.strip()
         env["GIT_USERNAME"] = username
         env["GIT_PASSWORD"] = token
+        # Make sure HUGGINGFACEHUB_API_TOKEN is set in the environment
+        env["HUGGINGFACEHUB_API_TOKEN"] = token
         # Determine current branch
         current_branch = subprocess.run(
             ["git", "branch", "--show-current"],
         if not current_branch:
             current_branch = "master"  # Default to master if no branch is returned
+            if not os.path.exists(".git/refs/heads/master"):
+                current_branch = "main"  # Try main as another default
         # Push code - force push to override any existing content
         print(f"Pushing from branch {current_branch} to main...")
         print("\nRunning git push command...")
         print(f"Pushing to Space as user: {username}")
+        # Try different push methods in sequence until one works
+        methods = [
+            # Method 1: Standard remote push
+            lambda: subprocess.run(cmd, check=True, env=env),
+            # Method 2: Direct URL push
+            lambda: subprocess.run(
+                ["git", "push", "-f", f"https://{username}:{token}@huggingface.co/spaces/{username}/{os.environ.get('SPACE_NAME')}", f"{current_branch}:main"],
+                check=True, env=env
+            ),
+            # Method 3: Push with credentials explicitly set
+            lambda: subprocess.run(
+                ["git", "push", "-f", "hf", f"{current_branch}:main"],
+                check=True, env={**env, "HUGGINGFACE_TOKEN": token, "HF_TOKEN": token}
+            )
+        ]
+        success = False
+        for i, method in enumerate(methods, 1):
             try:
+                print(f"\nTrying push method {i}...")
+                method()
+                print(f"Push method {i} succeeded!")
+                success = True
+                break
             except subprocess.CalledProcessError as e:
+                print(f"Push method {i} failed: {e}")
+                if i < len(methods):
+                    print("Trying next method...")
+                    time.sleep(2)  # Give a small delay before trying the next method
+        if success:
+            print("\nCode pushed to Hugging Face Space successfully!")
+        else:
+            raise Exception("All push methods failed")
+        # Wait a moment to ensure the Space starts building
+        print("\nWaiting for Space to start building...")
+        time.sleep(5)
+        print(f"\nYour Space will be available at: https://huggingface.co/spaces/{username}/{os.environ.get('SPACE_NAME')}")
+        print("It may take a few minutes for the Space to build and start.")
+        return True
+    except Exception as e:
         print(f"Error pushing code: {e}")
         print("\nTroubleshooting git push issues:")
         print("1. Ensure your Hugging Face token has write access")
     return True
 def main():
+    """Main entry point for the deployment script."""
+    print("Hugging Face Space Deployment Script")
     print("="*50)
+    print("This script will help you deploy your app to Hugging Face Spaces.")
+    try:
+        # Set up deployment environment
+        username, token, space_name = setup_deployment()
+        # Create the Space
+        if not create_space(username, token, space_name):
+            print("Error creating Space. Please check your credentials and try again.")
+            sys.exit(1)
+        # Prepare git repository
+        if not prepare_git_push(username, space_name):
+            print("Error preparing git repository. Please check your git configuration and try again.")
+            sys.exit(1)
+        # Push to Space
+        if not push_to_space(username, token):
+            print("Error pushing to Space. Please check the logs and try again.")
+            sys.exit(1)
+        print("\nDeployment complete!")
+        print(f"Your app is now available at: https://huggingface.co/spaces/{username}/{space_name}")
+        print("\nNote: It may take a few minutes for the Space to build and start.")
+        print("If your app is not showing up properly, check the Space logs in the Hugging Face UI.")
+        print("Common issues:")
+        print("1. Permission errors - check that cache directories have proper permissions")
+        print("2. Model loading errors - try using a smaller model")
+        print("3. Port configuration - ensure app is running on port 7860")
+    except KeyboardInterrupt:
+        print("\nDeployment interrupted by user.")
+        sys.exit(1)
+    except Exception as e:
+        print(f"\nUnexpected error: {e}")
+        sys.exit(1)
 if __name__ == "__main__":
     main()

push_to_hf.py ADDED Viewed

	@@ -0,0 +1,66 @@

+#!/usr/bin/env python
+"""
+Simple script to push directly to Hugging Face Space.
+This is a streamlined approach when you already have a Space.
+"""
+import os
+import subprocess
+import sys
+from getpass import getpass
+def push_to_huggingface():
+    """Push the current directory to Hugging Face Space."""
+    print("=" * 50)
+    print("Simple Hugging Face Push Tool")
+    print("=" * 50)
+    # Get credentials
+    username = input("Enter your Hugging Face username: ")
+    token = getpass("Enter your Hugging Face token: ")
+    space_name = input("Enter your Space name: ")
+    # Set environment variables
+    os.environ["HUGGINGFACEHUB_API_TOKEN"] = token
+    # Add the direct remote URL
+    remote_url = f"https://{username}:{token}@huggingface.co/spaces/{username}/{space_name}"
+    try:
+        # Add remote if not exists
+        remotes = subprocess.run(["git", "remote"], capture_output=True, text=True).stdout.strip().split('\n')
+        if "hf" not in remotes:
+            subprocess.run(["git", "remote", "add", "hf", remote_url], check=True)
+        else:
+            subprocess.run(["git", "remote", "set-url", "hf", remote_url], check=True)
+        # Stage all files
+        subprocess.run(["git", "add", "."], check=True)
+        # Commit changes
+        try:
+            subprocess.run(["git", "commit", "-m", "Fix 403 error by using local models"], check=True)
+        except subprocess.CalledProcessError:
+            # Check if there are changes to commit
+            status = subprocess.run(["git", "status", "--porcelain"], capture_output=True, text=True).stdout.strip()
+            if not status:
+                print("No changes to commit.")
+            else:
+                print("Error making commit. Will try to push existing commits.")
+        # Force push to Space
+        print("Pushing to Hugging Face Space...")
+        subprocess.run(["git", "push", "-f", "hf", "HEAD:main"], check=True)
+        print("\nSuccess! Your code has been pushed to Hugging Face Space.")
+        print(f"View your Space at: https://huggingface.co/spaces/{username}/{space_name}")
+        print("Note: It may take a few minutes for changes to appear.")
+    except subprocess.CalledProcessError as e:
+        print(f"Error: {e}")
+        sys.exit(1)
+    except Exception as e:
+        print(f"Unexpected error: {e}")
+        sys.exit(1)
+if __name__ == "__main__":
+    push_to_huggingface()