Spaces:
Running
Running
Thewhey-Brian
commited on
Commit
·
87de695
1
Parent(s):
bd710e9
Deploy nanochat
Browse files- .gitignore +20 -0
- app.py +30 -1
.gitignore
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Binary tokenizer files (available at https://huggingface.co/BrianGuo/nanochat-d20-chat)
|
| 2 |
+
tokenizer/token_bytes.pt
|
| 3 |
+
tokenizer/tokenizer.pkl
|
| 4 |
+
|
| 5 |
+
# Python cache
|
| 6 |
+
__pycache__/
|
| 7 |
+
*.py[cod]
|
| 8 |
+
*$py.class
|
| 9 |
+
*.so
|
| 10 |
+
|
| 11 |
+
# Virtual environments
|
| 12 |
+
venv/
|
| 13 |
+
env/
|
| 14 |
+
ENV/
|
| 15 |
+
|
| 16 |
+
# IDE
|
| 17 |
+
.vscode/
|
| 18 |
+
.idea/
|
| 19 |
+
*.swp
|
| 20 |
+
*.swo
|
app.py
CHANGED
|
@@ -28,9 +28,38 @@ def download_model():
|
|
| 28 |
)
|
| 29 |
print("Model downloaded successfully!")
|
| 30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
if __name__ == "__main__":
|
| 32 |
-
# Download model before starting
|
| 33 |
download_model()
|
|
|
|
| 34 |
|
| 35 |
# Override sys.argv to pass default arguments for HF Spaces
|
| 36 |
sys.argv = [
|
|
|
|
| 28 |
)
|
| 29 |
print("Model downloaded successfully!")
|
| 30 |
|
| 31 |
+
def download_tokenizer():
|
| 32 |
+
"""Download tokenizer files from Hugging Face."""
|
| 33 |
+
tokenizer_dir = "/data/tokenizer"
|
| 34 |
+
|
| 35 |
+
if os.path.exists(tokenizer_dir) and os.listdir(tokenizer_dir):
|
| 36 |
+
print(f"Tokenizer found, skipping download")
|
| 37 |
+
return
|
| 38 |
+
|
| 39 |
+
print("Downloading tokenizer from BrianGuo/nanochat-d20-chat...")
|
| 40 |
+
from huggingface_hub import hf_hub_download
|
| 41 |
+
|
| 42 |
+
os.makedirs(tokenizer_dir, exist_ok=True)
|
| 43 |
+
|
| 44 |
+
# Download tokenizer files
|
| 45 |
+
hf_hub_download(
|
| 46 |
+
repo_id="BrianGuo/nanochat-d20-chat",
|
| 47 |
+
filename="tokenizer/token_bytes.pt",
|
| 48 |
+
local_dir="/data",
|
| 49 |
+
local_dir_use_symlinks=False
|
| 50 |
+
)
|
| 51 |
+
hf_hub_download(
|
| 52 |
+
repo_id="BrianGuo/nanochat-d20-chat",
|
| 53 |
+
filename="tokenizer/tokenizer.pkl",
|
| 54 |
+
local_dir="/data",
|
| 55 |
+
local_dir_use_symlinks=False
|
| 56 |
+
)
|
| 57 |
+
print("Tokenizer downloaded successfully!")
|
| 58 |
+
|
| 59 |
if __name__ == "__main__":
|
| 60 |
+
# Download model and tokenizer before starting
|
| 61 |
download_model()
|
| 62 |
+
download_tokenizer()
|
| 63 |
|
| 64 |
# Override sys.argv to pass default arguments for HF Spaces
|
| 65 |
sys.argv = [
|