Spaces:
Runtime error
Runtime error
app update
Browse files
app.py
CHANGED
|
@@ -41,8 +41,9 @@ model.apply(_init_weights)
|
|
| 41 |
model.load_state_dict
|
| 42 |
|
| 43 |
|
| 44 |
-
checkpoint_dir = Path("out/redpajama/
|
| 45 |
strategy = "auto"
|
|
|
|
| 46 |
devices = 1
|
| 47 |
precision = None
|
| 48 |
|
|
@@ -61,14 +62,7 @@ model = fabric.setup_module(model)
|
|
| 61 |
load_checkpoint(fabric, model, checkpoint_dir)
|
| 62 |
|
| 63 |
tokenizer = Tokenizer(Path('tokenizer_config'))
|
| 64 |
-
encoded = tokenizer.encode(prompt, device=fabric.device)
|
| 65 |
-
prompt_length = encoded.size(0)
|
| 66 |
-
max_returned_tokens = prompt_length + max_new_tokens
|
| 67 |
|
| 68 |
-
with fabric.init_tensor():
|
| 69 |
-
# set the max_seq_length to limit the memory usage to what we need
|
| 70 |
-
model.max_seq_length = max_returned_tokens
|
| 71 |
-
|
| 72 |
@torch.inference_mode()
|
| 73 |
def generate(
|
| 74 |
model: GPT,
|
|
|
|
| 41 |
model.load_state_dict
|
| 42 |
|
| 43 |
|
| 44 |
+
checkpoint_dir = Path("out/redpajama/final-gpt-model-ckpt.pth")
|
| 45 |
strategy = "auto"
|
| 46 |
+
quantize = None
|
| 47 |
devices = 1
|
| 48 |
precision = None
|
| 49 |
|
|
|
|
| 62 |
load_checkpoint(fabric, model, checkpoint_dir)
|
| 63 |
|
| 64 |
tokenizer = Tokenizer(Path('tokenizer_config'))
|
|
|
|
|
|
|
|
|
|
| 65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
@torch.inference_mode()
|
| 67 |
def generate(
|
| 68 |
model: GPT,
|