BounharAbdelaziz commited on
Commit
309169c
·
verified ·
1 Parent(s): 5756e92

Explicit set eos and pad token

Browse files
Files changed (1) hide show
  1. app.py +12 -6
app.py CHANGED
@@ -9,11 +9,15 @@ device = "cuda:0" if torch.cuda.is_available() else "cpu"
9
  print(f'[INFO] Using device: {device}')
10
 
11
  # Load the pretrained model and tokenizer
12
- MODEL_NAME = "BounharAbdelaziz/Al-Atlas-LLM-0.5B"
13
 
14
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
15
  model = AutoModelForCausalLM.from_pretrained(MODEL_NAME).to(device)
16
 
 
 
 
 
17
  # Predefined examples
18
  examples = [
19
  ["الذكاء الاصطناعي هو فرع من علوم الكمبيوتر اللي كيركز"
@@ -39,6 +43,8 @@ def generate_text(prompt, max_length=256, temperature=0.7, top_p=0.9, top_k=150,
39
  num_beams=num_beams,
40
  top_k= top_k,
41
  early_stopping = True,
 
 
42
  )
43
  return tokenizer.decode(output[0], skip_special_tokens=True)
44
 
@@ -49,9 +55,9 @@ if __name__ == "__main__":
49
  fn=generate_text,
50
  inputs=[
51
  gr.Textbox(label="Prompt: دخل النص بالدارجة"),
52
- gr.Slider(50, 4096, value=256, label="Max Length"),
53
- gr.Slider(0.1, 1.5, value=0.7, label="Temperature"),
54
- gr.Slider(0.1, 1.0, value=0.9, label="Top-p"),
55
  gr.Slider(1, 10000, value=150, label="Top-k"),
56
  gr.Slider(1, 20, value=8, label="Number of Beams"),
57
  gr.Slider(0.0, 100.0, value=1.5, label="Repetition Penalty"),
@@ -62,4 +68,4 @@ if __name__ == "__main__":
62
  examples=examples,
63
  )
64
 
65
- app.launch()
 
9
  print(f'[INFO] Using device: {device}')
10
 
11
  # Load the pretrained model and tokenizer
12
+ MODEL_NAME = "BounharAbdelaziz/Al-Atlas-LLM-0.5B" # "atlasia/Al-Atlas-LLM-mid-training" # "BounharAbdelaziz/Al-Atlas-LLM-0.5B" #"atlasia/Al-Atlas-LLM"
13
 
14
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) # , token=token
15
  model = AutoModelForCausalLM.from_pretrained(MODEL_NAME).to(device)
16
 
17
+ # Fix tokenizer padding
18
+ if tokenizer.pad_token is None:
19
+ tokenizer.pad_token = tokenizer.eos_token # Set pad token
20
+
21
  # Predefined examples
22
  examples = [
23
  ["الذكاء الاصطناعي هو فرع من علوم الكمبيوتر اللي كيركز"
 
43
  num_beams=num_beams,
44
  top_k= top_k,
45
  early_stopping = True,
46
+ pad_token_id=tokenizer.pad_token_id, # Explicit pad token
47
+ eos_token_id=tokenizer.eos_token_id, # Explicit eos token
48
  )
49
  return tokenizer.decode(output[0], skip_special_tokens=True)
50
 
 
55
  fn=generate_text,
56
  inputs=[
57
  gr.Textbox(label="Prompt: دخل النص بالدارجة"),
58
+ gr.Slider(8, 4096, value=256, label="Max Length"),
59
+ gr.Slider(0.0, 2, value=0.7, label="Temperature"),
60
+ gr.Slider(0.0, 1.0, value=0.9, label="Top-p"),
61
  gr.Slider(1, 10000, value=150, label="Top-k"),
62
  gr.Slider(1, 20, value=8, label="Number of Beams"),
63
  gr.Slider(0.0, 100.0, value=1.5, label="Repetition Penalty"),
 
68
  examples=examples,
69
  )
70
 
71
+ app.launch()