Gemma3-4B-llamacpp-cpu-rag-smolagents

Runtime error

App Files Files Community

Akjava commited on Mar 20

Commit

217bb15

verified ·

1 Parent(s): 4df7b60

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -3

app.py CHANGED Viewed

@@ -106,6 +106,29 @@ hf_hub_download(
 retriever_tool = RetrieverTool(docs_processed)
 # based https://github.com/huggingface/smolagents/pull/450
 # almost overwrite with https://huggingface.co/spaces/sitammeur/Gemma-llamacpp
 class LlamaCppModel(Model):
@@ -207,13 +230,14 @@ class LlamaCppModel(Model):
             agent = LlamaCppAgent(
                 provider,
                 system_prompt=f"{system_message}",
-                predefined_messages_formatter_type=MessagesFormatterType.GEMMA_2,
                 debug_output=True,
             )
-            temperature = 0.7
             top_k=40
             top_p=0.95
-            max_tokens=1024
             repeat_penalty=1.1
             settings = provider.get_provider_default_settings()
             settings.temperature = temperature

 retriever_tool = RetrieverTool(docs_processed)
+# Define the prompt markers for Gemma 3
+gemma_3_prompt_markers = {
+    Roles.system: PromptMarkers("", "\n"),  # System prompt should be included within user message
+    Roles.user: PromptMarkers("<start_of_turn>user\n", "<end_of_turn>\n"),
+    Roles.assistant: PromptMarkers("<start_of_turn>model\n", "<end_of_turn>\n"),
+    Roles.tool: PromptMarkers("", ""),  # If you need tool support
+}
+# Create the formatter
+gemma_3_formatter = MessagesFormatter(
+    pre_prompt="",  # No pre-prompt
+    prompt_markers=gemma_3_prompt_markers,
+    include_sys_prompt_in_first_user_message=True,  # Include system prompt in first user message
+    default_stop_sequences=["<end_of_turn>", "<start_of_turn>"],
+    strip_prompt=False,  # Don't strip whitespace from the prompt
+    bos_token="<bos>",  # Beginning of sequence token for Gemma 3
+    eos_token="<eos>",  # End of sequence token for Gemma 3
+)
 # based https://github.com/huggingface/smolagents/pull/450
 # almost overwrite with https://huggingface.co/spaces/sitammeur/Gemma-llamacpp
 class LlamaCppModel(Model):
             agent = LlamaCppAgent(
                 provider,
                 system_prompt=f"{system_message}",
+                custom_messages_formatter=gemma_3_formatter,
+                #predefined_messages_formatter_type=MessagesFormatterType.GEMMA_2,
                 debug_output=True,
             )
+            temperature = 0.5
             top_k=40
             top_p=0.95
+            max_tokens=2048
             repeat_penalty=1.1
             settings = provider.get_provider_default_settings()
             settings.temperature = temperature