openai
/

gpt-oss-20b

@@ -288,30 +288,37 @@
             {%- endif %}
             {%- if message.content and message.thinking %}
                 {{- raise_exception("Cannot pass both content and thinking in an assistant message with tool calls! Put the analysis message in one or the other, but not both.") }}
-            {%- elif message.content and not future_final_message.found %}
-                {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.content + "<|end|>" }}
-            {%- elif message.thinking and not future_final_message.found %}
-                {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.thinking + "<|end|>" }}
             {%- endif %}
-            {{- "<|start|>assistant to=" }}
-            {{- "functions." + tool_call.name + "<|channel|>commentary " }}
-            {{- (tool_call.content_type if tool_call.content_type is defined else "json") + "<|message|>" }}
-            {{- tool_call.arguments|tojson }}
-            {{- "<|call|>" }}
             {%- set last_tool_call.name = tool_call.name %}
         {%- elif loop.last and not add_generation_prompt %}
             {#- Only render the CoT if the final turn is an assistant turn and add_generation_prompt is false #}
             {#- This is a situation that should only occur in training, never in inference. #}
-            {%- if "thinking" in message %}
-                {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.thinking + "<|end|>" }}
-            {%- endif %}
-            {#- <|return|> indicates the end of generation, but <|end|> does not #}
-            {#- <|return|> should never be an input to the model, but we include it as the final token #}
-            {#- when training, so the model learns to emit it. #}
-            {{- "<|start|>assistant<|channel|>final<|message|>" + message.content + "<|return|>" }}
         {%- else %}
             {#- CoT is dropped during all previous turns, so we never render it for inference #}
-            {{- "<|start|>assistant<|channel|>final<|message|>" + message.content + "<|end|>" }}
             {%- set last_tool_call.name = none %}
         {%- endif %}
     {%- elif message.role == 'tool' -%}

             {%- endif %}
             {%- if message.content and message.thinking %}
                 {{- raise_exception("Cannot pass both content and thinking in an assistant message with tool calls! Put the analysis message in one or the other, but not both.") }}
             {%- endif %}
+            {% generation %}
+                {%- if message.content and not future_final_message.found %}
+                    {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.content + "<|end|>" }}
+                {%- elif message.thinking and not future_final_message.found %}
+                    {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.thinking + "<|end|>" }}
+                {%- endif %}
+                {{- "<|start|>assistant to=" }}
+                {{- "functions." + tool_call.name + "<|channel|>commentary " }}
+                {{- (tool_call.content_type if tool_call.content_type is defined else "json") + "<|message|>" }}
+                {{- tool_call.arguments|tojson }}
+                {{- "<|call|>" }}
+            {% endgeneration %}
             {%- set last_tool_call.name = tool_call.name %}
         {%- elif loop.last and not add_generation_prompt %}
             {#- Only render the CoT if the final turn is an assistant turn and add_generation_prompt is false #}
             {#- This is a situation that should only occur in training, never in inference. #}
+            {% generation %}
+                {%- if "thinking" in message %}
+                    {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.thinking + "<|end|>" }}
+                {%- endif %}
+                {#- <|return|> indicates the end of generation, but <|end|> does not #}
+                {#- <|return|> should never be an input to the model, but we include it as the final token #}
+                {#- when training, so the model learns to emit it. #}
+                {{- "<|start|>assistant<|channel|>final<|message|>" + message.content + "<|return|>" }}
+            {% endgeneration %}
         {%- else %}
             {#- CoT is dropped during all previous turns, so we never render it for inference #}
+            {% generation %}
+                {{- "<|start|>assistant<|channel|>final<|message|>" + message.content + "<|end|>" }}
+            {% endgeneration %}
             {%- set last_tool_call.name = none %}
         {%- endif %}
     {%- elif message.role == 'tool' -%}