pluralchat

Running on Inf2

nsarrazin commited on Feb 12

Commit

a68038a

unverified ·

1 Parent(s): eccf709

fix: handle no beginToken for token based reasoning models (#1713)

* refactor: remove debug console log in updateLocalEnv script

* fix: handle no beginToken for token-based reasoning models

Files changed (4) hide show

chart/env/prod.yaml CHANGED Viewed

@@ -139,7 +139,7 @@ envVars:
         "description": "The first reasoning model from DeepSeek, distilled into a 32B dense model. Outperforms o1-mini on multiple benchmarks.",
         "reasoning": {
           "type": "tokens",
-          "beginToken": "<think>",
           "endToken": "</think>"
         },
         "promptExamples": [

         "description": "The first reasoning model from DeepSeek, distilled into a 32B dense model. Outperforms o1-mini on multiple benchmarks.",
         "reasoning": {
           "type": "tokens",
+          "beginToken": "",
           "endToken": "</think>"
         },
         "promptExamples": [

scripts/updateLocalEnv.ts CHANGED Viewed

@@ -30,7 +30,5 @@ full_config = full_config.replaceAll(
 	"https://api-inference.huggingface.co"
 );
-console.log(full_config);
 // Write full_config to .env.local
 fs.writeFileSync(".env.local", full_config);

 	"https://api-inference.huggingface.co"
 );
 // Write full_config to .env.local
 fs.writeFileSync(".env.local", full_config);

src/lib/server/models.ts CHANGED Viewed

@@ -24,7 +24,7 @@ const reasoningSchema = z.union([
 	}),
 	z.object({
 		type: z.literal("tokens"), // use beginning and end tokens that define the reasoning portion of the answer
-		beginToken: z.string(),
 		endToken: z.string(),
 	}),
 	z.object({

 	}),
 	z.object({
 		type: z.literal("tokens"), // use beginning and end tokens that define the reasoning portion of the answer
+		beginToken: z.string(), // empty string means the model starts in reasoning mode
 		endToken: z.string(),
 	}),
 	z.object({

src/lib/server/textGeneration/generate.ts CHANGED Viewed

@@ -27,7 +27,10 @@ export async function* generate(
 	const startTime = new Date();
 	if (
 		model.reasoning &&
-		(model.reasoning.type === "regex" || model.reasoning.type === "summarize")
 	) {
 		// if the model has reasoning in regex or summarize mode, it starts in reasoning mode
 		// and we extract the answer from the reasoning
@@ -104,7 +107,11 @@ Do not use prefixes such as Response: or Answer: when answering to the user.`,
 			} else if (model.reasoning && model.reasoning.type === "tokens") {
 				// make sure to remove the content of the reasoning buffer from
 				// the final answer to avoid duplication
-				const beginIndex = reasoningBuffer.indexOf(model.reasoning.beginToken);
 				const endIndex = reasoningBuffer.lastIndexOf(model.reasoning.endToken);
 				if (beginIndex !== -1 && endIndex !== -1) {

 	const startTime = new Date();
 	if (
 		model.reasoning &&
+		// if the beginToken is an empty string, the model starts in reasoning mode
+		(model.reasoning.type === "regex" ||
+			model.reasoning.type === "summarize" ||
+			(model.reasoning.type === "tokens" && model.reasoning.beginToken === ""))
 	) {
 		// if the model has reasoning in regex or summarize mode, it starts in reasoning mode
 		// and we extract the answer from the reasoning
 			} else if (model.reasoning && model.reasoning.type === "tokens") {
 				// make sure to remove the content of the reasoning buffer from
 				// the final answer to avoid duplication
+				// if the beginToken is an empty string, we don't need to remove anything
+				const beginIndex = model.reasoning.beginToken
+					? reasoningBuffer.indexOf(model.reasoning.beginToken)
+					: 0;
 				const endIndex = reasoningBuffer.lastIndexOf(model.reasoning.endToken);
 				if (beginIndex !== -1 && endIndex !== -1) {