Spaces:
Running
on
Inf2
Running
on
Inf2
fix: handle no beginToken for token based reasoning models (#1713)
Browse files* refactor: remove debug console log in updateLocalEnv script
* fix: handle no beginToken for token-based reasoning models
chart/env/prod.yaml
CHANGED
|
@@ -139,7 +139,7 @@ envVars:
|
|
| 139 |
"description": "The first reasoning model from DeepSeek, distilled into a 32B dense model. Outperforms o1-mini on multiple benchmarks.",
|
| 140 |
"reasoning": {
|
| 141 |
"type": "tokens",
|
| 142 |
-
"beginToken": "
|
| 143 |
"endToken": "</think>"
|
| 144 |
},
|
| 145 |
"promptExamples": [
|
|
|
|
| 139 |
"description": "The first reasoning model from DeepSeek, distilled into a 32B dense model. Outperforms o1-mini on multiple benchmarks.",
|
| 140 |
"reasoning": {
|
| 141 |
"type": "tokens",
|
| 142 |
+
"beginToken": "",
|
| 143 |
"endToken": "</think>"
|
| 144 |
},
|
| 145 |
"promptExamples": [
|
scripts/updateLocalEnv.ts
CHANGED
|
@@ -30,7 +30,5 @@ full_config = full_config.replaceAll(
|
|
| 30 |
"https://api-inference.huggingface.co"
|
| 31 |
);
|
| 32 |
|
| 33 |
-
console.log(full_config);
|
| 34 |
-
|
| 35 |
// Write full_config to .env.local
|
| 36 |
fs.writeFileSync(".env.local", full_config);
|
|
|
|
| 30 |
"https://api-inference.huggingface.co"
|
| 31 |
);
|
| 32 |
|
|
|
|
|
|
|
| 33 |
// Write full_config to .env.local
|
| 34 |
fs.writeFileSync(".env.local", full_config);
|
src/lib/server/models.ts
CHANGED
|
@@ -24,7 +24,7 @@ const reasoningSchema = z.union([
|
|
| 24 |
}),
|
| 25 |
z.object({
|
| 26 |
type: z.literal("tokens"), // use beginning and end tokens that define the reasoning portion of the answer
|
| 27 |
-
beginToken: z.string(),
|
| 28 |
endToken: z.string(),
|
| 29 |
}),
|
| 30 |
z.object({
|
|
|
|
| 24 |
}),
|
| 25 |
z.object({
|
| 26 |
type: z.literal("tokens"), // use beginning and end tokens that define the reasoning portion of the answer
|
| 27 |
+
beginToken: z.string(), // empty string means the model starts in reasoning mode
|
| 28 |
endToken: z.string(),
|
| 29 |
}),
|
| 30 |
z.object({
|
src/lib/server/textGeneration/generate.ts
CHANGED
|
@@ -27,7 +27,10 @@ export async function* generate(
|
|
| 27 |
const startTime = new Date();
|
| 28 |
if (
|
| 29 |
model.reasoning &&
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
| 31 |
) {
|
| 32 |
// if the model has reasoning in regex or summarize mode, it starts in reasoning mode
|
| 33 |
// and we extract the answer from the reasoning
|
|
@@ -104,7 +107,11 @@ Do not use prefixes such as Response: or Answer: when answering to the user.`,
|
|
| 104 |
} else if (model.reasoning && model.reasoning.type === "tokens") {
|
| 105 |
// make sure to remove the content of the reasoning buffer from
|
| 106 |
// the final answer to avoid duplication
|
| 107 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
const endIndex = reasoningBuffer.lastIndexOf(model.reasoning.endToken);
|
| 109 |
|
| 110 |
if (beginIndex !== -1 && endIndex !== -1) {
|
|
|
|
| 27 |
const startTime = new Date();
|
| 28 |
if (
|
| 29 |
model.reasoning &&
|
| 30 |
+
// if the beginToken is an empty string, the model starts in reasoning mode
|
| 31 |
+
(model.reasoning.type === "regex" ||
|
| 32 |
+
model.reasoning.type === "summarize" ||
|
| 33 |
+
(model.reasoning.type === "tokens" && model.reasoning.beginToken === ""))
|
| 34 |
) {
|
| 35 |
// if the model has reasoning in regex or summarize mode, it starts in reasoning mode
|
| 36 |
// and we extract the answer from the reasoning
|
|
|
|
| 107 |
} else if (model.reasoning && model.reasoning.type === "tokens") {
|
| 108 |
// make sure to remove the content of the reasoning buffer from
|
| 109 |
// the final answer to avoid duplication
|
| 110 |
+
|
| 111 |
+
// if the beginToken is an empty string, we don't need to remove anything
|
| 112 |
+
const beginIndex = model.reasoning.beginToken
|
| 113 |
+
? reasoningBuffer.indexOf(model.reasoning.beginToken)
|
| 114 |
+
: 0;
|
| 115 |
const endIndex = reasoningBuffer.lastIndexOf(model.reasoning.endToken);
|
| 116 |
|
| 117 |
if (beginIndex !== -1 && endIndex !== -1) {
|