Spaces:
Sleeping
Sleeping
fix: only import node-llama-cpp if needed and skip for huggingchat image
Browse files
.github/workflows/deploy-prod.yml
CHANGED
|
@@ -49,6 +49,7 @@ jobs:
|
|
| 49 |
APP_BASE=/chat
|
| 50 |
PUBLIC_APP_COLOR=yellow
|
| 51 |
PUBLIC_COMMIT_SHA=${{ env.GITHUB_SHA_SHORT }}
|
|
|
|
| 52 |
deploy:
|
| 53 |
name: Deploy on prod
|
| 54 |
runs-on: ubuntu-latest
|
|
|
|
| 49 |
APP_BASE=/chat
|
| 50 |
PUBLIC_APP_COLOR=yellow
|
| 51 |
PUBLIC_COMMIT_SHA=${{ env.GITHUB_SHA_SHORT }}
|
| 52 |
+
SKIP_LLAMA_CPP_BUILD=true
|
| 53 |
deploy:
|
| 54 |
name: Deploy on prod
|
| 55 |
runs-on: ubuntu-latest
|
README.md
CHANGED
|
@@ -1104,9 +1104,9 @@ docker build -t huggingchat:latest --build-arg INCLUDE_DB=false --build-arg APP_
|
|
| 1104 |
If you want to run the images with your local .env.local you have two options
|
| 1105 |
|
| 1106 |
```bash
|
| 1107 |
-
DOTENV_LOCAL=$(<.env.local) docker run --
|
| 1108 |
```
|
| 1109 |
|
| 1110 |
```bash
|
| 1111 |
-
docker run --
|
| 1112 |
```
|
|
|
|
| 1104 |
If you want to run the images with your local .env.local you have two options
|
| 1105 |
|
| 1106 |
```bash
|
| 1107 |
+
DOTENV_LOCAL=$(<.env.local) docker run --network=host -e DOTENV_LOCAL chat-ui-db
|
| 1108 |
```
|
| 1109 |
|
| 1110 |
```bash
|
| 1111 |
+
docker run --network=host --mount type=bind,source="$(pwd)/.env.local",target=/app/.env.local chat-ui-db
|
| 1112 |
```
|
src/lib/server/endpoints/local/endpointLocal.ts
CHANGED
|
@@ -10,14 +10,11 @@ import {
|
|
| 10 |
makeImageProcessor,
|
| 11 |
type ImageProcessor,
|
| 12 |
} from "../images";
|
| 13 |
-
|
| 14 |
-
import { LlamaChatSession, LlamaContextSequence, resolveModelFile } from "node-llama-cpp";
|
| 15 |
import { findRepoRoot } from "$lib/server/findRepoRoot";
|
| 16 |
import { fileURLToPath } from "url";
|
| 17 |
import { dirname, join } from "path";
|
| 18 |
import { logger } from "$lib/server/logger";
|
| 19 |
-
import {
|
| 20 |
-
|
| 21 |
export const endpointLocalParametersSchema = z.object({
|
| 22 |
weight: z.number().int().positive().default(1),
|
| 23 |
model: z.any(),
|
|
@@ -55,7 +52,37 @@ export async function endpointLocal(
|
|
| 55 |
|
| 56 |
// Initialize Llama model
|
| 57 |
|
|
|
|
|
|
|
| 58 |
const modelPath = await resolveModelFile(path, modelFolder);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
if (!llama) {
|
| 60 |
throw new Error("Failed to initialize llama.cpp build.");
|
| 61 |
}
|
|
|
|
| 10 |
makeImageProcessor,
|
| 11 |
type ImageProcessor,
|
| 12 |
} from "../images";
|
|
|
|
|
|
|
| 13 |
import { findRepoRoot } from "$lib/server/findRepoRoot";
|
| 14 |
import { fileURLToPath } from "url";
|
| 15 |
import { dirname, join } from "path";
|
| 16 |
import { logger } from "$lib/server/logger";
|
| 17 |
+
import type { LlamaContextSequence } from "node-llama-cpp";
|
|
|
|
| 18 |
export const endpointLocalParametersSchema = z.object({
|
| 19 |
weight: z.number().int().positive().default(1),
|
| 20 |
model: z.any(),
|
|
|
|
| 52 |
|
| 53 |
// Initialize Llama model
|
| 54 |
|
| 55 |
+
const { getLlama, LlamaChatSession, resolveModelFile } = await import("node-llama-cpp");
|
| 56 |
+
|
| 57 |
const modelPath = await resolveModelFile(path, modelFolder);
|
| 58 |
+
|
| 59 |
+
const llama = await getLlama({
|
| 60 |
+
logger: (level, message) => {
|
| 61 |
+
switch (level) {
|
| 62 |
+
case "fatal":
|
| 63 |
+
logger.fatal(message);
|
| 64 |
+
break;
|
| 65 |
+
case "error":
|
| 66 |
+
logger.error(message);
|
| 67 |
+
break;
|
| 68 |
+
case "warn":
|
| 69 |
+
logger.warn(message);
|
| 70 |
+
break;
|
| 71 |
+
case "info":
|
| 72 |
+
logger.info(message);
|
| 73 |
+
break;
|
| 74 |
+
case "log":
|
| 75 |
+
logger.info(message); // Map 'log' to 'info' since pino doesn't have a 'log' level
|
| 76 |
+
break;
|
| 77 |
+
case "debug":
|
| 78 |
+
logger.debug(message);
|
| 79 |
+
break;
|
| 80 |
+
default:
|
| 81 |
+
break;
|
| 82 |
+
}
|
| 83 |
+
},
|
| 84 |
+
});
|
| 85 |
+
|
| 86 |
if (!llama) {
|
| 87 |
throw new Error("Failed to initialize llama.cpp build.");
|
| 88 |
}
|
src/lib/server/endpoints/local/utilsLocal.ts
DELETED
|
@@ -1,37 +0,0 @@
|
|
| 1 |
-
import { getLlama } from "node-llama-cpp";
|
| 2 |
-
import { logger } from "$lib/server/logger";
|
| 3 |
-
|
| 4 |
-
export const llama = await getLlama({
|
| 5 |
-
logger: (level, message) => {
|
| 6 |
-
// Log messages based on their level
|
| 7 |
-
switch (level) {
|
| 8 |
-
case "fatal":
|
| 9 |
-
logger.fatal(message);
|
| 10 |
-
break;
|
| 11 |
-
case "error":
|
| 12 |
-
logger.error(message);
|
| 13 |
-
break;
|
| 14 |
-
case "warn":
|
| 15 |
-
logger.warn(message);
|
| 16 |
-
break;
|
| 17 |
-
case "info":
|
| 18 |
-
logger.info(message);
|
| 19 |
-
break;
|
| 20 |
-
case "log":
|
| 21 |
-
logger.info(message); // Map 'log' to 'info' since pino doesn't have a 'log' level
|
| 22 |
-
break;
|
| 23 |
-
case "debug":
|
| 24 |
-
logger.debug(message);
|
| 25 |
-
break;
|
| 26 |
-
default:
|
| 27 |
-
// For 'disabled' or any other unexpected levels
|
| 28 |
-
break;
|
| 29 |
-
}
|
| 30 |
-
},
|
| 31 |
-
}).catch((e) => {
|
| 32 |
-
logger.warn(
|
| 33 |
-
e,
|
| 34 |
-
"Failed to initialize llama.cpp. This won't break anything if you're not using the \"local\" endpoint."
|
| 35 |
-
);
|
| 36 |
-
return undefined;
|
| 37 |
-
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/lib/server/models.ts
CHANGED
|
@@ -14,7 +14,6 @@ import { getTokenizer } from "$lib/utils/getTokenizer";
|
|
| 14 |
import { logger } from "$lib/server/logger";
|
| 15 |
import { type ToolInput } from "$lib/types/Tool";
|
| 16 |
import { join, dirname } from "path";
|
| 17 |
-
import { resolveModelFile, readGgufFileInfo } from "node-llama-cpp";
|
| 18 |
import { fileURLToPath } from "url";
|
| 19 |
import { findRepoRoot } from "./findRepoRoot";
|
| 20 |
import { Template } from "@huggingface/jinja";
|
|
@@ -151,6 +150,8 @@ async function getChatPromptRender(
|
|
| 151 |
const endpoint = m.endpoints?.find((e) => e.type === "local");
|
| 152 |
const path = endpoint?.modelPath ?? `hf:${m.id ?? m.name}`;
|
| 153 |
|
|
|
|
|
|
|
| 154 |
const modelPath = await resolveModelFile(path, MODELS_FOLDER);
|
| 155 |
|
| 156 |
const info = await readGgufFileInfo(modelPath, {
|
|
|
|
| 14 |
import { logger } from "$lib/server/logger";
|
| 15 |
import { type ToolInput } from "$lib/types/Tool";
|
| 16 |
import { join, dirname } from "path";
|
|
|
|
| 17 |
import { fileURLToPath } from "url";
|
| 18 |
import { findRepoRoot } from "./findRepoRoot";
|
| 19 |
import { Template } from "@huggingface/jinja";
|
|
|
|
| 150 |
const endpoint = m.endpoints?.find((e) => e.type === "local");
|
| 151 |
const path = endpoint?.modelPath ?? `hf:${m.id ?? m.name}`;
|
| 152 |
|
| 153 |
+
const { resolveModelFile, readGgufFileInfo } = await import("node-llama-cpp");
|
| 154 |
+
|
| 155 |
const modelPath = await resolveModelFile(path, MODELS_FOLDER);
|
| 156 |
|
| 157 |
const info = await readGgufFileInfo(modelPath, {
|