Spaces:
Runtime error
Runtime error
goupilew
commited on
[vertex] Add PDF/plein texts support (#1520)
Browse files* [vertex] Add PDF support
* [vertex] Fix lint
* [vertex] Add support for text/plain
src/lib/components/chat/ChatWindow.svelte
CHANGED
|
@@ -213,7 +213,7 @@
|
|
| 213 |
...(!$page.data?.assistant && currentModel.tools
|
| 214 |
? activeTools.flatMap((tool: ToolFront) => tool.mimeTypes ?? [])
|
| 215 |
: []),
|
| 216 |
-
...(currentModel.multimodal ? ["image/*"] : []),
|
| 217 |
];
|
| 218 |
|
| 219 |
$: isFileUploadEnabled = activeMimeTypes.length > 0;
|
|
|
|
| 213 |
...(!$page.data?.assistant && currentModel.tools
|
| 214 |
? activeTools.flatMap((tool: ToolFront) => tool.mimeTypes ?? [])
|
| 215 |
: []),
|
| 216 |
+
...(currentModel.multimodal ? currentModel.multimodalAcceptedMimetypes ?? ["image/*"] : []),
|
| 217 |
];
|
| 218 |
|
| 219 |
$: isFileUploadEnabled = activeMimeTypes.length > 0;
|
src/lib/server/endpoints/document.ts
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import type { MessageFile } from "$lib/types/Message";
|
| 2 |
+
import { z } from "zod";
|
| 3 |
+
|
| 4 |
+
export interface FileProcessorOptions<TMimeType extends string = string> {
|
| 5 |
+
supportedMimeTypes: TMimeType[];
|
| 6 |
+
maxSizeInMB: number;
|
| 7 |
+
}
|
| 8 |
+
|
| 9 |
+
export type ImageProcessor<TMimeType extends string = string> = (file: MessageFile) => Promise<{
|
| 10 |
+
file: Buffer;
|
| 11 |
+
mime: TMimeType;
|
| 12 |
+
}>;
|
| 13 |
+
|
| 14 |
+
export const createDocumentProcessorOptionsValidator = <TMimeType extends string = string>(
|
| 15 |
+
defaults: FileProcessorOptions<TMimeType>
|
| 16 |
+
) => {
|
| 17 |
+
return z
|
| 18 |
+
.object({
|
| 19 |
+
supportedMimeTypes: z
|
| 20 |
+
.array(
|
| 21 |
+
z.enum<string, [TMimeType, ...TMimeType[]]>([
|
| 22 |
+
defaults.supportedMimeTypes[0],
|
| 23 |
+
...defaults.supportedMimeTypes.slice(1),
|
| 24 |
+
])
|
| 25 |
+
)
|
| 26 |
+
.default(defaults.supportedMimeTypes),
|
| 27 |
+
maxSizeInMB: z.number().positive().default(defaults.maxSizeInMB),
|
| 28 |
+
})
|
| 29 |
+
.default(defaults);
|
| 30 |
+
};
|
| 31 |
+
|
| 32 |
+
export type DocumentProcessor<TMimeType extends string = string> = (file: MessageFile) => {
|
| 33 |
+
file: Buffer;
|
| 34 |
+
mime: TMimeType;
|
| 35 |
+
};
|
| 36 |
+
|
| 37 |
+
export function makeDocumentProcessor<TMimeType extends string = string>(
|
| 38 |
+
options: FileProcessorOptions<TMimeType>
|
| 39 |
+
): DocumentProcessor<TMimeType> {
|
| 40 |
+
return (file) => {
|
| 41 |
+
const { supportedMimeTypes, maxSizeInMB } = options;
|
| 42 |
+
const { mime, value } = file;
|
| 43 |
+
|
| 44 |
+
const buffer = Buffer.from(value, "base64");
|
| 45 |
+
|
| 46 |
+
const tooLargeInBytes = buffer.byteLength > maxSizeInMB * 1000 * 1000;
|
| 47 |
+
|
| 48 |
+
if (tooLargeInBytes) {
|
| 49 |
+
throw Error("Document is too large");
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
const outputMime = validateMimeType(supportedMimeTypes, mime);
|
| 53 |
+
|
| 54 |
+
return { file: buffer, mime: outputMime };
|
| 55 |
+
};
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
const validateMimeType = <T extends readonly string[]>(
|
| 59 |
+
supportedMimes: T,
|
| 60 |
+
mime: string
|
| 61 |
+
): T[number] => {
|
| 62 |
+
if (!supportedMimes.includes(mime)) {
|
| 63 |
+
const supportedMimesStr = supportedMimes.join(", ");
|
| 64 |
+
|
| 65 |
+
throw Error(`Mimetype "${mime}" not found in supported mimes: ${supportedMimesStr}`);
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
return mime;
|
| 69 |
+
};
|
src/lib/server/endpoints/google/endpointVertex.ts
CHANGED
|
@@ -10,6 +10,7 @@ import { z } from "zod";
|
|
| 10 |
import type { Message } from "$lib/types/Message";
|
| 11 |
import type { TextGenerationStreamOutput } from "@huggingface/inference";
|
| 12 |
import { createImageProcessorOptionsValidator, makeImageProcessor } from "../images";
|
|
|
|
| 13 |
|
| 14 |
export const endpointVertexParametersSchema = z.object({
|
| 15 |
weight: z.number().int().positive().default(1),
|
|
@@ -39,12 +40,17 @@ export const endpointVertexParametersSchema = z.object({
|
|
| 39 |
"image/avif",
|
| 40 |
"image/tiff",
|
| 41 |
"image/gif",
|
|
|
|
| 42 |
],
|
| 43 |
preferredMimeType: "image/webp",
|
| 44 |
-
maxSizeInMB:
|
| 45 |
maxWidth: 4096,
|
| 46 |
maxHeight: 4096,
|
| 47 |
}),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
})
|
| 49 |
.default({}),
|
| 50 |
});
|
|
@@ -109,17 +115,33 @@ export function endpointVertex(input: z.input<typeof endpointVertexParametersSch
|
|
| 109 |
const vertexMessages = await Promise.all(
|
| 110 |
messages.map(async ({ from, content, files }: Omit<Message, "id">): Promise<Content> => {
|
| 111 |
const imageProcessor = makeImageProcessor(multimodal.image);
|
| 112 |
-
const
|
|
|
|
|
|
|
| 113 |
files && files.length > 0
|
| 114 |
-
? await Promise.all(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
: [];
|
| 116 |
|
|
|
|
|
|
|
| 117 |
return {
|
| 118 |
role: from === "user" ? "user" : "model",
|
| 119 |
parts: [
|
| 120 |
...processedFiles.map((processedFile) => ({
|
| 121 |
inlineData: {
|
| 122 |
-
data: processedFile.
|
| 123 |
mimeType: processedFile.mime,
|
| 124 |
},
|
| 125 |
})),
|
|
|
|
| 10 |
import type { Message } from "$lib/types/Message";
|
| 11 |
import type { TextGenerationStreamOutput } from "@huggingface/inference";
|
| 12 |
import { createImageProcessorOptionsValidator, makeImageProcessor } from "../images";
|
| 13 |
+
import { createDocumentProcessorOptionsValidator, makeDocumentProcessor } from "../document";
|
| 14 |
|
| 15 |
export const endpointVertexParametersSchema = z.object({
|
| 16 |
weight: z.number().int().positive().default(1),
|
|
|
|
| 40 |
"image/avif",
|
| 41 |
"image/tiff",
|
| 42 |
"image/gif",
|
| 43 |
+
"application/pdf",
|
| 44 |
],
|
| 45 |
preferredMimeType: "image/webp",
|
| 46 |
+
maxSizeInMB: 20,
|
| 47 |
maxWidth: 4096,
|
| 48 |
maxHeight: 4096,
|
| 49 |
}),
|
| 50 |
+
document: createDocumentProcessorOptionsValidator({
|
| 51 |
+
supportedMimeTypes: ["application/pdf", "text/plain"],
|
| 52 |
+
maxSizeInMB: 20,
|
| 53 |
+
}),
|
| 54 |
})
|
| 55 |
.default({}),
|
| 56 |
});
|
|
|
|
| 115 |
const vertexMessages = await Promise.all(
|
| 116 |
messages.map(async ({ from, content, files }: Omit<Message, "id">): Promise<Content> => {
|
| 117 |
const imageProcessor = makeImageProcessor(multimodal.image);
|
| 118 |
+
const documentProcessor = makeDocumentProcessor(multimodal.document);
|
| 119 |
+
|
| 120 |
+
const processedFilesWithNull =
|
| 121 |
files && files.length > 0
|
| 122 |
+
? await Promise.all(
|
| 123 |
+
files.map(async (file) => {
|
| 124 |
+
if (file.mime.includes("image")) {
|
| 125 |
+
const { image, mime } = await imageProcessor(file);
|
| 126 |
+
|
| 127 |
+
return { file: image, mime };
|
| 128 |
+
} else if (file.mime === "application/pdf" || file.mime === "text/plain") {
|
| 129 |
+
return documentProcessor(file);
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
return null;
|
| 133 |
+
})
|
| 134 |
+
)
|
| 135 |
: [];
|
| 136 |
|
| 137 |
+
const processedFiles = processedFilesWithNull.filter((file) => file !== null);
|
| 138 |
+
|
| 139 |
return {
|
| 140 |
role: from === "user" ? "user" : "model",
|
| 141 |
parts: [
|
| 142 |
...processedFiles.map((processedFile) => ({
|
| 143 |
inlineData: {
|
| 144 |
+
data: processedFile.file.toString("base64"),
|
| 145 |
mimeType: processedFile.mime,
|
| 146 |
},
|
| 147 |
})),
|
src/lib/server/models.ts
CHANGED
|
@@ -63,6 +63,7 @@ const modelConfig = z.object({
|
|
| 63 |
.passthrough()
|
| 64 |
.optional(),
|
| 65 |
multimodal: z.boolean().default(false),
|
|
|
|
| 66 |
tools: z.boolean().default(false),
|
| 67 |
unlisted: z.boolean().default(false),
|
| 68 |
embeddingModel: validateEmbeddingModelByName(embeddingModels).optional(),
|
|
|
|
| 63 |
.passthrough()
|
| 64 |
.optional(),
|
| 65 |
multimodal: z.boolean().default(false),
|
| 66 |
+
multimodalAcceptedMimetypes: z.array(z.string()).optional(),
|
| 67 |
tools: z.boolean().default(false),
|
| 68 |
unlisted: z.boolean().default(false),
|
| 69 |
embeddingModel: validateEmbeddingModelByName(embeddingModels).optional(),
|
src/lib/types/Model.ts
CHANGED
|
@@ -16,6 +16,7 @@ export type Model = Pick<
|
|
| 16 |
| "datasetUrl"
|
| 17 |
| "preprompt"
|
| 18 |
| "multimodal"
|
|
|
|
| 19 |
| "unlisted"
|
| 20 |
| "tools"
|
| 21 |
| "hasInferenceAPI"
|
|
|
|
| 16 |
| "datasetUrl"
|
| 17 |
| "preprompt"
|
| 18 |
| "multimodal"
|
| 19 |
+
| "multimodalAcceptedMimetypes"
|
| 20 |
| "unlisted"
|
| 21 |
| "tools"
|
| 22 |
| "hasInferenceAPI"
|
src/routes/+layout.server.ts
CHANGED
|
@@ -190,6 +190,7 @@ export const load: LayoutServerLoad = async ({ locals, depends, request }) => {
|
|
| 190 |
parameters: model.parameters,
|
| 191 |
preprompt: model.preprompt,
|
| 192 |
multimodal: model.multimodal,
|
|
|
|
| 193 |
tools:
|
| 194 |
model.tools &&
|
| 195 |
// disable tools on huggingchat android app
|
|
|
|
| 190 |
parameters: model.parameters,
|
| 191 |
preprompt: model.preprompt,
|
| 192 |
multimodal: model.multimodal,
|
| 193 |
+
multimodalAcceptedMimetypes: model.multimodalAcceptedMimetypes,
|
| 194 |
tools:
|
| 195 |
model.tools &&
|
| 196 |
// disable tools on huggingchat android app
|