Spaces:
Sleeping
Sleeping
Commit
·
0ad97e0
1
Parent(s):
60df644
Initial setups refine for ingestion_js #3 (upd module ver)
Browse files- ingestion_js/lib/parser.ts +2 -6
- ingestion_js/next.config.js +11 -4
ingestion_js/lib/parser.ts
CHANGED
|
@@ -1,10 +1,6 @@
|
|
| 1 |
-
import * as pdfjs from 'pdfjs-dist'
|
| 2 |
import mammoth from 'mammoth'
|
| 3 |
|
| 4 |
-
// Configure pdfjs in Node
|
| 5 |
-
// @ts-ignore
|
| 6 |
-
pdfjs.GlobalWorkerOptions.workerSrc = 'pdfjs-dist/build/pdf.worker.js'
|
| 7 |
-
|
| 8 |
export type Page = { page_num: number; text: string; images: Buffer[] }
|
| 9 |
|
| 10 |
export async function parsePdfBytes(buf: Buffer): Promise<Page[]> {
|
|
@@ -15,7 +11,7 @@ export async function parsePdfBytes(buf: Buffer): Promise<Page[]> {
|
|
| 15 |
for (let i = 1; i <= num; i++) {
|
| 16 |
const page = await pdf.getPage(i)
|
| 17 |
const content = await page.getTextContent()
|
| 18 |
-
const text = content.items.map((it: any) => (it.str || '')).join(' ')
|
| 19 |
out.push({ page_num: i, text, images: [] })
|
| 20 |
}
|
| 21 |
return out
|
|
|
|
| 1 |
+
import * as pdfjs from 'pdfjs-dist/legacy/build/pdf'
|
| 2 |
import mammoth from 'mammoth'
|
| 3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
export type Page = { page_num: number; text: string; images: Buffer[] }
|
| 5 |
|
| 6 |
export async function parsePdfBytes(buf: Buffer): Promise<Page[]> {
|
|
|
|
| 11 |
for (let i = 1; i <= num; i++) {
|
| 12 |
const page = await pdf.getPage(i)
|
| 13 |
const content = await page.getTextContent()
|
| 14 |
+
const text = (content.items as any[]).map((it: any) => (it.str || '')).join(' ')
|
| 15 |
out.push({ page_num: i, text, images: [] })
|
| 16 |
}
|
| 17 |
return out
|
ingestion_js/next.config.js
CHANGED
|
@@ -1,10 +1,17 @@
|
|
| 1 |
/** @type {import('next').NextConfig} */
|
| 2 |
const nextConfig = {
|
| 3 |
experimental: {
|
| 4 |
-
serverActions: {
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
}
|
|
|
|
| 7 |
}
|
| 8 |
-
}
|
| 9 |
|
| 10 |
-
module.exports = nextConfig
|
|
|
|
| 1 |
/** @type {import('next').NextConfig} */
|
| 2 |
const nextConfig = {
|
| 3 |
experimental: {
|
| 4 |
+
serverActions: { bodySizeLimit: '50mb' },
|
| 5 |
+
serverComponentsExternalPackages: ['pdfjs-dist']
|
| 6 |
+
},
|
| 7 |
+
webpack: (config) => {
|
| 8 |
+
config.resolve = config.resolve || {}
|
| 9 |
+
config.resolve.alias = {
|
| 10 |
+
...(config.resolve.alias || {}),
|
| 11 |
+
canvas: false
|
| 12 |
}
|
| 13 |
+
return config
|
| 14 |
}
|
| 15 |
+
}
|
| 16 |
|
| 17 |
+
module.exports = nextConfig
|