LiamKhoaLe commited on
Commit
0ad97e0
·
1 Parent(s): 60df644

Initial setups refine for ingestion_js #3 (upd module ver)

Browse files
ingestion_js/lib/parser.ts CHANGED
@@ -1,10 +1,6 @@
1
- import * as pdfjs from 'pdfjs-dist'
2
  import mammoth from 'mammoth'
3
 
4
- // Configure pdfjs in Node
5
- // @ts-ignore
6
- pdfjs.GlobalWorkerOptions.workerSrc = 'pdfjs-dist/build/pdf.worker.js'
7
-
8
  export type Page = { page_num: number; text: string; images: Buffer[] }
9
 
10
  export async function parsePdfBytes(buf: Buffer): Promise<Page[]> {
@@ -15,7 +11,7 @@ export async function parsePdfBytes(buf: Buffer): Promise<Page[]> {
15
  for (let i = 1; i <= num; i++) {
16
  const page = await pdf.getPage(i)
17
  const content = await page.getTextContent()
18
- const text = content.items.map((it: any) => (it.str || '')).join(' ')
19
  out.push({ page_num: i, text, images: [] })
20
  }
21
  return out
 
1
+ import * as pdfjs from 'pdfjs-dist/legacy/build/pdf'
2
  import mammoth from 'mammoth'
3
 
 
 
 
 
4
  export type Page = { page_num: number; text: string; images: Buffer[] }
5
 
6
  export async function parsePdfBytes(buf: Buffer): Promise<Page[]> {
 
11
  for (let i = 1; i <= num; i++) {
12
  const page = await pdf.getPage(i)
13
  const content = await page.getTextContent()
14
+ const text = (content.items as any[]).map((it: any) => (it.str || '')).join(' ')
15
  out.push({ page_num: i, text, images: [] })
16
  }
17
  return out
ingestion_js/next.config.js CHANGED
@@ -1,10 +1,17 @@
1
  /** @type {import('next').NextConfig} */
2
  const nextConfig = {
3
  experimental: {
4
- serverActions: {
5
- bodySizeLimit: '50mb'
 
 
 
 
 
 
6
  }
 
7
  }
8
- };
9
 
10
- module.exports = nextConfig;
 
1
  /** @type {import('next').NextConfig} */
2
  const nextConfig = {
3
  experimental: {
4
+ serverActions: { bodySizeLimit: '50mb' },
5
+ serverComponentsExternalPackages: ['pdfjs-dist']
6
+ },
7
+ webpack: (config) => {
8
+ config.resolve = config.resolve || {}
9
+ config.resolve.alias = {
10
+ ...(config.resolve.alias || {}),
11
+ canvas: false
12
  }
13
+ return config
14
  }
15
+ }
16
 
17
+ module.exports = nextConfig