baohuynhbk14 commited on
Commit
cdfbdf6
·
verified ·
1 Parent(s): 728997f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -98
app.py CHANGED
@@ -270,6 +270,73 @@ def generate_video(model_name: str, text: str, video_path: str,
270
  yield buffer, buffer
271
 
272
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
  @spaces.GPU(duration=120)
274
  def generate_pdf(model_name: str, text: str, state: Dict[str, Any],
275
  max_new_tokens: int = 2048,
@@ -278,16 +345,10 @@ def generate_pdf(model_name: str, text: str, state: Dict[str, Any],
278
  top_k: int = 50,
279
  repetition_penalty: float = 1.2):
280
 
281
- # if model_name == "Qwen2.5-VL-7B-Instruct":
282
- # processor, model = processor_m, model_m
283
- # elif model_name == "Qwen2.5-VL-3B-Instruct":
284
- # processor, model = processor_x, model_x
285
  if model_name == "Qwen3-VL-4B-Instruct":
286
  processor, model = processor_q, model_q
287
  elif model_name == "Qwen3-VL-8B-Instruct":
288
  processor, model = processor_y, model_y
289
- # elif model_name == "Qwen3-VL-8B-Thinking":
290
- # processor, model = processor_z, model_z
291
  elif model_name == "Qwen3-VL-4B-Thinking":
292
  processor, model = processor_t, model_t
293
  elif model_name == "Qwen3-VL-2B-Instruct":
@@ -303,105 +364,44 @@ def generate_pdf(model_name: str, text: str, state: Dict[str, Any],
303
  return
304
 
305
  page_images = state["pages"]
306
- full_response = ""
307
- for i, image in enumerate(page_images):
308
- page_header = f"--- Page {i+1}/{len(page_images)} ---\n"
309
- yield full_response + page_header, full_response + page_header
310
-
311
- messages = [{"role": "user", "content": [{"type": "image"}, {"type": "text", "text": text}]}]
312
- # Sử dụng processor đã chọn
313
- prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
314
- inputs = processor(text=[prompt_full], images=[image], return_tensors="pt", padding=True).to(device)
315
- streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
316
-
317
- generation_kwargs = {
318
- **inputs,
319
- "streamer": streamer,
320
- "max_new_tokens": max_new_tokens,
321
- "do_sample": True,
322
- "temperature": temperature,
323
- "top_p": top_p,
324
- "top_k": top_k,
325
- "repetition_penalty": repetition_penalty
326
- }
327
-
328
- # Sử dụng model đã chọn
329
- thread = Thread(target=model.generate, kwargs=generation_kwargs)
330
- thread.start()
331
-
332
- page_buffer = ""
333
- for new_text in streamer:
334
- page_buffer += new_text
335
- yield full_response + page_header + page_buffer, full_response + page_header + page_buffer
336
- time.sleep(0.01)
337
-
338
- full_response += page_header + page_buffer + "\n\n"
339
 
340
- # @spaces.GPU
341
- # def generate_pdf(model_name: str, text: str, state: Dict[str, Any],
342
- # max_new_tokens: int = 2048,
343
- # temperature: float = 0.6,
344
- # top_p: float = 0.9,
345
- # top_k: int = 50,
346
- # repetition_penalty: float = 1.2):
347
-
348
- # if model_name == "Qwen3-VL-4B-Instruct":
349
- # processor, model = processor_q, model_q
350
- # elif model_name == "Qwen3-VL-8B-Instruct":
351
- # processor, model = processor_y, model_y
352
- # elif model_name == "Qwen3-VL-4B-Thinking":
353
- # processor, model = processor_t, model_t
354
- # elif model_name == "Qwen3-VL-2B-Instruct":
355
- # processor, model = processor_l, model_l
356
- # elif model_name == "Qwen3-VL-2B-Thinking":
357
- # processor, model = processor_j, model_j
358
- # else:
359
- # yield "Invalid model selected.", "Invalid model selected."
360
- # return
361
-
362
- # if not state or not state["pages"]:
363
- # yield "Please upload a PDF file first.", "Please upload a PDF file first."
364
- # return
365
-
366
- # page_images = state["pages"]
367
-
368
- # messages = [{"role": "user", "content": [{"type": "text", "text": text}]}]
369
- # images_for_processor = []
370
- # for frame in page_images:
371
- # messages[0]["content"].append({"type": "image"})
372
- # images_for_processor.append(frame)
373
 
374
- # prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
375
 
376
- # inputs = processor(
377
- # text=[prompt_full],
378
- # images=images_for_processor, # Truyền cả list ảnh
379
- # return_tensors="pt",
380
- # padding=True
381
- # ).to(device)
382
 
383
- # streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
384
 
385
- # generation_kwargs = {
386
- # **inputs,
387
- # "streamer": streamer,
388
- # "max_new_tokens": max_new_tokens,
389
- # "do_sample": True,
390
- # "temperature": temperature,
391
- # "top_p": top_p,
392
- # "top_k": top_k,
393
- # "repetition_penalty": repetition_penalty
394
- # }
395
 
396
- # thread = Thread(target=model.generate, kwargs=generation_kwargs)
397
- # thread.start()
398
 
399
- # buffer = ""
400
- # for new_text in streamer:
401
- # buffer += new_text
402
- # buffer = buffer.replace("<|im_end|>", "") # Thêm dòng này giống video
403
- # yield buffer, buffer
404
- # time.sleep(0.01)
405
 
406
  image_examples = [
407
  ["Explain the content in detail.", "images/force.jpg"],
 
270
  yield buffer, buffer
271
 
272
 
273
+ # @spaces.GPU(duration=120)
274
+ # def generate_pdf(model_name: str, text: str, state: Dict[str, Any],
275
+ # max_new_tokens: int = 2048,
276
+ # temperature: float = 0.6,
277
+ # top_p: float = 0.9,
278
+ # top_k: int = 50,
279
+ # repetition_penalty: float = 1.2):
280
+
281
+ # # if model_name == "Qwen2.5-VL-7B-Instruct":
282
+ # # processor, model = processor_m, model_m
283
+ # # elif model_name == "Qwen2.5-VL-3B-Instruct":
284
+ # # processor, model = processor_x, model_x
285
+ # if model_name == "Qwen3-VL-4B-Instruct":
286
+ # processor, model = processor_q, model_q
287
+ # elif model_name == "Qwen3-VL-8B-Instruct":
288
+ # processor, model = processor_y, model_y
289
+ # # elif model_name == "Qwen3-VL-8B-Thinking":
290
+ # # processor, model = processor_z, model_z
291
+ # elif model_name == "Qwen3-VL-4B-Thinking":
292
+ # processor, model = processor_t, model_t
293
+ # elif model_name == "Qwen3-VL-2B-Instruct":
294
+ # processor, model = processor_l, model_l
295
+ # elif model_name == "Qwen3-VL-2B-Thinking":
296
+ # processor, model = processor_j, model_j
297
+ # else:
298
+ # yield "Invalid model selected.", "Invalid model selected."
299
+ # return
300
+
301
+ # if not state or not state["pages"]:
302
+ # yield "Please upload a PDF file first.", "Please upload a PDF file first."
303
+ # return
304
+
305
+ # page_images = state["pages"]
306
+ # full_response = ""
307
+ # for i, image in enumerate(page_images):
308
+ # page_header = f"--- Page {i+1}/{len(page_images)} ---\n"
309
+ # yield full_response + page_header, full_response + page_header
310
+
311
+ # messages = [{"role": "user", "content": [{"type": "image"}, {"type": "text", "text": text}]}]
312
+ # # Sử dụng processor đã chọn
313
+ # prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
314
+ # inputs = processor(text=[prompt_full], images=[image], return_tensors="pt", padding=True).to(device)
315
+ # streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
316
+
317
+ # generation_kwargs = {
318
+ # **inputs,
319
+ # "streamer": streamer,
320
+ # "max_new_tokens": max_new_tokens,
321
+ # "do_sample": True,
322
+ # "temperature": temperature,
323
+ # "top_p": top_p,
324
+ # "top_k": top_k,
325
+ # "repetition_penalty": repetition_penalty
326
+ # }
327
+
328
+ # # Sử dụng model đã chọn
329
+ # thread = Thread(target=model.generate, kwargs=generation_kwargs)
330
+ # thread.start()
331
+
332
+ # page_buffer = ""
333
+ # for new_text in streamer:
334
+ # page_buffer += new_text
335
+ # yield full_response + page_header + page_buffer, full_response + page_header + page_buffer
336
+ # time.sleep(0.01)
337
+
338
+ # full_response += page_header + page_buffer + "\n\n"
339
+
340
  @spaces.GPU(duration=120)
341
  def generate_pdf(model_name: str, text: str, state: Dict[str, Any],
342
  max_new_tokens: int = 2048,
 
345
  top_k: int = 50,
346
  repetition_penalty: float = 1.2):
347
 
 
 
 
 
348
  if model_name == "Qwen3-VL-4B-Instruct":
349
  processor, model = processor_q, model_q
350
  elif model_name == "Qwen3-VL-8B-Instruct":
351
  processor, model = processor_y, model_y
 
 
352
  elif model_name == "Qwen3-VL-4B-Thinking":
353
  processor, model = processor_t, model_t
354
  elif model_name == "Qwen3-VL-2B-Instruct":
 
364
  return
365
 
366
  page_images = state["pages"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
367
 
368
+ messages = [{"role": "user", "content": [{"type": "text", "text": text}]}]
369
+ images_for_processor = []
370
+ for frame in page_images:
371
+ messages[0]["content"].append({"type": "image"})
372
+ images_for_processor.append(frame)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
373
 
374
+ prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
375
 
376
+ inputs = processor(
377
+ text=[prompt_full],
378
+ images=images_for_processor, # Truyền cả list ảnh
379
+ return_tensors="pt",
380
+ padding=True
381
+ ).to(device)
382
 
383
+ streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
384
 
385
+ generation_kwargs = {
386
+ **inputs,
387
+ "streamer": streamer,
388
+ "max_new_tokens": max_new_tokens,
389
+ "do_sample": True,
390
+ "temperature": temperature,
391
+ "top_p": top_p,
392
+ "top_k": top_k,
393
+ "repetition_penalty": repetition_penalty
394
+ }
395
 
396
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
397
+ thread.start()
398
 
399
+ buffer = ""
400
+ for new_text in streamer:
401
+ buffer += new_text
402
+ buffer = buffer.replace("<|im_end|>", "") # Thêm dòng này giống video
403
+ yield buffer, buffer
404
+ time.sleep(0.01)
405
 
406
  image_examples = [
407
  ["Explain the content in detail.", "images/force.jpg"],