Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.document_loaders import PyPDFLoader | |
| from transformers import T5Tokenizer, T5ForConditionalGeneration | |
| from transformers import pipeline | |
| import torch | |
| import base64 | |
| import time | |
| from PIL import Image | |
| st.image("https://huggingface.co/spaces/wiwaaw/summary/resolve/main/banner.png") | |
| #MODEL AND TOKENIZER | |
| model_checkpoint = "MBZUAI/LaMini-Flan-T5-783M" | |
| model_tokenizer = T5Tokenizer.from_pretrained(model_checkpoint) | |
| model = T5ForConditionalGeneration.from_pretrained(model_checkpoint) | |
| #FILE LOADER AND PREPROCESSING | |
| def preprocess_pdf(file): | |
| loader = PyPDFLoader(file) | |
| pages = loader.load_and_split() | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=170, chunk_overlap=70) | |
| texts = text_splitter.split_documents(pages) | |
| final_text = "" | |
| for text in texts: | |
| final_text = final_text + text.page_content | |
| return final_text | |
| #LLM PIPELINE | |
| def language_model_pipeline(filepath): | |
| summarization_pipeline = pipeline( | |
| 'summarization', | |
| model = model, | |
| tokenizer = model_tokenizer, | |
| max_length = 500, | |
| min_length = 32 | |
| ) | |
| input_text = preprocess_pdf(filepath) | |
| summary_result = summarization_pipeline(input_text) | |
| summarized_text = summary_result[0]['summary_text'] | |
| return summarized_text | |
| title = st.title("PDF Summarization using LaMini") | |
| uploaded_file = st.file_uploader('Upload your PDF file', type=['pdf']) | |
| if uploaded_file is not None: | |
| st.success("File Uploaded") | |
| if st.button ("Summarize"): | |
| time.sleep(10) | |
| filepath = uploaded_file.name | |
| with open(filepath, "wb") as temp_file: | |
| temp_file.write(uploaded_file.read()) | |
| summarized_result = language_model_pipeline(filepath) | |
| st.info("Summarization Complete") | |
| st.success(summarized_result) |