Spaces:
Sleeping
Sleeping
| import os | |
| import time | |
| import asyncio | |
| from llama_index.core.query_engine import CitationQueryEngine | |
| from llama_index.core import VectorStoreIndex | |
| from llama_index.core import Settings | |
| from llama_index.embeddings.huggingface import HuggingFaceEmbedding | |
| from llama_index.llms.gemini import Gemini | |
| from llama_index.core.postprocessor import SimilarityPostprocessor | |
| from llama_index.core.storage.docstore import SimpleDocumentStore | |
| from llama_index.core import StorageContext, load_index_from_storage | |
| from llama_index.core.data_structs import Node | |
| from llama_index.core.schema import NodeWithScore | |
| import re | |
| import pandas as pd | |
| import gradio as gr | |
| import logging | |
| #Enable logging to see what's happening behind the scenes | |
| logging.basicConfig(level=logging.INFO) | |
| token_w = os.environ['token_w'] | |
| HF_TOKEN=os.environ['token_r'] | |
| API_KEY=os.environ["GOOGLE_API_KEY"] | |
| generation_config = { | |
| "temperature": 0, | |
| # "top_p": 1, | |
| # "top_k": 1, | |
| "max_output_tokens":8192, | |
| } | |
| safety_settings = [ | |
| { | |
| "category": "HARM_CATEGORY_HARASSMENT", | |
| "threshold": "BLOCK_NONE" | |
| }, | |
| { | |
| "category": "HARM_CATEGORY_HATE_SPEECH", | |
| "threshold": "BLOCK_NONE" | |
| }, | |
| { | |
| "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", | |
| "threshold": "BLOCK_NONE" | |
| }, | |
| { | |
| "category": "HARM_CATEGORY_DANGEROUS_CONTENT", | |
| "threshold": "BLOCK_NONE" | |
| }, | |
| ] | |
| llm = Gemini( | |
| model="models/gemini-1.5-flash-002", | |
| generation_config=generation_config, | |
| safety_settings=safety_settings, | |
| ) | |
| # Setup embedder | |
| embed_model_name = "malteos/scincl" | |
| embed_model = HuggingFaceEmbedding(model_name=embed_model_name) | |
| Settings.llm = llm | |
| Settings.embed_model = embed_model | |
| # rebuild storage context | |
| storage_context = StorageContext.from_defaults(persist_dir="malteos_scincl__CAR_T_cell__PersistVectorStore_v2") | |
| # load index | |
| index_persisted = load_index_from_storage(storage_context, index_id="vector_index") | |
| async def clean_trial_text(text): | |
| """ | |
| Cleans text by removing everything starting from the word 'Reference Papers' | |
| and any special characters like '*'. | |
| """ | |
| # Remove special characters like '*' | |
| text = re.sub(r'\*+', '', text).strip() | |
| # Find the position of 'Reference Papers' and truncate the text | |
| reference_start = re.search(r'\bReference Papers\b', text, re.IGNORECASE) | |
| if reference_start: | |
| text = text[:reference_start.start()].strip() | |
| return text | |
| async def process_criteria(text): | |
| """ | |
| Processes the query response text, removing special characters and cleaning it | |
| up to the word 'Reference Papers'. | |
| """ | |
| text = re.sub(r'#+\s*', '', text) # Remove headings like '###' | |
| text = re.sub(r'(Criteria)\n\s*\n(\d+\.)', r'\1\n\2', text) # Fix spacing issues | |
| text = await clean_trial_text(text) # Clean up text until 'Reference Papers' | |
| return text | |
| async def extract_criteria(text): | |
| """Extracts inclusion and exclusion criteria from text.""" | |
| patterns = { | |
| "inclusion": r'Inclusion Criteria:?(.*?)(?=Exclusion Criteria)', | |
| "exclusion": r'Exclusion Criteria:?(.*?)(?=Reference Papers|\n\n\n)' | |
| } | |
| inclusion = re.search(patterns["inclusion"], text, re.DOTALL | re.IGNORECASE) | |
| exclusion = re.search(patterns["exclusion"], text, re.DOTALL | re.IGNORECASE) | |
| return ( | |
| "Inclusion Criteria:\n" + (inclusion.group(1).strip() if inclusion else "Not found") + "\n\n" + | |
| "Exclusion Criteria:\n" + (exclusion.group(1).strip() if exclusion else "Not found") | |
| ) | |
| async def run_function_on_text(top_k, study_obj, study_type, phase, purpose, allocation, intervention_model, Masking, conditions, interventions, location_countries, removed_location_countries): | |
| """Runs the main function to process study information and generate formatted output.""" | |
| # Set up query engine | |
| query_engine_get_study = CitationQueryEngine.from_args( | |
| index_persisted, | |
| similarity_top_k=top_k, | |
| citation_chunk_size=2048, | |
| verbose=True, | |
| node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.7)], | |
| use_async=True | |
| ) | |
| # Build prompt | |
| study_information = f""" | |
| # Study Objectives/Description | |
| {study_obj} | |
| # Intervention | |
| {interventions} | |
| # Location | |
| - Location_Countries: {location_countries} | |
| - Removed Location: {removed_location_countries} | |
| # Conditions | |
| Cancer {conditions} | |
| # Study Design | |
| - Study Type: {study_type} | |
| - Phase: {phase} | |
| - Primary Purpose: {purpose} | |
| - Allocation: {allocation} | |
| - Interventional Model: {intervention_model} | |
| - Masking: None {Masking} | |
| """ | |
| # Query | |
| query_response = await query_engine_get_study.aquery(f""" | |
| Based on the provided instructions and clinical trial information, generate the new eligibility criteria by analyzing the related studies and clinical trial information. | |
| ### Instruction: | |
| Find suitable papers that have relevant or similar to the clinical trial information(### Clinical Trial Information). | |
| Prioritize the following topics when finding related studies: | |
| 1. Study Objectives | |
| 2. Study Design and Phases | |
| 3. Conditions | |
| 4. Intervention/Treatment | |
| 5. Location | |
| Criteria generation: | |
| As a clinical researcher, generate new eligibility criteria for given clinical trial information. | |
| Analyze the information from related studies for more precise new eligibility criteria generation. | |
| Ensure the criteria are clear, specific, and reasonable for a clinical research information. | |
| Reference Papers generation: | |
| Please give us NCT IDs and study names for {top_k} used papers. | |
| Please follows the pattern of the output(### Pattern of the output). | |
| -------------------------------------------------- | |
| ### Clinical Trial Information | |
| {study_information} | |
| -------------------------------------------------- | |
| ### Pattern of the output | |
| Inclusion Criteria | |
| 1. | |
| 2. | |
| . | |
| . | |
| . | |
| Exclusion Criteria | |
| 1. | |
| 2. | |
| . | |
| . | |
| . | |
| Reference Papers | |
| 1.NCT ID: | |
| Study Name: | |
| Condition: | |
| Intervention/Treatment: | |
| 2.NCT ID: | |
| Study Name: | |
| Condition: | |
| Intervention/Treatment: | |
| . | |
| . | |
| . | |
| """ | |
| ) | |
| response = query_response.response | |
| if response != "Empty Response": | |
| final_response = await process_criteria(response) | |
| # Extract and format references | |
| pattern = r'Reference Papers\s*(.+)$' | |
| match = re.search(pattern, response, re.DOTALL | re.IGNORECASE) | |
| ext_ref = match.group(1) if match else "" | |
| split_ref = re.split(r'\n*\d+\.\s+', ext_ref)[1:] | |
| formatted_ref = [] | |
| for i, ref in enumerate(split_ref, 1): | |
| nct_id = re.search(r'NCT[_ ]ID: (NCT\d+)', ref) | |
| if not nct_id: | |
| nct_id = re.search(r'(NCT\d+)', ref) | |
| if not nct_id: | |
| continue | |
| study_name = re.search(r'Study[_ ]Name:?\s*(.*?)(?=\n|;|Condition:|Intervention/Treatment:|$)', ref, re.DOTALL) | |
| condition = re.search(r'Condition:?\s*(.*?)(?=\n|;|Intervention/Treatment:|$)', ref, re.DOTALL) | |
| intervention = re.search(r'Intervention/Treatment:?\s*(.*?)(?=\n|$)', ref, re.DOTALL) | |
| formatted_ref.append([ | |
| i, | |
| f'<a href="https://clinicaltrials.gov/study/{nct_id.group(1)}"><u>{nct_id.group(1)}</u></a>', | |
| study_name.group(1).strip() if study_name else "", | |
| condition.group(1).strip() if condition else "", | |
| intervention.group(1).strip() if intervention else "" | |
| ]) | |
| else: | |
| final_response, formatted_ref = "Empty Response", [] | |
| return final_response, formatted_ref | |
| # Place holder | |
| place_holder = f"""Study Objectives | |
| The purpose of this study is to evaluate the safety, tolerance and efficacy of Liposomal Paclitaxel With Nedaplatin as First-line in patients with Advanced or Recurrent Esophageal Carcinoma | |
| Conditions: Esophageal Carcinoma | |
| Intervention / Treatment: | |
| DRUG: Liposomal Paclitaxel, | |
| DRUG: Nedaplatin | |
| Location: China | |
| Study Design and Phases | |
| Study Type: INTERVENTIONAL | |
| Phase: PHASE2 Primary Purpose: | |
| TREATMENT Allocation: NA | |
| Interventional Model: SINGLE_GROUP Masking: NONE | |
| """ | |
| objective_place_holder = f"""Example: The purpose of this study is to evaluate the safety, tolerance and efficacy of Liposomal Paclitaxel With Nedaplatin as First-line in patients with Advanced or Recurrent Esophageal Carcinoma | |
| """ | |
| conditions_place_holder = f"""Example: Esophageal Carcinoma | |
| """ | |
| interventions_place_holder = f"""Example: | |
| - Drug: irinotecan hydrochloride | |
| - Given IV | |
| - Other Names: | |
| - Campto | |
| - Camptosar | |
| - CPT-11 | |
| - irinotecan | |
| - U-101440E | |
| - Drug: Amoxicillin hydrate | |
| - Amoxicillin hydrate (potency) | |
| - Biological: Pneumococcal Vaccine | |
| - Subcutaneously on Day 0 | |
| - Other Names: | |
| - Prevnar | |
| - Drug: Doxorubicin, Cotrimoxazole, Carboplatin, Ifosfamide | |
| - Drug: Irinotecan | |
| - Irinotecan will be administered at a dose of 180mg/m2 IV over 90 minutes on day 21 every 42 days. | |
| - Other Names: | |
| - CAMPTOSAR™ | |
| - Drug: Placeblo | |
| - Placebo tablet | |
| """ | |
| with gr.Blocks() as demo: | |
| # Study description | |
| with gr.Row(): | |
| gr.Markdown("# Research Information"), | |
| with gr.Row(): | |
| study_obj_box = gr.Textbox( | |
| label="Study Objective / Study Description", | |
| placeholder=objective_place_holder, | |
| lines=10) | |
| # Conditions | |
| with gr.Row(): | |
| gr.Markdown("# Conditions"), | |
| with gr.Row(): | |
| conditions_box = gr.Textbox( | |
| label="Conditions / Disease", | |
| info="Primary condition or cancer type being studied in the trial", | |
| placeholder=conditions_place_holder, | |
| ) | |
| #Interventions | |
| with gr.Row(): | |
| gr.Markdown("# Interventions / Drugs"), | |
| with gr.Row(): | |
| intervention_box = gr.Textbox( | |
| label="Intervention Type", | |
| info="A process or action studied in a clinical trial, including drugs, devices, procedures, vaccines, or noninvasive approaches.", | |
| placeholder=interventions_place_holder, | |
| # lines=5, | |
| ) | |
| # Study Design | |
| with gr.Row(): | |
| gr.Markdown("# Study Design"), | |
| with gr.Column(): | |
| study_type_box = gr.Radio( | |
| ["Expanded Access", "Interventional", "Observational"], | |
| label="Study Type", | |
| ) | |
| phase_box= gr.CheckboxGroup( | |
| ["Not Applicable", "Early Phase 1", "Phase 1", "Phase 2", "Phase 3", "Phase 4"], | |
| label="Phase" | |
| ) | |
| purpose_box = gr.Radio( | |
| ["Treatment", "Prevention", "Diagnostic", "Educational/Counseling/Training", "Supportive Care", "Screening", "Health Services Research", "Basic Science", "Device Feasibility", "Other"], | |
| label="Primary Purpose" | |
| ) | |
| allocation_box = gr.Radio( | |
| ["Randomized", "Non-Randomized", "N/A"], | |
| label="Allocation" | |
| ) | |
| intervention_model_box = gr.Radio( | |
| ["Parallel", "Single-Group", "Crossover", "Factorial", "Sequential"], | |
| label="Interventional Model" | |
| ) | |
| masking_box = gr.Radio( | |
| ["None (Open Label)", "Single", "Double", "Triple", "Quadruple"], | |
| label="Masking" | |
| ) | |
| #Location | |
| with gr.Row(): | |
| gr.Markdown("# Location"), | |
| with gr.Column(): | |
| location_box = gr.Textbox( | |
| label="Location (Countries)", | |
| ) | |
| removed_location_box = gr.Textbox( | |
| label="Removed Location (Countries)", | |
| ) | |
| # Reference paper amount | |
| with gr.Row(): | |
| gr.Markdown("# Reference paper"), | |
| with gr.Row(): | |
| top_k_box = gr.Slider( | |
| label="Number of Reference Papers", | |
| info="Note: The number of reference papers may vary based on relevance. Only references that meet the similarity threshold will be included, so the final number may be less than specified.", | |
| value=10, | |
| minimum=10, | |
| maximum=30, | |
| step=1, | |
| ) | |
| # Submit & Clear | |
| with gr.Row(): | |
| submit_button = gr.Button("Submit") | |
| clear_button = gr.Button("Clear") | |
| # Output | |
| with gr.Row(): | |
| gr.Markdown("# Eligibility Criteria Generation"), | |
| with gr.Row(): | |
| with gr.Column(): | |
| base_box = gr.Textbox( | |
| label="Response", | |
| lines=15, | |
| interactive=False) | |
| with gr.Row(): | |
| ref_table = gr.Dataframe( | |
| label="Reference", | |
| headers=["No.",'Study Link', 'Study Title', 'Interventions', 'Conditions'], | |
| datatype=["markdown","html","markdown", "markdown","markdown"], | |
| wrap=True, | |
| interactive=False) | |
| # with gr.Column(): | |
| # rag_box = gr.Textbox( | |
| # label="Response 2", | |
| # lines=15, | |
| # interactive=False) | |
| # with gr.Column(): | |
| # combine_box = gr.Textbox( | |
| # label="Response 3", | |
| # lines=15, | |
| # interactive=False) | |
| with gr.Row(): | |
| regenerate_button = gr.Button("Regenerate Criteria") | |
| inputs_information = [top_k_box, study_obj_box, study_type_box, phase_box, purpose_box, allocation_box, intervention_model_box, masking_box, conditions_box, intervention_box, location_box, removed_location_box] | |
| outputs_information = [base_box,ref_table] | |
| # outputs_information = [base_box, rag_box,combine_box] | |
| submit_button.click( | |
| run_function_on_text, | |
| inputs=inputs_information, | |
| outputs=outputs_information | |
| ) | |
| regenerate_button.click( | |
| run_function_on_text, | |
| inputs=inputs_information, | |
| outputs=outputs_information | |
| ) | |
| clear_button.click(lambda : [None] * len(inputs_information), outputs=inputs_information) | |
| #Clear all | |
| with gr.Row(): | |
| clear_all_button = gr.Button("Clear All") | |
| # flag_response = [selected_response] | |
| all_information = inputs_information + outputs_information #+ flag_response | |
| clear_all_button.click(lambda : [None] * len(all_information), outputs=all_information) | |
| if __name__ == "__main__": | |
| demo.launch(debug=True) | |
| # demo.queue(max_size=20,default_concurrency_limit=5 ).launch(server_name="0.0.0.0", server_port=7860,debug=True, share=True) |