Spaces:
Sleeping
Sleeping
| import os | |
| import time | |
| import asyncio | |
| from llama_index.core.query_engine import CitationQueryEngine | |
| from llama_index.core import VectorStoreIndex | |
| from llama_index.core import Settings | |
| from llama_index.embeddings.huggingface import HuggingFaceEmbedding | |
| from llama_index.llms.gemini import Gemini | |
| from llama_index.core.postprocessor import SimilarityPostprocessor | |
| from llama_index.core.storage.docstore import SimpleDocumentStore | |
| from llama_index.core import StorageContext, load_index_from_storage | |
| import re | |
| import pandas as pd | |
| import gradio as gr | |
| import logging | |
| #Enable logging to see what's happening behind the scenes | |
| logging.basicConfig(level=logging.INFO) | |
| token_w = os.environ['token_w'] | |
| HF_TOKEN=os.environ['token_r'] | |
| API_KEY=os.environ["GOOGLE_API_KEY"] | |
| generation_config = { | |
| "temperature": 0, | |
| # "top_p": 1, | |
| # "top_k": 1, | |
| "max_output_tokens":8192, | |
| } | |
| safety_settings = [ | |
| { | |
| "category": "HARM_CATEGORY_HARASSMENT", | |
| "threshold": "BLOCK_NONE" | |
| }, | |
| { | |
| "category": "HARM_CATEGORY_HATE_SPEECH", | |
| "threshold": "BLOCK_NONE" | |
| }, | |
| { | |
| "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", | |
| "threshold": "BLOCK_NONE" | |
| }, | |
| { | |
| "category": "HARM_CATEGORY_DANGEROUS_CONTENT", | |
| "threshold": "BLOCK_NONE" | |
| }, | |
| ] | |
| llm = Gemini( | |
| model="models/gemini-1.5-flash-002", | |
| generation_config=generation_config, | |
| safety_settings=safety_settings, | |
| ) | |
| # Setup embedder | |
| embed_model_name = "BAAI/bge-small-en-v1.5" | |
| embed_model = HuggingFaceEmbedding(model_name=embed_model_name) | |
| Settings.llm = llm | |
| Settings.embed_model = embed_model | |
| # rebuild storage context | |
| storage_context = StorageContext.from_defaults(persist_dir="VectorStore") | |
| # load index | |
| index_persisted = load_index_from_storage(storage_context, index_id="vector_index") | |
| async def remove_ref(text): | |
| split_text = re.split(r'\bReference Papers\b', text, flags=re.IGNORECASE) | |
| if len(split_text) > 1: | |
| return split_text[0].strip() | |
| return text.strip() | |
| async def run_function_on_text(top_k,study_obj,study_type,phase,purpose,allocation,intervention_model,Masking,conditions,interventions,location_countries,removed_location_countries): | |
| # Set up query engine | |
| query_engine_get_study = CitationQueryEngine.from_args( | |
| index_persisted, | |
| similarity_top_k=top_k, | |
| citation_chunk_size=2048, | |
| verbose=True, | |
| node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.8)], | |
| use_async=True | |
| ) | |
| #Build prompt | |
| study_information = f""" | |
| #Study Objectives/Study Description | |
| {study_obj} | |
| #Intervention | |
| {interventions} | |
| #Location | |
| - Location_Countries: {location_countries} | |
| - Removed Location: {removed_location_countries} | |
| #Conditions | |
| Cancer {conditions} | |
| #Study Design | |
| - Study Type: {study_type} | |
| - Phase: {phase} | |
| - Primary Purpose: {purpose} | |
| - Allocation: {allocation} | |
| - Interventional Model: {intervention_model} | |
| - Masking: None {Masking} | |
| """ | |
| # Query | |
| query_response = await query_engine_get_study.aquery(f""" | |
| Based on the provided instructions and clinical trial information, generate the new eligibility criteria by analyzing the related studies and clinical trial information. | |
| Find suitable papers that have relevant or similar to the clinical trial information(### Clinical Trial Information). | |
| Prioritize the following topics when finding related studies: | |
| 1. Study Objectives | |
| 2. Study Design and Phases | |
| 3. Conditions | |
| 4. Intervention/Treatment | |
| Criteria generation: | |
| As a clinical researcher, generate new eligibility criteria for given clinical trial information. | |
| Analyze the information from related studies for more precise new eligibility criteria generation. | |
| Ensure the criteria are clear, specific, and reasonable for a clinical research information. | |
| Reference Papers generation: | |
| Please give us NCT IDs and study names for {top_k} used papers. | |
| Please follows the pattern of the output(### Pattern of the output). | |
| -------------------------------------------------- | |
| ### Clinical Trial Information | |
| {study_information} | |
| -------------------------------------------------- | |
| ### Pattern of the output | |
| Inclusion Criteria | |
| 1. | |
| 2. | |
| . | |
| . | |
| . | |
| Exclusion Criteria | |
| 1. | |
| 2. | |
| . | |
| . | |
| . | |
| Reference Papers | |
| 1.NCT ID: | |
| Study Name: | |
| Condition: | |
| Intervention/Treatment: | |
| 2.NCT ID: | |
| Study Name: | |
| Condition: | |
| Intervention/Treatment: | |
| . | |
| . | |
| . | |
| """ | |
| ) | |
| #Extract ref | |
| if query_response.response != "Empty Response": | |
| pattern = r'Reference Papers:?\s*(.*?)(?:\n\n.*$|$)' | |
| match = re.search(pattern, query_response.response, re.DOTALL | re.IGNORECASE) | |
| ext_ref = match.group(1) if match and match.group(1) else '' | |
| split_ref = re.split(r'\d+\.\s+', ext_ref)[1:] | |
| formatted_ref = [] | |
| n=0 | |
| for ref in split_ref: | |
| nct_match = re.search(r'NCT[_ ]ID: (NCT\d+)', ref) | |
| if nct_match: | |
| nct_id = nct_match.group(1) | |
| else: | |
| nct_match = re.search(r'(NCT\d+)', ref) | |
| if nct_match: | |
| nct_id = nct_match.group(1) | |
| else: | |
| continue | |
| n+=1 | |
| study_name = re.search(r'Study Name:?\s*(.*?)(?=\n|Condition:|Intervention/Treatment:|$)', ref, re.DOTALL).group(1).strip() | |
| condition = re.search(r'Condition:?\s*(.*?)(?=\n|Intervention/Treatment:|$)', ref, re.DOTALL).group(1).strip() | |
| intervention = re.search(r'Intervention/Treatment:?\s*(.*?)(?=\n|$)', ref, re.DOTALL).group(1).strip() | |
| study_name = re.sub(r'\*+', '', study_name).strip() | |
| condition = re.sub(r'\*+', '', condition).strip() | |
| intervention = re.sub(r'\*+', '', intervention).strip() | |
| formatted_trial = [ | |
| n, | |
| f'<a href="https://clinicaltrials.gov/study/{nct_id}"><u>{nct_id}</u></a>', | |
| study_name, | |
| condition, | |
| intervention | |
| ] | |
| formatted_ref.append(formatted_trial) | |
| else: | |
| formatted_ref = [] | |
| #Extract criteria | |
| if query_response.response == "Empty Response": | |
| return query_response,formatted_ref | |
| else: | |
| removed_ref = await remove_ref(query_response.response) | |
| combine_criteira = re.sub(r'##\s*', '', removed_ref).strip() | |
| combine_criteira = re.sub(r'#\s*', '', combine_criteira).strip() | |
| combine_criteira = re.sub(r'\*\*', '', combine_criteira).strip() | |
| combine_criteira = re.sub(r'(Criteria)\n\s*\n(\d+\.)', r'\1\n\2', combine_criteira).strip() | |
| return combine_criteira,formatted_ref | |
| # # LLM.complete | |
| # complete_response = await llm.acomplete(f""" | |
| # Based on the provided instructions and clinical trial information, generate the new eligibility criteria by analyzing clinical trial information(### Clinical Trial Information). | |
| # ### Instruction: | |
| # Criteria generation: | |
| # As a clinical researcher, generate new eligibility criteria for given clinical trial information. | |
| # Ensure the criteria are clear, specific, and reasonable for a clinical research information. | |
| # Prioritize the following topics in clinical trial information.: | |
| # 1. Study Objectives | |
| # 2. Study Design and Phases | |
| # 3. Conditions | |
| # 4. Intervention/Treatment | |
| # Please follow the pattern of the output(### Pattern of the output). | |
| # -------------------------------------------------- | |
| # ### Clinical Trial Information | |
| # {study_information} | |
| # -------------------------------------------------- | |
| # ### Pattern of the output | |
| # Inclusion Criteria | |
| # 1. | |
| # 2. | |
| # . | |
| # . | |
| # . | |
| # Exclusion Criteria | |
| # 1. | |
| # 2. | |
| # . | |
| # . | |
| # . | |
| # """ | |
| # ) | |
| # combine_response = await llm.acomplete(f""" | |
| # Based on the provided instructions clinical, clinical trial information, and criteria information, generate the appropriate eligibility criteria for ### Clinical Trial Information by analyze clinical trial information(### Clinical Trial Information), criteria 1 (### Criteria 1) and criteria 2 (### Criteria 2). | |
| # ### Instruction: | |
| # Criteria generation: | |
| # As a clinical researcher, generate appropriate eligibility criteria by analyzing given information. | |
| # Ensure the criteria are clear, specific, and reasonable for a clinical research information(### Clinical Trial Information). | |
| # Prioritize the following topics in clinical trial information.: | |
| # 1. Study Objectives | |
| # 2. Study Design and Phases | |
| # 3. Conditions | |
| # 4. Intervention/Treatment | |
| # Do not generate redundant inclusion and exclusion criteria. For example, if a criterion is included in one set of inclusion or exclusion criteria, do not include it again. | |
| # Reference Papers generation: | |
| # Please give us NCT IDs and study names from the references list in ### Criteria 1. | |
| # Please follow the pattern of the output(### Pattern of the output). | |
| # -------------------------------------------------- | |
| # ### Clinical Trial Information | |
| # {study_information} | |
| # -------------------------------------------------- | |
| # ### Criteria 1 | |
| # {query_response} | |
| # -------------------------------------------------- | |
| # ### Criteria 2 | |
| # {complete_response} | |
| # -------------------------------------------------- | |
| # ### Pattern of the output | |
| # Inclusion Criteria | |
| # 1. | |
| # 2. | |
| # . | |
| # . | |
| # . | |
| # Exclusion Criteria | |
| # 1. | |
| # 2. | |
| # . | |
| # . | |
| # . | |
| # Reference Papers | |
| # 1.NCT ID: | |
| # Study Name: | |
| # Condition: | |
| # Intervention/Treatment: | |
| # 2.NCT ID: | |
| # Study Name: | |
| # Condition: | |
| # Intervention/Treatment: | |
| # . | |
| # . | |
| # . | |
| # """ | |
| # ) | |
| # return query_response | |
| # return query_response,complete_response,combine_response | |
| # Place holder | |
| place_holder = f"""Study Objectives | |
| The purpose of this study is to evaluate the safety, tolerance and efficacy of Liposomal Paclitaxel With Nedaplatin as First-line in patients with Advanced or Recurrent Esophageal Carcinoma | |
| Conditions: Esophageal Carcinoma | |
| Intervention / Treatment: | |
| DRUG: Liposomal Paclitaxel, | |
| DRUG: Nedaplatin | |
| Location: China | |
| Study Design and Phases | |
| Study Type: INTERVENTIONAL | |
| Phase: PHASE2 Primary Purpose: | |
| TREATMENT Allocation: NA | |
| Interventional Model: SINGLE_GROUP Masking: NONE | |
| """ | |
| objective_place_holder = f"""Example: The purpose of this study is to evaluate the safety, tolerance and efficacy of Liposomal Paclitaxel With Nedaplatin as First-line in patients with Advanced or Recurrent Esophageal Carcinoma | |
| """ | |
| conditions_place_holder = f"""Example: Esophageal Carcinoma | |
| """ | |
| interventions_place_holder = f"""Example: | |
| - Drug: irinotecan hydrochloride | |
| - Given IV | |
| - Other Names: | |
| - Campto | |
| - Camptosar | |
| - CPT-11 | |
| - irinotecan | |
| - U-101440E | |
| - Drug: Amoxicillin hydrate | |
| - Amoxicillin hydrate (potency) | |
| - Procedure: Stem cell transplant | |
| - See Detailed Description section for details of treatment interventions. | |
| - Biological: Pneumococcal Vaccine | |
| - Subcutaneously on Day 0 | |
| - Other Names: | |
| - Prevnar | |
| - Drug: Doxorubicin, Cotrimoxazole, Carboplatin, Ifosfamide | |
| - Drug: Irinotecan | |
| - Irinotecan will be administered at a dose of 180mg/m2 IV over 90 minutes on day 21 every 42 days. | |
| - Other Names: | |
| - CAMPTOSAR™ | |
| - Drug: Placeblo | |
| - Placebo tablet | |
| """ | |
| prefilled_value = f"""[Clinicaltrials.gov](https://clinicaltrials.gov/) """ | |
| with gr.Blocks() as demo: | |
| with gr.Row(): | |
| gr.Markdown("# Reference paper"), | |
| with gr.Row(): | |
| top_k_box = gr.Slider( | |
| label="Amount of reference paper", | |
| value=5, | |
| minimum=0, | |
| maximum=30, | |
| step=1, | |
| ) | |
| # Study description | |
| with gr.Row(): | |
| gr.Markdown("# Research Information"), | |
| with gr.Row(): | |
| study_obj_box = gr.Textbox( | |
| label="Study Objective / Study Description", # Study description | |
| # placeholder=prefilled_value, | |
| placeholder=objective_place_holder, | |
| lines=10) | |
| # Conditions | |
| with gr.Row(): | |
| gr.Markdown("# Conditions"), | |
| with gr.Row(): | |
| conditions_box = gr.Textbox( | |
| label="Conditions / Disease", | |
| info="Primary Disease or Condition of Cancer Being Studied in the Trial, or the Focus of the Study", | |
| placeholder=conditions_place_holder, | |
| ) | |
| #Interventions | |
| with gr.Row(): | |
| gr.Markdown("# Interventions / Drugs"), | |
| with gr.Row(): | |
| intervention_box = gr.Textbox( | |
| label="Intervention type", | |
| info="A process or action studied in a clinical trial, including drugs, devices, procedures, vaccines, or noninvasive approaches.", | |
| placeholder=interventions_place_holder, | |
| # lines=5, | |
| ) | |
| # Study Design | |
| with gr.Row(): | |
| gr.Markdown("# Study Design"), | |
| with gr.Column(): | |
| study_type_box = gr.Radio( | |
| ["Expanded Access", "Interventional", "Observational"], | |
| label="Study Type", | |
| ) | |
| phase_box= gr.CheckboxGroup( | |
| ["Not Applicable", "Early Phase 1", "Phase 1", "Phase 2", "Phase 3", "Phase 4"], | |
| label="Phase" | |
| ) | |
| purpose_box = gr.Radio( | |
| ["Treatment", "Prevention", "Diagnostic", "Educational/Counseling/Training", "Supportive Care", "Screening", "Health Services Research", "Basic Science", "Device Feasibility", "Other"], | |
| label="Primary Purpose" | |
| ) | |
| allocation_box = gr.Radio( | |
| ["Randomized", "Non-Randomized", "N/A"], | |
| label="Allocation" | |
| ) | |
| intervention_model_box = gr.Radio( | |
| ["Parallel", "Single-Group", "Crossover", "Factorial", "Sequential"], | |
| label="Interventional Model" | |
| ) | |
| masking_box = gr.Radio( | |
| ["None (Open Label)", "Single", "Double", "Triple", "Quadruple"], | |
| label="Masking" | |
| ) | |
| #Location | |
| with gr.Row(): | |
| gr.Markdown("# Location"), | |
| with gr.Column(): | |
| location_box = gr.Textbox( | |
| label="Location (Countries)", | |
| ) | |
| removed_location_box = gr.Textbox( | |
| label="Removed Location (Countries)", | |
| ) | |
| # Submit & Clear | |
| with gr.Row(): | |
| submit_button = gr.Button("Submit") | |
| clear_button = gr.Button("Clear") | |
| # Output | |
| with gr.Row(): | |
| gr.Markdown("# Eligibility Criteria Generation"), | |
| with gr.Row(): | |
| with gr.Column(): | |
| base_box = gr.Textbox( | |
| label="Response", | |
| lines=15, | |
| interactive=False) | |
| with gr.Row(): | |
| ref_table = gr.Dataframe( | |
| label="Reference", | |
| headers=["No.",'Link', 'Study name', 'Condition', 'Intervention'], | |
| datatype=["markdown","html","markdown", "markdown","markdown"], | |
| wrap=True, | |
| interactive=False) | |
| # with gr.Column(): | |
| # rag_box = gr.Textbox( | |
| # label="Response 2", | |
| # lines=15, | |
| # interactive=False) | |
| # with gr.Column(): | |
| # combine_box = gr.Textbox( | |
| # label="Response 3", | |
| # lines=15, | |
| # interactive=False) | |
| with gr.Row(): | |
| regenerate_button = gr.Button("Regenerate") | |
| inputs_information = [top_k_box, study_obj_box, study_type_box, phase_box, purpose_box, allocation_box, intervention_model_box, masking_box, conditions_box, intervention_box, location_box, removed_location_box] | |
| outputs_information = [base_box,ref_table] | |
| # outputs_information = [base_box, rag_box,combine_box] | |
| submit_button.click( | |
| run_function_on_text, | |
| inputs=inputs_information, | |
| outputs=outputs_information | |
| ) | |
| regenerate_button.click( | |
| run_function_on_text, | |
| inputs=inputs_information, | |
| outputs=outputs_information | |
| ) | |
| clear_button.click(lambda : [None] * len(inputs_information), outputs=inputs_information) | |
| # with gr.Row(): | |
| # selected_response = gr.Radio( | |
| # choices=[ | |
| # "Response 1", | |
| # "Response 2", | |
| # "Response 3", | |
| # "All responses are equally good", | |
| # "Neither response is satisfactory" | |
| # ], | |
| # label="Select the best response" | |
| # ) | |
| # with gr.Row(): | |
| # flag_button = gr.Button("Flag Selected Response") | |
| # #Flagging | |
| # dataset_name = "ravistech/feedback-demo-space" | |
| # hf_writer = gr.HuggingFaceDatasetSaver(hf_token=token_w, dataset_name=dataset_name, private=True) | |
| # hf_writer.setup([selected_response, study_obj_box, study_type_box, phase_box, purpose_box, allocation_box, intervention_model_box, masking_box, conditions_box, intervention_box, location_box, removed_location_box, top_k_box, base_box, rag_box, combine_box],dataset_name) | |
| # flag_button.click(lambda *args: hf_writer.flag(list(args)), [selected_response, study_obj_box, study_type_box, phase_box, purpose_box, allocation_box, intervention_model_box, masking_box, conditions_box, intervention_box, location_box, removed_location_box, top_k_box, base_box, rag_box, combine_box], None, preprocess=False) | |
| #Clear all | |
| with gr.Row(): | |
| clear_all_button = gr.Button("Clear All") | |
| # flag_response = [selected_response] | |
| all_information = inputs_information + outputs_information #+ flag_response | |
| clear_all_button.click(lambda : [None] * len(all_information), outputs=all_information) | |
| if __name__ == "__main__": | |
| demo.launch(debug=True) | |
| # custom_css = """ | |
| # .gradio-container { | |
| # font-family: 'Roboto', sans-serif; | |
| # } | |
| # .main-header { | |
| # text-align: center; | |
| # color: #4a4a4a; | |
| # margin-bottom: 2rem; | |
| # } | |
| # .tab-header { | |
| # font-size: 1.2rem; | |
| # font-weight: bold; | |
| # margin-bottom: 1rem; | |
| # } | |
| # .custom-chatbot { | |
| # border-radius: 10px; | |
| # box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); | |
| # } | |
| # .custom-button { | |
| # background-color: #3498db; | |
| # color: white; | |
| # border: none; | |
| # padding: 10px 20px; | |
| # border-radius: 5px; | |
| # cursor: pointer; | |
| # transition: background-color 0.3s ease; | |
| # } | |
| # .custom-button:hover { | |
| # background-color: #2980b9; | |
| # } | |
| # """ | |
| # # Define Gradio theme | |
| # theme = gr.themes.Default( | |
| # primary_hue="zinc", | |
| # secondary_hue="red", | |
| # neutral_hue="neutral", | |
| # font=[gr.themes.GoogleFont('Roboto'), "sans-serif"] | |
| # ) |