Update app.py
Browse files
app.py
CHANGED
|
@@ -1,101 +1,126 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
from transformers import pipeline
|
| 3 |
from rdkit import Chem
|
| 4 |
from rdkit.Chem import AllChem
|
| 5 |
from rdkit.Chem.Draw import rdMolDraw2D
|
| 6 |
import base64
|
| 7 |
import re
|
| 8 |
import py3Dmol
|
| 9 |
-
import
|
| 10 |
|
| 11 |
-
# Load
|
| 12 |
-
bio_gpt = pipeline("text-generation", model="microsoft/BioGPT-Large")
|
|
|
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
def drug_discovery(disease, symptoms):
|
| 15 |
-
#
|
| 16 |
prompt = (
|
| 17 |
-
f"
|
| 18 |
-
f"
|
| 19 |
-
f"
|
| 20 |
-
f"
|
| 21 |
-
f"SMILES: C1=CC=CC=C1 C(C(=O)O)N ..."
|
| 22 |
)
|
| 23 |
|
| 24 |
try:
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
return f"BioGPT error: {e}", "", "", ""
|
| 29 |
-
|
| 30 |
-
# Extract drug names and SMILES
|
| 31 |
-
drugs_match = re.search(r"Drugs:\s*(.+)", result)
|
| 32 |
-
smiles_match = re.search(r"SMILES:\s*(.+)", result)
|
| 33 |
|
| 34 |
-
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
-
|
| 38 |
-
|
|
|
|
|
|
|
| 39 |
|
| 40 |
-
|
| 41 |
-
|
| 42 |
|
| 43 |
-
|
| 44 |
-
for smiles in smiles_list:
|
| 45 |
mol = Chem.MolFromSmiles(smiles)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
AllChem.Compute2DCoords(mol)
|
| 47 |
-
drawer = rdMolDraw2D.MolDraw2DCairo(
|
| 48 |
drawer.DrawMolecule(mol)
|
| 49 |
drawer.FinishDrawing()
|
| 50 |
img_data = drawer.GetDrawingText()
|
| 51 |
img_base64 = base64.b64encode(img_data).decode("utf-8")
|
| 52 |
-
img_html
|
| 53 |
-
<div style="display:inline-block; margin:10px;">
|
| 54 |
-
<img src="data:image/png;base64,{img_base64}" width="120" height="120">
|
| 55 |
-
<p style="color:white; font-size:12px;">{smiles}</p>
|
| 56 |
-
</div>'''
|
| 57 |
|
| 58 |
-
# 3D
|
| 59 |
mol3d = Chem.AddHs(mol)
|
| 60 |
-
AllChem.EmbedMolecule(mol3d
|
| 61 |
AllChem.UFFOptimizeMolecule(mol3d)
|
| 62 |
mb = Chem.MolToMolBlock(mol3d)
|
| 63 |
-
|
|
|
|
| 64 |
viewer.addModel(mb, "mol")
|
| 65 |
viewer.setStyle({"stick": {"colorscheme": "cyanCarbon"}})
|
| 66 |
viewer.setBackgroundColor("black")
|
| 67 |
viewer.zoomTo()
|
| 68 |
viewer_html_raw = viewer._make_html()
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
|
|
|
| 73 |
|
| 74 |
-
|
| 75 |
-
|
|
|
|
| 76 |
|
| 77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
|
| 79 |
-
|
| 80 |
|
|
|
|
| 81 |
iface = gr.Interface(
|
| 82 |
fn=drug_discovery,
|
| 83 |
inputs=[
|
| 84 |
-
gr.Textbox(label="
|
| 85 |
-
gr.Textbox(label="
|
| 86 |
],
|
| 87 |
outputs=[
|
| 88 |
-
gr.Textbox(label="
|
| 89 |
-
gr.Textbox(label="π§ͺ SMILES
|
| 90 |
gr.HTML(label="πΌοΈ 2D Molecules"),
|
| 91 |
-
gr.HTML(label="π¬ 3D Molecules")
|
| 92 |
],
|
| 93 |
-
title="
|
| 94 |
-
description="BioGPT +
|
| 95 |
css="""
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
|
|
|
|
|
|
| 99 |
)
|
| 100 |
|
| 101 |
iface.launch(share=True)
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
from transformers import pipeline, AutoTokenizer, AutoModelForMaskedLM, AutoModelForCausalLM
|
| 3 |
from rdkit import Chem
|
| 4 |
from rdkit.Chem import AllChem
|
| 5 |
from rdkit.Chem.Draw import rdMolDraw2D
|
| 6 |
import base64
|
| 7 |
import re
|
| 8 |
import py3Dmol
|
| 9 |
+
import random
|
| 10 |
|
| 11 |
+
# Load multiple models
|
| 12 |
+
bio_gpt = pipeline("text-generation", model="microsoft/BioGPT-Large", device=0)
|
| 13 |
+
pubmed_bert = pipeline("fill-mask", model="microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext")
|
| 14 |
|
| 15 |
+
# Helper function to clean and parse generated SMILES
|
| 16 |
+
def extract_valid_smiles(generated_text, count=3):
|
| 17 |
+
smiles_matches = re.findall(r"(?<![A-Za-z0-9])[A-Za-z0-9@+\-\[\]\(\)=#$]{5,}(?![A-Za-z0-9])", generated_text)
|
| 18 |
+
valid_smiles = []
|
| 19 |
+
for match in smiles_matches:
|
| 20 |
+
mol = Chem.MolFromSmiles(match)
|
| 21 |
+
if mol:
|
| 22 |
+
valid_smiles.append(match)
|
| 23 |
+
if len(valid_smiles) >= count:
|
| 24 |
+
break
|
| 25 |
+
return valid_smiles
|
| 26 |
+
|
| 27 |
+
# Drug discovery function
|
| 28 |
def drug_discovery(disease, symptoms):
|
| 29 |
+
# Prompt to BioGPT for biomedical insights
|
| 30 |
prompt = (
|
| 31 |
+
f"Imagine a novel disease '{disease}' with symptoms: {symptoms}.\n"
|
| 32 |
+
f"Write a short research summary covering:\n"
|
| 33 |
+
f"- Hypothetical causes\n- Suggested diagnostic methods\n- Possible treatments\n"
|
| 34 |
+
f"- Names of potential experimental drugs\n"
|
|
|
|
| 35 |
)
|
| 36 |
|
| 37 |
try:
|
| 38 |
+
literature = bio_gpt(prompt, max_length=512, temperature=0.7)[0]["generated_text"]
|
| 39 |
+
except:
|
| 40 |
+
literature = "β οΈ Error: Could not retrieve literature using BioGPT."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
+
# Prompt for SMILES + Drug Names using BioGPT
|
| 43 |
+
molecule_prompt = (
|
| 44 |
+
f"Give 3 unique experimental drug-like SMILES strings with hypothetical drug names for treating '{disease}' "
|
| 45 |
+
f"with symptoms: {symptoms}. Format: <SMILES> - <DrugName>"
|
| 46 |
+
)
|
| 47 |
|
| 48 |
+
try:
|
| 49 |
+
smiles_response = bio_gpt(molecule_prompt, max_length=100)[0]["generated_text"]
|
| 50 |
+
except:
|
| 51 |
+
smiles_response = "C1=CC=CC=C1 - DemoDrug"
|
| 52 |
|
| 53 |
+
entries = re.findall(r"([A-Za-z0-9@+\-\[\]\(\)=#$]{5,})\s*-\s*(\w+)", smiles_response)
|
| 54 |
+
results = []
|
| 55 |
|
| 56 |
+
for smiles, name in entries[:3]: # Limit to 3 molecules
|
|
|
|
| 57 |
mol = Chem.MolFromSmiles(smiles)
|
| 58 |
+
if not mol:
|
| 59 |
+
continue
|
| 60 |
+
|
| 61 |
+
# 2D drawing
|
| 62 |
AllChem.Compute2DCoords(mol)
|
| 63 |
+
drawer = rdMolDraw2D.MolDraw2DCairo(300, 300)
|
| 64 |
drawer.DrawMolecule(mol)
|
| 65 |
drawer.FinishDrawing()
|
| 66 |
img_data = drawer.GetDrawingText()
|
| 67 |
img_base64 = base64.b64encode(img_data).decode("utf-8")
|
| 68 |
+
img_html = f'''<img src="data:image/png;base64,{img_base64}" width="200" style="margin:10px; border:1px solid #ccc; border-radius:12px;">'''
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
+
# 3D molecule view
|
| 71 |
mol3d = Chem.AddHs(mol)
|
| 72 |
+
AllChem.EmbedMolecule(mol3d)
|
| 73 |
AllChem.UFFOptimizeMolecule(mol3d)
|
| 74 |
mb = Chem.MolToMolBlock(mol3d)
|
| 75 |
+
|
| 76 |
+
viewer = py3Dmol.view(width=300, height=300)
|
| 77 |
viewer.addModel(mb, "mol")
|
| 78 |
viewer.setStyle({"stick": {"colorscheme": "cyanCarbon"}})
|
| 79 |
viewer.setBackgroundColor("black")
|
| 80 |
viewer.zoomTo()
|
| 81 |
viewer_html_raw = viewer._make_html()
|
| 82 |
+
viewer_html = f'''<iframe srcdoc="{viewer_html_raw.replace('"', '"')}"
|
| 83 |
+
width="320" height="320" frameborder="0"
|
| 84 |
+
style="border-radius: 12px; box-shadow: 0 6px 20px rgba(0,255,255,0.35);"></iframe>'''
|
| 85 |
+
|
| 86 |
+
results.append((f"{name}", smiles, img_html, viewer_html))
|
| 87 |
|
| 88 |
+
# If no valid molecule found
|
| 89 |
+
if not results:
|
| 90 |
+
results.append(("DemoDrug", "C1=CC=CC=C1", "", ""))
|
| 91 |
|
| 92 |
+
# Combine outputs
|
| 93 |
+
combined_molecules = ""
|
| 94 |
+
combined_imgs = ""
|
| 95 |
+
combined_3d = ""
|
| 96 |
+
for drug_name, smiles, img, viewer in results:
|
| 97 |
+
combined_molecules += f"π {drug_name}: {smiles}\n"
|
| 98 |
+
combined_imgs += img
|
| 99 |
+
combined_3d += viewer
|
| 100 |
|
| 101 |
+
return literature.strip(), combined_molecules.strip(), combined_imgs, combined_3d
|
| 102 |
|
| 103 |
+
# Gradio UI
|
| 104 |
iface = gr.Interface(
|
| 105 |
fn=drug_discovery,
|
| 106 |
inputs=[
|
| 107 |
+
gr.Textbox(label="π¦ Enter Novel Disease Name", value="Neurospike Fever"),
|
| 108 |
+
gr.Textbox(label="π©Ί Enter Symptoms", value="fever, neural tremors, fatigue"),
|
| 109 |
],
|
| 110 |
outputs=[
|
| 111 |
+
gr.Textbox(label="π Biomedical Insights"),
|
| 112 |
+
gr.Textbox(label="π§ͺ Molecule Names + SMILES"),
|
| 113 |
gr.HTML(label="πΌοΈ 2D Molecules"),
|
| 114 |
+
gr.HTML(label="π¬ 3D Molecules"),
|
| 115 |
],
|
| 116 |
+
title="𧬠Experimental Drug Discovery for Unknown Diseases",
|
| 117 |
+
description="Enter an unknown disease and symptoms. The AI (BioGPT + PubMedBERT) will generate biomedical research, name possible experimental drugs, and visualize 2D + 3D molecular structures.",
|
| 118 |
css="""
|
| 119 |
+
body { background-color: #0d1117; color: #ffffff; font-family: Segoe UI, sans-serif; }
|
| 120 |
+
.gradio-container { padding: 20px; }
|
| 121 |
+
iframe, img { margin: 8px; }
|
| 122 |
+
""",
|
| 123 |
+
allow_flagging="never"
|
| 124 |
)
|
| 125 |
|
| 126 |
iface.launch(share=True)
|