Spaces:
Runtime error
Runtime error
Upload 4 files
Browse files- README.txt +13 -0
- app.py +81 -0
- requirements.txt +71 -0
- test.json +12 -0
README.txt
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: ⚕️MedNER - Biomed Entity Recognizer
|
| 3 |
+
emoji: 👩⚕️🩺⚕️🙋
|
| 4 |
+
colorFrom: purple
|
| 5 |
+
colorTo: red
|
| 6 |
+
sdk: gradio
|
| 7 |
+
sdk_version: 3.8
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
+
license: apache-2.0
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import json
|
| 4 |
+
from collections import defaultdict
|
| 5 |
+
|
| 6 |
+
# Create tokenizer for biomed model
|
| 7 |
+
from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification
|
| 8 |
+
tokenizer = AutoTokenizer.from_pretrained("d4data/biomedical-ner-all")
|
| 9 |
+
model = AutoModelForTokenClassification.from_pretrained("d4data/biomedical-ner-all")
|
| 10 |
+
pipe = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")
|
| 11 |
+
|
| 12 |
+
# Matplotlib for entity graph
|
| 13 |
+
import matplotlib.pyplot as plt
|
| 14 |
+
plt.switch_backend("Agg")
|
| 15 |
+
|
| 16 |
+
# Load examples from JSON
|
| 17 |
+
EXAMPLES = {}
|
| 18 |
+
with open("examples.json", "r") as f:
|
| 19 |
+
example_json = json.load(f)
|
| 20 |
+
EXAMPLES = {x["text"]: x["label"] for x in example_json}
|
| 21 |
+
|
| 22 |
+
def group_by_entity(raw):
|
| 23 |
+
out = defaultdict(int)
|
| 24 |
+
for ent in raw:
|
| 25 |
+
out[ent["entity_group"]] += 1
|
| 26 |
+
# out["total"] = sum(out.values())
|
| 27 |
+
return out
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def plot_to_figure(grouped):
|
| 31 |
+
fig = plt.figure()
|
| 32 |
+
plt.bar(x=list(grouped.keys()), height=list(grouped.values()))
|
| 33 |
+
plt.margins(0.2)
|
| 34 |
+
plt.subplots_adjust(bottom=0.4)
|
| 35 |
+
plt.xticks(rotation=90)
|
| 36 |
+
return fig
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def ner(text):
|
| 40 |
+
raw = pipe(text)
|
| 41 |
+
ner_content = {
|
| 42 |
+
"text": text,
|
| 43 |
+
"entities": [
|
| 44 |
+
{
|
| 45 |
+
"entity": x["entity_group"],
|
| 46 |
+
"word": x["word"],
|
| 47 |
+
"score": x["score"],
|
| 48 |
+
"start": x["start"],
|
| 49 |
+
"end": x["end"],
|
| 50 |
+
}
|
| 51 |
+
for x in raw
|
| 52 |
+
],
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
grouped = group_by_entity(raw)
|
| 56 |
+
figure = plot_to_figure(grouped)
|
| 57 |
+
label = EXAMPLES.get(text, "Unknown")
|
| 58 |
+
|
| 59 |
+
meta = {
|
| 60 |
+
"entity_counts": grouped,
|
| 61 |
+
"entities": len(set(grouped.keys())),
|
| 62 |
+
"counts": sum(grouped.values()),
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
return (ner_content, meta, label, figure)
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
interface = gr.Interface(
|
| 69 |
+
ner,
|
| 70 |
+
inputs=gr.Textbox(label="Note text", value=""),
|
| 71 |
+
outputs=[
|
| 72 |
+
gr.HighlightedText(label="NER", combine_adjacent=True),
|
| 73 |
+
gr.JSON(label="Entity Counts"),
|
| 74 |
+
gr.Label(label="Rating"),
|
| 75 |
+
gr.Plot(label="Bar"),
|
| 76 |
+
],
|
| 77 |
+
examples=list(EXAMPLES.keys()),
|
| 78 |
+
allow_flagging="never",
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
interface.launch()
|
requirements.txt
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
aiohttp==3.8.3
|
| 2 |
+
aiosignal==1.2.0
|
| 3 |
+
anyio==3.6.1
|
| 4 |
+
async-timeout==4.0.2
|
| 5 |
+
attrs==22.1.0
|
| 6 |
+
bcrypt==4.0.1
|
| 7 |
+
black==22.10.0
|
| 8 |
+
certifi==2022.9.24
|
| 9 |
+
cffi==1.15.1
|
| 10 |
+
charset-normalizer==2.1.1
|
| 11 |
+
click==8.1.3
|
| 12 |
+
contourpy==1.0.5
|
| 13 |
+
cryptography==38.0.1
|
| 14 |
+
cycler==0.11.0
|
| 15 |
+
fastapi==0.85.0
|
| 16 |
+
ffmpy==0.3.0
|
| 17 |
+
filelock==3.8.0
|
| 18 |
+
fonttools==4.37.4
|
| 19 |
+
frozenlist==1.3.1
|
| 20 |
+
fsspec==2022.8.2
|
| 21 |
+
gradio==3.4.1
|
| 22 |
+
h11==0.12.0
|
| 23 |
+
httpcore==0.15.0
|
| 24 |
+
httpx==0.23.0
|
| 25 |
+
huggingface-hub==0.10.0
|
| 26 |
+
idna==3.4
|
| 27 |
+
Jinja2==3.1.2
|
| 28 |
+
kiwisolver==1.4.4
|
| 29 |
+
linkify-it-py==1.0.3
|
| 30 |
+
markdown-it-py==2.1.0
|
| 31 |
+
MarkupSafe==2.1.1
|
| 32 |
+
matplotlib==3.6.1
|
| 33 |
+
mdit-py-plugins==0.3.1
|
| 34 |
+
mdurl==0.1.2
|
| 35 |
+
multidict==6.0.2
|
| 36 |
+
mypy-extensions==0.4.3
|
| 37 |
+
numpy==1.23.3
|
| 38 |
+
orjson==3.8.0
|
| 39 |
+
packaging==21.3
|
| 40 |
+
pandas==1.5.0
|
| 41 |
+
paramiko==2.11.0
|
| 42 |
+
pathspec==0.10.1
|
| 43 |
+
Pillow==9.2.0
|
| 44 |
+
platformdirs==2.5.2
|
| 45 |
+
pycparser==2.21
|
| 46 |
+
pycryptodome==3.15.0
|
| 47 |
+
pydantic==1.10.2
|
| 48 |
+
pydub==0.25.1
|
| 49 |
+
PyNaCl==1.5.0
|
| 50 |
+
pyparsing==3.0.9
|
| 51 |
+
python-dateutil==2.8.2
|
| 52 |
+
python-multipart==0.0.5
|
| 53 |
+
pytz==2022.4
|
| 54 |
+
PyYAML==6.0
|
| 55 |
+
regex==2022.9.13
|
| 56 |
+
requests==2.28.1
|
| 57 |
+
rfc3986==1.5.0
|
| 58 |
+
six==1.16.0
|
| 59 |
+
sniffio==1.3.0
|
| 60 |
+
starlette==0.20.4
|
| 61 |
+
tokenizers==0.12.1
|
| 62 |
+
tomli==2.0.1
|
| 63 |
+
torch==1.12.1
|
| 64 |
+
tqdm==4.64.1
|
| 65 |
+
transformers==4.22.2
|
| 66 |
+
typing_extensions==4.4.0
|
| 67 |
+
uc-micro-py==1.0.1
|
| 68 |
+
urllib3==1.26.12
|
| 69 |
+
uvicorn==0.18.3
|
| 70 |
+
websockets==10.3
|
| 71 |
+
yarl==1.8.1
|
test.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"type": "service_account",
|
| 3 |
+
"project_id": "clinical-nlp-b9117",
|
| 4 |
+
"private_key_id": "6972d02311e8ee0c5b582551fbcf9c99b9169b58",
|
| 5 |
+
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCmrSoB92G/ihxL\nzIk7Y8RUNc6Iezr6pZ+eSz2RGxEz2qPMfWjNeOJEAlACYJp4aUwyX5IHGb8Eh/oj\nkr7nVsgvuDyrTWpCAv16AuRycKgxvqj0+uDaVrF0vLgTumy62x5QM7i+n2YTDXoP\nXHMHX7yXZ6zc9Ibmm065f2kgWyjmIZDt+flTBYeBS203ZIzMBHhN1e1jdtzR36z/\n1MBmLjpRKvmuHF2SnraVjoRh7Xe6R99K8DxRQ61TJt9xLukvLBYelnqf2/cK8bZM\n5p2pErR4FE7ki3MX7HWdMJQSe+Uj10hurjNBdHcCaNUou5EL5+NRgqLow0tfatWC\n+Jpiw3K9AgMBAAECggEAGpT7YhzmBfos0RnpuQMMSLHcIoAkw9yuPDybsQy0DaUN\nAovtrvdcfqQvxnFJsXJ5qH79dwxwHnThO9MnhxWcD6A+bMOH8scvTcowTOASsvxJ\nTejE+41f99IxOVQ+Cv7vMrNM/3nEeb1ofhKsdbybAzqRoxuMeDLEt2jOh06Ck1D8\n/YV8kavGYR/VNxO2l7C5DZJYXgcm18ZrTFEXZes8bydZesoHl+JRVO1utjR2IhAj\nnYqqNaf5RXruEzXWxP0+jjEgg4NLFfqVnQTZFrLwokwc8NEMXf3dZJ0k0cHHmxOB\n6BHuPZhMOZ56U74PyWgCmbPp9g/SLt3iInpZ4ahmAQKBgQDhQwdbUEQ1q+KSMsMm\ndJl+ghX/Ff3uaZ7LjdBiOgtmTaIVbuf/bw0V9x8GbRGdJJyp546R5vhUE0zKzkMt\nTNdDNrWk3Zh4tCRHvPEHiqmDn91pWFeDDQf/OjKz+SFV31mQ050BOatZ8dBEy+md\nvHG8yLTB7oJvSpviim4ty15wIQKBgQC9a5jsBFB0fltHNJ0lZp7I2hF+aOqOngJM\nqEipPjJABJ4izGTOK/KW8CyWEP82nb6p7u9v0f4sV8CFWXG178DMv1NlRYzom3CQ\nkXdx+nRgO4oX4eEfYuoP2PxF0hCOwbh55NgFdwTt/dExX6bau4d9yQMV7o0TXpRW\nZzygOOTfHQKBgQC7ayhwyfymZydwmjmSAks/XX5tqN+IgGo1U/1/7GlVqdvkV01B\nUiUiFGTE1PRluXN7TYRqUjBky1YGGsz7oMYtTxScYh6ctszEvygPLUhSki0GnBDb\noXj42nQbF3mr19POUrJ7tX6irDWrN7lcmtBK0PbLr+ToMbw3JRP8mAsv4QKBgEac\nC18/pHYofAIpHMNKY7pff9HtbjJHuHe2648bPkQa9I/oPVOVklKtqREvuNM1LlPO\nW7cFQohpFb0fwIGfo/EvCPlhWcuD1gwuDaaRRDxzNWD9tJusla/epPup+L4efJQD\nuHshCNdmnEqZa2tyKGm9Osc8K56izQ0AYtsfGkIJAoGAMtaXTA96OXUvpEm4waQX\nOTbuEZQEdntnYWHacNrGlvwnNmvNC9hXwB38ijxXHEn0j1QUcV3w5QXFupwzjpZ2\nlIp9vTq1mOTVhHzmQmOb9DKKAE/2pi2HnekItncoQCBtgJ7k6tIk1KEfvXuQS/oM\nh8qPMwuMcQ/vKGhl3xLYo9M=\n-----END PRIVATE KEY-----\n",
|
| 6 |
+
"client_email": "firebase-adminsdk-qaxaj@clinical-nlp-b9117.iam.gserviceaccount.com",
|
| 7 |
+
"client_id": "117623958723912081118",
|
| 8 |
+
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
| 9 |
+
"token_uri": "https://oauth2.googleapis.com/token",
|
| 10 |
+
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
|
| 11 |
+
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/firebase-adminsdk-qaxaj%40clinical-nlp-b9117.iam.gserviceaccount.com"
|
| 12 |
+
}
|