alexandrlukashov commited on
Commit
1d354d1
·
verified ·
1 Parent(s): f1d838a

fixed demo

Browse files
Files changed (2) hide show
  1. app.py +1 -1
  2. interfaces/relation_e.py +84 -73
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import subprocess
2
- subprocess.run(["pip", "install", "utca>=0.1.0"])
3
 
4
  import gradio as gr
5
  from interfaces import ner_interface, qa_interface, open_ie_interface, summarization_interface, landing_interface, relation_e_interface, universal_interface, classification_interface
 
1
  import subprocess
2
+ #subprocess.run(["pip", "install", "utca>=0.1.0"])
3
 
4
  import gradio as gr
5
  from interfaces import ner_interface, qa_interface, open_ie_interface, summarization_interface, landing_interface, relation_e_interface, universal_interface, classification_interface
interfaces/relation_e.py CHANGED
@@ -1,99 +1,112 @@
1
- from utca.core import RenameAttribute
2
- from utca.implementation.predictors import (
3
- GLiNERPredictor,
4
- GLiNERPredictorConfig
5
- )
6
- from utca.implementation.tasks import (
7
- GLiNER,
8
- GLiNERPreprocessor,
9
- GLiNERRelationExtraction,
10
- GLiNERRelationExtractionPreprocessor,
11
- )
12
- from typing import Dict, Union
13
  import gradio as gr
14
 
 
 
15
  text = """
16
- Dr. Paul Hammond, a renowned neurologist at Johns Hopkins University, has recently published a paper in the prestigious journal \"Nature Neuroscience\".
17
  His research focuses on a rare genetic mutation, found in less than 0.01% of the population, that appears to prevent the development of Alzheimer's disease.
18
  Collaborating with researchers at the University of California, San Francisco, the team is now working to understand the mechanism by which this mutation confers its protective effect.
19
  Funded by the National Institutes of Health, their research could potentially open new avenues for Alzheimer's treatment.
20
  """
21
 
22
- predictor = GLiNERPredictor( # Predictor manages the model that will be used by tasks
23
- GLiNERPredictorConfig(
24
- model_name = "knowledgator/gliner-multitask-large-v0.5", # Model to use
25
- device = "cpu", # Device to use
26
- )
27
- )
28
 
29
- pipe = (
30
- GLiNER( # GLiNER task produces classified entities that will be at the "output" key.
31
- predictor=predictor,
32
- preprocess=GLiNERPreprocessor(threshold=0.7) # Entities threshold
33
- )
34
- | RenameAttribute("output", "entities") # Rename output entities from GLiNER task to use them as inputs in GLiNERRelationExtraction
35
- | GLiNERRelationExtraction( # GLiNERRelationExtraction is used for relation extraction.
36
- predictor=predictor,
37
- preprocess=(
38
- GLiNERPreprocessor(threshold=0.7) # Relations threshold
39
- | GLiNERRelationExtractionPreprocessor()
40
- )
41
- )
42
- )
43
 
 
 
 
 
 
 
 
 
 
44
 
45
- def process(
46
- relation: str, text, distance_threshold: str, pairs_filter: str, labels: str
47
- ) -> Dict[str, Union[str, int, float]]:
48
- pairs_filter = [tuple(pair.strip() for pair in pair.split("->")) for pair in pairs_filter.split(",")]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
- if len(distance_threshold) < 1 or not distance_threshold or not distance_threshold.strip().isdigit():
51
- r = pipe.run({
52
- "text": text,
53
- "labels": [label.strip() for label in labels.split(",")],
54
- "relations": [{
55
- "relation": relation,
56
- "pairs_filter": pairs_filter
57
- }]
58
- })
59
- elif int(distance_threshold.strip()):
60
- r = pipe.run({
61
- "text": text,
62
- "labels": [label.strip() for label in labels.split(",")],
63
- "relations": [{
64
- "relation": relation,
65
- "pairs_filter": pairs_filter,
66
- "distance_threshold": int(distance_threshold.replace(" ", ""))
67
- }]
68
- })
69
-
70
- return r["output"]
71
 
72
  relation_e_examples = [
73
  [
74
- "worked at",
75
- text,
76
- "None",
77
- "scientist -> university, scientist -> other",
78
- "scientist, university, city, research, journal"]
79
  ]
 
80
 
81
  with gr.Blocks(title="Open Information Extracting") as relation_e_interface:
82
  relation = gr.Textbox(label="Relation", placeholder="Enter relation you want to extract here")
83
  input_text = gr.Textbox(label="Text input", placeholder="Enter your text here")
84
  labels = gr.Textbox(label="Labels", placeholder="Enter your labels here (comma separated)", scale=2)
85
  pairs_filter = gr.Textbox(label="Pairs Filter", placeholder="It specifies possible members of relations by their entity labels. Write as: source -> target,..")
86
- distance_threshold = gr.Textbox(label="Distance Threshold", placeholder="It specifies the max distance in characters between spans in the text")
87
  output = gr.Textbox(label="Predicted Relation")
88
  submit_btn = gr.Button("Submit")
89
  examples = gr.Examples(
90
- relation_e_examples,
91
- fn=process,
92
- inputs=[relation, input_text, distance_threshold, pairs_filter, labels],
93
- outputs=output,
94
- cache_examples=True
95
  )
96
- theme=gr.themes.Base()
97
 
98
  input_text.submit(fn=process, inputs=[relation, input_text, distance_threshold, pairs_filter, labels], outputs=output)
99
  labels.submit(fn=process, inputs=[relation, input_text, distance_threshold, pairs_filter, labels], outputs=output)
@@ -101,7 +114,5 @@ with gr.Blocks(title="Open Information Extracting") as relation_e_interface:
101
  submit_btn.click(fn=process, inputs=[relation, input_text, distance_threshold, pairs_filter, labels], outputs=output)
102
  distance_threshold.submit(fn=process, inputs=[relation, input_text, distance_threshold, pairs_filter, labels], outputs=output)
103
 
104
-
105
  if __name__ == "__main__":
106
-
107
- relation_e_interface.launch()
 
1
+ from typing import List, Tuple, Dict
2
+ from gliner import GLiNER
 
 
 
 
 
 
 
 
 
 
3
  import gradio as gr
4
 
5
+ model = GLiNER.from_pretrained("knowledgator/gliner-multitask-large-v0.5").to("cpu")
6
+
7
  text = """
8
+ Dr. Paul Hammond, a renowned neurologist at Johns Hopkins University, has recently published a paper in the prestigious journal "Nature Neuroscience".
9
  His research focuses on a rare genetic mutation, found in less than 0.01% of the population, that appears to prevent the development of Alzheimer's disease.
10
  Collaborating with researchers at the University of California, San Francisco, the team is now working to understand the mechanism by which this mutation confers its protective effect.
11
  Funded by the National Institutes of Health, their research could potentially open new avenues for Alzheimer's treatment.
12
  """
13
 
 
 
 
 
 
 
14
 
15
+ def process(relation: str, text: str, distance_threshold: str, pairs_filter: str, labels: str) -> str:
16
+ entity_labels: List[str] = [x.strip() for x in labels.split(",") if x.strip()]
17
+ if not entity_labels:
18
+ return "Error: provide Labels (comma-separated)."
19
+ pairs: List[Tuple[str, str]] = []
20
+ for p in pairs_filter.split(","):
21
+ if "->" in p:
22
+ a, b = p.split("->", 1)
23
+ a, b = a.strip(), b.strip()
24
+ if a and b:
25
+ pairs.append((a, b))
26
+ dist = None
27
+ if distance_threshold and distance_threshold.strip().isdigit():
28
+ dist = int(distance_threshold.strip())
29
 
30
+ ents_raw = model.predict_entities(text, entity_labels, threshold=0.5)
31
+ entities: List[Dict] = [{
32
+ "label": e["label"],
33
+ "text": e["text"],
34
+ "start": int(e["start"]),
35
+ "end": int(e["end"]),
36
+ "score": float(e.get("score", 0.0)),
37
+ } for e in ents_raw]
38
+ entities.sort(key=lambda x: (x["start"], x["end"], x["label"]))
39
 
40
+ by_label: Dict[str, List[Dict]] = {}
41
+ for e in entities:
42
+ by_label.setdefault(e["label"], []).append(e)
43
+
44
+ rels: List[Dict] = []
45
+ for s_lbl, t_lbl in pairs:
46
+ sources = by_label.get(s_lbl, [])
47
+ targets = by_label.get(t_lbl, [])
48
+ for s in sources:
49
+ for t in targets:
50
+ if s["end"] <= t["start"]:
51
+ d = t["start"] - s["end"]
52
+ elif t["end"] <= s["start"]:
53
+ d = s["start"] - t["end"]
54
+ else:
55
+ d = 0
56
+ if dist is not None and d > dist:
57
+ continue
58
+ cs, ce = min(s["start"], t["start"]), max(s["end"], t["end"])
59
+ chunk = text[cs:ce]
60
+ rel_label = f"{s_lbl} <> {relation}"
61
+ try:
62
+ hit = model.predict_entities(chunk, [rel_label], threshold=0.5)
63
+ except Exception:
64
+ hit = []
65
+ if hit:
66
+ rels.append({
67
+ "relation": relation,
68
+ "source": {"text": s["text"], "label": s["label"], "start": s["start"], "end": s["end"]},
69
+ "target": {"text": t["text"], "label": t["label"], "start": t["start"], "end": t["end"]},
70
+ "score": float(hit[0].get("score", 0.0)),
71
+ "distance": int(d),
72
+ })
73
+
74
+ if not rels:
75
+ return "No relations found"
76
+ rels.sort(key=lambda r: (r["relation"], r["source"]["start"], r["target"]["start"]))
77
+ lines = [
78
+ f"{r['source']['text']} ({r['source']['label']}) -> {r['relation']} -> {r['target']['text']} ({r['target']['label']})"
79
+ for r in rels
80
+ ]
81
+ return "\n".join(lines)
82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
  relation_e_examples = [
85
  [
86
+ "worked at",
87
+ text,
88
+ "",
89
+ "scientist -> university, scientist -> other",
90
+ "scientist, university, city, research, journal"
91
  ]
92
+ ]
93
 
94
  with gr.Blocks(title="Open Information Extracting") as relation_e_interface:
95
  relation = gr.Textbox(label="Relation", placeholder="Enter relation you want to extract here")
96
  input_text = gr.Textbox(label="Text input", placeholder="Enter your text here")
97
  labels = gr.Textbox(label="Labels", placeholder="Enter your labels here (comma separated)", scale=2)
98
  pairs_filter = gr.Textbox(label="Pairs Filter", placeholder="It specifies possible members of relations by their entity labels. Write as: source -> target,..")
99
+ distance_threshold = gr.Textbox(label="Distance Threshold", placeholder="It specifies the max distance in characters between spans in the text")
100
  output = gr.Textbox(label="Predicted Relation")
101
  submit_btn = gr.Button("Submit")
102
  examples = gr.Examples(
103
+ relation_e_examples,
104
+ fn=process,
105
+ inputs=[relation, input_text, distance_threshold, pairs_filter, labels],
106
+ outputs=output,
107
+ cache_examples=True
108
  )
109
+ theme = gr.themes.Base()
110
 
111
  input_text.submit(fn=process, inputs=[relation, input_text, distance_threshold, pairs_filter, labels], outputs=output)
112
  labels.submit(fn=process, inputs=[relation, input_text, distance_threshold, pairs_filter, labels], outputs=output)
 
114
  submit_btn.click(fn=process, inputs=[relation, input_text, distance_threshold, pairs_filter, labels], outputs=output)
115
  distance_threshold.submit(fn=process, inputs=[relation, input_text, distance_threshold, pairs_filter, labels], outputs=output)
116
 
 
117
  if __name__ == "__main__":
118
+ relation_e_interface.launch()