Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -105,7 +105,29 @@ def create_mask_dict(entities):
|
|
| 105 |
entity_counters[entity['entity_group']] += 1
|
| 106 |
mask_dict[entity['word']] = f"{entity['entity_group']}_{entity_counters[entity['entity_group']]}"
|
| 107 |
return mask_dict
|
| 108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
Run_Button = st.button("Run")
|
| 110 |
|
| 111 |
if Run_Button and input_text:
|
|
@@ -147,6 +169,8 @@ if Run_Button and input_text:
|
|
| 147 |
|
| 148 |
st.subheader("Recognized Entities")
|
| 149 |
st.dataframe(df_final)
|
|
|
|
|
|
|
| 150 |
|
| 151 |
# Spacy display logic with entity numbering
|
| 152 |
spacy_display = {"ents": [], "text": input_text, "title": None}
|
|
@@ -160,5 +184,16 @@ if Run_Button and input_text:
|
|
| 160 |
html = spacy.displacy.render(spacy_display, style="ent", minify=True, manual=True)
|
| 161 |
st.write(html, unsafe_allow_html=True)
|
| 162 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
st.subheader("Masking Dictionary")
|
| 164 |
st.json(mask_dict)
|
|
|
|
| 105 |
entity_counters[entity['entity_group']] += 1
|
| 106 |
mask_dict[entity['word']] = f"{entity['entity_group']}_{entity_counters[entity['entity_group']]}"
|
| 107 |
return mask_dict
|
| 108 |
+
def export_masked_text(masked_text, file_type):
|
| 109 |
+
if file_type == "txt":
|
| 110 |
+
return masked_text.encode("utf-8")
|
| 111 |
+
elif file_type == "pdf":
|
| 112 |
+
pdf_buffer = io.BytesIO()
|
| 113 |
+
from fpdf import FPDF
|
| 114 |
+
pdf = FPDF()
|
| 115 |
+
pdf.add_page()
|
| 116 |
+
pdf.set_font("Arial", size=12)
|
| 117 |
+
pdf.multi_cell(0, 10, masked_text)
|
| 118 |
+
pdf.output(pdf_buffer)
|
| 119 |
+
pdf_buffer.seek(0)
|
| 120 |
+
return pdf_buffer.getvalue()
|
| 121 |
+
elif file_type == "docx":
|
| 122 |
+
doc = docx.Document()
|
| 123 |
+
doc.add_paragraph(masked_text)
|
| 124 |
+
buffer = io.BytesIO()
|
| 125 |
+
doc.save(buffer)
|
| 126 |
+
buffer.seek(0)
|
| 127 |
+
return buffer.getvalue()
|
| 128 |
+
else:
|
| 129 |
+
st.error("Unsupported file type for export")
|
| 130 |
+
return None
|
| 131 |
Run_Button = st.button("Run")
|
| 132 |
|
| 133 |
if Run_Button and input_text:
|
|
|
|
| 169 |
|
| 170 |
st.subheader("Recognized Entities")
|
| 171 |
st.dataframe(df_final)
|
| 172 |
+
|
| 173 |
+
|
| 174 |
|
| 175 |
# Spacy display logic with entity numbering
|
| 176 |
spacy_display = {"ents": [], "text": input_text, "title": None}
|
|
|
|
| 184 |
html = spacy.displacy.render(spacy_display, style="ent", minify=True, manual=True)
|
| 185 |
st.write(html, unsafe_allow_html=True)
|
| 186 |
|
| 187 |
+
export_file_type = uploaded_file.type.split("/")[-1] if uploaded_file is not None else "txt"
|
| 188 |
+
if st.button("Download Masked Text"):
|
| 189 |
+
masked_file_content = export_masked_text(masked_text, export_file_type)
|
| 190 |
+
if masked_file_content:
|
| 191 |
+
st.download_button(
|
| 192 |
+
label="Download",
|
| 193 |
+
data=masked_file_content,
|
| 194 |
+
file_name=f"masked_output.{export_file_type}",
|
| 195 |
+
mime=f"application/{export_file_type}" if export_file_type != "txt" else "text/plain"
|
| 196 |
+
)
|
| 197 |
+
|
| 198 |
st.subheader("Masking Dictionary")
|
| 199 |
st.json(mask_dict)
|