Spaces:
Running
Running
theo
commited on
Commit
Β·
1cc3978
1
Parent(s):
326ad7e
add a validator input
Browse files- requirements.txt +1 -0
- tagging_app.py +29 -19
requirements.txt
CHANGED
|
@@ -1,3 +1,4 @@
|
|
| 1 |
pyyaml
|
| 2 |
datasets
|
| 3 |
streamlit
|
|
|
|
|
|
| 1 |
pyyaml
|
| 2 |
datasets
|
| 3 |
streamlit
|
| 4 |
+
langcodes[data]
|
tagging_app.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
import json
|
| 2 |
from pathlib import Path
|
| 3 |
-
from typing import Callable, List, Tuple
|
| 4 |
|
| 5 |
import streamlit as st
|
| 6 |
import yaml
|
|
@@ -85,6 +85,21 @@ def multiselect(
|
|
| 85 |
return w.multiselect(markdown, valid_set, default=valid_values, format_func=format_func)
|
| 86 |
|
| 87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
def new_state():
|
| 89 |
return {
|
| 90 |
"task_categories": [],
|
|
@@ -155,17 +170,7 @@ if rightbtn.button("flush state"):
|
|
| 155 |
st.experimental_set_query_params()
|
| 156 |
|
| 157 |
if preloaded_id is not None and initial_state is not None:
|
| 158 |
-
|
| 159 |
-
DatasetMetadata(**initial_state)
|
| 160 |
-
valid = "βοΈ This is a valid tagset!"
|
| 161 |
-
except Exception as e:
|
| 162 |
-
valid = f"""
|
| 163 |
-
π This is an invalid tagset, here are the errors in it:
|
| 164 |
-
```
|
| 165 |
-
{e}
|
| 166 |
-
```
|
| 167 |
-
You're _very_ welcome to fix these issues and submit a new PR on [`datasets`](https://github.com/huggingface/datasets/)
|
| 168 |
-
"""
|
| 169 |
st.sidebar.markdown(
|
| 170 |
f"""
|
| 171 |
---
|
|
@@ -323,13 +328,8 @@ state["size_categories"] = [
|
|
| 323 |
########################
|
| 324 |
## Show results
|
| 325 |
########################
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
valid = "β Validated! Copy it into your dataset's `README.md` header! π€ "
|
| 329 |
-
except Exception as e:
|
| 330 |
-
valid = f"""π Could not validate:
|
| 331 |
-
```{e}```
|
| 332 |
-
"""
|
| 333 |
rightcol.markdown(
|
| 334 |
f"""
|
| 335 |
### Finalized tag set
|
|
@@ -339,5 +339,15 @@ rightcol.markdown(
|
|
| 339 |
```yaml
|
| 340 |
{yaml.dump(state)}
|
| 341 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
| 342 |
""",
|
| 343 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import json
|
| 2 |
from pathlib import Path
|
| 3 |
+
from typing import Callable, Dict, List, Tuple
|
| 4 |
|
| 5 |
import streamlit as st
|
| 6 |
import yaml
|
|
|
|
| 85 |
return w.multiselect(markdown, valid_set, default=valid_values, format_func=format_func)
|
| 86 |
|
| 87 |
|
| 88 |
+
def validate_dict(state_dict: Dict) -> str:
|
| 89 |
+
try:
|
| 90 |
+
DatasetMetadata(**state_dict)
|
| 91 |
+
valid = "βοΈ This is a valid tagset! π€"
|
| 92 |
+
except Exception as e:
|
| 93 |
+
valid = f"""
|
| 94 |
+
π This is an invalid tagset, here are the errors in it:
|
| 95 |
+
```
|
| 96 |
+
{e}
|
| 97 |
+
```
|
| 98 |
+
You're _very_ welcome to fix these issues and submit a new PR on [`datasets`](https://github.com/huggingface/datasets/)
|
| 99 |
+
"""
|
| 100 |
+
return valid
|
| 101 |
+
|
| 102 |
+
|
| 103 |
def new_state():
|
| 104 |
return {
|
| 105 |
"task_categories": [],
|
|
|
|
| 170 |
st.experimental_set_query_params()
|
| 171 |
|
| 172 |
if preloaded_id is not None and initial_state is not None:
|
| 173 |
+
valid = validate_dict(initial_state)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
st.sidebar.markdown(
|
| 175 |
f"""
|
| 176 |
---
|
|
|
|
| 328 |
########################
|
| 329 |
## Show results
|
| 330 |
########################
|
| 331 |
+
|
| 332 |
+
valid = validate_dict(state)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 333 |
rightcol.markdown(
|
| 334 |
f"""
|
| 335 |
### Finalized tag set
|
|
|
|
| 339 |
```yaml
|
| 340 |
{yaml.dump(state)}
|
| 341 |
```
|
| 342 |
+
---
|
| 343 |
+
#### Arbitrary yaml validator
|
| 344 |
+
|
| 345 |
+
This is a standalone tool, it is useful to check for errors on an existing tagset or modifying directly the text rather than the UI on the left.
|
| 346 |
""",
|
| 347 |
)
|
| 348 |
+
|
| 349 |
+
yamlblock = rightcol.text_area("Input your yaml here")
|
| 350 |
+
if yamlblock.strip() != "":
|
| 351 |
+
inputdict = yaml.safe_load(yamlblock)
|
| 352 |
+
valid = validate_dict(inputdict)
|
| 353 |
+
rightcol.markdown(valid)
|