Spaces:
Running
Running
Align more metadata with other repo types (models,spaces)
#2
by
julien-c
HF Staff
- opened
- .github/workflows/deploy.yml +0 -37
- apputils.py +2 -2
- tagging_app.py +7 -14
.github/workflows/deploy.yml
DELETED
|
@@ -1,37 +0,0 @@
|
|
| 1 |
-
name: Deployment
|
| 2 |
-
|
| 3 |
-
on:
|
| 4 |
-
push:
|
| 5 |
-
branches:
|
| 6 |
-
- main
|
| 7 |
-
workflow_dispatch:
|
| 8 |
-
|
| 9 |
-
jobs:
|
| 10 |
-
deployment:
|
| 11 |
-
runs-on: ubuntu-latest
|
| 12 |
-
steps:
|
| 13 |
-
- name: Check out
|
| 14 |
-
uses: actions/checkout@v2
|
| 15 |
-
with:
|
| 16 |
-
fetch-depth: 0
|
| 17 |
-
- name: Set up Python
|
| 18 |
-
uses: actions/setup-python@v2
|
| 19 |
-
with:
|
| 20 |
-
python-version: "3.6"
|
| 21 |
-
- name: Install dependencies
|
| 22 |
-
run: |
|
| 23 |
-
python -m pip install --upgrade pip
|
| 24 |
-
pip install -r requirements.txt
|
| 25 |
-
- name: Build metadata file
|
| 26 |
-
run: |
|
| 27 |
-
python build_metadata_file.py
|
| 28 |
-
- name: Commit metadata file
|
| 29 |
-
run: |
|
| 30 |
-
git config user.name github-actions
|
| 31 |
-
git config user.email github-actions@github.com
|
| 32 |
-
git add -f metadata_*
|
| 33 |
-
git commit -m "Add metadata file"
|
| 34 |
-
- name: Push to Hub
|
| 35 |
-
env:
|
| 36 |
-
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 37 |
-
run: git push --force https://albertvillanova:$HF_TOKEN@huggingface.co/spaces/huggingface/datasets-tagging main
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
apputils.py
CHANGED
|
@@ -6,11 +6,11 @@ def new_state() -> Dict[str, List]:
|
|
| 6 |
"task_categories": [],
|
| 7 |
"task_ids": [],
|
| 8 |
"multilinguality": [],
|
| 9 |
-
"
|
| 10 |
"language_creators": [],
|
| 11 |
"annotations_creators": [],
|
| 12 |
"source_datasets": [],
|
| 13 |
"size_categories": [],
|
| 14 |
-
"
|
| 15 |
"pretty_name": None,
|
| 16 |
}
|
|
|
|
| 6 |
"task_categories": [],
|
| 7 |
"task_ids": [],
|
| 8 |
"multilinguality": [],
|
| 9 |
+
"language": [],
|
| 10 |
"language_creators": [],
|
| 11 |
"annotations_creators": [],
|
| 12 |
"source_datasets": [],
|
| 13 |
"size_categories": [],
|
| 14 |
+
"license": [],
|
| 15 |
"pretty_name": None,
|
| 16 |
}
|
tagging_app.py
CHANGED
|
@@ -258,7 +258,7 @@ if "other" in state["multilinguality"]:
|
|
| 258 |
state["multilinguality"][state["multilinguality"].index("other")] = f"other-{other_multilinguality}"
|
| 259 |
|
| 260 |
valid_values, invalid_values = list(), list()
|
| 261 |
-
for langtag in state["
|
| 262 |
try:
|
| 263 |
lc.get(langtag)
|
| 264 |
valid_values.append(langtag)
|
|
@@ -273,7 +273,7 @@ langtags = leftcol.text_area(
|
|
| 273 |
"What languages are represented in the dataset? expected format is BCP47 tags separated for ';' e.g. 'en;fr'",
|
| 274 |
value=";".join(valid_values),
|
| 275 |
)
|
| 276 |
-
state["
|
| 277 |
|
| 278 |
|
| 279 |
#
|
|
@@ -297,23 +297,16 @@ state["annotations_creators"] = multiselect(
|
|
| 297 |
|
| 298 |
|
| 299 |
#
|
| 300 |
-
#
|
| 301 |
#
|
| 302 |
-
state["
|
| 303 |
leftcol,
|
| 304 |
-
"
|
| 305 |
-
"What
|
| 306 |
valid_set=list(known_licenses.keys()),
|
| 307 |
-
values=state["
|
| 308 |
format_func=lambda l: f"{l} : {known_licenses[l]}",
|
| 309 |
)
|
| 310 |
-
if "other" in state["licenses"]:
|
| 311 |
-
other_license = st.text_input(
|
| 312 |
-
"You selected 'other' type of license. Please enter a short hyphen-separated description:",
|
| 313 |
-
value="my-license",
|
| 314 |
-
)
|
| 315 |
-
st.write(f"Registering other-{other_license} license")
|
| 316 |
-
state["licenses"][state["licenses"].index("other")] = f"other-{other_license}"
|
| 317 |
|
| 318 |
|
| 319 |
#
|
|
|
|
| 258 |
state["multilinguality"][state["multilinguality"].index("other")] = f"other-{other_multilinguality}"
|
| 259 |
|
| 260 |
valid_values, invalid_values = list(), list()
|
| 261 |
+
for langtag in state["language"]:
|
| 262 |
try:
|
| 263 |
lc.get(langtag)
|
| 264 |
valid_values.append(langtag)
|
|
|
|
| 273 |
"What languages are represented in the dataset? expected format is BCP47 tags separated for ';' e.g. 'en;fr'",
|
| 274 |
value=";".join(valid_values),
|
| 275 |
)
|
| 276 |
+
state["language"] = langtags.strip().split(";") if langtags.strip() != "" else []
|
| 277 |
|
| 278 |
|
| 279 |
#
|
|
|
|
| 297 |
|
| 298 |
|
| 299 |
#
|
| 300 |
+
# LICENSE
|
| 301 |
#
|
| 302 |
+
state["license"] = multiselect(
|
| 303 |
leftcol,
|
| 304 |
+
"License",
|
| 305 |
+
"What license(s) is the dataset under?",
|
| 306 |
valid_set=list(known_licenses.keys()),
|
| 307 |
+
values=state["license"],
|
| 308 |
format_func=lambda l: f"{l} : {known_licenses[l]}",
|
| 309 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 310 |
|
| 311 |
|
| 312 |
#
|