Spaces:
Running
Running
Update curated.py
Browse files- curated.py +36 -26
curated.py
CHANGED
|
@@ -438,6 +438,36 @@ phil_filter = pd.DataFrame(
|
|
| 438 |
table_html_phil = phil_filter.to_html(index=False, border=0)
|
| 439 |
table_div_phil = Div(NotStr(table_html_phil), style="margin: 40px;")
|
| 440 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 441 |
filtering_process = Div(
|
| 442 |
Section(
|
| 443 |
H3("This section contains the specific filtering steps taken for all 14 curated datasets."),
|
|
@@ -622,6 +652,11 @@ filtering_process = Div(
|
|
| 622 |
Li("Local dedup was done within freelaw itself which removed 90%+ duplicates"),
|
| 623 |
),
|
| 624 |
table_div_freelaw,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 625 |
),
|
| 626 |
),
|
| 627 |
Section(
|
|
@@ -920,27 +955,7 @@ def get_data(data_source: str = "Freelaw", doc_id: int = 3, target: str = "foo")
|
|
| 920 |
|
| 921 |
|
| 922 |
|
| 923 |
-
def get_freelaw_data(data_source: str = "Freelaw", doc_id: int = 3, target: str = "foo"):
|
| 924 |
-
doc_id = max(0, min(int(doc_id), 9))
|
| 925 |
|
| 926 |
-
if data_source == "Freelaw":
|
| 927 |
-
raw_sample_doc = json.load(open("data/curated_samples/freelaw_raw.json"))
|
| 928 |
-
extracted_sample_doc = json.load(
|
| 929 |
-
open("data/curated_samples/freelaw_extract.json")
|
| 930 |
-
)
|
| 931 |
-
else:
|
| 932 |
-
raw_sample_doc = extracted_sample_doc = [{} for _ in range(10)]
|
| 933 |
-
|
| 934 |
-
raw_json = raw_sample_doc[doc_id]
|
| 935 |
-
extracted_json = extracted_sample_doc[doc_id]
|
| 936 |
-
return view_data(
|
| 937 |
-
raw_json,
|
| 938 |
-
extracted_json,
|
| 939 |
-
doc_id=doc_id,
|
| 940 |
-
data_source=data_source,
|
| 941 |
-
data_sources=data_sources,
|
| 942 |
-
target=target,
|
| 943 |
-
)
|
| 944 |
|
| 945 |
|
| 946 |
def update(target: str, request):
|
|
@@ -1000,12 +1015,7 @@ fig.update_layout(
|
|
| 1000 |
# Show the plot
|
| 1001 |
diff2_stacked_bar = fig
|
| 1002 |
|
| 1003 |
-
|
| 1004 |
-
Div(
|
| 1005 |
-
get_freelaw_data(target=gen_random_id()),
|
| 1006 |
-
style="border: 1px solid #ccc; padding: 20px;",
|
| 1007 |
-
),
|
| 1008 |
-
)
|
| 1009 |
|
| 1010 |
def curated(request):
|
| 1011 |
|
|
|
|
| 438 |
table_html_phil = phil_filter.to_html(index=False, border=0)
|
| 439 |
table_div_phil = Div(NotStr(table_html_phil), style="margin: 40px;")
|
| 440 |
|
| 441 |
+
|
| 442 |
+
def get_freelaw_data(data_source: str = "Freelaw", doc_id: int = 3, target: str = "foo"):
|
| 443 |
+
doc_id = max(0, min(int(doc_id), 9))
|
| 444 |
+
|
| 445 |
+
if data_source == "Freelaw":
|
| 446 |
+
raw_sample_doc = json.load(open("data/curated_samples/freelaw_raw.json"))
|
| 447 |
+
extracted_sample_doc = json.load(
|
| 448 |
+
open("data/curated_samples/freelaw_extract.json")
|
| 449 |
+
)
|
| 450 |
+
else:
|
| 451 |
+
raw_sample_doc = extracted_sample_doc = [{} for _ in range(10)]
|
| 452 |
+
|
| 453 |
+
raw_json = raw_sample_doc[doc_id]
|
| 454 |
+
extracted_json = extracted_sample_doc[doc_id]
|
| 455 |
+
return view_data(
|
| 456 |
+
raw_json,
|
| 457 |
+
extracted_json,
|
| 458 |
+
doc_id=doc_id,
|
| 459 |
+
data_source="Freelaw",
|
| 460 |
+
data_sources=data_sources,
|
| 461 |
+
target=target,
|
| 462 |
+
)
|
| 463 |
+
|
| 464 |
+
freelaw_examples = Div(
|
| 465 |
+
Div(
|
| 466 |
+
get_freelaw_data(target=gen_random_id()),
|
| 467 |
+
style="border: 1px solid #ccc; padding: 20px;",
|
| 468 |
+
),
|
| 469 |
+
)
|
| 470 |
+
|
| 471 |
filtering_process = Div(
|
| 472 |
Section(
|
| 473 |
H3("This section contains the specific filtering steps taken for all 14 curated datasets."),
|
|
|
|
| 652 |
Li("Local dedup was done within freelaw itself which removed 90%+ duplicates"),
|
| 653 |
),
|
| 654 |
table_div_freelaw,
|
| 655 |
+
Details(
|
| 656 |
+
Summary("FreeLaw Filtering Examples")
|
| 657 |
+
freelaw_examples,
|
| 658 |
+
)
|
| 659 |
+
|
| 660 |
),
|
| 661 |
),
|
| 662 |
Section(
|
|
|
|
| 955 |
|
| 956 |
|
| 957 |
|
|
|
|
|
|
|
| 958 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 959 |
|
| 960 |
|
| 961 |
def update(target: str, request):
|
|
|
|
| 1015 |
# Show the plot
|
| 1016 |
diff2_stacked_bar = fig
|
| 1017 |
|
| 1018 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1019 |
|
| 1020 |
def curated(request):
|
| 1021 |
|