Spaces:
Running
Running
Update curated.py
Browse files- curated.py +10 -60
curated.py
CHANGED
|
@@ -511,12 +511,7 @@ def get_freelaw_data(data_source: str = "Freelaw", doc_id: int = 3, target: str
|
|
| 511 |
target=target,
|
| 512 |
)
|
| 513 |
|
| 514 |
-
freelaw_examples =
|
| 515 |
-
Div(
|
| 516 |
-
get_freelaw_data(target=gen_random_id()),
|
| 517 |
-
style="border: 1px solid #ccc; padding: 20px;",
|
| 518 |
-
),
|
| 519 |
-
)
|
| 520 |
|
| 521 |
def get_se_data(data_source: str = "StackExchange", doc_id: int = 3, target: str = "foo"):
|
| 522 |
doc_id = max(0, min(int(doc_id), 9))
|
|
@@ -540,12 +535,7 @@ def get_se_data(data_source: str = "StackExchange", doc_id: int = 3, target: str
|
|
| 540 |
target=target,
|
| 541 |
)
|
| 542 |
|
| 543 |
-
se_examples =
|
| 544 |
-
Div(
|
| 545 |
-
get_se_data(target=gen_random_id()),
|
| 546 |
-
style="border: 1px solid #ccc; padding: 20px;",
|
| 547 |
-
),
|
| 548 |
-
)
|
| 549 |
|
| 550 |
def get_phil_data(data_source: str = "PhilPapers", doc_id: int = 3, target: str = "foo"):
|
| 551 |
doc_id = max(0, min(int(doc_id), 9))
|
|
@@ -568,12 +558,7 @@ def get_phil_data(data_source: str = "PhilPapers", doc_id: int = 3, target: str
|
|
| 568 |
target=target,
|
| 569 |
)
|
| 570 |
|
| 571 |
-
phil_examples =
|
| 572 |
-
Div(
|
| 573 |
-
get_phil_data(target=gen_random_id()),
|
| 574 |
-
style="border: 1px solid #ccc; padding: 20px;",
|
| 575 |
-
),
|
| 576 |
-
)
|
| 577 |
|
| 578 |
def get_arx_data(data_source: str = "Arxiv", doc_id: int = 3, target: str = "foo"):
|
| 579 |
doc_id = max(0, min(int(doc_id), 9))
|
|
@@ -597,12 +582,7 @@ def get_arx_data(data_source: str = "Arxiv", doc_id: int = 3, target: str = "foo
|
|
| 597 |
target=target,
|
| 598 |
)
|
| 599 |
|
| 600 |
-
arx_examples =
|
| 601 |
-
Div(
|
| 602 |
-
get_arx_data(target=gen_random_id()),
|
| 603 |
-
style="border: 1px solid #ccc; padding: 20px;",
|
| 604 |
-
),
|
| 605 |
-
)
|
| 606 |
|
| 607 |
def get_S2ORC_data(data_source: str = "S2ORC", doc_id: int = 3, target: str = "foo"):
|
| 608 |
doc_id = max(0, min(int(doc_id), 9))
|
|
@@ -625,12 +605,7 @@ def get_S2ORC_data(data_source: str = "S2ORC", doc_id: int = 3, target: str = "f
|
|
| 625 |
target=target,
|
| 626 |
)
|
| 627 |
|
| 628 |
-
s2o_examples =
|
| 629 |
-
Div(
|
| 630 |
-
get_S2ORC_data(target=gen_random_id()),
|
| 631 |
-
style="border: 1px solid #ccc; padding: 20px;",
|
| 632 |
-
),
|
| 633 |
-
)
|
| 634 |
|
| 635 |
def get_S2ORCA_data(data_source: str = "S2ORC Abstract", doc_id: int = 3, target: str = "foo"):
|
| 636 |
doc_id = max(0, min(int(doc_id), 9))
|
|
@@ -653,12 +628,7 @@ def get_S2ORCA_data(data_source: str = "S2ORC Abstract", doc_id: int = 3, target
|
|
| 653 |
target=target,
|
| 654 |
)
|
| 655 |
|
| 656 |
-
s2oa_examples =
|
| 657 |
-
Div(
|
| 658 |
-
get_S2ORCA_data(target=gen_random_id()),
|
| 659 |
-
style="border: 1px solid #ccc; padding: 20px;",
|
| 660 |
-
),
|
| 661 |
-
)
|
| 662 |
|
| 663 |
def get_pubmed_data(data_source: str = "Pubmed", doc_id: int = 3, target: str = "foo"):
|
| 664 |
doc_id = max(0, min(int(doc_id), 9))
|
|
@@ -682,12 +652,7 @@ def get_pubmed_data(data_source: str = "Pubmed", doc_id: int = 3, target: str =
|
|
| 682 |
target=target,
|
| 683 |
)
|
| 684 |
|
| 685 |
-
pubmed_examples =
|
| 686 |
-
Div(
|
| 687 |
-
get_pubmed_data(target=gen_random_id()),
|
| 688 |
-
style="border: 1px solid #ccc; padding: 20px;",
|
| 689 |
-
),
|
| 690 |
-
)
|
| 691 |
|
| 692 |
def get_dmm_data(data_source: str = "DM Math", doc_id: int = 3, target: str = "foo"):
|
| 693 |
doc_id = max(0, min(int(doc_id), 9))
|
|
@@ -711,12 +676,7 @@ def get_dmm_data(data_source: str = "DM Math", doc_id: int = 3, target: str = "f
|
|
| 711 |
target=target,
|
| 712 |
)
|
| 713 |
|
| 714 |
-
dmm_examples =
|
| 715 |
-
Div(
|
| 716 |
-
get_dmm_data(target=gen_random_id()),
|
| 717 |
-
style="border: 1px solid #ccc; padding: 20px;",
|
| 718 |
-
),
|
| 719 |
-
)
|
| 720 |
|
| 721 |
def get_pg19_data(data_source: str = "PG19", doc_id: int = 3, target: str = "foo"):
|
| 722 |
doc_id = max(0, min(int(doc_id), 9))
|
|
@@ -739,12 +699,7 @@ def get_pg19_data(data_source: str = "PG19", doc_id: int = 3, target: str = "foo
|
|
| 739 |
target=target,
|
| 740 |
)
|
| 741 |
|
| 742 |
-
pg19_examples =
|
| 743 |
-
Div(
|
| 744 |
-
get_pg19_data(target=gen_random_id()),
|
| 745 |
-
style="border: 1px solid #ccc; padding: 20px;",
|
| 746 |
-
),
|
| 747 |
-
)
|
| 748 |
|
| 749 |
def get_eu_data(data_source: str = "Europarl", doc_id: int = 3, target: str = "foo"):
|
| 750 |
doc_id = max(0, min(int(doc_id), 9))
|
|
@@ -767,12 +722,7 @@ def get_eu_data(data_source: str = "Europarl", doc_id: int = 3, target: str = "f
|
|
| 767 |
target=target,
|
| 768 |
)
|
| 769 |
|
| 770 |
-
eu_examples =
|
| 771 |
-
Div(
|
| 772 |
-
get_eu_data(target=gen_random_id()),
|
| 773 |
-
style="border: 1px solid #ccc; padding: 20px;",
|
| 774 |
-
),
|
| 775 |
-
)
|
| 776 |
|
| 777 |
filtering_process = Div(
|
| 778 |
Section(
|
|
|
|
| 511 |
target=target,
|
| 512 |
)
|
| 513 |
|
| 514 |
+
freelaw_examples = DV("data/curated_samples/freelaw_extract.json", 0, "Freelaw")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 515 |
|
| 516 |
def get_se_data(data_source: str = "StackExchange", doc_id: int = 3, target: str = "foo"):
|
| 517 |
doc_id = max(0, min(int(doc_id), 9))
|
|
|
|
| 535 |
target=target,
|
| 536 |
)
|
| 537 |
|
| 538 |
+
se_examples = DV("data/curated_samples/stackexchange_extract.json", 0, "StackExchange")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 539 |
|
| 540 |
def get_phil_data(data_source: str = "PhilPapers", doc_id: int = 3, target: str = "foo"):
|
| 541 |
doc_id = max(0, min(int(doc_id), 9))
|
|
|
|
| 558 |
target=target,
|
| 559 |
)
|
| 560 |
|
| 561 |
+
phil_examples = DV("data/curated_samples/philpapers_raw.json", 0, "PhilPapers")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 562 |
|
| 563 |
def get_arx_data(data_source: str = "Arxiv", doc_id: int = 3, target: str = "foo"):
|
| 564 |
doc_id = max(0, min(int(doc_id), 9))
|
|
|
|
| 582 |
target=target,
|
| 583 |
)
|
| 584 |
|
| 585 |
+
arx_examples = DV("data/curated_samples/arxiv_extract.json", 0, "Arxiv")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 586 |
|
| 587 |
def get_S2ORC_data(data_source: str = "S2ORC", doc_id: int = 3, target: str = "foo"):
|
| 588 |
doc_id = max(0, min(int(doc_id), 9))
|
|
|
|
| 605 |
target=target,
|
| 606 |
)
|
| 607 |
|
| 608 |
+
s2o_examples = DV("data/curated_samples/s2orc_raw.json", 0, "S2ORC")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 609 |
|
| 610 |
def get_S2ORCA_data(data_source: str = "S2ORC Abstract", doc_id: int = 3, target: str = "foo"):
|
| 611 |
doc_id = max(0, min(int(doc_id), 9))
|
|
|
|
| 628 |
target=target,
|
| 629 |
)
|
| 630 |
|
| 631 |
+
s2oa_examples = DV("data/curated_samples/s2orc_abstract_raw.json", 0, "S2ORC Abstract")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 632 |
|
| 633 |
def get_pubmed_data(data_source: str = "Pubmed", doc_id: int = 3, target: str = "foo"):
|
| 634 |
doc_id = max(0, min(int(doc_id), 9))
|
|
|
|
| 652 |
target=target,
|
| 653 |
)
|
| 654 |
|
| 655 |
+
pubmed_examples = DV("data/curated_samples/pubmed_extract.json", 0, "PubMed")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 656 |
|
| 657 |
def get_dmm_data(data_source: str = "DM Math", doc_id: int = 3, target: str = "foo"):
|
| 658 |
doc_id = max(0, min(int(doc_id), 9))
|
|
|
|
| 676 |
target=target,
|
| 677 |
)
|
| 678 |
|
| 679 |
+
dmm_examples = DV("data/curated_samples/dm_maths_extract.json", 0, "DM Math")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 680 |
|
| 681 |
def get_pg19_data(data_source: str = "PG19", doc_id: int = 3, target: str = "foo"):
|
| 682 |
doc_id = max(0, min(int(doc_id), 9))
|
|
|
|
| 699 |
target=target,
|
| 700 |
)
|
| 701 |
|
| 702 |
+
pg19_examples = DV("data/curated_samples/pg19_raw.json", 0, "PG19")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 703 |
|
| 704 |
def get_eu_data(data_source: str = "Europarl", doc_id: int = 3, target: str = "foo"):
|
| 705 |
doc_id = max(0, min(int(doc_id), 9))
|
|
|
|
| 722 |
target=target,
|
| 723 |
)
|
| 724 |
|
| 725 |
+
eu_examples = DV("data/curated_samples/europarl_raw.json", 0, "Europarl")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 726 |
|
| 727 |
filtering_process = Div(
|
| 728 |
Section(
|