Spaces:

LLM360
/

TxT360

Running

victormiller commited on Oct 2, 2024

Commit

e723f50

verified ·

1 Parent(s): 7cc1892

Update curated.py

Files changed (1) hide show

curated.py CHANGED Viewed

@@ -919,6 +919,30 @@ def get_data(data_source: str = "Freelaw", doc_id: int = 3, target: str = "foo")
     )
 def update(target: str, request):
     params = request.query_params
     if data_source := params.get(f"data_source_{target}"):
@@ -1043,6 +1067,13 @@ def curated(request):
     preprocessing_steps = pd.DataFrame(
         {
             "Step": [
@@ -1127,6 +1158,7 @@ def curated(request):
             plotly2fasthtml(diff2_stacked_bar),
             P("The figure above provides a global view of the document filtering results. ~8% of documents were removed during these three steps."),
             filtering_process,
             data_preparation_div,
             #H2("Local Deduplication"), are these numbers even right?
             #local_dedup_text,

     )
+def get_freelaw_data(data_source: str = "Freelaw", doc_id: int = 3, target: str = "foo"):
+    doc_id = max(0, min(int(doc_id), 9))
+    if data_source == "Freelaw":
+        raw_sample_doc = json.load(open("data/curated_samples/freelaw_raw.json"))
+        extracted_sample_doc = json.load(
+            open("data/curated_samples/freelaw_extract.json")
+        )
+    else:
+        raw_sample_doc = extracted_sample_doc = [{} for _ in range(10)]
+    raw_json = raw_sample_doc[doc_id]
+    extracted_json = extracted_sample_doc[doc_id]
+    return view_data(
+        raw_json,
+        extracted_json,
+        doc_id=doc_id,
+        data_source=data_source,
+        data_sources=data_sources,
+        target=target,
+    )
 def update(target: str, request):
     params = request.query_params
     if data_source := params.get(f"data_source_{target}"):
+freelaw_examples = Div(
+    Div(
+        get_freelaw_data(target=gen_random_id()),
+        style="border: 1px solid #ccc; padding: 20px;",
+    ),
+)
     preprocessing_steps = pd.DataFrame(
         {
             "Step": [
             plotly2fasthtml(diff2_stacked_bar),
             P("The figure above provides a global view of the document filtering results. ~8% of documents were removed during these three steps."),
             filtering_process,
+            freelaw_examples,
             data_preparation_div,
             #H2("Local Deduplication"), are these numbers even right?
             #local_dedup_text,