Spaces:
Runtime error
Runtime error
Commit
·
9ac5ea2
1
Parent(s):
ffd3765
Decimate
Browse files
app.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import os
|
| 2 |
from datetime import datetime, timedelta
|
| 3 |
import json
|
|
@@ -16,7 +17,7 @@ HfFolder.save_token(HF_TOKEN)
|
|
| 16 |
datasets = {
|
| 17 |
# "stars": load_dataset("open-source-metrics/stars"),
|
| 18 |
"issues": load_dataset("open-source-metrics/issues"),
|
| 19 |
-
"pip": load_dataset("open-source-metrics/pip")
|
| 20 |
}
|
| 21 |
|
| 22 |
|
|
@@ -49,6 +50,8 @@ class RequestHandler(SimpleHTTPRequestHandler):
|
|
| 49 |
self.end_headers()
|
| 50 |
|
| 51 |
# TODO: Send and display warnings
|
|
|
|
|
|
|
| 52 |
self.wfile.write(json.dumps(list(dataset_with_most_splits)).encode("utf-8"))
|
| 53 |
|
| 54 |
return SimpleHTTPRequestHandler
|
|
@@ -61,8 +64,7 @@ class RequestHandler(SimpleHTTPRequestHandler):
|
|
| 61 |
|
| 62 |
returned_values = {}
|
| 63 |
for library_name in library_names:
|
| 64 |
-
|
| 65 |
-
for i in dataset:
|
| 66 |
if i['day'] in returned_values:
|
| 67 |
returned_values[i['day']][library_name] = i['num_downloads']
|
| 68 |
else:
|
|
@@ -71,10 +73,11 @@ class RequestHandler(SimpleHTTPRequestHandler):
|
|
| 71 |
for library_name in library_names:
|
| 72 |
for i in returned_values.keys():
|
| 73 |
if library_name not in returned_values[i]:
|
| 74 |
-
returned_values[i][library_name] =
|
| 75 |
|
| 76 |
-
|
| 77 |
-
output[
|
|
|
|
| 78 |
|
| 79 |
self.send_response(200)
|
| 80 |
self.send_header("Content-Type", "application/json")
|
|
@@ -91,12 +94,17 @@ class RequestHandler(SimpleHTTPRequestHandler):
|
|
| 91 |
library_names = library_names.split(',')
|
| 92 |
|
| 93 |
returned_values = {}
|
| 94 |
-
dataset_dict = load_dataset(f"open-source-metrics/stars", use_auth_token=True
|
| 95 |
|
| 96 |
for library_name in library_names:
|
| 97 |
dataset = dataset_dict[library_name]
|
|
|
|
| 98 |
n = 0
|
| 99 |
-
for i in dataset:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
n += 1
|
| 101 |
if i['dates'] in returned_values:
|
| 102 |
returned_values[i['dates']][library_name] = n
|
|
@@ -108,6 +116,7 @@ class RequestHandler(SimpleHTTPRequestHandler):
|
|
| 108 |
if library_name not in returned_values[i]:
|
| 109 |
returned_values[i][library_name] = None
|
| 110 |
|
|
|
|
| 111 |
output = {l: [k[l] for k in returned_values.values()][::-1] for l in library_names}
|
| 112 |
output['day'] = list(returned_values.keys())[::-1]
|
| 113 |
|
|
|
|
| 1 |
+
import collections
|
| 2 |
import os
|
| 3 |
from datetime import datetime, timedelta
|
| 4 |
import json
|
|
|
|
| 17 |
datasets = {
|
| 18 |
# "stars": load_dataset("open-source-metrics/stars"),
|
| 19 |
"issues": load_dataset("open-source-metrics/issues"),
|
| 20 |
+
"pip": load_dataset("open-source-metrics/pip").sort('day')
|
| 21 |
}
|
| 22 |
|
| 23 |
|
|
|
|
| 50 |
self.end_headers()
|
| 51 |
|
| 52 |
# TODO: Send and display warnings
|
| 53 |
+
dataset_with_most_splits = list(dataset_with_most_splits)
|
| 54 |
+
dataset_with_most_splits.sort()
|
| 55 |
self.wfile.write(json.dumps(list(dataset_with_most_splits)).encode("utf-8"))
|
| 56 |
|
| 57 |
return SimpleHTTPRequestHandler
|
|
|
|
| 64 |
|
| 65 |
returned_values = {}
|
| 66 |
for library_name in library_names:
|
| 67 |
+
for i in datasets['pip'][library_name]:
|
|
|
|
| 68 |
if i['day'] in returned_values:
|
| 69 |
returned_values[i['day']][library_name] = i['num_downloads']
|
| 70 |
else:
|
|
|
|
| 73 |
for library_name in library_names:
|
| 74 |
for i in returned_values.keys():
|
| 75 |
if library_name not in returned_values[i]:
|
| 76 |
+
returned_values[i][library_name] = None
|
| 77 |
|
| 78 |
+
returned_values = collections.OrderedDict(sorted(returned_values.items()))
|
| 79 |
+
output = {l: [k[l] for k in returned_values.values()] for l in library_names}
|
| 80 |
+
output['day'] = list(returned_values.keys())
|
| 81 |
|
| 82 |
self.send_response(200)
|
| 83 |
self.send_header("Content-Type", "application/json")
|
|
|
|
| 94 |
library_names = library_names.split(',')
|
| 95 |
|
| 96 |
returned_values = {}
|
| 97 |
+
dataset_dict = load_dataset(f"open-source-metrics/stars", use_auth_token=True).sort('dates')
|
| 98 |
|
| 99 |
for library_name in library_names:
|
| 100 |
dataset = dataset_dict[library_name]
|
| 101 |
+
|
| 102 |
n = 0
|
| 103 |
+
for k, i in enumerate(dataset):
|
| 104 |
+
# Decimate values if there are too many
|
| 105 |
+
if len(dataset) > 1000 and k % int(len(dataset) / 1000) != 0:
|
| 106 |
+
continue
|
| 107 |
+
|
| 108 |
n += 1
|
| 109 |
if i['dates'] in returned_values:
|
| 110 |
returned_values[i['dates']][library_name] = n
|
|
|
|
| 116 |
if library_name not in returned_values[i]:
|
| 117 |
returned_values[i][library_name] = None
|
| 118 |
|
| 119 |
+
returned_values = collections.OrderedDict(sorted(returned_values.items()))
|
| 120 |
output = {l: [k[l] for k in returned_values.values()][::-1] for l in library_names}
|
| 121 |
output['day'] = list(returned_values.keys())[::-1]
|
| 122 |
|