Spaces:
Runtime error
Runtime error
Preprocessing
Browse files
app.py
CHANGED
|
@@ -15,91 +15,21 @@ HfFolder.save_token(HF_TOKEN)
|
|
| 15 |
|
| 16 |
|
| 17 |
datasets = {
|
| 18 |
-
"stars": load_dataset("open-source-metrics/
|
| 19 |
-
"issues": load_dataset("open-source-metrics/
|
| 20 |
"pip": load_dataset("open-source-metrics/pip").sort('day'),
|
| 21 |
}
|
| 22 |
|
| 23 |
external_datasets = {
|
| 24 |
-
"stars": load_dataset("open-source-metrics/stars-external").sort('dates'),
|
| 25 |
-
"issues": load_dataset("open-source-metrics/issues-external").sort('dates'),
|
| 26 |
"pip": load_dataset("open-source-metrics/pip-external").sort('day')
|
| 27 |
}
|
| 28 |
|
| 29 |
-
val = 0
|
| 30 |
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
current_date = datetime.strptime(e['dates'], "%Y-%m-%dT%H:%M:%SZ")
|
| 38 |
-
first_date = datetime.fromtimestamp(1)
|
| 39 |
-
week = abs(current_date - first_date).days // 7
|
| 40 |
-
e['week'] = week
|
| 41 |
-
|
| 42 |
-
return e
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
def _ignore_org_members(e):
|
| 46 |
-
global val
|
| 47 |
-
e['range_non_org'] = val
|
| 48 |
-
|
| 49 |
-
if e['type']['authorAssociation'] != 'MEMBER':
|
| 50 |
-
val += 1
|
| 51 |
-
|
| 52 |
-
return e
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
stars = {}
|
| 56 |
-
for k, v in datasets['stars'].items():
|
| 57 |
-
stars[k] = v.map(_range)
|
| 58 |
-
val = 0
|
| 59 |
-
|
| 60 |
-
stars_external = {}
|
| 61 |
-
for k, v in external_datasets['stars'].items():
|
| 62 |
-
stars_external[k] = v.map(_range)
|
| 63 |
-
val = 0
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
issues = {}
|
| 67 |
-
for k, v in datasets['issues'].items():
|
| 68 |
-
issues[k] = v.map(_range)
|
| 69 |
-
val = 0
|
| 70 |
-
issues[k] = issues[k].map(_ignore_org_members)
|
| 71 |
-
val = 0
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
issues_external = {}
|
| 75 |
-
for k, v in external_datasets['issues'].items():
|
| 76 |
-
issues_external[k] = v.map(_range)
|
| 77 |
-
val = 0
|
| 78 |
-
issues_external[k] = issues_external[k].map(_ignore_org_members)
|
| 79 |
-
val = 0
|
| 80 |
-
|
| 81 |
-
datasets['stars'] = DatasetDict(**stars)
|
| 82 |
-
datasets['issues'] = DatasetDict(**issues)
|
| 83 |
-
external_datasets['stars'] = DatasetDict(**stars_external)
|
| 84 |
-
external_datasets['issues'] = DatasetDict(**issues_external)
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
def link_values(library_names, returned_values):
|
| 88 |
-
previous_values = {library_name: None for library_name in library_names}
|
| 89 |
-
for library_name in library_names:
|
| 90 |
-
for i in returned_values.keys():
|
| 91 |
-
if library_name not in returned_values[i]:
|
| 92 |
-
returned_values[i][library_name] = previous_values[library_name]
|
| 93 |
-
else:
|
| 94 |
-
previous_values[library_name] = returned_values[i][library_name]
|
| 95 |
-
|
| 96 |
-
return returned_values
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
def running_mean(x, N, total_length=-1):
|
| 100 |
-
cumsum = np.cumsum(np.insert(x, 0, 0))
|
| 101 |
-
to_pad = max(total_length - len(cumsum), 0)
|
| 102 |
-
return np.pad(cumsum[N:] - cumsum[:-N], (to_pad, 0)) / float(N)
|
| 103 |
|
| 104 |
|
| 105 |
def parse_name_and_options(path):
|
|
@@ -152,10 +82,12 @@ class RequestHandler(SimpleHTTPRequestHandler):
|
|
| 152 |
external_dataset_with_most_splits = list(external_dataset_with_most_splits)
|
| 153 |
external_dataset_with_most_splits.sort()
|
| 154 |
|
|
|
|
|
|
|
| 155 |
res = {
|
| 156 |
'internal': dataset_with_most_splits,
|
| 157 |
'external': external_dataset_with_most_splits,
|
| 158 |
-
'warnings':
|
| 159 |
}
|
| 160 |
|
| 161 |
print(f"Returning: {res}")
|
|
@@ -215,90 +147,29 @@ class RequestHandler(SimpleHTTPRequestHandler):
|
|
| 215 |
return self.response(output)
|
| 216 |
|
| 217 |
if self.path.startswith("/retrieveStars"):
|
| 218 |
-
errors = []
|
| 219 |
library_names, options = parse_name_and_options(self.path)
|
| 220 |
-
returned_values = {}
|
| 221 |
-
dataset_dict = datasets['stars']
|
| 222 |
-
external_dataset_dict = external_datasets['stars']
|
| 223 |
week_over_week = '1' in options
|
| 224 |
|
| 225 |
-
|
| 226 |
-
if
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
dataset = external_dataset_dict[library_name]
|
| 230 |
-
else:
|
| 231 |
-
errors.append(f"No {library_name} found in internal or external datasets for stars.")
|
| 232 |
-
return {'errors': errors}
|
| 233 |
-
|
| 234 |
-
last_value = 0
|
| 235 |
-
last_week = dataset[0]['week']
|
| 236 |
-
for i in dataset:
|
| 237 |
-
if week_over_week and last_week == i['week']:
|
| 238 |
-
continue
|
| 239 |
-
if i['dates'] in returned_values:
|
| 240 |
-
returned_values[i['dates']][library_name] = i['range'] - last_value
|
| 241 |
-
else:
|
| 242 |
-
returned_values[i['dates']] = {library_name: i['range'] - last_value}
|
| 243 |
-
|
| 244 |
-
last_value = i['range'] if week_over_week else 0
|
| 245 |
-
last_week = i['week']
|
| 246 |
-
|
| 247 |
-
returned_values = collections.OrderedDict(sorted(returned_values.items()))
|
| 248 |
-
returned_values = link_values(library_names, returned_values)
|
| 249 |
-
output = {l: [k[l] for k in returned_values.values()][::-1] for l in library_names}
|
| 250 |
-
output['day'] = list(returned_values.keys())[::-1]
|
| 251 |
-
|
| 252 |
-
# Trim down to a smaller number of points.
|
| 253 |
-
output = {k: [v for i, v in enumerate(value) if i % max(1, int(len(value) / 100)) == 0] for k, value in output.items()}
|
| 254 |
-
|
| 255 |
-
return self.response(output)
|
| 256 |
-
|
| 257 |
|
| 258 |
if self.path.startswith("/retrieveIssues"):
|
| 259 |
-
errors = []
|
| 260 |
library_names, options = parse_name_and_options(self.path)
|
| 261 |
-
|
| 262 |
exclude_org_members = '1' in options
|
| 263 |
week_over_week = '2' in options
|
| 264 |
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
range_id = 'range' if not exclude_org_members else 'range_non_org'
|
| 269 |
-
|
| 270 |
-
for library_name in library_names:
|
| 271 |
-
if library_name in dataset_dict:
|
| 272 |
-
dataset = dataset_dict[library_name]
|
| 273 |
-
elif library_name in external_dataset_dict:
|
| 274 |
-
dataset = external_dataset_dict[library_name]
|
| 275 |
else:
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
if week_over_week and last_week == i['week']:
|
| 283 |
-
continue
|
| 284 |
-
|
| 285 |
-
if i['dates'] in returned_values:
|
| 286 |
-
returned_values[i['dates']][library_name] = i[range_id] - last_value
|
| 287 |
-
else:
|
| 288 |
-
returned_values[i['dates']] = {library_name: i[range_id] - last_value}
|
| 289 |
-
|
| 290 |
-
last_value = i[range_id] if week_over_week else 0
|
| 291 |
-
last_week = i['week']
|
| 292 |
-
|
| 293 |
-
returned_values = collections.OrderedDict(sorted(returned_values.items()))
|
| 294 |
-
returned_values = link_values(library_names, returned_values)
|
| 295 |
-
output = {l: [k[l] for k in returned_values.values()][::-1] for l in library_names}
|
| 296 |
-
output['day'] = list(returned_values.keys())[::-1]
|
| 297 |
-
|
| 298 |
-
# Trim down to a smaller number of points.
|
| 299 |
-
output = {k: [v for i, v in enumerate(value) if i % max(1, int(len(value) / 100)) == 0] for k, value in output.items()}
|
| 300 |
-
|
| 301 |
-
return self.response(output)
|
| 302 |
|
| 303 |
return SimpleHTTPRequestHandler.do_GET(self)
|
| 304 |
|
|
|
|
| 15 |
|
| 16 |
|
| 17 |
datasets = {
|
| 18 |
+
"stars": load_dataset("open-source-metrics/preprocessed_stars"),
|
| 19 |
+
"issues": load_dataset("open-source-metrics/preprocessed_issues"),
|
| 20 |
"pip": load_dataset("open-source-metrics/pip").sort('day'),
|
| 21 |
}
|
| 22 |
|
| 23 |
external_datasets = {
|
|
|
|
|
|
|
| 24 |
"pip": load_dataset("open-source-metrics/pip-external").sort('day')
|
| 25 |
}
|
| 26 |
|
|
|
|
| 27 |
|
| 28 |
+
def cut_output(full_output: Dataset, library_names: list):
|
| 29 |
+
output = full_output.to_dict().items()
|
| 30 |
+
output = {k: v + [None] for k, v in output if k in library_names + ['day']}
|
| 31 |
+
last_value = max(output[k].index(None) for k in output.keys() if k != 'day')
|
| 32 |
+
return {k: v[:last_value] for k, v in output.items()}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
|
| 35 |
def parse_name_and_options(path):
|
|
|
|
| 82 |
external_dataset_with_most_splits = list(external_dataset_with_most_splits)
|
| 83 |
external_dataset_with_most_splits.sort()
|
| 84 |
|
| 85 |
+
warnings.append("Selecting PyTorch and/or TensorFlow will take a while to compute, and may timeout for issues/PRs..")
|
| 86 |
+
|
| 87 |
res = {
|
| 88 |
'internal': dataset_with_most_splits,
|
| 89 |
'external': external_dataset_with_most_splits,
|
| 90 |
+
'warnings': []
|
| 91 |
}
|
| 92 |
|
| 93 |
print(f"Returning: {res}")
|
|
|
|
| 147 |
return self.response(output)
|
| 148 |
|
| 149 |
if self.path.startswith("/retrieveStars"):
|
|
|
|
| 150 |
library_names, options = parse_name_and_options(self.path)
|
|
|
|
|
|
|
|
|
|
| 151 |
week_over_week = '1' in options
|
| 152 |
|
| 153 |
+
if week_over_week:
|
| 154 |
+
return self.response({k: v for k, v in datasets['stars']['wow'].to_dict().items() if k in library_names + ['day']})
|
| 155 |
+
else:
|
| 156 |
+
return self.response({k: v for k, v in datasets['stars']['wow'].to_dict().items() if k in library_names + ['day']})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
|
| 158 |
if self.path.startswith("/retrieveIssues"):
|
|
|
|
| 159 |
library_names, options = parse_name_and_options(self.path)
|
|
|
|
| 160 |
exclude_org_members = '1' in options
|
| 161 |
week_over_week = '2' in options
|
| 162 |
|
| 163 |
+
if week_over_week:
|
| 164 |
+
if exclude_org_members:
|
| 165 |
+
return self.response(cut_output(datasets['issues']['eom_wow'], library_names))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
else:
|
| 167 |
+
return self.response({k: v for k, v in datasets['issues']['wow'].to_dict().items() if k in library_names + ['day']})
|
| 168 |
+
else:
|
| 169 |
+
if exclude_org_members:
|
| 170 |
+
return self.response({k: v for k, v in datasets['issues']['eom'].to_dict().items() if k in library_names + ['day']})
|
| 171 |
+
else:
|
| 172 |
+
return self.response({k: v for k, v in datasets['issues']['raw'].to_dict().items() if k in library_names + ['day']})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
|
| 174 |
return SimpleHTTPRequestHandler.do_GET(self)
|
| 175 |
|
index.js
CHANGED
|
@@ -122,16 +122,16 @@ const initialize = async () => {
|
|
| 122 |
graphSelector.appendChild(graphSpan);
|
| 123 |
|
| 124 |
if (inferJson.warnings.length > 0) {
|
| 125 |
-
const div = document.createElement('div');
|
| 126 |
-
div.classList.add('warning-div')
|
| 127 |
-
|
| 128 |
for (const warning of inferJson.warnings) {
|
|
|
|
|
|
|
|
|
|
| 129 |
const labelSpan = document.createElement('span');
|
| 130 |
labelSpan.textContent = `Warning: ${warning}`;
|
| 131 |
|
| 132 |
div.appendChild(labelSpan);
|
|
|
|
| 133 |
}
|
| 134 |
-
warnings.appendChild(div);
|
| 135 |
}
|
| 136 |
|
| 137 |
for (const element of inferJson.internal) {
|
|
|
|
| 122 |
graphSelector.appendChild(graphSpan);
|
| 123 |
|
| 124 |
if (inferJson.warnings.length > 0) {
|
|
|
|
|
|
|
|
|
|
| 125 |
for (const warning of inferJson.warnings) {
|
| 126 |
+
const div = document.createElement('div');
|
| 127 |
+
div.classList.add('warning-div')
|
| 128 |
+
|
| 129 |
const labelSpan = document.createElement('span');
|
| 130 |
labelSpan.textContent = `Warning: ${warning}`;
|
| 131 |
|
| 132 |
div.appendChild(labelSpan);
|
| 133 |
+
warnings.appendChild(div);
|
| 134 |
}
|
|
|
|
| 135 |
}
|
| 136 |
|
| 137 |
for (const element of inferJson.internal) {
|