Spaces:
Running
on
Zero
Running
on
Zero
Upload TMIDIX.py
Browse files
TMIDIX.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
#! /usr/bin/python3
|
| 2 |
|
| 3 |
-
|
| 4 |
r'''###############################################################################
|
| 5 |
###################################################################################
|
| 6 |
#
|
|
@@ -8,7 +7,7 @@ r'''############################################################################
|
|
| 8 |
# Tegridy MIDI X Module (TMIDI X / tee-midi eks)
|
| 9 |
# Version 1.0
|
| 10 |
#
|
| 11 |
-
# NOTE: TMIDI X Module starts after the partial MIDI.py module @ line
|
| 12 |
#
|
| 13 |
# Based upon MIDI.py module v.6.7. by Peter Billam / pjb.com.au
|
| 14 |
#
|
|
@@ -1458,8 +1457,6 @@ import os
|
|
| 1458 |
|
| 1459 |
import datetime
|
| 1460 |
|
| 1461 |
-
import copy
|
| 1462 |
-
|
| 1463 |
from datetime import datetime
|
| 1464 |
|
| 1465 |
import secrets
|
|
@@ -1476,12 +1473,12 @@ import multiprocessing
|
|
| 1476 |
|
| 1477 |
from itertools import zip_longest
|
| 1478 |
from itertools import groupby
|
|
|
|
| 1479 |
from collections import Counter
|
|
|
|
| 1480 |
|
| 1481 |
from operator import itemgetter
|
| 1482 |
|
| 1483 |
-
import sys
|
| 1484 |
-
|
| 1485 |
from abc import ABC, abstractmethod
|
| 1486 |
|
| 1487 |
from difflib import SequenceMatcher as SM
|
|
@@ -1493,7 +1490,7 @@ import matplotlib.pyplot as plt
|
|
| 1493 |
|
| 1494 |
import psutil
|
| 1495 |
|
| 1496 |
-
|
| 1497 |
|
| 1498 |
###################################################################################
|
| 1499 |
#
|
|
@@ -4184,6 +4181,17 @@ def advanced_score_processor(raw_score,
|
|
| 4184 |
basic_single_track_score.append(ev)
|
| 4185 |
num_tracks += 1
|
| 4186 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4187 |
basic_single_track_score.sort(key=lambda x: x[4] if x[0] == 'note' else 128, reverse=True)
|
| 4188 |
basic_single_track_score.sort(key=lambda x: x[1])
|
| 4189 |
|
|
@@ -4198,7 +4206,7 @@ def advanced_score_processor(raw_score,
|
|
| 4198 |
enhanced_single_track_score.append(event)
|
| 4199 |
num_patch_changes += 1
|
| 4200 |
|
| 4201 |
-
if event[0] == 'note':
|
| 4202 |
if event[3] != 9:
|
| 4203 |
event.extend([patches[event[3]]])
|
| 4204 |
all_score_patches.extend([patches[event[3]]])
|
|
@@ -11300,7 +11308,7 @@ def create_files_list(datasets_paths=['./'],
|
|
| 11300 |
|
| 11301 |
files_exts = tuple(files_exts)
|
| 11302 |
|
| 11303 |
-
for dataset_addr in tqdm.tqdm(datasets_paths):
|
| 11304 |
for dirpath, dirnames, filenames in os.walk(dataset_addr):
|
| 11305 |
for file in filenames:
|
| 11306 |
if file not in filez_set and file.endswith(files_exts):
|
|
@@ -11363,6 +11371,861 @@ def has_consecutive_trend(nums, count):
|
|
| 11363 |
|
| 11364 |
return False
|
| 11365 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11366 |
###################################################################################
|
| 11367 |
# This is the end of the TMIDI X Python module
|
| 11368 |
###################################################################################
|
|
|
|
| 1 |
#! /usr/bin/python3
|
| 2 |
|
|
|
|
| 3 |
r'''###############################################################################
|
| 4 |
###################################################################################
|
| 5 |
#
|
|
|
|
| 7 |
# Tegridy MIDI X Module (TMIDI X / tee-midi eks)
|
| 8 |
# Version 1.0
|
| 9 |
#
|
| 10 |
+
# NOTE: TMIDI X Module starts after the partial MIDI.py module @ line 1437
|
| 11 |
#
|
| 12 |
# Based upon MIDI.py module v.6.7. by Peter Billam / pjb.com.au
|
| 13 |
#
|
|
|
|
| 1457 |
|
| 1458 |
import datetime
|
| 1459 |
|
|
|
|
|
|
|
| 1460 |
from datetime import datetime
|
| 1461 |
|
| 1462 |
import secrets
|
|
|
|
| 1473 |
|
| 1474 |
from itertools import zip_longest
|
| 1475 |
from itertools import groupby
|
| 1476 |
+
|
| 1477 |
from collections import Counter
|
| 1478 |
+
from collections import defaultdict
|
| 1479 |
|
| 1480 |
from operator import itemgetter
|
| 1481 |
|
|
|
|
|
|
|
| 1482 |
from abc import ABC, abstractmethod
|
| 1483 |
|
| 1484 |
from difflib import SequenceMatcher as SM
|
|
|
|
| 1490 |
|
| 1491 |
import psutil
|
| 1492 |
|
| 1493 |
+
import json
|
| 1494 |
|
| 1495 |
###################################################################################
|
| 1496 |
#
|
|
|
|
| 4181 |
basic_single_track_score.append(ev)
|
| 4182 |
num_tracks += 1
|
| 4183 |
|
| 4184 |
+
for e in basic_single_track_score:
|
| 4185 |
+
|
| 4186 |
+
if e[0] == 'note':
|
| 4187 |
+
e[3] = e[3] % 16
|
| 4188 |
+
e[4] = e[4] % 128
|
| 4189 |
+
e[5] = e[5] % 128
|
| 4190 |
+
|
| 4191 |
+
if e[0] == 'patch_change':
|
| 4192 |
+
e[2] = e[2] % 16
|
| 4193 |
+
e[3] = e[3] % 128
|
| 4194 |
+
|
| 4195 |
basic_single_track_score.sort(key=lambda x: x[4] if x[0] == 'note' else 128, reverse=True)
|
| 4196 |
basic_single_track_score.sort(key=lambda x: x[1])
|
| 4197 |
|
|
|
|
| 4206 |
enhanced_single_track_score.append(event)
|
| 4207 |
num_patch_changes += 1
|
| 4208 |
|
| 4209 |
+
if event[0] == 'note':
|
| 4210 |
if event[3] != 9:
|
| 4211 |
event.extend([patches[event[3]]])
|
| 4212 |
all_score_patches.extend([patches[event[3]]])
|
|
|
|
| 11308 |
|
| 11309 |
files_exts = tuple(files_exts)
|
| 11310 |
|
| 11311 |
+
for dataset_addr in tqdm.tqdm(datasets_paths, disable=not verbose):
|
| 11312 |
for dirpath, dirnames, filenames in os.walk(dataset_addr):
|
| 11313 |
for file in filenames:
|
| 11314 |
if file not in filez_set and file.endswith(files_exts):
|
|
|
|
| 11371 |
|
| 11372 |
return False
|
| 11373 |
|
| 11374 |
+
###################################################################################
|
| 11375 |
+
|
| 11376 |
+
def escore_notes_primary_features(escore_notes):
|
| 11377 |
+
|
| 11378 |
+
#=================================================================
|
| 11379 |
+
|
| 11380 |
+
def mean(values):
|
| 11381 |
+
return sum(values) / len(values) if values else None
|
| 11382 |
+
|
| 11383 |
+
def std(values):
|
| 11384 |
+
if not values:
|
| 11385 |
+
return None
|
| 11386 |
+
m = mean(values)
|
| 11387 |
+
return math.sqrt(sum((x - m) ** 2 for x in values) / len(values)) if m is not None else None
|
| 11388 |
+
|
| 11389 |
+
def skew(values):
|
| 11390 |
+
if not values:
|
| 11391 |
+
return None
|
| 11392 |
+
m = mean(values)
|
| 11393 |
+
s = std(values)
|
| 11394 |
+
if s is None or s == 0:
|
| 11395 |
+
return None
|
| 11396 |
+
return sum(((x - m) / s) ** 3 for x in values) / len(values)
|
| 11397 |
+
|
| 11398 |
+
def kurtosis(values):
|
| 11399 |
+
if not values:
|
| 11400 |
+
return None
|
| 11401 |
+
m = mean(values)
|
| 11402 |
+
s = std(values)
|
| 11403 |
+
if s is None or s == 0:
|
| 11404 |
+
return None
|
| 11405 |
+
return sum(((x - m) / s) ** 4 for x in values) / len(values) - 3
|
| 11406 |
+
|
| 11407 |
+
def median(values):
|
| 11408 |
+
if not values:
|
| 11409 |
+
return None
|
| 11410 |
+
srt = sorted(values)
|
| 11411 |
+
n = len(srt)
|
| 11412 |
+
mid = n // 2
|
| 11413 |
+
if n % 2 == 0:
|
| 11414 |
+
return (srt[mid - 1] + srt[mid]) / 2.0
|
| 11415 |
+
return srt[mid]
|
| 11416 |
+
|
| 11417 |
+
def percentile(values, p):
|
| 11418 |
+
if not values:
|
| 11419 |
+
return None
|
| 11420 |
+
srt = sorted(values)
|
| 11421 |
+
n = len(srt)
|
| 11422 |
+
k = (n - 1) * p / 100.0
|
| 11423 |
+
f = int(k)
|
| 11424 |
+
c = k - f
|
| 11425 |
+
if f + 1 < n:
|
| 11426 |
+
return srt[f] * (1 - c) + srt[f + 1] * c
|
| 11427 |
+
return srt[f]
|
| 11428 |
+
|
| 11429 |
+
def diff(values):
|
| 11430 |
+
if not values or len(values) < 2:
|
| 11431 |
+
return []
|
| 11432 |
+
return [values[i + 1] - values[i] for i in range(len(values) - 1)]
|
| 11433 |
+
|
| 11434 |
+
def mad(values):
|
| 11435 |
+
if not values:
|
| 11436 |
+
return None
|
| 11437 |
+
m = median(values)
|
| 11438 |
+
return median([abs(x - m) for x in values])
|
| 11439 |
+
|
| 11440 |
+
def entropy(values):
|
| 11441 |
+
if not values:
|
| 11442 |
+
return None
|
| 11443 |
+
freq = {}
|
| 11444 |
+
for v in values:
|
| 11445 |
+
freq[v] = freq.get(v, 0) + 1
|
| 11446 |
+
total = len(values)
|
| 11447 |
+
ent = 0.0
|
| 11448 |
+
for count in freq.values():
|
| 11449 |
+
p_val = count / total
|
| 11450 |
+
ent -= p_val * math.log2(p_val)
|
| 11451 |
+
return ent
|
| 11452 |
+
|
| 11453 |
+
def mode(values):
|
| 11454 |
+
if not values:
|
| 11455 |
+
return None
|
| 11456 |
+
freq = {}
|
| 11457 |
+
for v in values:
|
| 11458 |
+
freq[v] = freq.get(v, 0) + 1
|
| 11459 |
+
max_count = max(freq.values())
|
| 11460 |
+
modes = [k for k, count in freq.items() if count == max_count]
|
| 11461 |
+
return min(modes)
|
| 11462 |
+
|
| 11463 |
+
|
| 11464 |
+
#=================================================================
|
| 11465 |
+
|
| 11466 |
+
sp_score = solo_piano_escore_notes(escore_notes)
|
| 11467 |
+
|
| 11468 |
+
dscore = delta_score_notes(sp_score)
|
| 11469 |
+
|
| 11470 |
+
seq = []
|
| 11471 |
+
|
| 11472 |
+
for d in dscore:
|
| 11473 |
+
seq.extend([d[1], d[2], d[4]])
|
| 11474 |
+
|
| 11475 |
+
#=================================================================
|
| 11476 |
+
|
| 11477 |
+
n = len(seq)
|
| 11478 |
+
if n % 3 != 0:
|
| 11479 |
+
seq = seq[: n - (n % 3)]
|
| 11480 |
+
arr = [seq[i:i + 3] for i in range(0, len(seq), 3)]
|
| 11481 |
+
|
| 11482 |
+
#=================================================================
|
| 11483 |
+
|
| 11484 |
+
features = {}
|
| 11485 |
+
|
| 11486 |
+
delta_times = [row[0] for row in arr]
|
| 11487 |
+
if delta_times:
|
| 11488 |
+
features['delta_times_mean'] = mean(delta_times)
|
| 11489 |
+
features['delta_times_std'] = std(delta_times)
|
| 11490 |
+
features['delta_times_min'] = min(delta_times)
|
| 11491 |
+
features['delta_times_max'] = max(delta_times)
|
| 11492 |
+
features['delta_times_skew'] = skew(delta_times)
|
| 11493 |
+
features['delta_times_kurtosis'] = kurtosis(delta_times)
|
| 11494 |
+
delta_zero_count = sum(1 for x in delta_times if x == 0)
|
| 11495 |
+
features['delta_times_zero_ratio'] = delta_zero_count / len(delta_times)
|
| 11496 |
+
nonzero_dt = [x for x in delta_times if x != 0]
|
| 11497 |
+
if nonzero_dt:
|
| 11498 |
+
features['delta_times_nonzero_mean'] = mean(nonzero_dt)
|
| 11499 |
+
features['delta_times_nonzero_std'] = std(nonzero_dt)
|
| 11500 |
+
else:
|
| 11501 |
+
features['delta_times_nonzero_mean'] = None
|
| 11502 |
+
features['delta_times_nonzero_std'] = None
|
| 11503 |
+
features['delta_times_mad'] = mad(delta_times)
|
| 11504 |
+
features['delta_times_cv'] = (features['delta_times_std'] / features['delta_times_mean']
|
| 11505 |
+
if features['delta_times_mean'] and features['delta_times_mean'] != 0 else None)
|
| 11506 |
+
features['delta_times_entropy'] = entropy(delta_times)
|
| 11507 |
+
features['delta_times_range'] = max(delta_times) - min(delta_times)
|
| 11508 |
+
features['delta_times_median'] = median(delta_times)
|
| 11509 |
+
features['delta_times_quantile_25'] = percentile(delta_times, 25)
|
| 11510 |
+
features['delta_times_quantile_75'] = percentile(delta_times, 75)
|
| 11511 |
+
if (features['delta_times_quantile_25'] is not None and features['delta_times_quantile_75'] is not None):
|
| 11512 |
+
features['delta_times_iqr'] = features['delta_times_quantile_75'] - features['delta_times_quantile_25']
|
| 11513 |
+
else:
|
| 11514 |
+
features['delta_times_iqr'] = None
|
| 11515 |
+
else:
|
| 11516 |
+
for key in ['delta_times_mean', 'delta_times_std', 'delta_times_min', 'delta_times_max',
|
| 11517 |
+
'delta_times_skew', 'delta_times_kurtosis', 'delta_times_zero_ratio',
|
| 11518 |
+
'delta_times_nonzero_mean', 'delta_times_nonzero_std', 'delta_times_mad',
|
| 11519 |
+
'delta_times_cv', 'delta_times_entropy', 'delta_times_range', 'delta_times_median',
|
| 11520 |
+
'delta_times_quantile_25', 'delta_times_quantile_75', 'delta_times_iqr']:
|
| 11521 |
+
features[key] = None
|
| 11522 |
+
|
| 11523 |
+
#=================================================================
|
| 11524 |
+
|
| 11525 |
+
durations = [row[1] for row in arr]
|
| 11526 |
+
if durations:
|
| 11527 |
+
features['durations_mean'] = mean(durations)
|
| 11528 |
+
features['durations_std'] = std(durations)
|
| 11529 |
+
features['durations_min'] = min(durations)
|
| 11530 |
+
features['durations_max'] = max(durations)
|
| 11531 |
+
features['durations_skew'] = skew(durations)
|
| 11532 |
+
features['durations_kurtosis'] = kurtosis(durations)
|
| 11533 |
+
features['durations_mad'] = mad(durations)
|
| 11534 |
+
features['durations_cv'] = (features['durations_std'] / features['durations_mean']
|
| 11535 |
+
if features['durations_mean'] and features['durations_mean'] != 0 else None)
|
| 11536 |
+
features['durations_entropy'] = entropy(durations)
|
| 11537 |
+
features['durations_range'] = max(durations) - min(durations)
|
| 11538 |
+
features['durations_median'] = median(durations)
|
| 11539 |
+
features['durations_quantile_25'] = percentile(durations, 25)
|
| 11540 |
+
features['durations_quantile_75'] = percentile(durations, 75)
|
| 11541 |
+
if features['durations_quantile_25'] is not None and features['durations_quantile_75'] is not None:
|
| 11542 |
+
features['durations_iqr'] = features['durations_quantile_75'] - features['durations_quantile_25']
|
| 11543 |
+
else:
|
| 11544 |
+
features['durations_iqr'] = None
|
| 11545 |
+
else:
|
| 11546 |
+
for key in ['durations_mean', 'durations_std', 'durations_min', 'durations_max',
|
| 11547 |
+
'durations_skew', 'durations_kurtosis', 'durations_mad', 'durations_cv',
|
| 11548 |
+
'durations_entropy', 'durations_range', 'durations_median', 'durations_quantile_25',
|
| 11549 |
+
'durations_quantile_75', 'durations_iqr']:
|
| 11550 |
+
features[key] = None
|
| 11551 |
+
|
| 11552 |
+
#=================================================================
|
| 11553 |
+
|
| 11554 |
+
pitches = [row[2] for row in arr]
|
| 11555 |
+
if pitches:
|
| 11556 |
+
features['pitches_mean'] = mean(pitches)
|
| 11557 |
+
features['pitches_std'] = std(pitches)
|
| 11558 |
+
features['pitches_min'] = min(pitches)
|
| 11559 |
+
features['pitches_max'] = max(pitches)
|
| 11560 |
+
features['pitches_skew'] = skew(pitches)
|
| 11561 |
+
features['pitches_kurtosis'] = kurtosis(pitches)
|
| 11562 |
+
features['pitches_range'] = max(pitches) - min(pitches)
|
| 11563 |
+
features['pitches_median'] = median(pitches)
|
| 11564 |
+
features['pitches_quantile_25'] = percentile(pitches, 25)
|
| 11565 |
+
features['pitches_quantile_75'] = percentile(pitches, 75)
|
| 11566 |
+
if len(pitches) > 1:
|
| 11567 |
+
dps = diff(pitches)
|
| 11568 |
+
features['pitches_diff_mean'] = mean(dps)
|
| 11569 |
+
features['pitches_diff_std'] = std(dps)
|
| 11570 |
+
else:
|
| 11571 |
+
features['pitches_diff_mean'] = None
|
| 11572 |
+
features['pitches_diff_std'] = None
|
| 11573 |
+
features['pitches_mad'] = mad(pitches)
|
| 11574 |
+
if len(pitches) > 2:
|
| 11575 |
+
peaks = sum(1 for i in range(1, len(pitches)-1)
|
| 11576 |
+
if pitches[i] > pitches[i-1] and pitches[i] > pitches[i+1])
|
| 11577 |
+
valleys = sum(1 for i in range(1, len(pitches)-1)
|
| 11578 |
+
if pitches[i] < pitches[i-1] and pitches[i] < pitches[i+1])
|
| 11579 |
+
else:
|
| 11580 |
+
peaks, valleys = None, None
|
| 11581 |
+
features['pitches_peak_count'] = peaks
|
| 11582 |
+
features['pitches_valley_count'] = valleys
|
| 11583 |
+
if len(pitches) > 1:
|
| 11584 |
+
x = list(range(len(pitches)))
|
| 11585 |
+
denominator = (len(x) * sum(xi ** 2 for xi in x) - sum(x) ** 2)
|
| 11586 |
+
if denominator != 0:
|
| 11587 |
+
slope = (len(x) * sum(x[i] * pitches[i] for i in range(len(x))) -
|
| 11588 |
+
sum(x) * sum(pitches)) / denominator
|
| 11589 |
+
else:
|
| 11590 |
+
slope = None
|
| 11591 |
+
features['pitches_trend_slope'] = slope
|
| 11592 |
+
else:
|
| 11593 |
+
features['pitches_trend_slope'] = None
|
| 11594 |
+
|
| 11595 |
+
features['pitches_unique_count'] = len(set(pitches))
|
| 11596 |
+
pitch_class_hist = {i: 0 for i in range(12)}
|
| 11597 |
+
for p in pitches:
|
| 11598 |
+
pitch_class_hist[p % 12] += 1
|
| 11599 |
+
total_pitch = len(pitches)
|
| 11600 |
+
for i in range(12):
|
| 11601 |
+
features[f'pitches_pc_{i}'] = (pitch_class_hist[i] / total_pitch) if total_pitch > 0 else None
|
| 11602 |
+
|
| 11603 |
+
max_asc = 0
|
| 11604 |
+
cur_asc = 0
|
| 11605 |
+
max_desc = 0
|
| 11606 |
+
cur_desc = 0
|
| 11607 |
+
for i in range(1, len(pitches)):
|
| 11608 |
+
if pitches[i] > pitches[i-1]:
|
| 11609 |
+
cur_asc += 1
|
| 11610 |
+
max_asc = max(max_asc, cur_asc)
|
| 11611 |
+
cur_desc = 0
|
| 11612 |
+
elif pitches[i] < pitches[i-1]:
|
| 11613 |
+
cur_desc += 1
|
| 11614 |
+
max_desc = max(max_desc, cur_desc)
|
| 11615 |
+
cur_asc = 0
|
| 11616 |
+
else:
|
| 11617 |
+
cur_asc = 0
|
| 11618 |
+
cur_desc = 0
|
| 11619 |
+
features['pitches_max_consecutive_ascending'] = max_asc if pitches else None
|
| 11620 |
+
features['pitches_max_consecutive_descending'] = max_desc if pitches else None
|
| 11621 |
+
p_intervals = diff(pitches)
|
| 11622 |
+
features['pitches_median_diff'] = median(p_intervals) if p_intervals else None
|
| 11623 |
+
if p_intervals:
|
| 11624 |
+
dc = sum(1 for i in range(1, len(p_intervals))
|
| 11625 |
+
if (p_intervals[i] > 0 and p_intervals[i-1] < 0) or (p_intervals[i] < 0 and p_intervals[i-1] > 0))
|
| 11626 |
+
features['pitches_direction_changes'] = dc
|
| 11627 |
+
else:
|
| 11628 |
+
features['pitches_direction_changes'] = None
|
| 11629 |
+
else:
|
| 11630 |
+
for key in (['pitches_mean', 'pitches_std', 'pitches_min', 'pitches_max', 'pitches_skew',
|
| 11631 |
+
'pitches_kurtosis', 'pitches_range', 'pitches_median', 'pitches_quantile_25',
|
| 11632 |
+
'pitches_quantile_75', 'pitches_diff_mean', 'pitches_diff_std', 'pitches_mad',
|
| 11633 |
+
'pitches_peak_count', 'pitches_valley_count', 'pitches_trend_slope',
|
| 11634 |
+
'pitches_unique_count', 'pitches_max_consecutive_ascending', 'pitches_max_consecutive_descending',
|
| 11635 |
+
'pitches_median_diff', 'pitches_direction_changes'] +
|
| 11636 |
+
[f'pitches_pc_{i}' for i in range(12)]):
|
| 11637 |
+
features[key] = None
|
| 11638 |
+
|
| 11639 |
+
#=================================================================
|
| 11640 |
+
|
| 11641 |
+
overall = [x for row in arr for x in row]
|
| 11642 |
+
if overall:
|
| 11643 |
+
features['overall_mean'] = mean(overall)
|
| 11644 |
+
features['overall_std'] = std(overall)
|
| 11645 |
+
features['overall_min'] = min(overall)
|
| 11646 |
+
features['overall_max'] = max(overall)
|
| 11647 |
+
features['overall_cv'] = (features['overall_std'] / features['overall_mean']
|
| 11648 |
+
if features['overall_mean'] and features['overall_mean'] != 0 else None)
|
| 11649 |
+
else:
|
| 11650 |
+
for key in ['overall_mean', 'overall_std', 'overall_min', 'overall_max', 'overall_cv']:
|
| 11651 |
+
features[key] = None
|
| 11652 |
+
|
| 11653 |
+
#=================================================================
|
| 11654 |
+
|
| 11655 |
+
onsets = []
|
| 11656 |
+
cumulative = 0
|
| 11657 |
+
for dt in delta_times:
|
| 11658 |
+
onsets.append(cumulative)
|
| 11659 |
+
cumulative += dt
|
| 11660 |
+
if onsets and durations:
|
| 11661 |
+
overall_piece_duration = onsets[-1] + durations[-1]
|
| 11662 |
+
else:
|
| 11663 |
+
overall_piece_duration = None
|
| 11664 |
+
features['overall_piece_duration'] = overall_piece_duration
|
| 11665 |
+
features['overall_notes_density'] = (len(arr) / overall_piece_duration
|
| 11666 |
+
if overall_piece_duration and overall_piece_duration > 0 else None)
|
| 11667 |
+
features['rhythm_ratio'] = (features['durations_mean'] / features['delta_times_mean']
|
| 11668 |
+
if features['delta_times_mean'] and features['delta_times_mean'] != 0 else None)
|
| 11669 |
+
features['overall_sum_delta_times'] = (sum(delta_times) if delta_times else None)
|
| 11670 |
+
features['overall_sum_durations'] = (sum(durations) if durations else None)
|
| 11671 |
+
features['overall_voicing_ratio'] = (sum(durations) / overall_piece_duration
|
| 11672 |
+
if overall_piece_duration and durations else None)
|
| 11673 |
+
features['overall_onset_std'] = std(onsets) if onsets else None
|
| 11674 |
+
|
| 11675 |
+
#=================================================================
|
| 11676 |
+
|
| 11677 |
+
chords_raw = []
|
| 11678 |
+
chords_pc = []
|
| 11679 |
+
current_group = []
|
| 11680 |
+
for i, note in enumerate(arr):
|
| 11681 |
+
dt = note[0]
|
| 11682 |
+
if i == 0:
|
| 11683 |
+
current_group = [i]
|
| 11684 |
+
else:
|
| 11685 |
+
if dt == 0:
|
| 11686 |
+
current_group.append(i)
|
| 11687 |
+
else:
|
| 11688 |
+
if len(current_group) >= 2:
|
| 11689 |
+
chord_notes = [arr[j][2] for j in current_group]
|
| 11690 |
+
chords_raw.append(tuple(sorted(chord_notes)))
|
| 11691 |
+
chords_pc.append(tuple(sorted(set(p % 12 for p in chord_notes))))
|
| 11692 |
+
|
| 11693 |
+
current_group = [i]
|
| 11694 |
+
|
| 11695 |
+
if current_group and len(current_group) >= 2:
|
| 11696 |
+
chord_notes = [arr[j][2] for j in current_group]
|
| 11697 |
+
chords_raw.append(tuple(sorted(chord_notes)))
|
| 11698 |
+
chords_pc.append(tuple(sorted(set(p % 12 for p in chord_notes))))
|
| 11699 |
+
|
| 11700 |
+
if chords_raw:
|
| 11701 |
+
chord_count = len(chords_raw)
|
| 11702 |
+
features['chords_count'] = chord_count
|
| 11703 |
+
features['chords_density'] = (chord_count / overall_piece_duration
|
| 11704 |
+
if overall_piece_duration and chord_count is not None else None)
|
| 11705 |
+
chord_sizes = [len(ch) for ch in chords_raw]
|
| 11706 |
+
features['chords_size_mean'] = mean(chord_sizes)
|
| 11707 |
+
features['chords_size_std'] = std(chord_sizes)
|
| 11708 |
+
features['chords_size_min'] = min(chord_sizes) if chord_sizes else None
|
| 11709 |
+
features['chords_size_max'] = max(chord_sizes) if chord_sizes else None
|
| 11710 |
+
features['chords_unique_raw_count'] = len(set(chords_raw))
|
| 11711 |
+
features['chords_unique_pc_count'] = len(set(chords_pc))
|
| 11712 |
+
features['chords_entropy_raw'] = entropy(chords_raw)
|
| 11713 |
+
features['chords_entropy_pc'] = entropy(chords_pc)
|
| 11714 |
+
if len(chords_raw) > 1:
|
| 11715 |
+
rep_raw = sum(1 for i in range(1, len(chords_raw)) if chords_raw[i] == chords_raw[i - 1])
|
| 11716 |
+
features['chords_repeat_ratio_raw'] = rep_raw / (len(chords_raw) - 1)
|
| 11717 |
+
else:
|
| 11718 |
+
features['chords_repeat_ratio_raw'] = None
|
| 11719 |
+
if len(chords_pc) > 1:
|
| 11720 |
+
rep_pc = sum(1 for i in range(1, len(chords_pc)) if chords_pc[i] == chords_pc[i - 1])
|
| 11721 |
+
features['chords_repeat_ratio_pc'] = rep_pc / (len(chords_pc) - 1)
|
| 11722 |
+
else:
|
| 11723 |
+
features['chords_repeat_ratio_pc'] = None
|
| 11724 |
+
if len(chords_raw) > 1:
|
| 11725 |
+
bigrams_raw = [(chords_raw[i], chords_raw[i + 1]) for i in range(len(chords_raw) - 1)]
|
| 11726 |
+
features['chords_bigram_entropy_raw'] = entropy(bigrams_raw)
|
| 11727 |
+
else:
|
| 11728 |
+
features['chords_bigram_entropy_raw'] = None
|
| 11729 |
+
if len(chords_pc) > 1:
|
| 11730 |
+
bigrams_pc = [(chords_pc[i], chords_pc[i + 1]) for i in range(len(chords_pc) - 1)]
|
| 11731 |
+
features['chords_bigram_entropy_pc'] = entropy(bigrams_pc)
|
| 11732 |
+
else:
|
| 11733 |
+
features['chords_bigram_entropy_pc'] = None
|
| 11734 |
+
features['chords_mode_raw'] = mode(chords_raw)
|
| 11735 |
+
features['chords_mode_pc'] = mode(chords_pc)
|
| 11736 |
+
if chords_pc:
|
| 11737 |
+
pc_sizes = [len(ch) for ch in chords_pc]
|
| 11738 |
+
features['chords_pc_size_mean'] = mean(pc_sizes)
|
| 11739 |
+
else:
|
| 11740 |
+
features['chords_pc_size_mean'] = None
|
| 11741 |
+
else:
|
| 11742 |
+
for key in ['chords_count', 'chords_density', 'chords_size_mean', 'chords_size_std',
|
| 11743 |
+
'chords_size_min', 'chords_size_max', 'chords_unique_raw_count', 'chords_unique_pc_count',
|
| 11744 |
+
'chords_entropy_raw', 'chords_entropy_pc', 'chords_repeat_ratio_raw', 'chords_repeat_ratio_pc',
|
| 11745 |
+
'chords_bigram_entropy_raw', 'chords_bigram_entropy_pc', 'chords_mode_raw', 'chords_mode_pc',
|
| 11746 |
+
'chords_pc_size_mean']:
|
| 11747 |
+
features[key] = None
|
| 11748 |
+
|
| 11749 |
+
#=================================================================
|
| 11750 |
+
|
| 11751 |
+
if delta_times:
|
| 11752 |
+
med_dt = features['delta_times_median']
|
| 11753 |
+
iqr_dt = features['delta_times_iqr']
|
| 11754 |
+
threshold_a = med_dt + 1.5 * iqr_dt if med_dt is not None and iqr_dt is not None else None
|
| 11755 |
+
threshold_b = percentile(delta_times, 90)
|
| 11756 |
+
if threshold_a is not None and threshold_b is not None:
|
| 11757 |
+
phrase_threshold = max(threshold_a, threshold_b)
|
| 11758 |
+
elif threshold_a is not None:
|
| 11759 |
+
phrase_threshold = threshold_a
|
| 11760 |
+
elif threshold_b is not None:
|
| 11761 |
+
phrase_threshold = threshold_b
|
| 11762 |
+
else:
|
| 11763 |
+
phrase_threshold = None
|
| 11764 |
+
else:
|
| 11765 |
+
phrase_threshold = None
|
| 11766 |
+
|
| 11767 |
+
phrases = []
|
| 11768 |
+
current_phrase = []
|
| 11769 |
+
if onsets:
|
| 11770 |
+
current_phrase.append(0)
|
| 11771 |
+
for i in range(len(onsets) - 1):
|
| 11772 |
+
gap = onsets[i + 1] - onsets[i]
|
| 11773 |
+
if phrase_threshold is not None and gap > phrase_threshold:
|
| 11774 |
+
phrases.append(current_phrase)
|
| 11775 |
+
current_phrase = []
|
| 11776 |
+
current_phrase.append(i + 1)
|
| 11777 |
+
if current_phrase:
|
| 11778 |
+
phrases.append(current_phrase)
|
| 11779 |
+
if phrases:
|
| 11780 |
+
phrase_note_counts = []
|
| 11781 |
+
phrase_durations = []
|
| 11782 |
+
phrase_densities = []
|
| 11783 |
+
phrase_mean_pitches = []
|
| 11784 |
+
phrase_pitch_ranges = []
|
| 11785 |
+
phrase_start_times = []
|
| 11786 |
+
phrase_end_times = []
|
| 11787 |
+
for phrase in phrases:
|
| 11788 |
+
note_count = len(phrase)
|
| 11789 |
+
phrase_note_counts.append(note_count)
|
| 11790 |
+
ph_start = onsets[phrase[0]]
|
| 11791 |
+
ph_end = onsets[phrase[-1]] + durations[phrase[-1]]
|
| 11792 |
+
phrase_start_times.append(ph_start)
|
| 11793 |
+
phrase_end_times.append(ph_end)
|
| 11794 |
+
ph_duration = ph_end - ph_start
|
| 11795 |
+
phrase_durations.append(ph_duration)
|
| 11796 |
+
density = note_count / ph_duration if ph_duration > 0 else None
|
| 11797 |
+
phrase_densities.append(density)
|
| 11798 |
+
ph_pitches = [pitches[i] for i in phrase if i < len(pitches)]
|
| 11799 |
+
phrase_mean_pitches.append(mean(ph_pitches) if ph_pitches else None)
|
| 11800 |
+
phrase_pitch_ranges.append((max(ph_pitches) - min(ph_pitches)) if ph_pitches else None)
|
| 11801 |
+
if len(phrases) > 1:
|
| 11802 |
+
phrase_gaps = []
|
| 11803 |
+
for i in range(len(phrases) - 1):
|
| 11804 |
+
gap = phrase_start_times[i + 1] - phrase_end_times[i]
|
| 11805 |
+
phrase_gaps.append(gap if gap > 0 else 0)
|
| 11806 |
+
else:
|
| 11807 |
+
phrase_gaps = []
|
| 11808 |
+
features['phrases_count'] = len(phrases)
|
| 11809 |
+
features['phrases_avg_note_count'] = mean(phrase_note_counts) if phrase_note_counts else None
|
| 11810 |
+
features['phrases_std_note_count'] = std(phrase_note_counts) if phrase_note_counts else None
|
| 11811 |
+
features['phrases_min_note_count'] = min(phrase_note_counts) if phrase_note_counts else None
|
| 11812 |
+
features['phrases_max_note_count'] = max(phrase_note_counts) if phrase_note_counts else None
|
| 11813 |
+
features['phrases_avg_duration'] = mean(phrase_durations) if phrase_durations else None
|
| 11814 |
+
features['phrases_std_duration'] = std(phrase_durations) if phrase_durations else None
|
| 11815 |
+
features['phrases_min_duration'] = min(phrase_durations) if phrase_durations else None
|
| 11816 |
+
features['phrases_max_duration'] = max(phrase_durations) if phrase_durations else None
|
| 11817 |
+
features['phrases_avg_density'] = mean(phrase_densities) if phrase_densities else None
|
| 11818 |
+
features['phrases_std_density'] = std(phrase_densities) if phrase_densities else None
|
| 11819 |
+
features['phrases_avg_mean_pitch'] = mean(phrase_mean_pitches) if phrase_mean_pitches else None
|
| 11820 |
+
features['phrases_avg_pitch_range'] = mean(phrase_pitch_ranges) if phrase_pitch_ranges else None
|
| 11821 |
+
if phrase_gaps:
|
| 11822 |
+
features['phrases_avg_gap'] = mean(phrase_gaps)
|
| 11823 |
+
features['phrases_std_gap'] = std(phrase_gaps)
|
| 11824 |
+
features['phrases_min_gap'] = min(phrase_gaps)
|
| 11825 |
+
features['phrases_max_gap'] = max(phrase_gaps)
|
| 11826 |
+
else:
|
| 11827 |
+
features['phrases_avg_gap'] = None
|
| 11828 |
+
features['phrases_std_gap'] = None
|
| 11829 |
+
features['phrases_min_gap'] = None
|
| 11830 |
+
features['phrases_max_gap'] = None
|
| 11831 |
+
features['phrases_threshold'] = phrase_threshold
|
| 11832 |
+
else:
|
| 11833 |
+
for key in ['phrases_count', 'phrases_avg_note_count', 'phrases_std_note_count',
|
| 11834 |
+
'phrases_min_note_count', 'phrases_max_note_count', 'phrases_avg_duration',
|
| 11835 |
+
'phrases_std_duration', 'phrases_min_duration', 'phrases_max_duration',
|
| 11836 |
+
'phrases_avg_density', 'phrases_std_density', 'phrases_avg_mean_pitch',
|
| 11837 |
+
'phrases_avg_pitch_range', 'phrases_avg_gap', 'phrases_std_gap',
|
| 11838 |
+
'phrases_min_gap', 'phrases_max_gap', 'phrases_threshold']:
|
| 11839 |
+
features[key] = None
|
| 11840 |
+
|
| 11841 |
+
#=================================================================
|
| 11842 |
+
|
| 11843 |
+
return features
|
| 11844 |
+
|
| 11845 |
+
###################################################################################
|
| 11846 |
+
|
| 11847 |
+
def winsorized_normalize(data, new_range=(0, 255), clip=1.5):
|
| 11848 |
+
|
| 11849 |
+
#=================================================================
|
| 11850 |
+
|
| 11851 |
+
new_min, new_max = new_range
|
| 11852 |
+
|
| 11853 |
+
#=================================================================
|
| 11854 |
+
|
| 11855 |
+
def percentile(values, p):
|
| 11856 |
+
|
| 11857 |
+
srt = sorted(values)
|
| 11858 |
+
n = len(srt)
|
| 11859 |
+
if n == 1:
|
| 11860 |
+
return srt[0]
|
| 11861 |
+
k = (n - 1) * p / 100.0
|
| 11862 |
+
f = int(k)
|
| 11863 |
+
c = k - f
|
| 11864 |
+
if f + 1 < n:
|
| 11865 |
+
return srt[f] * (1 - c) + srt[f + 1] * c
|
| 11866 |
+
|
| 11867 |
+
return srt[f]
|
| 11868 |
+
|
| 11869 |
+
#=================================================================
|
| 11870 |
+
|
| 11871 |
+
q1 = percentile(data, 25)
|
| 11872 |
+
q3 = percentile(data, 75)
|
| 11873 |
+
iqr = q3 - q1
|
| 11874 |
+
|
| 11875 |
+
lower_bound_w = q1 - clip * iqr
|
| 11876 |
+
upper_bound_w = q3 + clip * iqr
|
| 11877 |
+
|
| 11878 |
+
data_min = min(data)
|
| 11879 |
+
data_max = max(data)
|
| 11880 |
+
effective_low = max(lower_bound_w, data_min)
|
| 11881 |
+
effective_high = min(upper_bound_w, data_max)
|
| 11882 |
+
|
| 11883 |
+
#=================================================================
|
| 11884 |
+
|
| 11885 |
+
if effective_high == effective_low:
|
| 11886 |
+
|
| 11887 |
+
if data_max == data_min:
|
| 11888 |
+
return [int(new_min)] * len(data)
|
| 11889 |
+
|
| 11890 |
+
normalized = [(x - data_min) / (data_max - data_min) for x in data]
|
| 11891 |
+
|
| 11892 |
+
return [int(round(new_min + norm * (new_max - new_min))) for norm in normalized]
|
| 11893 |
+
|
| 11894 |
+
#=================================================================
|
| 11895 |
+
|
| 11896 |
+
clipped = [x if x >= effective_low else effective_low for x in data]
|
| 11897 |
+
clipped = [x if x <= effective_high else effective_high for x in clipped]
|
| 11898 |
+
|
| 11899 |
+
normalized = [(x - effective_low) / (effective_high - effective_low) for x in clipped]
|
| 11900 |
+
|
| 11901 |
+
#=================================================================
|
| 11902 |
+
|
| 11903 |
+
return [int(round(new_min + norm * (new_max - new_min))) for norm in normalized]
|
| 11904 |
+
|
| 11905 |
+
###################################################################################
|
| 11906 |
+
|
| 11907 |
+
def tokenize_features_to_ints_winsorized(features, new_range=(0, 255), clip=1.5, none_token=-1):
|
| 11908 |
+
|
| 11909 |
+
values = []
|
| 11910 |
+
tokens = []
|
| 11911 |
+
|
| 11912 |
+
#=================================================================
|
| 11913 |
+
|
| 11914 |
+
def process_value(val):
|
| 11915 |
+
|
| 11916 |
+
if isinstance(val, (int, float)):
|
| 11917 |
+
return int(round(abs(val)))
|
| 11918 |
+
|
| 11919 |
+
elif isinstance(val, (list, tuple)):
|
| 11920 |
+
return int(round(abs(sum(val) / len(val))))
|
| 11921 |
+
|
| 11922 |
+
else:
|
| 11923 |
+
return int(abs(hash(val)) % (10 ** 8))
|
| 11924 |
+
|
| 11925 |
+
#=================================================================
|
| 11926 |
+
|
| 11927 |
+
for key in sorted(features.keys()):
|
| 11928 |
+
|
| 11929 |
+
value = features[key]
|
| 11930 |
+
|
| 11931 |
+
if value is None:
|
| 11932 |
+
tokens.append(none_token)
|
| 11933 |
+
values.append(none_token)
|
| 11934 |
+
|
| 11935 |
+
else:
|
| 11936 |
+
tokens.append(process_value(value))
|
| 11937 |
+
|
| 11938 |
+
if isinstance(value, (list, tuple)):
|
| 11939 |
+
values.append(sum(value) / len(value))
|
| 11940 |
+
|
| 11941 |
+
else:
|
| 11942 |
+
values.append(value)
|
| 11943 |
+
|
| 11944 |
+
#=================================================================
|
| 11945 |
+
|
| 11946 |
+
norm_tokens = winsorized_normalize(tokens, new_range, clip)
|
| 11947 |
+
|
| 11948 |
+
#=================================================================
|
| 11949 |
+
|
| 11950 |
+
return values, tokens, norm_tokens
|
| 11951 |
+
|
| 11952 |
+
###################################################################################
|
| 11953 |
+
|
| 11954 |
+
def write_jsonl(records_dicts_list,
|
| 11955 |
+
file_name='data',
|
| 11956 |
+
file_ext='.jsonl',
|
| 11957 |
+
file_mode='w',
|
| 11958 |
+
line_sep='\n',
|
| 11959 |
+
verbose=True
|
| 11960 |
+
):
|
| 11961 |
+
|
| 11962 |
+
if verbose:
|
| 11963 |
+
print('=' * 70)
|
| 11964 |
+
print('Writing', len(records_dicts_list), 'records to jsonl file...')
|
| 11965 |
+
print('=' * 70)
|
| 11966 |
+
|
| 11967 |
+
if not os.path.splitext(file_name)[1]:
|
| 11968 |
+
file_name += file_ext
|
| 11969 |
+
|
| 11970 |
+
l_count = 0
|
| 11971 |
+
|
| 11972 |
+
with open(file_name, mode=file_mode) as f:
|
| 11973 |
+
for record in tqdm.tqdm(records_dicts_list, disable=not verbose):
|
| 11974 |
+
f.write(json.dumps(record) + line_sep)
|
| 11975 |
+
l_count += 1
|
| 11976 |
+
|
| 11977 |
+
f.close()
|
| 11978 |
+
|
| 11979 |
+
if verbose:
|
| 11980 |
+
print('=' * 70)
|
| 11981 |
+
print('Written total of', l_count, 'jsonl records.')
|
| 11982 |
+
print('=' * 70)
|
| 11983 |
+
print('Done!')
|
| 11984 |
+
print('=' * 70)
|
| 11985 |
+
|
| 11986 |
+
###################################################################################
|
| 11987 |
+
|
| 11988 |
+
def read_jsonl(file_name='data',
|
| 11989 |
+
file_ext='.jsonl',
|
| 11990 |
+
verbose=True
|
| 11991 |
+
):
|
| 11992 |
+
|
| 11993 |
+
if verbose:
|
| 11994 |
+
print('=' * 70)
|
| 11995 |
+
print('Reading jsonl file...')
|
| 11996 |
+
print('=' * 70)
|
| 11997 |
+
|
| 11998 |
+
if not os.path.splitext(file_name)[1]:
|
| 11999 |
+
file_name += file_ext
|
| 12000 |
+
|
| 12001 |
+
with open(file_name, 'r') as f:
|
| 12002 |
+
|
| 12003 |
+
records = []
|
| 12004 |
+
gl_count = 0
|
| 12005 |
+
|
| 12006 |
+
for i, line in tqdm.tqdm(enumerate(f), disable=not verbose):
|
| 12007 |
+
|
| 12008 |
+
try:
|
| 12009 |
+
record = json.loads(line)
|
| 12010 |
+
records.append(record)
|
| 12011 |
+
gl_count += 1
|
| 12012 |
+
|
| 12013 |
+
except KeyboardInterrupt:
|
| 12014 |
+
if verbose:
|
| 12015 |
+
print('=' * 70)
|
| 12016 |
+
print('Stoping...')
|
| 12017 |
+
print('=' * 70)
|
| 12018 |
+
|
| 12019 |
+
f.close()
|
| 12020 |
+
|
| 12021 |
+
return records
|
| 12022 |
+
|
| 12023 |
+
except json.JSONDecodeError:
|
| 12024 |
+
if verbose:
|
| 12025 |
+
print('=' * 70)
|
| 12026 |
+
print('[ERROR] Line', i, 'is corrupted! Skipping it...')
|
| 12027 |
+
print('=' * 70)
|
| 12028 |
+
|
| 12029 |
+
continue
|
| 12030 |
+
|
| 12031 |
+
f.close()
|
| 12032 |
+
|
| 12033 |
+
if verbose:
|
| 12034 |
+
print('=' * 70)
|
| 12035 |
+
print('Loaded total of', gl_count, 'jsonl records.')
|
| 12036 |
+
print('=' * 70)
|
| 12037 |
+
print('Done!')
|
| 12038 |
+
print('=' * 70)
|
| 12039 |
+
|
| 12040 |
+
return records
|
| 12041 |
+
|
| 12042 |
+
###################################################################################
|
| 12043 |
+
|
| 12044 |
+
def read_jsonl_lines(lines_indexes_list,
|
| 12045 |
+
file_name='data',
|
| 12046 |
+
file_ext='.jsonl',
|
| 12047 |
+
verbose=True
|
| 12048 |
+
):
|
| 12049 |
+
|
| 12050 |
+
if verbose:
|
| 12051 |
+
print('=' * 70)
|
| 12052 |
+
print('Reading jsonl file...')
|
| 12053 |
+
print('=' * 70)
|
| 12054 |
+
|
| 12055 |
+
if not os.path.splitext(file_name)[1]:
|
| 12056 |
+
file_name += file_ext
|
| 12057 |
+
|
| 12058 |
+
records = []
|
| 12059 |
+
l_count = 0
|
| 12060 |
+
|
| 12061 |
+
lines_indexes_list.sort(reverse=True)
|
| 12062 |
+
|
| 12063 |
+
with open(file_name, 'r') as f:
|
| 12064 |
+
for current_line_number, line in tqdm.tqdm(enumerate(f)):
|
| 12065 |
+
|
| 12066 |
+
try:
|
| 12067 |
+
if current_line_number in lines_indexes_list:
|
| 12068 |
+
record = json.loads(line)
|
| 12069 |
+
records.append(record)
|
| 12070 |
+
lines_indexes_list = lines_indexes_list[:-1]
|
| 12071 |
+
l_count += 1
|
| 12072 |
+
|
| 12073 |
+
if not lines_indexes_list:
|
| 12074 |
+
break
|
| 12075 |
+
|
| 12076 |
+
except KeyboardInterrupt:
|
| 12077 |
+
if verbose:
|
| 12078 |
+
print('=' * 70)
|
| 12079 |
+
print('Stoping...')
|
| 12080 |
+
print('=' * 70)
|
| 12081 |
+
|
| 12082 |
+
f.close()
|
| 12083 |
+
|
| 12084 |
+
return records
|
| 12085 |
+
|
| 12086 |
+
except json.JSONDecodeError:
|
| 12087 |
+
if verbose:
|
| 12088 |
+
print('=' * 70)
|
| 12089 |
+
print('[ERROR] Line', current_line_number, 'is corrupted! Skipping it...')
|
| 12090 |
+
print('=' * 70)
|
| 12091 |
+
|
| 12092 |
+
continue
|
| 12093 |
+
|
| 12094 |
+
f.close()
|
| 12095 |
+
|
| 12096 |
+
if verbose:
|
| 12097 |
+
print('=' * 70)
|
| 12098 |
+
print('Loaded total of', l_count, 'jsonl records.')
|
| 12099 |
+
print('=' * 70)
|
| 12100 |
+
print('Done!')
|
| 12101 |
+
print('=' * 70)
|
| 12102 |
+
|
| 12103 |
+
return records
|
| 12104 |
+
|
| 12105 |
+
###################################################################################
|
| 12106 |
+
|
| 12107 |
+
def compute_base(x: int, n: int) -> int:
|
| 12108 |
+
|
| 12109 |
+
if x < 0:
|
| 12110 |
+
raise ValueError("x must be non-negative.")
|
| 12111 |
+
if x == 0:
|
| 12112 |
+
return 2
|
| 12113 |
+
|
| 12114 |
+
b = max(2, int(x ** (1 / n)))
|
| 12115 |
+
|
| 12116 |
+
if b ** n <= x:
|
| 12117 |
+
b += 1
|
| 12118 |
+
|
| 12119 |
+
return b
|
| 12120 |
+
|
| 12121 |
+
###################################################################################
|
| 12122 |
+
|
| 12123 |
+
def encode_int_auto(x: int, n: int) -> tuple[int, list[int]]:
|
| 12124 |
+
|
| 12125 |
+
base = compute_base(x, n)
|
| 12126 |
+
digits = [0] * n
|
| 12127 |
+
|
| 12128 |
+
for i in range(n - 1, -1, -1):
|
| 12129 |
+
digits[i] = x % base
|
| 12130 |
+
x //= base
|
| 12131 |
+
|
| 12132 |
+
return base, digits
|
| 12133 |
+
|
| 12134 |
+
###################################################################################
|
| 12135 |
+
|
| 12136 |
+
def decode_int_auto(base: int, digits: list[int]) -> int:
|
| 12137 |
+
|
| 12138 |
+
x = 0
|
| 12139 |
+
for digit in digits:
|
| 12140 |
+
if digit < 0 or digit >= base:
|
| 12141 |
+
raise ValueError(f"Each digit must be in the range 0 to {base - 1}. Invalid digit: {digit}")
|
| 12142 |
+
|
| 12143 |
+
x = x * base + digit
|
| 12144 |
+
|
| 12145 |
+
return x
|
| 12146 |
+
|
| 12147 |
+
###################################################################################
|
| 12148 |
+
|
| 12149 |
+
def encode_int_manual(x, base, n):
|
| 12150 |
+
|
| 12151 |
+
digits = [0] * n
|
| 12152 |
+
|
| 12153 |
+
for i in range(n - 1, -1, -1):
|
| 12154 |
+
digits[i] = x % base
|
| 12155 |
+
x //= base
|
| 12156 |
+
|
| 12157 |
+
return digits
|
| 12158 |
+
|
| 12159 |
+
###################################################################################
|
| 12160 |
+
|
| 12161 |
+
def escore_notes_pitches_chords_signature(escore_notes,
|
| 12162 |
+
max_patch=128,
|
| 12163 |
+
sort_by_counts=False,
|
| 12164 |
+
use_full_chords=False
|
| 12165 |
+
):
|
| 12166 |
+
|
| 12167 |
+
escore_notes = [e for e in escore_notes if e[6] <= max_patch % 129]
|
| 12168 |
+
|
| 12169 |
+
if escore_notes:
|
| 12170 |
+
|
| 12171 |
+
cscore = chordify_score([1000, escore_notes])
|
| 12172 |
+
|
| 12173 |
+
sig = []
|
| 12174 |
+
dsig = []
|
| 12175 |
+
|
| 12176 |
+
drums_offset = 321 + 128
|
| 12177 |
+
|
| 12178 |
+
bad_chords_counter = 0
|
| 12179 |
+
|
| 12180 |
+
for c in cscore:
|
| 12181 |
+
|
| 12182 |
+
all_pitches = [e[4] if e[3] != 9 else e[4]+128 for e in c]
|
| 12183 |
+
chord = sorted(set(all_pitches))
|
| 12184 |
+
|
| 12185 |
+
pitches = sorted([p for p in chord if p < 128], reverse=True)
|
| 12186 |
+
drums = [(d+drums_offset)-128 for d in chord if d > 127]
|
| 12187 |
+
|
| 12188 |
+
if pitches:
|
| 12189 |
+
if len(pitches) > 1:
|
| 12190 |
+
tones_chord = sorted(set([p % 12 for p in pitches]))
|
| 12191 |
+
|
| 12192 |
+
try:
|
| 12193 |
+
sig_token = ALL_CHORDS_SORTED.index(tones_chord) + 128
|
| 12194 |
+
except:
|
| 12195 |
+
checked_tones_chord = check_and_fix_tones_chord(tones_chord, use_full_chords=use_full_chords)
|
| 12196 |
+
sig_token = ALL_CHORDS_SORTED.index(checked_tones_chord) + 128
|
| 12197 |
+
bad_chords_counter += 1
|
| 12198 |
+
|
| 12199 |
+
elif len(pitches) == 1:
|
| 12200 |
+
sig_token = pitches[0]
|
| 12201 |
+
|
| 12202 |
+
sig.append(sig_token)
|
| 12203 |
+
|
| 12204 |
+
if drums:
|
| 12205 |
+
dsig.extend(drums)
|
| 12206 |
+
|
| 12207 |
+
sig_p = {}
|
| 12208 |
+
|
| 12209 |
+
for item in sig+dsig:
|
| 12210 |
+
|
| 12211 |
+
if item in sig_p:
|
| 12212 |
+
sig_p[item] += 1
|
| 12213 |
+
|
| 12214 |
+
else:
|
| 12215 |
+
sig_p[item] = 1
|
| 12216 |
+
|
| 12217 |
+
sig_p[-1] = bad_chords_counter
|
| 12218 |
+
|
| 12219 |
+
fsig = [list(v) for v in sig_p.items()]
|
| 12220 |
+
|
| 12221 |
+
if sort_by_counts:
|
| 12222 |
+
fsig.sort(key=lambda x: x[1], reverse=True)
|
| 12223 |
+
|
| 12224 |
+
return fsig
|
| 12225 |
+
|
| 12226 |
+
else:
|
| 12227 |
+
return []
|
| 12228 |
+
|
| 12229 |
###################################################################################
|
| 12230 |
# This is the end of the TMIDI X Python module
|
| 12231 |
###################################################################################
|