Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- TMIDIX.py +60 -5
- midi_to_colab_audio.py +171 -115
TMIDIX.py
CHANGED
|
@@ -1461,6 +1461,7 @@ import tqdm
|
|
| 1461 |
|
| 1462 |
from itertools import zip_longest
|
| 1463 |
from itertools import groupby
|
|
|
|
| 1464 |
|
| 1465 |
from operator import itemgetter
|
| 1466 |
|
|
@@ -3852,7 +3853,8 @@ ALL_CHORDS = [[0], [7], [5], [9], [2], [4], [11], [10], [8], [6], [3], [1], [0,
|
|
| 3852 |
[2, 5, 7, 9, 11], [1, 3, 5, 7, 10], [0, 2, 4, 7, 10], [1, 3, 5, 7, 9],
|
| 3853 |
[1, 3, 5, 9, 11], [1, 5, 7, 9, 11], [1, 3, 7, 9, 11], [3, 5, 7, 9, 11],
|
| 3854 |
[2, 4, 6, 8, 10], [0, 4, 6, 8, 10], [0, 2, 6, 8, 10], [1, 3, 5, 7, 11],
|
| 3855 |
-
[0, 2, 4, 8, 10], [0, 2, 4, 6, 8], [0, 2, 4, 6, 10]]
|
|
|
|
| 3856 |
|
| 3857 |
def find_exact_match_variable_length(list_of_lists, target_list, uncertain_indices):
|
| 3858 |
# Infer possible values for each uncertain index
|
|
@@ -3981,7 +3983,7 @@ def analyze_score_pitches(score, channels_to_analyze=[0]):
|
|
| 3981 |
|
| 3982 |
###################################################################################
|
| 3983 |
|
| 3984 |
-
ALL_CHORDS_GROUPED = [
|
| 3985 |
[[0, 2, 5, 7, 10], [0, 2, 4, 7, 9], [0, 2, 5, 7, 9], [1, 4, 6, 9, 11],
|
| 3986 |
[1, 3, 6, 8, 11], [1, 3, 6, 8, 10], [1, 4, 6, 8, 11], [1, 3, 5, 8, 10],
|
| 3987 |
[2, 4, 6, 9, 11], [2, 4, 7, 9, 11], [0, 3, 5, 7, 10], [0, 3, 5, 8, 10],
|
|
@@ -4427,12 +4429,15 @@ def ascii_texts_search(texts = ['text1', 'text2', 'text3'],
|
|
| 4427 |
deterministic_matching = False
|
| 4428 |
):
|
| 4429 |
|
|
|
|
|
|
|
| 4430 |
if not deterministic_matching:
|
| 4431 |
-
|
|
|
|
| 4432 |
|
| 4433 |
clean_texts = []
|
| 4434 |
|
| 4435 |
-
for t in
|
| 4436 |
text_words_list = [at.split(chr(32)) for at in t.split(chr(10))]
|
| 4437 |
|
| 4438 |
clean_text_words_list = []
|
|
@@ -4473,7 +4478,7 @@ def ascii_texts_search(texts = ['text1', 'text2', 'text3'],
|
|
| 4473 |
|
| 4474 |
if texts_match_ratios:
|
| 4475 |
max_text_match_ratio = max(texts_match_ratios)
|
| 4476 |
-
max_match_ratio_text =
|
| 4477 |
max_text_words_match_indexes = words_match_indexes[texts_match_ratios.index(max_text_match_ratio)]
|
| 4478 |
|
| 4479 |
return [max_match_ratio_text, max_text_match_ratio, max_text_words_match_indexes]
|
|
@@ -4507,6 +4512,56 @@ def ascii_text_words_counter(ascii_text):
|
|
| 4507 |
|
| 4508 |
###################################################################################
|
| 4509 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4510 |
# This is the end of the TMIDI X Python module
|
| 4511 |
|
| 4512 |
###################################################################################
|
|
|
|
| 1461 |
|
| 1462 |
from itertools import zip_longest
|
| 1463 |
from itertools import groupby
|
| 1464 |
+
from collections import Counter
|
| 1465 |
|
| 1466 |
from operator import itemgetter
|
| 1467 |
|
|
|
|
| 3853 |
[2, 5, 7, 9, 11], [1, 3, 5, 7, 10], [0, 2, 4, 7, 10], [1, 3, 5, 7, 9],
|
| 3854 |
[1, 3, 5, 9, 11], [1, 5, 7, 9, 11], [1, 3, 7, 9, 11], [3, 5, 7, 9, 11],
|
| 3855 |
[2, 4, 6, 8, 10], [0, 4, 6, 8, 10], [0, 2, 6, 8, 10], [1, 3, 5, 7, 11],
|
| 3856 |
+
[0, 2, 4, 8, 10], [0, 2, 4, 6, 8], [0, 2, 4, 6, 10], [0, 2, 4, 6, 8, 10],
|
| 3857 |
+
[1, 3, 5, 7, 9, 11]]
|
| 3858 |
|
| 3859 |
def find_exact_match_variable_length(list_of_lists, target_list, uncertain_indices):
|
| 3860 |
# Infer possible values for each uncertain index
|
|
|
|
| 3983 |
|
| 3984 |
###################################################################################
|
| 3985 |
|
| 3986 |
+
ALL_CHORDS_GROUPED = [[[1, 3, 5, 7, 9, 11], [0, 2, 4, 6, 8, 10]],
|
| 3987 |
[[0, 2, 5, 7, 10], [0, 2, 4, 7, 9], [0, 2, 5, 7, 9], [1, 4, 6, 9, 11],
|
| 3988 |
[1, 3, 6, 8, 11], [1, 3, 6, 8, 10], [1, 4, 6, 8, 11], [1, 3, 5, 8, 10],
|
| 3989 |
[2, 4, 6, 9, 11], [2, 4, 7, 9, 11], [0, 3, 5, 7, 10], [0, 3, 5, 8, 10],
|
|
|
|
| 4429 |
deterministic_matching = False
|
| 4430 |
):
|
| 4431 |
|
| 4432 |
+
texts_copy = texts
|
| 4433 |
+
|
| 4434 |
if not deterministic_matching:
|
| 4435 |
+
texts_copy = copy.deepcopy(texts)
|
| 4436 |
+
random.shuffle(texts_copy)
|
| 4437 |
|
| 4438 |
clean_texts = []
|
| 4439 |
|
| 4440 |
+
for t in texts_copy:
|
| 4441 |
text_words_list = [at.split(chr(32)) for at in t.split(chr(10))]
|
| 4442 |
|
| 4443 |
clean_text_words_list = []
|
|
|
|
| 4478 |
|
| 4479 |
if texts_match_ratios:
|
| 4480 |
max_text_match_ratio = max(texts_match_ratios)
|
| 4481 |
+
max_match_ratio_text = texts_copy[texts_match_ratios.index(max_text_match_ratio)]
|
| 4482 |
max_text_words_match_indexes = words_match_indexes[texts_match_ratios.index(max_text_match_ratio)]
|
| 4483 |
|
| 4484 |
return [max_match_ratio_text, max_text_match_ratio, max_text_words_match_indexes]
|
|
|
|
| 4512 |
|
| 4513 |
###################################################################################
|
| 4514 |
|
| 4515 |
+
def check_and_fix_tones_chord(tones_chord):
|
| 4516 |
+
|
| 4517 |
+
lst = tones_chord
|
| 4518 |
+
|
| 4519 |
+
if len(lst) == 2:
|
| 4520 |
+
if lst[1] - lst[0] == 1:
|
| 4521 |
+
return [lst[-1]]
|
| 4522 |
+
else:
|
| 4523 |
+
if 0 in lst and 11 in lst:
|
| 4524 |
+
lst.remove(0)
|
| 4525 |
+
return lst
|
| 4526 |
+
|
| 4527 |
+
non_consecutive = [lst[0]]
|
| 4528 |
+
|
| 4529 |
+
if len(lst) > 2:
|
| 4530 |
+
for i in range(1, len(lst) - 1):
|
| 4531 |
+
if lst[i-1] + 1 != lst[i] and lst[i] + 1 != lst[i+1]:
|
| 4532 |
+
non_consecutive.append(lst[i])
|
| 4533 |
+
non_consecutive.append(lst[-1])
|
| 4534 |
+
|
| 4535 |
+
if 0 in non_consecutive and 11 in non_consecutive:
|
| 4536 |
+
non_consecutive.remove(0)
|
| 4537 |
+
|
| 4538 |
+
return non_consecutive
|
| 4539 |
+
|
| 4540 |
+
###################################################################################
|
| 4541 |
+
|
| 4542 |
+
def create_similarity_matrix(list_of_values, matrix_length=0):
|
| 4543 |
+
|
| 4544 |
+
counts = Counter(list_of_values).items()
|
| 4545 |
+
|
| 4546 |
+
if matrix_length > 0:
|
| 4547 |
+
sim_matrix = [0] * max(matrix_length, len(list_of_values))
|
| 4548 |
+
else:
|
| 4549 |
+
sim_matrix = [0] * len(counts)
|
| 4550 |
+
|
| 4551 |
+
for c in counts:
|
| 4552 |
+
sim_matrix[c[0]] = c[1]
|
| 4553 |
+
|
| 4554 |
+
similarity_matrix = [[0] * len(sim_matrix) for _ in range(len(sim_matrix))]
|
| 4555 |
+
|
| 4556 |
+
for i in range(len(sim_matrix)):
|
| 4557 |
+
for j in range(len(sim_matrix)):
|
| 4558 |
+
if max(sim_matrix[i], sim_matrix[j]) != 0:
|
| 4559 |
+
similarity_matrix[i][j] = min(sim_matrix[i], sim_matrix[j]) / max(sim_matrix[i], sim_matrix[j])
|
| 4560 |
+
|
| 4561 |
+
return similarity_matrix, sim_matrix
|
| 4562 |
+
|
| 4563 |
+
###################################################################################
|
| 4564 |
+
|
| 4565 |
# This is the end of the TMIDI X Python module
|
| 4566 |
|
| 4567 |
###################################################################################
|
midi_to_colab_audio.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
#===================================================================================================================
|
| 2 |
#
|
| 3 |
# MIDI to Colab AUdio Python Module
|
| 4 |
#
|
|
@@ -47,7 +47,6 @@
|
|
| 47 |
# could break compatiblity, but there's not much else you can do to fix the bug
|
| 48 |
# https://en.wikipedia.org/wiki/Shift_JIS
|
| 49 |
|
| 50 |
-
r'''
|
| 51 |
This module offers functions: concatenate_scores(), grep(),
|
| 52 |
merge_scores(), mix_scores(), midi2opus(), midi2score(), opus2midi(),
|
| 53 |
opus2score(), play_score(), score2midi(), score2opus(), score2stats(),
|
|
@@ -2874,12 +2873,16 @@ def raw_audio_string(data):
|
|
| 2874 |
#===============================================================================
|
| 2875 |
|
| 2876 |
import numpy as np
|
|
|
|
| 2877 |
|
| 2878 |
def midi_opus_to_colab_audio(midi_opus,
|
| 2879 |
soundfont_path='/usr/share/sounds/sf2/FluidR3_GM.sf2',
|
| 2880 |
sample_rate=16000, # 44100
|
| 2881 |
volume_scale=10,
|
| 2882 |
-
|
|
|
|
|
|
|
|
|
|
| 2883 |
):
|
| 2884 |
|
| 2885 |
def normalize_volume(matrix, factor=10):
|
|
@@ -2889,68 +2892,95 @@ def midi_opus_to_colab_audio(midi_opus,
|
|
| 2889 |
final_matrix = np.clip(mult_matrix, -1.0, 1.0)
|
| 2890 |
return final_matrix
|
| 2891 |
|
| 2892 |
-
|
| 2893 |
-
|
| 2894 |
-
|
| 2895 |
-
|
| 2896 |
-
|
| 2897 |
-
|
| 2898 |
-
|
| 2899 |
-
|
| 2900 |
-
|
| 2901 |
-
|
| 2902 |
-
|
| 2903 |
-
|
| 2904 |
-
|
| 2905 |
-
|
| 2906 |
-
|
| 2907 |
-
|
| 2908 |
-
|
| 2909 |
-
|
| 2910 |
-
|
| 2911 |
-
|
| 2912 |
-
|
| 2913 |
-
|
| 2914 |
-
|
| 2915 |
-
|
| 2916 |
-
|
| 2917 |
-
|
| 2918 |
-
|
| 2919 |
-
|
| 2920 |
-
|
| 2921 |
-
|
| 2922 |
-
|
| 2923 |
-
|
| 2924 |
-
|
| 2925 |
-
|
| 2926 |
-
|
| 2927 |
-
|
| 2928 |
-
|
| 2929 |
-
|
| 2930 |
-
|
| 2931 |
-
|
| 2932 |
-
|
| 2933 |
-
|
| 2934 |
-
|
| 2935 |
-
|
| 2936 |
-
|
| 2937 |
-
|
| 2938 |
-
|
| 2939 |
-
|
| 2940 |
-
|
| 2941 |
-
|
| 2942 |
-
|
| 2943 |
-
|
| 2944 |
-
|
| 2945 |
-
|
| 2946 |
-
|
| 2947 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2948 |
|
| 2949 |
def midi_to_colab_audio(midi_file,
|
| 2950 |
soundfont_path='/usr/share/sounds/sf2/FluidR3_GM.sf2',
|
| 2951 |
sample_rate=16000, # 44100
|
| 2952 |
volume_scale=10,
|
| 2953 |
-
|
|
|
|
|
|
|
|
|
|
| 2954 |
):
|
| 2955 |
|
| 2956 |
'''
|
|
@@ -2965,8 +2995,6 @@ def midi_to_colab_audio(midi_file,
|
|
| 2965 |
|
| 2966 |
'''
|
| 2967 |
|
| 2968 |
-
midi_opus = midi2opus(open(midi_file, 'rb').read())
|
| 2969 |
-
|
| 2970 |
def normalize_volume(matrix, factor=10):
|
| 2971 |
norm = np.linalg.norm(matrix)
|
| 2972 |
matrix = matrix/norm # normalized matrix
|
|
@@ -2974,61 +3002,89 @@ def midi_to_colab_audio(midi_file,
|
|
| 2974 |
final_matrix = np.clip(mult_matrix, -1.0, 1.0)
|
| 2975 |
return final_matrix
|
| 2976 |
|
| 2977 |
-
|
| 2978 |
-
|
| 2979 |
-
|
| 2980 |
-
|
| 2981 |
-
|
| 2982 |
-
|
| 2983 |
-
|
| 2984 |
-
|
| 2985 |
-
|
| 2986 |
-
|
| 2987 |
-
|
| 2988 |
-
|
| 2989 |
-
|
| 2990 |
-
|
| 2991 |
-
|
| 2992 |
-
|
| 2993 |
-
|
| 2994 |
-
|
| 2995 |
-
|
| 2996 |
-
|
| 2997 |
-
|
| 2998 |
-
|
| 2999 |
-
|
| 3000 |
-
|
| 3001 |
-
|
| 3002 |
-
|
| 3003 |
-
|
| 3004 |
-
|
| 3005 |
-
|
| 3006 |
-
|
| 3007 |
-
|
| 3008 |
-
|
| 3009 |
-
|
| 3010 |
-
|
| 3011 |
-
|
| 3012 |
-
|
| 3013 |
-
|
| 3014 |
-
|
| 3015 |
-
|
| 3016 |
-
|
| 3017 |
-
|
| 3018 |
-
|
| 3019 |
-
|
| 3020 |
-
|
| 3021 |
-
|
| 3022 |
-
|
| 3023 |
-
|
| 3024 |
-
|
| 3025 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3026 |
return ss
|
| 3027 |
|
| 3028 |
-
|
| 3029 |
|
| 3030 |
-
|
| 3031 |
-
|
| 3032 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3033 |
|
| 3034 |
#===================================================================================================================
|
|
|
|
| 1 |
+
r'''#===================================================================================================================
|
| 2 |
#
|
| 3 |
# MIDI to Colab AUdio Python Module
|
| 4 |
#
|
|
|
|
| 47 |
# could break compatiblity, but there's not much else you can do to fix the bug
|
| 48 |
# https://en.wikipedia.org/wiki/Shift_JIS
|
| 49 |
|
|
|
|
| 50 |
This module offers functions: concatenate_scores(), grep(),
|
| 51 |
merge_scores(), mix_scores(), midi2opus(), midi2score(), opus2midi(),
|
| 52 |
opus2score(), play_score(), score2midi(), score2opus(), score2stats(),
|
|
|
|
| 2873 |
#===============================================================================
|
| 2874 |
|
| 2875 |
import numpy as np
|
| 2876 |
+
import wave
|
| 2877 |
|
| 2878 |
def midi_opus_to_colab_audio(midi_opus,
|
| 2879 |
soundfont_path='/usr/share/sounds/sf2/FluidR3_GM.sf2',
|
| 2880 |
sample_rate=16000, # 44100
|
| 2881 |
volume_scale=10,
|
| 2882 |
+
trim_silence=True,
|
| 2883 |
+
silence_threshold=0.1,
|
| 2884 |
+
output_for_gradio=False,
|
| 2885 |
+
write_audio_to_WAV=''
|
| 2886 |
):
|
| 2887 |
|
| 2888 |
def normalize_volume(matrix, factor=10):
|
|
|
|
| 2892 |
final_matrix = np.clip(mult_matrix, -1.0, 1.0)
|
| 2893 |
return final_matrix
|
| 2894 |
|
| 2895 |
+
if midi_opus[1]:
|
| 2896 |
+
|
| 2897 |
+
ticks_per_beat = midi_opus[0]
|
| 2898 |
+
event_list = []
|
| 2899 |
+
for track_idx, track in enumerate(midi_opus[1:]):
|
| 2900 |
+
abs_t = 0
|
| 2901 |
+
for event in track:
|
| 2902 |
+
abs_t += event[1]
|
| 2903 |
+
event_new = [*event]
|
| 2904 |
+
event_new[1] = abs_t
|
| 2905 |
+
event_list.append(event_new)
|
| 2906 |
+
event_list = sorted(event_list, key=lambda e: e[1])
|
| 2907 |
+
|
| 2908 |
+
tempo = int((60 / 120) * 10 ** 6) # default 120 bpm
|
| 2909 |
+
ss = np.empty((0, 2), dtype=np.int16)
|
| 2910 |
+
fl = Synth(samplerate=float(sample_rate))
|
| 2911 |
+
sfid = fl.sfload(soundfont_path)
|
| 2912 |
+
last_t = 0
|
| 2913 |
+
for c in range(16):
|
| 2914 |
+
fl.program_select(c, sfid, 128 if c == 9 else 0, 0)
|
| 2915 |
+
for event in event_list:
|
| 2916 |
+
name = event[0]
|
| 2917 |
+
sample_len = int(((event[1] / ticks_per_beat) * tempo / (10 ** 6)) * sample_rate)
|
| 2918 |
+
sample_len -= int(((last_t / ticks_per_beat) * tempo / (10 ** 6)) * sample_rate)
|
| 2919 |
+
last_t = event[1]
|
| 2920 |
+
if sample_len > 0:
|
| 2921 |
+
sample = fl.get_samples(sample_len).reshape(sample_len, 2)
|
| 2922 |
+
ss = np.concatenate([ss, sample])
|
| 2923 |
+
if name == "set_tempo":
|
| 2924 |
+
tempo = event[2]
|
| 2925 |
+
elif name == "patch_change":
|
| 2926 |
+
c, p = event[2:4]
|
| 2927 |
+
fl.program_select(c, sfid, 128 if c == 9 else 0, p)
|
| 2928 |
+
elif name == "control_change":
|
| 2929 |
+
c, cc, v = event[2:5]
|
| 2930 |
+
fl.cc(c, cc, v)
|
| 2931 |
+
elif name == "note_on" and event[3] > 0:
|
| 2932 |
+
c, p, v = event[2:5]
|
| 2933 |
+
fl.noteon(c, p, v)
|
| 2934 |
+
elif name == "note_off" or (name == "note_on" and event[3] == 0):
|
| 2935 |
+
c, p = event[2:4]
|
| 2936 |
+
fl.noteoff(c, p)
|
| 2937 |
+
|
| 2938 |
+
fl.delete()
|
| 2939 |
+
if ss.shape[0] > 0:
|
| 2940 |
+
max_val = np.abs(ss).max()
|
| 2941 |
+
if max_val != 0:
|
| 2942 |
+
ss = (ss / max_val) * np.iinfo(np.int16).max
|
| 2943 |
+
ss = ss.astype(np.int16)
|
| 2944 |
+
|
| 2945 |
+
if trim_silence:
|
| 2946 |
+
threshold = np.std(np.abs(ss)) * silence_threshold
|
| 2947 |
+
exceeded_thresh = np.abs(ss) > threshold
|
| 2948 |
+
if np.any(exceeded_thresh):
|
| 2949 |
+
last_idx = np.where(exceeded_thresh)[0][-1]
|
| 2950 |
+
ss = ss[:last_idx+1]
|
| 2951 |
+
|
| 2952 |
+
if output_for_gradio:
|
| 2953 |
+
return ss
|
| 2954 |
+
|
| 2955 |
+
ss = ss.swapaxes(1, 0)
|
| 2956 |
+
|
| 2957 |
+
raw_audio = normalize_volume(ss, volume_scale)
|
| 2958 |
+
|
| 2959 |
+
if write_audio_to_WAV != '':
|
| 2960 |
+
|
| 2961 |
+
r_audio = raw_audio.T
|
| 2962 |
+
|
| 2963 |
+
r_audio = np.int16(r_audio / np.max(np.abs(r_audio)) * 32767)
|
| 2964 |
+
|
| 2965 |
+
with wave.open(write_audio_to_WAV, 'w') as wf:
|
| 2966 |
+
wf.setframerate(sample_rate)
|
| 2967 |
+
wf.setsampwidth(2)
|
| 2968 |
+
wf.setnchannels(r_audio.shape[1])
|
| 2969 |
+
wf.writeframes(r_audio)
|
| 2970 |
+
|
| 2971 |
+
return raw_audio
|
| 2972 |
+
|
| 2973 |
+
else:
|
| 2974 |
+
return None
|
| 2975 |
|
| 2976 |
def midi_to_colab_audio(midi_file,
|
| 2977 |
soundfont_path='/usr/share/sounds/sf2/FluidR3_GM.sf2',
|
| 2978 |
sample_rate=16000, # 44100
|
| 2979 |
volume_scale=10,
|
| 2980 |
+
trim_silence=True,
|
| 2981 |
+
silence_threshold=0.1,
|
| 2982 |
+
output_for_gradio=False,
|
| 2983 |
+
write_audio_to_WAV=False
|
| 2984 |
):
|
| 2985 |
|
| 2986 |
'''
|
|
|
|
| 2995 |
|
| 2996 |
'''
|
| 2997 |
|
|
|
|
|
|
|
| 2998 |
def normalize_volume(matrix, factor=10):
|
| 2999 |
norm = np.linalg.norm(matrix)
|
| 3000 |
matrix = matrix/norm # normalized matrix
|
|
|
|
| 3002 |
final_matrix = np.clip(mult_matrix, -1.0, 1.0)
|
| 3003 |
return final_matrix
|
| 3004 |
|
| 3005 |
+
midi_opus = midi2opus(open(midi_file, 'rb').read())
|
| 3006 |
+
|
| 3007 |
+
if midi_opus[1]:
|
| 3008 |
+
|
| 3009 |
+
ticks_per_beat = midi_opus[0]
|
| 3010 |
+
event_list = []
|
| 3011 |
+
for track_idx, track in enumerate(midi_opus[1:]):
|
| 3012 |
+
abs_t = 0
|
| 3013 |
+
for event in track:
|
| 3014 |
+
abs_t += event[1]
|
| 3015 |
+
event_new = [*event]
|
| 3016 |
+
event_new[1] = abs_t
|
| 3017 |
+
event_list.append(event_new)
|
| 3018 |
+
event_list = sorted(event_list, key=lambda e: e[1])
|
| 3019 |
+
|
| 3020 |
+
tempo = int((60 / 120) * 10 ** 6) # default 120 bpm
|
| 3021 |
+
ss = np.empty((0, 2), dtype=np.int16)
|
| 3022 |
+
fl = Synth(samplerate=float(sample_rate))
|
| 3023 |
+
sfid = fl.sfload(soundfont_path)
|
| 3024 |
+
last_t = 0
|
| 3025 |
+
for c in range(16):
|
| 3026 |
+
fl.program_select(c, sfid, 128 if c == 9 else 0, 0)
|
| 3027 |
+
for event in event_list:
|
| 3028 |
+
name = event[0]
|
| 3029 |
+
sample_len = int(((event[1] / ticks_per_beat) * tempo / (10 ** 6)) * sample_rate)
|
| 3030 |
+
sample_len -= int(((last_t / ticks_per_beat) * tempo / (10 ** 6)) * sample_rate)
|
| 3031 |
+
last_t = event[1]
|
| 3032 |
+
if sample_len > 0:
|
| 3033 |
+
sample = fl.get_samples(sample_len).reshape(sample_len, 2)
|
| 3034 |
+
ss = np.concatenate([ss, sample])
|
| 3035 |
+
if name == "set_tempo":
|
| 3036 |
+
tempo = event[2]
|
| 3037 |
+
elif name == "patch_change":
|
| 3038 |
+
c, p = event[2:4]
|
| 3039 |
+
fl.program_select(c, sfid, 128 if c == 9 else 0, p)
|
| 3040 |
+
elif name == "control_change":
|
| 3041 |
+
c, cc, v = event[2:5]
|
| 3042 |
+
fl.cc(c, cc, v)
|
| 3043 |
+
elif name == "note_on" and event[3] > 0:
|
| 3044 |
+
c, p, v = event[2:5]
|
| 3045 |
+
fl.noteon(c, p, v)
|
| 3046 |
+
elif name == "note_off" or (name == "note_on" and event[3] == 0):
|
| 3047 |
+
c, p = event[2:4]
|
| 3048 |
+
fl.noteoff(c, p)
|
| 3049 |
+
|
| 3050 |
+
fl.delete()
|
| 3051 |
+
if ss.shape[0] > 0:
|
| 3052 |
+
max_val = np.abs(ss).max()
|
| 3053 |
+
if max_val != 0:
|
| 3054 |
+
ss = (ss / max_val) * np.iinfo(np.int16).max
|
| 3055 |
+
ss = ss.astype(np.int16)
|
| 3056 |
+
|
| 3057 |
+
if trim_silence:
|
| 3058 |
+
threshold = np.std(np.abs(ss)) * silence_threshold
|
| 3059 |
+
exceeded_thresh = np.abs(ss) > threshold
|
| 3060 |
+
if np.any(exceeded_thresh):
|
| 3061 |
+
last_idx = np.where(exceeded_thresh)[0][-1]
|
| 3062 |
+
ss = ss[:last_idx+1]
|
| 3063 |
+
|
| 3064 |
+
if output_for_gradio:
|
| 3065 |
return ss
|
| 3066 |
|
| 3067 |
+
ss = ss.swapaxes(1, 0)
|
| 3068 |
|
| 3069 |
+
raw_audio = normalize_volume(ss, volume_scale)
|
| 3070 |
+
|
| 3071 |
+
if write_audio_to_WAV:
|
| 3072 |
+
|
| 3073 |
+
filename = midi_file.split('.')[-2] + '.wav'
|
| 3074 |
+
|
| 3075 |
+
r_audio = raw_audio.T
|
| 3076 |
+
|
| 3077 |
+
r_audio = np.int16(r_audio / np.max(np.abs(r_audio)) * 32767)
|
| 3078 |
+
|
| 3079 |
+
with wave.open(filename, 'w') as wf:
|
| 3080 |
+
wf.setframerate(sample_rate)
|
| 3081 |
+
wf.setsampwidth(2)
|
| 3082 |
+
wf.setnchannels(r_audio.shape[1])
|
| 3083 |
+
wf.writeframes(r_audio)
|
| 3084 |
+
|
| 3085 |
+
return raw_audio
|
| 3086 |
+
|
| 3087 |
+
else:
|
| 3088 |
+
return None
|
| 3089 |
|
| 3090 |
#===================================================================================================================
|