midi-composer

Runtime error

App Files Files Community

skytnt commited on Sep 25, 2024

Commit

d5353b5

1 Parent(s): 9d7618f

update tokenizer

Browse files

Files changed (2) hide show

app.py +15 -6
midi_tokenizer.py +111 -9

app.py CHANGED Viewed

@@ -121,7 +121,8 @@ def send_msgs(msgs):
     return json.dumps(msgs)
-def run(model_name, tab, instruments, drum_kit, bpm, mid, midi_events, midi_opt, seed, seed_rand,
         gen_events, temp, top_p, top_k, allow_cc):
     mid_seq = []
     bpm = int(bpm)
@@ -153,8 +154,11 @@ def run(model_name, tab, instruments, drum_kit, bpm, mid, midi_events, midi_opt,
             disable_patch_change = True
             disable_channels = [i for i in range(16) if i not in patches]
     elif mid is not None:
-        eps = 4 if midi_opt else 0
-        mid = tokenizer.tokenize(MIDI.midi2score(mid), cc_eps=eps, tempo_eps=eps)
         mid = np.asarray(mid, dtype=np.int64)
         mid = mid[:int(midi_events)]
         for token_seq in mid:
@@ -306,7 +310,10 @@ if __name__ == "__main__":
                 input_midi_events = gr.Slider(label="use first n midi events as prompt", minimum=1, maximum=512,
                                               step=1,
                                               value=128)
-                input_midi_opt = gr.Checkbox(label="optimise midi (uncheck if your midi is generate from this model)", value=True)
                 example2 = gr.Examples([[file, 128] for file in glob.glob("example/*.mid")],
                                        [input_midi, input_midi_events])
@@ -330,8 +337,10 @@ if __name__ == "__main__":
         output_audio = gr.Audio(label="output audio", format="mp3", elem_id="midi_audio")
         output_midi = gr.File(label="output midi", file_types=[".mid"])
         run_event = run_btn.click(run, [input_model, tab_select, input_instruments, input_drum_kit, input_bpm,
-                                        input_midi, input_midi_events, input_midi_opt, input_seed, input_seed_rand,
-                                        input_gen_events, input_temp, input_top_p, input_top_k, input_allow_cc],
                                   [output_midi_seq, output_midi, output_audio, input_seed, js_msg],
                                   concurrency_limit=3)
         stop_btn.click(cancel_run, [output_midi_seq], [output_midi, output_audio, js_msg], cancels=run_event, queue=False)

     return json.dumps(msgs)
+def run(model_name, tab, instruments, drum_kit, bpm, mid, midi_events,
+        reduce_cc_st, remap_track_channel, add_default_instr, remove_empty_channels, seed, seed_rand,
         gen_events, temp, top_p, top_k, allow_cc):
     mid_seq = []
     bpm = int(bpm)
             disable_patch_change = True
             disable_channels = [i for i in range(16) if i not in patches]
     elif mid is not None:
+        eps = 4 if reduce_cc_st else 0
+        mid = tokenizer.tokenize(MIDI.midi2score(mid), cc_eps=eps, tempo_eps=eps,
+                                 remap_track_channel=remap_track_channel,
+                                 add_default_instr=add_default_instr,
+                                 remove_empty_channels=remove_empty_channels)
         mid = np.asarray(mid, dtype=np.int64)
         mid = mid[:int(midi_events)]
         for token_seq in mid:
                 input_midi_events = gr.Slider(label="use first n midi events as prompt", minimum=1, maximum=512,
                                               step=1,
                                               value=128)
+                input_reduce_cc_st = gr.Checkbox(label="reduce control_change and set_tempo events", value=True)
+                input_remap_track_channel = gr.Checkbox(label="remap tracks and channels to have only one channel per track", value=True)
+                input_add_default_instr = gr.Checkbox(label="add a default instrument to channels that don't have an instrument", value=True)
+                input_remove_empty_channels = gr.Checkbox(label="remove channels without notes", value=False)
                 example2 = gr.Examples([[file, 128] for file in glob.glob("example/*.mid")],
                                        [input_midi, input_midi_events])
         output_audio = gr.Audio(label="output audio", format="mp3", elem_id="midi_audio")
         output_midi = gr.File(label="output midi", file_types=[".mid"])
         run_event = run_btn.click(run, [input_model, tab_select, input_instruments, input_drum_kit, input_bpm,
+                                        input_midi, input_midi_events, input_reduce_cc_st, input_remap_track_channel,
+                                        input_add_default_instr, input_remove_empty_channels, input_seed,
+                                        input_seed_rand, input_gen_events, input_temp, input_top_p, input_top_k,
+                                        input_allow_cc],
                                   [output_midi_seq, output_midi, output_audio, input_seed, js_msg],
                                   concurrency_limit=3)
         stop_btn.click(cancel_run, [output_midi_seq], [output_midi, output_audio, js_msg], cancels=run_event, queue=False)

midi_tokenizer.py CHANGED Viewed

@@ -42,9 +42,16 @@ class MIDITokenizer:
         tempo = int((60 / bpm) * 10 ** 6)
         return tempo
-    def tokenize(self, midi_score, add_bos_eos=True, cc_eps=4, tempo_eps=4):
         ticks_per_beat = midi_score[0]
         event_list = {}
         for track_idx, track in enumerate(midi_score[1:129]):
             last_notes = {}
             patch_dict = {}
@@ -53,9 +60,18 @@ class MIDITokenizer:
             for event in track:
                 if event[0] not in self.events:
                     continue
                 t = round(16 * event[1] / ticks_per_beat)  # quantization
                 new_event = [event[0], t // 16, t % 16, track_idx] + event[2:]
                 if event[0] == "note":
                     new_event[4] = max(1, round(16 * new_event[4] / ticks_per_beat))
                 elif event[0] == "set_tempo":
                     if new_event[4] == 0: # invalid tempo
@@ -68,12 +84,18 @@ class MIDITokenizer:
                     key = tuple(new_event[:-1])
                 if event[0] == "patch_change":
                     c, p = event[2:]
                     last_p = patch_dict.setdefault(c, None)
                     if last_p == p:
                         continue
                     patch_dict[c] = p
                 elif event[0] == "control_change":
                     c, cc, v = event[2:]
                     last_v = control_dict.setdefault((c, cc), 0)
                     if abs(last_v - v) < cc_eps:
                         continue
@@ -84,6 +106,13 @@ class MIDITokenizer:
                         continue
                     last_tempo = tempo
                 if event[0] == "note":  # to eliminate note overlap due to quantization
                     cp = tuple(new_event[5:7])
                     if cp in last_notes:
@@ -95,8 +124,79 @@ class MIDITokenizer:
                     last_notes[cp] = (key, new_event)
                 event_list[key] = new_event
         event_list = list(event_list.values())
-        event_list = sorted(event_list, key=lambda e: e[1:4])
-        midi_seq = []
         setup_events = {}
         notes_in_setup = False
         for i, event in enumerate(event_list):  # optimise setup
@@ -113,7 +213,7 @@ class MIDITokenizer:
                 pre_event = event_list[i - 1]
                 has_pre = event[1] + event[2] == pre_event[1] + pre_event[2]
             if (event[0] == "note" and not has_next) or (notes_in_setup and not has_pre) :
-                event_list = sorted(setup_events.values(), key=lambda e: 1 if e[0] == "note" else 0) + event_list[i:]
                 break
             else:
                 if event[0] == "note":
@@ -122,7 +222,10 @@ class MIDITokenizer:
             setup_events[key] = new_event
         last_t1 = 0
         for event in event_list:
             cur_t1 = event[1]
             event[1] = event[1] - last_t1
             tokens = self.event2tokens(event)
@@ -181,7 +284,7 @@ class MIDITokenizer:
                 if track_idx not in tracks_dict:
                     tracks_dict[track_idx] = []
                 tracks_dict[track_idx].append([event[0], t] + event[4:])
-        tracks = list(tracks_dict.values())
         for i in range(len(tracks)):  # to eliminate note overlap
             track = tracks[i]
@@ -292,7 +395,6 @@ class MIDITokenizer:
         notes_bandwidth_list = []
         instruments = {}
         piano_channels = []
-        undef_instrument = False
         abs_t1 = 0
         last_t = 0
         for tsi, tokens in enumerate(midi_seq):
@@ -309,7 +411,9 @@ class MIDITokenizer:
                 time_hist[t2] += 1
                 if c != 9:  # ignore drum channel
                     if c not in instruments:
-                        undef_instrument = True
                     note_windows.setdefault(abs_t1 // note_window_size, []).append(p)
                 if last_t != t:
                     notes_sametime = [(et, p_) for et, p_ in notes_sametime if et > last_t]
@@ -330,8 +434,6 @@ class MIDITokenizer:
             reasons.append("total_min")
         if total_notes > total_notes_max:
             reasons.append("total_max")
-        if undef_instrument:
-            reasons.append("undef_instr")
         if len(note_windows) == 0 and total_notes > 0:
             reasons.append("drum_only")
         if reasons:

         tempo = int((60 / bpm) * 10 ** 6)
         return tempo
+    def tokenize(self, midi_score, add_bos_eos=True, cc_eps=4, tempo_eps=4,
+                 remap_track_channel=False, add_default_instr=False, remove_empty_channels=False):
         ticks_per_beat = midi_score[0]
         event_list = {}
+        track_idx_map = {i: dict() for i in range(16)}
+        track_idx_dict = {}
+        channels = []
+        patch_channels = []
+        empty_channels = [True]*16
+        channel_note_tracks = {i: list() for i in range(16)}
         for track_idx, track in enumerate(midi_score[1:129]):
             last_notes = {}
             patch_dict = {}
             for event in track:
                 if event[0] not in self.events:
                     continue
+                c = -1
                 t = round(16 * event[1] / ticks_per_beat)  # quantization
                 new_event = [event[0], t // 16, t % 16, track_idx] + event[2:]
                 if event[0] == "note":
+                    c = event[3]
+                    if c > 15 or c < 0:
+                        continue
+                    empty_channels[c] = False
+                    track_idx_dict.setdefault(c, track_idx)
+                    note_tracks = channel_note_tracks[c]
+                    if track_idx not in note_tracks:
+                        note_tracks.append(track_idx)
                     new_event[4] = max(1, round(16 * new_event[4] / ticks_per_beat))
                 elif event[0] == "set_tempo":
                     if new_event[4] == 0: # invalid tempo
                     key = tuple(new_event[:-1])
                 if event[0] == "patch_change":
                     c, p = event[2:]
+                    if c > 15 or c < 0:
+                        continue
                     last_p = patch_dict.setdefault(c, None)
                     if last_p == p:
                         continue
                     patch_dict[c] = p
+                    if c not in patch_channels:
+                        patch_channels.append(c)
                 elif event[0] == "control_change":
                     c, cc, v = event[2:]
+                    if c > 15 or c < 0:
+                        continue
                     last_v = control_dict.setdefault((c, cc), 0)
                     if abs(last_v - v) < cc_eps:
                         continue
                         continue
                     last_tempo = tempo
+                if c != -1:
+                    if c not in channels:
+                        channels.append(c)
+                    tr_map = track_idx_map[c]
+                    if track_idx not in tr_map:
+                        tr_map[track_idx] = 0
                 if event[0] == "note":  # to eliminate note overlap due to quantization
                     cp = tuple(new_event[5:7])
                     if cp in last_notes:
                     last_notes[cp] = (key, new_event)
                 event_list[key] = new_event
         event_list = list(event_list.values())
+        empty_channels = [c for c in channels if empty_channels[c]]
+        if remap_track_channel:
+            patch_channels = []
+            channels_count = 0
+            channels_map = {9: 9} if 9 in channels else {}
+            for c in channels:
+                if c == 9:
+                    continue
+                channels_map[c] = channels_count
+                channels_count += 1
+                if channels_count == 9:
+                    channels_count = 10
+            channels = list(channels_map.values())
+            track_count = 0
+            track_idx_map_order = [k for k,v in sorted(list(channels_map.items()), key=lambda x: x[1])]
+            for c in track_idx_map_order: # tracks not to remove
+                if remove_empty_channels and c in empty_channels:
+                    continue
+                tr_map = track_idx_map[c]
+                for track_idx in tr_map:
+                    note_tracks = channel_note_tracks[c]
+                    if len(note_tracks) != 0 and track_idx not in note_tracks:
+                        continue
+                    track_count += 1
+                    tr_map[track_idx] = track_count
+            for c in track_idx_map_order: # tracks to remove
+                if not (remove_empty_channels and c in empty_channels):
+                    continue
+                tr_map = track_idx_map[c]
+                for track_idx in tr_map:
+                    note_tracks = channel_note_tracks[c]
+                    if not (len(note_tracks) != 0 and track_idx not in note_tracks):
+                        continue
+                    track_count += 1
+                    tr_map[track_idx] = track_count
+            empty_channels = [channels_map[c] for c in empty_channels]
+            for event in event_list:
+                name = event[0]
+                track_idx = event[3]
+                if name == "note":
+                    c = event[5]
+                    event[5] = channels_map[c]
+                    event[3] = track_idx_map[c][track_idx]
+                    track_idx_dict[event[5]] = event[3]
+                elif name == "set_tempo":
+                    event[3] = 0
+                elif name == "control_change" or name == "patch_change":
+                    c = event[4]
+                    event[4] = channels_map[c]
+                    tr_map = track_idx_map[c]
+                    # move the event to first track of the channel if it's original track is empty
+                    note_tracks = channel_note_tracks[c]
+                    if len(note_tracks) != 0 and track_idx not in note_tracks:
+                        track_idx = channel_note_tracks[c][0]
+                    new_track_idx = tr_map.setdefault(track_idx, next(iter(tr_map.values())))
+                    event[3] = new_track_idx
+                    if name == "patch_change" and event[4] not in patch_channels:
+                        patch_channels.append(event[4])
+        if add_default_instr:
+            for c in channels:
+                if c not in patch_channels:
+                    event_list.append(["patch_change", 0,0, track_idx_dict[c], c, 0])
+        events_name_order = {"set_tempo":0, "patch_change":1, "control_change":2, "note":3}
+        events_order = lambda e: e[1:4] + [events_name_order[e[0]]]
+        event_list = sorted(event_list, key=events_order)
         setup_events = {}
         notes_in_setup = False
         for i, event in enumerate(event_list):  # optimise setup
                 pre_event = event_list[i - 1]
                 has_pre = event[1] + event[2] == pre_event[1] + pre_event[2]
             if (event[0] == "note" and not has_next) or (notes_in_setup and not has_pre) :
+                event_list = sorted(setup_events.values(), key=events_order) + event_list[i:]
                 break
             else:
                 if event[0] == "note":
             setup_events[key] = new_event
         last_t1 = 0
+        midi_seq = []
         for event in event_list:
+            if remove_empty_channels and event[0] in ["control_change", "patch_change"] and event[4] in empty_channels:
+                continue
             cur_t1 = event[1]
             event[1] = event[1] - last_t1
             tokens = self.event2tokens(event)
                 if track_idx not in tracks_dict:
                     tracks_dict[track_idx] = []
                 tracks_dict[track_idx].append([event[0], t] + event[4:])
+        tracks = [tr for idx, tr in sorted(list(tracks_dict.items()), key=lambda it: it[0])]
         for i in range(len(tracks)):  # to eliminate note overlap
             track = tracks[i]
         notes_bandwidth_list = []
         instruments = {}
         piano_channels = []
         abs_t1 = 0
         last_t = 0
         for tsi, tokens in enumerate(midi_seq):
                 time_hist[t2] += 1
                 if c != 9:  # ignore drum channel
                     if c not in instruments:
+                        instruments[c] = 0
+                        if c not in piano_channels:
+                            piano_channels.append(c)
                     note_windows.setdefault(abs_t1 // note_window_size, []).append(p)
                 if last_t != t:
                     notes_sametime = [(et, p_) for et, p_ in notes_sametime if et > last_t]
             reasons.append("total_min")
         if total_notes > total_notes_max:
             reasons.append("total_max")
         if len(note_windows) == 0 and total_notes > 0:
             reasons.append("drum_only")
         if reasons: