Spaces:
Build error
Build error
| #!/usr/bin/env python3 | |
| # Copyright (c) Facebook, Inc. and its affiliates. | |
| # | |
| # This source code is licensed under the MIT license found in the | |
| # LICENSE file in the root directory of this source tree. | |
| """ | |
| Helper script to pre-compute embeddings for a flashlight (previously called wav2letter++) dataset | |
| """ | |
| import argparse | |
| import os | |
| def main(): | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("tsv") | |
| parser.add_argument("--output-dir", required=True) | |
| parser.add_argument("--output-name", required=True) | |
| args = parser.parse_args() | |
| os.makedirs(args.output_dir, exist_ok=True) | |
| transcriptions = {} | |
| with open(args.tsv, "r") as tsv, open( | |
| os.path.join(args.output_dir, args.output_name + ".ltr"), "w" | |
| ) as ltr_out, open( | |
| os.path.join(args.output_dir, args.output_name + ".wrd"), "w" | |
| ) as wrd_out: | |
| root = next(tsv).strip() | |
| for line in tsv: | |
| line = line.strip() | |
| dir = os.path.dirname(line) | |
| if dir not in transcriptions: | |
| parts = dir.split(os.path.sep) | |
| trans_path = f"{parts[-2]}-{parts[-1]}.trans.txt" | |
| path = os.path.join(root, dir, trans_path) | |
| assert os.path.exists(path) | |
| texts = {} | |
| with open(path, "r") as trans_f: | |
| for tline in trans_f: | |
| items = tline.strip().split() | |
| texts[items[0]] = " ".join(items[1:]) | |
| transcriptions[dir] = texts | |
| part = os.path.basename(line).split(".")[0] | |
| assert part in transcriptions[dir] | |
| print(transcriptions[dir][part], file=wrd_out) | |
| print( | |
| " ".join(list(transcriptions[dir][part].replace(" ", "|"))) + " |", | |
| file=ltr_out, | |
| ) | |
| if __name__ == "__main__": | |
| main() | |