Spaces:

jhj0517
/

Whisper-WebUI

Running

App Files Files Community

jhj0517 commited on Oct 30, 2024

Commit

79e6f08

1 Parent(s): a1b32c1

Refactor translation

Browse files

Files changed (3) hide show

modules/translation/deepl_api.py +17 -26
modules/translation/translation_base.py +18 -26
modules/utils/subtitle_manager.py +62 -69

modules/translation/deepl_api.py CHANGED Viewed

@@ -139,37 +139,28 @@ class DeepLAPI:
         )
         files_info = {}
-        for fileobj in fileobjs:
-            file_path = fileobj
-            file_name, file_ext = os.path.splitext(os.path.basename(fileobj))
-            if file_ext == ".srt":
-                parsed_dicts = parse_srt(file_path=file_path)
-            elif file_ext == ".vtt":
-                parsed_dicts = parse_vtt(file_path=file_path)
             batch_size = self.max_text_batch_size
-            for batch_start in range(0, len(parsed_dicts), batch_size):
-                batch_end = min(batch_start + batch_size, len(parsed_dicts))
-                sentences_to_translate = [dic["sentence"] for dic in parsed_dicts[batch_start:batch_end]]
                 translated_texts = self.request_deepl_translate(auth_key, sentences_to_translate, source_lang,
                                                                 target_lang, is_pro)
                 for i, translated_text in enumerate(translated_texts):
-                    parsed_dicts[batch_start + i]["sentence"] = translated_text["text"]
-                progress(batch_end / len(parsed_dicts), desc="Translating..")
-            if file_ext == ".srt":
-                subtitle = get_serialized_srt(parsed_dicts)
-            elif file_ext == ".vtt":
-                subtitle = get_serialized_vtt(parsed_dicts)
-            if add_timestamp:
-                timestamp = datetime.now().strftime("%m%d%H%M%S")
-                file_name += f"-{timestamp}"
-            output_path = os.path.join(self.output_dir, f"{file_name}{file_ext}")
-            write_file(subtitle, output_path)
             files_info[file_name] = {"subtitle": subtitle, "path": output_path}

         )
         files_info = {}
+        for file_path in fileobjs:
+            file_name, file_ext = os.path.splitext(os.path.basename(file_path))
+            writer = get_writer(file_ext, self.output_dir)
+            segments = writer.to_segments(file_path)
             batch_size = self.max_text_batch_size
+            for batch_start in range(0, len(segments), batch_size):
+                progress(batch_start / len(segments), desc="Translating..")
+                sentences_to_translate = segments[batch_start:batch_start+batch_size]
                 translated_texts = self.request_deepl_translate(auth_key, sentences_to_translate, source_lang,
                                                                 target_lang, is_pro)
                 for i, translated_text in enumerate(translated_texts):
+                    segments[batch_start + i].text = translated_text["text"]
+            print("DeepL Segments: ", segments)
+            subtitle, output_path = generate_file(
+                output_dir=self.output_dir,
+                output_file_name=file_name,
+                output_format=file_ext,
+                result=segments,
+                add_timestamp=add_timestamp
+            )
             files_info[file_name] = {"subtitle": subtitle, "path": output_path}

modules/translation/translation_base.py CHANGED Viewed

@@ -95,32 +95,22 @@ class TranslationBase(ABC):
             files_info = {}
             for fileobj in fileobjs:
                 file_name, file_ext = os.path.splitext(os.path.basename(fileobj))
-                if file_ext == ".srt":
-                    parsed_dicts = parse_srt(file_path=fileobj)
-                    total_progress = len(parsed_dicts)
-                    for index, dic in enumerate(parsed_dicts):
-                        progress(index / total_progress, desc="Translating..")
-                        translated_text = self.translate(dic["sentence"], max_length=max_length)
-                        dic["sentence"] = translated_text
-                    subtitle = get_serialized_srt(parsed_dicts)
-                elif file_ext == ".vtt":
-                    parsed_dicts = parse_vtt(file_path=fileobj)
-                    total_progress = len(parsed_dicts)
-                    for index, dic in enumerate(parsed_dicts):
-                        progress(index / total_progress, desc="Translating..")
-                        translated_text = self.translate(dic["sentence"], max_length=max_length)
-                        dic["sentence"] = translated_text
-                    subtitle = get_serialized_vtt(parsed_dicts)
-                if add_timestamp:
-                    timestamp = datetime.now().strftime("%m%d%H%M%S")
-                    file_name += f"-{timestamp}"
-                output_path = os.path.join(self.output_dir, f"{file_name}{file_ext}")
-                write_file(subtitle, output_path)
-                files_info[file_name] = {"subtitle": subtitle, "path": output_path}
             total_result = ''
             for file_name, info in files_info.items():
@@ -134,6 +124,8 @@ class TranslationBase(ABC):
         except Exception as e:
             print(f"Error: {str(e)}")
         finally:
             self.release_cuda_memory()

             files_info = {}
             for fileobj in fileobjs:
                 file_name, file_ext = os.path.splitext(os.path.basename(fileobj))
+                writer = get_writer(file_ext, self.output_dir)
+                segments = writer.to_segments(fileobj)
+                for i, segment in enumerate(segments):
+                    progress(i / len(segments), desc="Translating..")
+                    translated_text = self.translate(segment.text, max_length=max_length)
+                    segment.text = translated_text
+                subtitle, file_path = generate_file(
+                    output_dir=self.output_dir,
+                    output_file_name=file_name,
+                    output_format=file_ext,
+                    result=segments,
+                    add_timestamp=add_timestamp
+                )
+                files_info[file_name] = {"subtitle": subtitle, "path": file_path}
             total_result = ''
             for file_name, info in files_info.items():
         except Exception as e:
             print(f"Error: {str(e)}")
+            import traceback
+            traceback.print_exc()
         finally:
             self.release_cuda_memory()

modules/utils/subtitle_manager.py CHANGED Viewed

@@ -33,6 +33,18 @@ def format_timestamp(
     )
 def get_start(segments: List[dict]) -> Optional[float]:
     return next(
         (w["start"] for s in segments for w in s["words"]),
@@ -54,16 +66,12 @@ class ResultWriter:
         self.output_dir = output_dir
     def __call__(
-        self, result: Union[dict, List[Segment]], output_file_name: str, add_timestamp: bool = True,
             options: Optional[dict] = None, **kwargs
     ):
         if isinstance(result, List) and result and isinstance(result[0], Segment):
             result = {"segments": [seg.dict() for seg in result]}
-        if add_timestamp:
-            timestamp = datetime.now().strftime("%m%d%H%M%S")
-            output_file_name += f"-{timestamp}"
         output_path = os.path.join(
             self.output_dir, output_file_name + "." + self.extension
         )
@@ -216,6 +224,26 @@ class WriteVTT(SubtitlesWriter):
         for start, end, text in self.iterate_result(result, options, **kwargs):
             print(f"{start} --> {end}\n{text}\n", file=file, flush=True)
 class WriteSRT(SubtitlesWriter):
     extension: str = "srt"
@@ -230,6 +258,27 @@ class WriteSRT(SubtitlesWriter):
         ):
             print(f"{i}\n{start} --> {end}\n{text}\n", file=file, flush=True)
 class WriteTSV(ResultWriter):
     """
@@ -265,7 +314,7 @@ class WriteJSON(ResultWriter):
 def get_writer(
     output_format: str, output_dir: str
 ) -> Callable[[dict, TextIO, dict], None]:
-    output_format = output_format.strip().lower()
     writers = {
         "txt": WriteTXT,
@@ -292,75 +341,19 @@ def get_writer(
 def generate_file(
     output_format: str, output_dir: str, result: Union[dict, List[Segment]], output_file_name: str, add_timestamp: bool = True,
 ) -> Tuple[str, str]:
     file_path = os.path.join(output_dir, f"{output_file_name}.{output_format}")
     file_writer = get_writer(output_format=output_format, output_dir=output_dir)
-    file_writer(result=result, output_file_name=output_file_name, add_timestamp=add_timestamp)
     content = read_file(file_path)
     return content, file_path
-def parse_srt(file_path):
-    """Reads SRT file and returns as dict"""
-    with open(file_path, 'r', encoding='utf-8') as file:
-        srt_data = file.read()
-    data = []
-    blocks = srt_data.split('\n\n')
-    for block in blocks:
-        if block.strip() != '':
-            lines = block.strip().split('\n')
-            index = lines[0]
-            timestamp = lines[1]
-            sentence = ' '.join(lines[2:])
-            data.append({
-                "index": index,
-                "timestamp": timestamp,
-                "sentence": sentence
-            })
-    return data
-def parse_vtt(file_path):
-    """Reads WEBVTT file and returns as dict"""
-    with open(file_path, 'r', encoding='utf-8') as file:
-        webvtt_data = file.read()
-    data = []
-    blocks = webvtt_data.split('\n\n')
-    for block in blocks:
-        if block.strip() != '' and not block.strip().startswith("WEBVTT"):
-            lines = block.strip().split('\n')
-            timestamp = lines[0]
-            sentence = ' '.join(lines[1:])
-            data.append({
-                "timestamp": timestamp,
-                "sentence": sentence
-            })
-    return data
-def get_serialized_srt(dicts):
-    output = ""
-    for dic in dicts:
-        output += f'{dic["index"]}\n'
-        output += f'{dic["timestamp"]}\n'
-        output += f'{dic["sentence"]}\n\n'
-    return output
-def get_serialized_vtt(dicts):
-    output = "WEBVTT\n\n"
-    for dic in dicts:
-        output += f'{dic["timestamp"]}\n'
-        output += f'{dic["sentence"]}\n\n'
-    return output
 def safe_filename(name):
     INVALID_FILENAME_CHARS = r'[<>:"/\\|?*\x00-\x1f]'
     safe_name = re.sub(INVALID_FILENAME_CHARS, '_', name)

     )
+def time_str_to_seconds(time_str: str, decimal_marker: str = ",") -> float:
+    hours, minutes, rest = time_str.split(":")
+    seconds, fractional = rest.split(decimal_marker)
+    hours = int(hours)
+    minutes = int(minutes)
+    seconds = int(seconds)
+    fractional_seconds = float("0." + fractional)
+    return hours * 3600 + minutes * 60 + seconds + fractional_seconds
 def get_start(segments: List[dict]) -> Optional[float]:
     return next(
         (w["start"] for s in segments for w in s["words"]),
         self.output_dir = output_dir
     def __call__(
+        self, result: Union[dict, List[Segment]], output_file_name: str,
             options: Optional[dict] = None, **kwargs
     ):
         if isinstance(result, List) and result and isinstance(result[0], Segment):
             result = {"segments": [seg.dict() for seg in result]}
         output_path = os.path.join(
             self.output_dir, output_file_name + "." + self.extension
         )
         for start, end, text in self.iterate_result(result, options, **kwargs):
             print(f"{start} --> {end}\n{text}\n", file=file, flush=True)
+    def to_segments(self, file_path: str) -> List[Segment]:
+        segments = []
+        blocks = read_file(file_path).split('\n\n')
+        for block in blocks:
+            if block.strip() != '' and not block.strip().startswith("WEBVTT"):
+                lines = block.strip().split('\n')
+                time_line = lines[0].split(" --> ")
+                start, end = time_str_to_seconds(time_line[0], self.decimal_marker), time_str_to_seconds(time_line[1], self.decimal_marker)
+                sentence = ' '.join(lines[1:])
+                segments.append(Segment(
+                    start=start,
+                    end=end,
+                    text=sentence
+                ))
+        return segments
 class WriteSRT(SubtitlesWriter):
     extension: str = "srt"
         ):
             print(f"{i}\n{start} --> {end}\n{text}\n", file=file, flush=True)
+    def to_segments(self, file_path: str) -> List[Segment]:
+        segments = []
+        blocks = read_file(file_path).split('\n\n')
+        for block in blocks:
+            if block.strip() != '':
+                lines = block.strip().split('\n')
+                index = lines[0]
+                time_line = lines[1].split(" --> ")
+                start, end = time_str_to_seconds(time_line[0], self.decimal_marker), time_str_to_seconds(time_line[1], self.decimal_marker)
+                sentence = ' '.join(lines[2:])
+                segments.append(Segment(
+                    start=start,
+                    end=end,
+                    text=sentence
+                ))
+        return segments
 class WriteTSV(ResultWriter):
     """
 def get_writer(
     output_format: str, output_dir: str
 ) -> Callable[[dict, TextIO, dict], None]:
+    output_format = output_format.strip().lower().replace(".", "")
     writers = {
         "txt": WriteTXT,
 def generate_file(
     output_format: str, output_dir: str, result: Union[dict, List[Segment]], output_file_name: str, add_timestamp: bool = True,
 ) -> Tuple[str, str]:
+    output_format = output_format.strip().lower().replace(".", "")
+    if add_timestamp:
+        timestamp = datetime.now().strftime("%m%d%H%M%S")
+        output_file_name += timestamp
     file_path = os.path.join(output_dir, f"{output_file_name}.{output_format}")
     file_writer = get_writer(output_format=output_format, output_dir=output_dir)
+    file_writer(result=result, output_file_name=output_file_name)
     content = read_file(file_path)
     return content, file_path
 def safe_filename(name):
     INVALID_FILENAME_CHARS = r'[<>:"/\\|?*\x00-\x1f]'
     safe_name = re.sub(INVALID_FILENAME_CHARS, '_', name)