Spaces:

artificialguybr
/

video-dubbing

Runtime error

App Files Files Community

artificialguybr commited on Jul 5, 2024

Commit

4fe6158

verified ·

1 Parent(s): 5fe7517

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -6

app.py CHANGED Viewed

@@ -71,7 +71,8 @@ def check_for_faces(video_path):
         if not ret:
             break
         gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
-        if face_cascade.detectMultiScale(gray, 1.1, 4):
             return True
     return False
@@ -158,18 +159,19 @@ def process_video(radio, video, target_language, has_closeup_face):
         target_language_code, voice = language_mapping[target_language]
         translator = Translator()
         translated_text = translator.translate(whisper_text, dest=target_language_code).text
-        print(translated_text)
         asyncio.run(text_to_speech(translated_text, voice, f"{run_uuid}_output_synth.wav"))
         if has_closeup_face or check_for_faces(video_path):
             try:
                 subprocess.run(f"python Wav2Lip/inference.py --checkpoint_path 'Wav2Lip/checkpoints/wav2lip_gan.pth' --face '{video_path}' --audio '{run_uuid}_output_synth.wav' --pads 0 15 0 0 --resize_factor 1 --nosmooth --outfile '{run_uuid}_output_video.mp4'", shell=True, check=True)
-            except subprocess.CalledProcessError:
-                gr.Warning("Wav2lip didn't detect a face. Please try again with the option disabled.")
-                subprocess.run(f"ffmpeg -i {video_path} -i {run_uuid}_output_synth.wav -c:v copy -c:a aac -strict experimental -map 0:v:0 -map 1:a:0 {run_uuid}_output_video.mp4", shell=True)
         else:
-            subprocess.run(f"ffmpeg -i {video_path} -i {run_uuid}_output_synth.wav -c:v copy -c:a aac -strict experimental -map 0:v:0 -map 1:a:0 {run_uuid}_output_video.mp4", shell=True)
         output_video_path = f"{run_uuid}_output_video.mp4"
         if not os.path.exists(output_video_path):

         if not ret:
             break
         gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+        faces = face_cascade.detectMultiScale(gray, 1.1, 4)
+        if len(faces) > 0:
             return True
     return False
         target_language_code, voice = language_mapping[target_language]
         translator = Translator()
         translated_text = translator.translate(whisper_text, dest=target_language_code).text
+        print(f"Translated text: {translated_text}")
         asyncio.run(text_to_speech(translated_text, voice, f"{run_uuid}_output_synth.wav"))
         if has_closeup_face or check_for_faces(video_path):
             try:
                 subprocess.run(f"python Wav2Lip/inference.py --checkpoint_path 'Wav2Lip/checkpoints/wav2lip_gan.pth' --face '{video_path}' --audio '{run_uuid}_output_synth.wav' --pads 0 15 0 0 --resize_factor 1 --nosmooth --outfile '{run_uuid}_output_video.mp4'", shell=True, check=True)
+            except subprocess.CalledProcessError as e:
+                print(f"Wav2Lip error: {str(e)}")
+                gr.Warning("Wav2lip didn't detect a face or encountered an error. Falling back to simple audio replacement.")
+                subprocess.run(f"ffmpeg -i {video_path} -i {run_uuid}_output_synth.wav -c:v copy -c:a aac -strict experimental -map 0:v:0 -map 1:a:0 {run_uuid}_output_video.mp4", shell=True, check=True)
         else:
+            subprocess.run(f"ffmpeg -i {video_path} -i {run_uuid}_output_synth.wav -c:v copy -c:a aac -strict experimental -map 0:v:0 -map 1:a:0 {run_uuid}_output_video.mp4", shell=True, check=True)
         output_video_path = f"{run_uuid}_output_video.mp4"
         if not os.path.exists(output_video_path):