Loren commited on
Commit
cc8013b
·
verified ·
1 Parent(s): 699000d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -39
app.py CHANGED
@@ -31,50 +31,56 @@ def process_transcript(language: str, audio_path: str) -> str:
31
 
32
  @spaces.GPU
33
  def process_translate(language: str, audio_path: str) -> str:
34
- conversation = [
35
- {
36
- "role": "user",
37
- "content": [
38
- {
39
- "type": "audio",
40
- "path": audio_path,
41
- },
42
- {"type": "text", "text": "Translate this in "+language},
43
- ],
44
- }
45
- ]
46
-
47
- inputs = processor.apply_chat_template(conversation)
48
- inputs = inputs.to(device, dtype=torch.bfloat16)
49
-
50
- outputs = model.generate(**inputs, max_new_tokens=MAX_TOKENS)
51
- decoded_outputs = processor.batch_decode(outputs[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)
 
 
 
52
 
53
- return decoded_outputs[0]
54
  ###
55
 
56
  @spaces.GPU
57
  def process_chat(question: str, audio_path: str) -> str:
58
- conversation = [
59
- {
60
- "role": "user",
61
- "content": [
62
- {
63
- "type": "audio",
64
- "path": audio_path,
65
- },
66
- {"type": "text", "text": question},
67
- ],
68
- }
69
- ]
70
-
71
- inputs = processor.apply_chat_template(conversation)
72
- inputs = inputs.to(device, dtype=torch.bfloat16)
73
-
74
- outputs = model.generate(**inputs, max_new_tokens=500)
75
- decoded_outputs = processor.batch_decode(outputs[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)
76
-
77
- return decoded_outputs[0]
 
 
 
78
  ###
79
 
80
  def disable_buttons():
 
31
 
32
  @spaces.GPU
33
  def process_translate(language: str, audio_path: str) -> str:
34
+ if audio_path is None:
35
+ return "Please provide some input audio: either upload an audio file or use the microphone."
36
+ else:
37
+ conversation = [
38
+ {
39
+ "role": "user",
40
+ "content": [
41
+ {
42
+ "type": "audio",
43
+ "path": audio_path,
44
+ },
45
+ {"type": "text", "text": "Translate this in "+language},
46
+ ],
47
+ }
48
+ ]
49
+
50
+ inputs = processor.apply_chat_template(conversation)
51
+ inputs = inputs.to(device, dtype=torch.bfloat16)
52
+
53
+ outputs = model.generate(**inputs, max_new_tokens=MAX_TOKENS)
54
+ decoded_outputs = processor.batch_decode(outputs[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)
55
 
56
+ return decoded_outputs[0]
57
  ###
58
 
59
  @spaces.GPU
60
  def process_chat(question: str, audio_path: str) -> str:
61
+ if audio_path is None:
62
+ return "Please provide some input audio: either upload an audio file or use the microphone."
63
+ else:
64
+ conversation = [
65
+ {
66
+ "role": "user",
67
+ "content": [
68
+ {
69
+ "type": "audio",
70
+ "path": audio_path,
71
+ },
72
+ {"type": "text", "text": question},
73
+ ],
74
+ }
75
+ ]
76
+
77
+ inputs = processor.apply_chat_template(conversation)
78
+ inputs = inputs.to(device, dtype=torch.bfloat16)
79
+
80
+ outputs = model.generate(**inputs, max_new_tokens=500)
81
+ decoded_outputs = processor.batch_decode(outputs[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)
82
+
83
+ return decoded_outputs[0]
84
  ###
85
 
86
  def disable_buttons():