Shirochi commited on
Commit
207ab7d
·
verified ·
1 Parent(s): 6ad5ede

Upload google_free_translate.py

Browse files
Files changed (1) hide show
  1. google_free_translate.py +335 -0
google_free_translate.py ADDED
@@ -0,0 +1,335 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # google_free_translate.py
2
+ """
3
+ Free Google Translate API implementation using the web endpoint.
4
+ No API key or billing required!
5
+ """
6
+ import logging
7
+ import threading
8
+ import time
9
+ import json
10
+ import requests
11
+ from typing import Optional, Dict, Any
12
+ import urllib.parse
13
+ import random
14
+
15
+ class GoogleFreeTranslateNew:
16
+ """Google Translate API for free translation using public web endpoints.
17
+
18
+ Uses the same endpoints as the Google Translate web interface and mobile apps.
19
+ No API key or Google Cloud credentials required!
20
+ """
21
+ name = 'Google(Free)New'
22
+ free = True
23
+ endpoint: str = 'https://translate.googleapis.com/translate_a/single'
24
+
25
+ def __init__(self, source_language: str = "auto", target_language: str = "en", logger=None):
26
+ self.source_language = source_language
27
+ self.target_language = target_language
28
+ self.logger = logger or logging.getLogger(__name__)
29
+ self.cache = {} # Simple in-memory cache
30
+ self.cache_lock = threading.Lock()
31
+ self.request_lock = threading.Lock()
32
+ self.last_request_time = 0
33
+ self.rate_limit = 0.5 # 500ms between requests to avoid rate limiting
34
+
35
+ # Multiple user agents to rotate through (helps avoid 403)
36
+ self.user_agents = [
37
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
38
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
39
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
40
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
41
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0',
42
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Safari/605.1.15',
43
+ ]
44
+
45
+ def _get_source_code(self):
46
+ """Get the source language code."""
47
+ lang_map = {
48
+ "zh": "zh-CN",
49
+ "ja": "ja",
50
+ "ko": "ko",
51
+ "en": "en",
52
+ "auto": "auto"
53
+ }
54
+ return lang_map.get(self.source_language, self.source_language)
55
+
56
+ def _get_target_code(self):
57
+ """Get the target language code."""
58
+ lang_map = {
59
+ "en": "en",
60
+ "zh": "zh-CN",
61
+ "ja": "ja",
62
+ "ko": "ko"
63
+ }
64
+ return lang_map.get(self.target_language, self.target_language)
65
+
66
+ def get_headers(self):
67
+ """Get request headers that mimic a browser with random user agent."""
68
+ return {
69
+ 'Accept': '*/*',
70
+ 'Accept-Encoding': 'gzip, deflate, br',
71
+ 'Accept-Language': 'en-US,en;q=0.9',
72
+ 'Content-Type': 'application/x-www-form-urlencoded',
73
+ 'User-Agent': random.choice(self.user_agents), # Random user agent to avoid detection
74
+ 'Referer': 'https://translate.google.com/',
75
+ 'Origin': 'https://translate.google.com',
76
+ }
77
+
78
+ def _get_cache_key(self, text: str) -> str:
79
+ """Generate cache key for the translation."""
80
+ return f"{self.source_language}_{self.target_language}_{hash(text)}"
81
+
82
+ def _rate_limit_delay(self):
83
+ """Ensure rate limiting between requests."""
84
+ with self.request_lock:
85
+ current_time = time.time()
86
+ time_since_last = current_time - self.last_request_time
87
+ if time_since_last < self.rate_limit:
88
+ sleep_time = self.rate_limit - time_since_last
89
+ time.sleep(sleep_time)
90
+ self.last_request_time = time.time()
91
+
92
+ def translate(self, text: str) -> Dict[str, Any]:
93
+ """
94
+ Translate text using Google's free web API.
95
+
96
+ Args:
97
+ text: Text to translate
98
+
99
+ Returns:
100
+ Dict with 'translatedText' and 'detectedSourceLanguage' keys
101
+ """
102
+ if not text or not text.strip():
103
+ return {
104
+ 'translatedText': '',
105
+ 'detectedSourceLanguage': self.source_language
106
+ }
107
+
108
+ # Check cache first
109
+ cache_key = self._get_cache_key(text)
110
+ with self.cache_lock:
111
+ if cache_key in self.cache:
112
+ self.logger.debug(f"Cache hit for translation: {text[:50]}...")
113
+ return self.cache[cache_key]
114
+
115
+ # Rate limiting
116
+ self._rate_limit_delay()
117
+
118
+ try:
119
+ # Prepare request data
120
+ source_lang = self._get_source_code()
121
+ target_lang = self._get_target_code()
122
+
123
+ # Try multiple FREE endpoint formats (no credentials needed)
124
+ # These are public endpoints used by the web interface and mobile apps
125
+ # Ordered from most reliable to least reliable based on common usage
126
+ endpoints_to_try = [
127
+ 'https://translate.googleapis.com/translate_a/single', # Most reliable public endpoint
128
+ 'https://clients5.google.com/translate_a/t', # Mobile client endpoint
129
+ 'https://translate.google.com/translate_a/single', # Direct web endpoint
130
+ 'https://clients5.google.com/translate_a/single', # Alternative client5
131
+ ]
132
+
133
+ for endpoint_url in endpoints_to_try:
134
+ try:
135
+ if 'clients5.google.com/translate_a/t' in endpoint_url:
136
+ # Use mobile client API format
137
+ result = self._translate_via_mobile_api(text, source_lang, target_lang, endpoint_url)
138
+ else:
139
+ # Use the public translate_a/single API format
140
+ result = self._translate_via_single_api(text, source_lang, target_lang, endpoint_url)
141
+
142
+ if result:
143
+ # Cache successful result
144
+ with self.cache_lock:
145
+ self.cache[cache_key] = result
146
+ # Limit cache size
147
+ if len(self.cache) > 1000:
148
+ # Remove oldest entries
149
+ oldest_keys = list(self.cache.keys())[:100]
150
+ for key in oldest_keys:
151
+ del self.cache[key]
152
+
153
+ return result
154
+
155
+ except Exception as e:
156
+ self.logger.warning(f"Failed with endpoint {endpoint_url}: {e}")
157
+ continue
158
+
159
+ # If all endpoints failed
160
+ raise Exception("All Google Translate endpoints failed")
161
+
162
+ except Exception as e:
163
+ self.logger.error(f"Translation failed: {e}")
164
+ # Return original text as fallback
165
+ return {
166
+ 'translatedText': text,
167
+ 'detectedSourceLanguage': self.source_language,
168
+ 'error': str(e)
169
+ }
170
+
171
+ def _translate_via_single_api(self, text: str, source_lang: str, target_lang: str, endpoint_url: str) -> Optional[Dict[str, Any]]:
172
+ """Translate using the translate_a/single endpoint (older format)."""
173
+ # Try multiple client types to avoid 403 errors
174
+ client_types = ['gtx', 'webapp', 't', 'dict-chrome-ex', 'android']
175
+
176
+ for client_type in client_types:
177
+ params = {
178
+ 'client': client_type,
179
+ 'sl': source_lang,
180
+ 'tl': target_lang,
181
+ 'dt': 't',
182
+ 'q': text
183
+ }
184
+
185
+ # Add additional parameters for certain clients
186
+ if client_type == 'webapp':
187
+ params['dj'] = '1' # Return JSON format
188
+ params['dt'] = ['t', 'bd', 'ex', 'ld', 'md', 'qca', 'rw', 'rm', 'ss', 'at']
189
+ elif client_type == 'dict-chrome-ex':
190
+ params['dt'] = ['t', 'bd']
191
+
192
+ try:
193
+ result = self._try_single_api_request(params, endpoint_url, client_type)
194
+ if result:
195
+ return result
196
+ except Exception as e:
197
+ self.logger.debug(f"Client type {client_type} failed: {e}")
198
+ continue
199
+
200
+ return None
201
+
202
+ def _try_single_api_request(self, params: dict, endpoint_url: str, client_type: str) -> Optional[Dict[str, Any]]:
203
+ """Try a single API request with given parameters."""
204
+ response = requests.get(
205
+ endpoint_url,
206
+ params=params,
207
+ headers=self.get_headers(),
208
+ timeout=10
209
+ )
210
+
211
+ if response.status_code == 200:
212
+ try:
213
+ result = response.json()
214
+
215
+ # Handle webapp format (dj=1 returns different structure)
216
+ if client_type == 'webapp' and isinstance(result, dict):
217
+ if 'sentences' in result:
218
+ translated_parts = []
219
+ for sentence in result['sentences']:
220
+ if 'trans' in sentence:
221
+ translated_parts.append(sentence['trans'])
222
+
223
+ translated_text = ''.join(translated_parts)
224
+ detected_lang = result.get('src', params.get('sl', 'auto'))
225
+
226
+ return {
227
+ 'translatedText': translated_text,
228
+ 'detectedSourceLanguage': detected_lang
229
+ }
230
+
231
+ # Handle standard array format
232
+ elif isinstance(result, list) and len(result) > 0:
233
+ # Extract translated text
234
+ translated_parts = []
235
+ for item in result[0]:
236
+ if isinstance(item, list) and len(item) > 0:
237
+ translated_parts.append(item[0])
238
+
239
+ translated_text = ''.join(translated_parts)
240
+
241
+ # Try to detect source language from response
242
+ detected_lang = params.get('sl', 'auto')
243
+ if len(result) > 2 and isinstance(result[2], str):
244
+ detected_lang = result[2]
245
+
246
+ return {
247
+ 'translatedText': translated_text,
248
+ 'detectedSourceLanguage': detected_lang
249
+ }
250
+ except (json.JSONDecodeError, KeyError, IndexError, TypeError) as e:
251
+ self.logger.warning(f"Failed to parse single API response with client {client_type}: {e}")
252
+
253
+ return None
254
+
255
+ def _translate_via_mobile_api(self, text: str, source_lang: str, target_lang: str, endpoint_url: str) -> Optional[Dict[str, Any]]:
256
+ """Translate using the mobile client endpoint (clients5.google.com/translate_a/t)."""
257
+ params = {
258
+ 'client': 't', # Mobile client
259
+ 'sl': source_lang,
260
+ 'tl': target_lang,
261
+ 'q': text
262
+ }
263
+
264
+ # Use different headers for mobile endpoint
265
+ mobile_headers = {
266
+ 'Accept': '*/*',
267
+ 'Accept-Encoding': 'gzip, deflate, br',
268
+ 'Accept-Language': 'en-US,en;q=0.9',
269
+ 'User-Agent': 'GoogleTranslate/6.29.59279 (Linux; U; Android 11; Pixel 5)',
270
+ 'Content-Type': 'application/x-www-form-urlencoded',
271
+ }
272
+
273
+ response = requests.post(
274
+ endpoint_url,
275
+ data=params,
276
+ headers=mobile_headers,
277
+ timeout=10
278
+ )
279
+
280
+ if response.status_code == 200:
281
+ try:
282
+ # This endpoint returns a simple JSON array
283
+ result = response.json()
284
+ if isinstance(result, list) and len(result) > 0:
285
+ # Extract translated text from first element
286
+ if isinstance(result[0], str):
287
+ translated_text = result[0]
288
+ elif isinstance(result[0], list) and len(result[0]) > 0:
289
+ translated_text = result[0][0]
290
+ else:
291
+ return None
292
+
293
+ return {
294
+ 'translatedText': translated_text,
295
+ 'detectedSourceLanguage': source_lang
296
+ }
297
+ except (json.JSONDecodeError, KeyError, IndexError, TypeError) as e:
298
+ self.logger.warning(f"Failed to parse mobile API response: {e}")
299
+
300
+ return None
301
+
302
+ # Convenience function for easy usage
303
+ def translate_text(text: str, source_lang: str = "auto", target_lang: str = "en") -> str:
304
+ """
305
+ Simple function to translate text using Google's free API.
306
+
307
+ Args:
308
+ text: Text to translate
309
+ source_lang: Source language code (default: auto-detect)
310
+ target_lang: Target language code (default: en)
311
+
312
+ Returns:
313
+ Translated text
314
+ """
315
+ translator = GoogleFreeTranslateNew(source_lang, target_lang)
316
+ result = translator.translate(text)
317
+ return result.get('translatedText', text)
318
+
319
+ if __name__ == "__main__":
320
+ # Test the translator
321
+ translator = GoogleFreeTranslateNew("auto", "en")
322
+
323
+ test_texts = [
324
+ "こんにちは、世界!", # Japanese
325
+ "안녕하세요 세계!", # Korean
326
+ "你好世界!", # Chinese
327
+ "Hola mundo" # Spanish
328
+ ]
329
+
330
+ for test_text in test_texts:
331
+ result = translator.translate(test_text)
332
+ print(f"Original: {test_text}")
333
+ print(f"Translated: {result['translatedText']}")
334
+ print(f"Detected language: {result['detectedSourceLanguage']}")
335
+ print("-" * 50)