-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtranslate.py
More file actions
141 lines (123 loc) · 4.9 KB
/
translate.py
File metadata and controls
141 lines (123 loc) · 4.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import settings
import json
from deep_translator import GoogleTranslator
import zipfile
import re
import custom_translations
def load_translations_mapping():
try:
with open(settings.translations_path, "r") as json_file:
# Parse the JSON data from the file
settings.translations_mapping = json.load(json_file)
except FileNotFoundError:
return {}
def translate_text(text):
try:
value = translator.translate(text)
# Google tends to insert spaces into links
return remove_spaces_in_href(value)
except Exception as e:
if e.message not in settings.translations_errors:
settings.translations_errors[e.message] = 1
else:
settings.translations_errors[e.message] += 1
print("Error while translating text:", e.message)
return None
def remove_spaces_in_href(input_string):
pattern = r"href=\"(.*?)\""
result = re.sub(
pattern, lambda x: 'href="' + x.group(1).replace(" ", "") + '"', input_string
)
return result
def translate_value(value):
value = value.strip()
translated_value = None
# Already translated
if value in settings.translations_mapping:
return
# In custom
if value in settings.custom_translations:
return
# This is a bit of a hack but it appear some references are in the JSON with the / even though those are 2 separared folders.
if "/" in value and not any([x in value for x in ["http", "</"]]):
translated_parts = []
for part in value.split("/"):
translated_part = translate_value(part)
if translated_part:
translated_parts.append(translated_part)
if len(translated_parts) > 0:
translated_value = "/".join(translated_parts)
# To big for Google API/ Translate splitted
elif len(value) > 4999:
translated_value = process_chunked_value(value)
else:
translated_value = translate_text(value)
if translated_value:
settings.translations_mapping[value] = translated_value
def process_chunk(chunk):
if len(chunk) == 0:
return None
if chunk in settings.split_markers:
return chunk
# check it contains at least one letter
pattern = r"[a-zA-Z]"
if not re.search(pattern, chunk):
return chunk
translation = translate_text(chunk)
if translation:
return translation
else:
print("Failed to translate chunk:", chunk)
return chunk
def process_chunked_value(value):
translated_chunks = []
current_chunk = ""
# We go from char to char until a split marker is found, then translate.
for char in value:
current_chunk += char
if any(marker in current_chunk for marker in settings.split_markers):
translated_chunk = process_chunk(current_chunk)
if translated_chunk is not None:
translated_chunks.append(translated_chunk)
current_chunk = ""
else:
# TODO: Ensure the remaining part is not 5000 event if it means chunking randomly
translated_chunk = process_chunk(current_chunk)
if translated_chunk is not None:
translated_chunks.append(translated_chunk)
return "".join(translated_chunks) if translated_chunks else value
def process_json(data):
global translations_count
if isinstance(data, dict):
for key, value in data.items():
translations_count += 1
if translations_count % 10000 == 0:
print(
f" {translations_count} translations made. Still processing, please wait"
)
if key in settings.ignore_list:
continue
if key in settings.keys_to_process and isinstance(value, str) and value:
translate_value(value)
elif isinstance(value, (dict, list)):
process_json(value)
elif isinstance(data, list):
for item in data:
process_json(item)
def translate_zip():
global translator
translator = GoogleTranslator("en", settings.target_language)
global translations_count
translations_count = 0
custom_translations.load_custom_translations()
try:
with zipfile.ZipFile(settings.zip_file_path, "r") as zip_ref:
for file_name in zip_ref.namelist():
if file_name.endswith(".json") and not file_name.startswith("__MACOSX"):
print("Processing file:", file_name)
json_data = zip_ref.read(file_name).decode("utf-8")
json_data = json.loads(json_data)
process_json(json_data)
finally:
with open(settings.translations_path, "w") as json_file:
json.dump(settings.translations_mapping, json_file, indent=4)