1234
This commit is contained in:
parent
b3267425a5
commit
018d647783
2
01.py
2
01.py
@ -22,7 +22,7 @@ def sort_json_keys(input_file, output_file):
|
||||
sorted_data = {k: v for k, v in sorted_items}
|
||||
|
||||
with open(output_file, "w", encoding="utf-8") as f:
|
||||
json.dump(sorted_data, f, ensure_ascii=False, indent=2)
|
||||
json.dump(sorted_data, f, ensure_ascii=False, indent=4)
|
||||
|
||||
|
||||
# 示例用法
|
||||
|
12
04.py
12
04.py
@ -6,7 +6,7 @@ import re
|
||||
# 清洗函数
|
||||
def clean_text(text):
|
||||
text = re.sub(r"[#&]", "\n", text)
|
||||
text = re.sub(r"^(\[?[A-Za-z0-9]{2}\]?)+", "", text)
|
||||
text = re.sub(r"^(\\[A-Za-z0-9]{2}\s*)+", "", text)
|
||||
text = re.sub(r"/%*\$", "", text)
|
||||
text = re.sub(r"\^1", "", text)
|
||||
return text
|
||||
@ -21,12 +21,12 @@ def restore_text(key, text, source_text):
|
||||
def replace_newline(m):
|
||||
return "#" if "#" in original else "&"
|
||||
|
||||
text = re.sub(r"\n", replace_newline, text)
|
||||
text = re.sub(r"\\\n", replace_newline, text)
|
||||
else:
|
||||
text = text.replace("\n", "&")
|
||||
text = text.replace("\\\n", "&")
|
||||
|
||||
# 2. 恢复前缀控制符 z* 或标签
|
||||
match_prefix = re.match(r"^(\[?[A-Za-z0-9]{2}\]?)+", source_text.get(key, ""))
|
||||
match_prefix = re.match(r"^(\\[A-Za-z0-9]{2}\s*)+", source_text.get(key, ""))
|
||||
if match_prefix:
|
||||
text = match_prefix.group(0) + text
|
||||
|
||||
@ -65,7 +65,7 @@ def process_all_jsons(original_root, processed_root, target_root):
|
||||
# 清洗处理并保存 processed 文件
|
||||
cleaned_data = {k: clean_text(v) for k, v in original_data.items()}
|
||||
with open(processed_path, "w", encoding="utf-8") as f:
|
||||
json.dump(cleaned_data, f, ensure_ascii=False, indent=2)
|
||||
json.dump(cleaned_data, f, ensure_ascii=False, indent=4)
|
||||
|
||||
# 再次读取 processed 文件进行还原
|
||||
with open(processed_path, "r", encoding="utf-8") as f:
|
||||
@ -75,7 +75,7 @@ def process_all_jsons(original_root, processed_root, target_root):
|
||||
k: restore_text(k, v, original_data) for k, v in processed_data.items()
|
||||
}
|
||||
with open(target_path, "w", encoding="utf-8") as f:
|
||||
json.dump(restored_data, f, ensure_ascii=False, indent=2)
|
||||
json.dump(restored_data, f, ensure_ascii=False, indent=4)
|
||||
|
||||
print(f"✅ 处理完成: {rel_path}")
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
13093
text_processed/ch1/zh_CN_new.json
Normal file
13093
text_processed/ch1/zh_CN_new.json
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
12484
text_target/ch1/en.json
12484
text_target/ch1/en.json
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
26182
text_target/ch2/en.json
26182
text_target/ch2/en.json
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user