1234

2025-05-14 12:47:01 +08:00 · 2025-05-14 12:47:01 +08:00 · 018d647783
commit 018d647783
parent b3267425a5
11 changed files with 90432 additions and 77339 deletions
--- a/01.py
+++ b/01.py
@ -22,7 +22,7 @@ def sort_json_keys(input_file, output_file):
    sorted_data = {k: v for k, v in sorted_items}

    with open(output_file, "w", encoding="utf-8") as f:
-        json.dump(sorted_data, f, ensure_ascii=False, indent=2)
+        json.dump(sorted_data, f, ensure_ascii=False, indent=4)


 # 示例用法
--- a/04.py
+++ b/04.py
@ -6,7 +6,7 @@ import re
 # 清洗函数
 def clean_text(text):
    text = re.sub(r"[#&]", "\n", text)
-    text = re.sub(r"^(\[?[A-Za-z0-9]{2}\]?)+", "", text)
+    text = re.sub(r"^(\\[A-Za-z0-9]{2}\s*)+", "", text)
    text = re.sub(r"/%*\$", "", text)
    text = re.sub(r"\^1", "", text)
    return text
@ -21,12 +21,12 @@ def restore_text(key, text, source_text):
        def replace_newline(m):
            return "#" if "#" in original else "&"

-        text = re.sub(r"\n", replace_newline, text)
+        text = re.sub(r"\\\n", replace_newline, text)
    else:
-        text = text.replace("\n", "&")
+        text = text.replace("\\\n", "&")

    # 2. 恢复前缀控制符 z* 或标签
-    match_prefix = re.match(r"^(\[?[A-Za-z0-9]{2}\]?)+", source_text.get(key, ""))
+    match_prefix = re.match(r"^(\\[A-Za-z0-9]{2}\s*)+", source_text.get(key, ""))
    if match_prefix:
        text = match_prefix.group(0) + text

@ -65,7 +65,7 @@ def process_all_jsons(original_root, processed_root, target_root):
            # 清洗处理并保存 processed 文件
            cleaned_data = {k: clean_text(v) for k, v in original_data.items()}
            with open(processed_path, "w", encoding="utf-8") as f:
-                json.dump(cleaned_data, f, ensure_ascii=False, indent=2)
+                json.dump(cleaned_data, f, ensure_ascii=False, indent=4)

            # 再次读取 processed 文件进行还原
            with open(processed_path, "r", encoding="utf-8") as f:
@ -75,7 +75,7 @@ def process_all_jsons(original_root, processed_root, target_root):
                k: restore_text(k, v, original_data) for k, v in processed_data.items()
            }
            with open(target_path, "w", encoding="utf-8") as f:
-                json.dump(restored_data, f, ensure_ascii=False, indent=2)
+                json.dump(restored_data, f, ensure_ascii=False, indent=4)

            print(f"✅ 处理完成: {rel_path}")

--- a/text_processed/ch1/en.json
+++ b/text_processed/ch1/en.json
--- a/text_processed/ch1/zh_CN.json
+++ b/text_processed/ch1/zh_CN.json
--- a/text_processed/ch1/zh_CN_new.json
+++ b/text_processed/ch1/zh_CN_new.json
--- a/text_processed/ch2/en.json
+++ b/text_processed/ch2/en.json
--- a/text_processed/ch2/zh_CN.json
+++ b/text_processed/ch2/zh_CN.json
--- a/text_target/ch1/en.json
+++ b/text_target/ch1/en.json
--- a/text_target/ch1/zh_CN.json
+++ b/text_target/ch1/zh_CN.json
--- a/text_target/ch2/en.json
+++ b/text_target/ch2/en.json
--- a/text_target/ch2/zh_CN.json
+++ b/text_target/ch2/zh_CN.json