InvertedFate-PSTVL/labelimg.py
2025-04-17 00:25:06 +08:00

210 lines
6.1 KiB
Python

import os
import json, re
from PIL import Image
import easyocr
import numpy as np
import pytesseract
from collections import OrderedDict
from paddleocr import PaddleOCR
COMIC_ROOT = "comic"
TEXT_OUTPUT_DIR = "text2"
reader = easyocr.Reader(["en"], gpu=True)
def get_last_frame_gif(path):
gif = Image.open(path)
try:
while True:
gif.seek(gif.tell() + 1)
except EOFError:
return gif.convert("RGB")
def load_image(path):
return (
get_last_frame_gif(path)
if path.lower().endswith(".gif")
else Image.open(path).convert("RGB")
)
def get_color(img, x, y):
return img.getpixel((x, y))
def is_color(c, hex_code):
rgb = tuple(int(hex_code[i : i + 2], 16) for i in (1, 3, 5))
return c == rgb
def check_diag(img, x1, y1, x2, y2, color_hex):
return is_color(get_color(img, x1, y1), color_hex) and is_color(
get_color(img, x2, y2), color_hex
)
def is_battle_type(img):
pts = [(32, 473), (185, 432), (454, 473), (609, 432)]
return all(
is_color(get_color(img, x, y), "#ff7f27")
or is_color(get_color(img, x, y), "#ffff00")
for x, y in pts
)
def has_textbox(img, base_y_offset):
return (
check_diag(img, 32, base_y_offset, 37, base_y_offset + 5, "#ffffff")
and check_diag(
img, 604, base_y_offset + 146, 609, base_y_offset + 151, "#ffffff"
)
and check_diag(img, 38, base_y_offset + 6, 42, base_y_offset + 10, "#000000")
)
def has_battle_textbox(img, base_y_offset):
return (
check_diag(img, 32, base_y_offset, 36, base_y_offset + 4, "#ffffff")
and check_diag(
img, 602, base_y_offset + 135, 606, base_y_offset + 139, "#ffffff"
)
and check_diag(img, 37, base_y_offset + 5, 41, base_y_offset + 9, "#000000")
)
def check_avatar_absent(img, y1, y2):
def line_black(y):
for x in range(60, 145):
if get_color(img, x, y) != (0, 0, 0):
return False
return True
return line_black(y1) and line_black(y2)
def crop_textbox(img, is_battle, has_avatar, base_y_offset):
x0 = 160 if has_avatar else (39 if is_battle else 40)
y0 = base_y_offset + (7 if is_battle else 8)
y1 = base_y_offset + (132 if is_battle else 143)
x1 = 600 if is_battle else 602
return img.crop((x0, y0, x1, y1))
# def ocr_text(img_region):
# np_img = np.array(img_region)
# result = reader.readtext(np_img, detail=0)
# return " ".join(result).strip()
ocr = PaddleOCR(use_angle_cls=True, lang="en")
def ocr_text(img_region):
result = ocr.ocr(np.array(img_region))
if result and result[0]:
return " ".join([line[1][0] for line in result[0]]).strip()
return ""
# def ocr_text(img_region):
# text = pytesseract.image_to_string(img_region, config="--psm 6")
# return text.strip().replace("=", "*")
def describe_result(file, pos, avatar, battle_type):
return f"{file}-{pos}{avatar}{battle_type}"
def process_image(path):
img = load_image(path)
file = os.path.basename(path)
entry = None
if is_battle_type(img):
has_txt = has_battle_textbox(img, 250)
if has_txt:
textbox_img = crop_textbox(
img, is_battle=True, has_avatar=False, base_y_offset=250
)
text = ocr_text(textbox_img)
if text:
key = describe_result(file, "战斗文本框", "无头像", "战斗")
entry = {key: text}
else:
top = has_textbox(img, 10)
bottom = has_textbox(img, 320)
if top:
avatar_absent = check_avatar_absent(img, 60, 100)
textbox_img = crop_textbox(
img, is_battle=False, has_avatar=not avatar_absent, base_y_offset=10
)
text = ocr_text(textbox_img)
if text:
key = describe_result(
file,
"顶部文本框",
"无头像" if avatar_absent else "有头像",
"非战斗",
)
entry = {key: text}
elif bottom:
avatar_absent = check_avatar_absent(img, 370, 410)
textbox_img = crop_textbox(
img, is_battle=False, has_avatar=not avatar_absent, base_y_offset=320
)
text = ocr_text(textbox_img)
if text:
key = describe_result(
file,
"底部文本框",
"无头像" if avatar_absent else "有头像",
"非战斗",
)
entry = {key: text}
return entry
def main():
os.makedirs(TEXT_OUTPUT_DIR, exist_ok=True)
for root, dirs, files in os.walk(COMIC_ROOT):
if os.path.basename(root).startswith("Ch"):
chapter = os.path.basename(root)
output = {}
# 如果输出文件存在,则跳过
if os.path.exists(os.path.join(TEXT_OUTPUT_DIR, f"{chapter}/OCR2.json")):
continue
for file in sorted(files):
if file.lower().endswith((".png", ".gif")):
path = os.path.join(root, file)
entry = process_image(path)
if entry:
output.update(entry)
def extract_number(s):
match = re.search(r"(\d+)", s)
return int(match.group(1)) if match else float("inf")
if output:
sorted_output = OrderedDict(
sorted(output.items(), key=lambda x: extract_number(x[0]))
)
if not os.path.exists(os.path.join(TEXT_OUTPUT_DIR, f"{chapter}")):
os.makedirs(os.path.join(TEXT_OUTPUT_DIR, f"{chapter}"))
with open(
os.path.join(TEXT_OUTPUT_DIR, f"{chapter}/OCR2.json"),
"w",
encoding="utf-8",
) as f:
json.dump(sorted_output, f, ensure_ascii=False, indent=2)
print(f"✅ 完成 {os.path.join(TEXT_OUTPUT_DIR, f"{chapter}/OCR2.json")}")
if __name__ == "__main__":
main()