import os import base64 import requests from typing import List, Union import config.config class LayoutParserClient_application: def __init__(self, api_url: str = None, token: str = None): self.api_url = api_url or "https://q2z8becfm967o4y7.aistudio-app.com/layout-parsing" self.token = token or config.config.PADDLE_TOKEN self.headers = { "Authorization": f"token {self.token}", "Content-Type": "application/json" } def _encode_image(self, file_path: str) -> str: """读取图片并转换为 base64 编码""" with open(file_path, "rb") as file: return base64.b64encode(file.read()).decode("ascii") def _process_single_file(self, file_path: str) -> str: """处理单张图片并返回解析后的文本""" file_data = self._encode_image(file_path) payload = { "file": file_data, "fileType": 1, "useDocOrientationClassify": False, "useDocUnwarping": False, "useTextlineOrientation": False, } try: response = requests.post(self.api_url, json=payload, headers=self.headers) response.raise_for_status() # 检查 HTTP 状态码 result = response.json().get("result", {}) full_text = [] # 解析 OCR 结果 for res in result.get("ocrResults", []): pruned = res.get("prunedResult", {}) rec_texts = pruned.get("rec_texts", []) if rec_texts: full_text.extend(rec_texts) return "\n".join(full_text) except Exception as e: print(f"处理文件 {file_path} 时出错: {e}") return "" def parse(self, inputs: Union[str, List[str]]) -> str: """ 主入口方法 :param inputs: 可以是单张图片路径,也可以是图片路径列表 :return: 拼接后的所有文本 """ if isinstance(inputs, str): # 如果输入是单个字符串,转为列表统一处理 file_list = [inputs] else: file_list = inputs combined_results = [] for file_path in file_list: print(f"正在处理: {os.path.basename(file_path)}...") text = self._process_single_file(file_path) if text: combined_results.append(text) # 将多张图片的结果按顺序拼接,中间用双换行分隔 return "\n\n--- Next Page ---\n\n".join(combined_results) if __name__ == '__main__': # 实例化类 client = LayoutParserClient_application() # 示例 1: 处理单张图片 # single_img = "E:\\project\\arbitration_system\\appplication_extractor\\test\\李述花\\李述花-申请书.png" # result_1 = client.parse(single_img) # print(result_1) # 示例 2: 处理多张图片(按顺序拼接) multi_imgs = [ "E:\\project\\arbitration_system\\appplication_extractor\\test\\刘正新\\刘正新-申请书.png", "E:\\project\\arbitration_system\\appplication_extractor\\test\\刘正新\\刘正新-申请书续.png" # "E:\\project\\arbitration_system\\appplication_extractor\\test\\李述花\\李述花-申请书.png" ] result_2 = client.parse(multi_imgs) print(result_2)