| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697 |
- import os
- import base64
- import requests
- from typing import List, Union
- import config.config
- class LayoutParserClient_application:
- def __init__(self, api_url: str = None, token: str = None):
- self.api_url = api_url or "https://q2z8becfm967o4y7.aistudio-app.com/layout-parsing"
- self.token = token or config.config.PADDLE_TOKEN
- self.headers = {
- "Authorization": f"token {self.token}",
- "Content-Type": "application/json"
- }
- def _encode_image(self, file_path: str) -> str:
- """读取图片并转换为 base64 编码"""
- with open(file_path, "rb") as file:
- return base64.b64encode(file.read()).decode("ascii")
- def _process_single_file(self, file_path: str) -> str:
- """处理单张图片并返回解析后的文本"""
- file_data = self._encode_image(file_path)
- payload = {
- "file": file_data,
- "fileType": 1,
- "useDocOrientationClassify": False,
- "useDocUnwarping": False,
- "useTextlineOrientation": False,
- }
- try:
- response = requests.post(self.api_url, json=payload, headers=self.headers)
- response.raise_for_status() # 检查 HTTP 状态码
- result = response.json().get("result", {})
- full_text = []
- # 解析 OCR 结果
- for res in result.get("ocrResults", []):
- pruned = res.get("prunedResult", {})
- rec_texts = pruned.get("rec_texts", [])
- if rec_texts:
- full_text.extend(rec_texts)
- return "\n".join(full_text)
- except Exception as e:
- print(f"处理文件 {file_path} 时出错: {e}")
- return ""
- def parse(self, inputs: Union[str, List[str]]) -> str:
- """
- 主入口方法
- :param inputs: 可以是单张图片路径,也可以是图片路径列表
- :return: 拼接后的所有文本
- """
- if isinstance(inputs, str):
- # 如果输入是单个字符串,转为列表统一处理
- file_list = [inputs]
- else:
- file_list = inputs
- combined_results = []
- for file_path in file_list:
- print(f"正在处理: {os.path.basename(file_path)}...")
- text = self._process_single_file(file_path)
- if text:
- combined_results.append(text)
- # 将多张图片的结果按顺序拼接,中间用双换行分隔
- return "\n\n--- Next Page ---\n\n".join(combined_results)
- if __name__ == '__main__':
- # 实例化类
- client = LayoutParserClient_application()
- # 示例 1: 处理单张图片
- # single_img = "E:\\project\\arbitration_system\\appplication_extractor\\test\\李述花\\李述花-申请书.png"
- # result_1 = client.parse(single_img)
- # print(result_1)
- # 示例 2: 处理多张图片(按顺序拼接)
- multi_imgs = [
- "E:\\project\\arbitration_system\\appplication_extractor\\test\\刘正新\\刘正新-申请书.png",
- "E:\\project\\arbitration_system\\appplication_extractor\\test\\刘正新\\刘正新-申请书续.png"
- # "E:\\project\\arbitration_system\\appplication_extractor\\test\\李述花\\李述花-申请书.png"
- ]
- result_2 = client.parse(multi_imgs)
- print(result_2)
|