ocr_PP_StructureV3.py 3.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. import os
  2. import base64
  3. import requests
  4. from typing import List, Union
  5. import config.config
  6. class LayoutParserClient_application:
  7. def __init__(self, api_url: str = None, token: str = None):
  8. self.api_url = api_url or "https://q2z8becfm967o4y7.aistudio-app.com/layout-parsing"
  9. self.token = token or config.config.PADDLE_TOKEN
  10. self.headers = {
  11. "Authorization": f"token {self.token}",
  12. "Content-Type": "application/json"
  13. }
  14. def _encode_image(self, file_path: str) -> str:
  15. """读取图片并转换为 base64 编码"""
  16. with open(file_path, "rb") as file:
  17. return base64.b64encode(file.read()).decode("ascii")
  18. def _process_single_file(self, file_path: str) -> str:
  19. """处理单张图片并返回解析后的文本"""
  20. file_data = self._encode_image(file_path)
  21. payload = {
  22. "file": file_data,
  23. "fileType": 1,
  24. "useDocOrientationClassify": False,
  25. "useDocUnwarping": False,
  26. "useTextlineOrientation": False,
  27. }
  28. try:
  29. response = requests.post(self.api_url, json=payload, headers=self.headers)
  30. response.raise_for_status() # 检查 HTTP 状态码
  31. result = response.json().get("result", {})
  32. full_text = []
  33. # 解析 OCR 结果
  34. for res in result.get("ocrResults", []):
  35. pruned = res.get("prunedResult", {})
  36. rec_texts = pruned.get("rec_texts", [])
  37. if rec_texts:
  38. full_text.extend(rec_texts)
  39. return "\n".join(full_text)
  40. except Exception as e:
  41. print(f"处理文件 {file_path} 时出错: {e}")
  42. return ""
  43. def parse(self, inputs: Union[str, List[str]]) -> str:
  44. """
  45. 主入口方法
  46. :param inputs: 可以是单张图片路径,也可以是图片路径列表
  47. :return: 拼接后的所有文本
  48. """
  49. if isinstance(inputs, str):
  50. # 如果输入是单个字符串,转为列表统一处理
  51. file_list = [inputs]
  52. else:
  53. file_list = inputs
  54. combined_results = []
  55. for file_path in file_list:
  56. print(f"正在处理: {os.path.basename(file_path)}...")
  57. text = self._process_single_file(file_path)
  58. if text:
  59. combined_results.append(text)
  60. # 将多张图片的结果按顺序拼接,中间用双换行分隔
  61. return "\n\n--- Next Page ---\n\n".join(combined_results)
  62. if __name__ == '__main__':
  63. # 实例化类
  64. client = LayoutParserClient_application()
  65. # 示例 1: 处理单张图片
  66. # single_img = "E:\\project\\arbitration_system\\appplication_extractor\\test\\李述花\\李述花-申请书.png"
  67. # result_1 = client.parse(single_img)
  68. # print(result_1)
  69. # 示例 2: 处理多张图片(按顺序拼接)
  70. multi_imgs = [
  71. "E:\\project\\arbitration_system\\appplication_extractor\\test\\刘正新\\刘正新-申请书.png",
  72. "E:\\project\\arbitration_system\\appplication_extractor\\test\\刘正新\\刘正新-申请书续.png"
  73. # "E:\\project\\arbitration_system\\appplication_extractor\\test\\李述花\\李述花-申请书.png"
  74. ]
  75. result_2 = client.parse(multi_imgs)
  76. print(result_2)