ocr_paddle_ocr_vl.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. import json
  2. import os
  3. import base64
  4. import requests
  5. from typing import List, Union
  6. import config.config
  7. class LayoutParserClient_evidence:
  8. def __init__(self, api_url: str = None, token: str = None):
  9. self.api_url = api_url or "https://q8d4u1u6c45dn7pd.aistudio-app.com/layout-parsing"
  10. self.token = token or config.config.PADDLE_TOKEN
  11. self.headers = {
  12. "Authorization": f"token {self.token}",
  13. "Content-Type": "application/json"
  14. }
  15. def _encode_image(self, file_path: str) -> str:
  16. """读取图片并转换为 base64 编码"""
  17. with open(file_path, "rb") as file:
  18. return base64.b64encode(file.read()).decode("ascii")
  19. def _process_single_file(self, file_path: str) -> str:
  20. """处理单张图片并返回解析后的文本"""
  21. file_data = self._encode_image(file_path)
  22. payload = {
  23. "file": file_data,
  24. "fileType": 1,
  25. "useDocOrientationClassify": False,
  26. "useDocUnwarping": False,
  27. "useTextlineOrientation": False,
  28. }
  29. try:
  30. response = requests.post(self.api_url, json=payload, headers=self.headers)
  31. response.raise_for_status() # 检查 HTTP 状态码
  32. result = response.json()["result"]
  33. # print(result)
  34. full_text = []
  35. # 提取 parsing_res_list
  36. for res in result.get("layoutParsingResults", []):
  37. pruned_result = res.get("prunedResult", {})
  38. parsing_res_list = pruned_result.get("parsing_res_list", [])
  39. if parsing_res_list:
  40. print("\n=== parsing_res_list ===")
  41. full_text.extend(parsing_res_list)
  42. return json.dumps(full_text, ensure_ascii=False, indent=2)
  43. except Exception as e:
  44. print(f"处理文件 {file_path} 时出错: {e}")
  45. return ""
  46. def parse(self, inputs: Union[str, List[str]]) -> str:
  47. """
  48. 主入口方法
  49. :param inputs: 可以是单张图片路径,也可以是图片路径列表
  50. :return: 拼接后的所有文本
  51. """
  52. if isinstance(inputs, str):
  53. # 如果输入是单个字符串,转为列表统一处理
  54. file_list = [inputs]
  55. else:
  56. file_list = inputs
  57. combined_results = []
  58. for file_path in file_list:
  59. print(f"正在处理: {os.path.basename(file_path)}...")
  60. text = self._process_single_file(file_path)
  61. if text:
  62. combined_results.append(text)
  63. # 将多张图片的结果按顺序拼接,中间用双换行分隔
  64. return "\n\n--- Next Page ---\n\n".join(combined_results)
  65. if __name__ == '__main__':
  66. # 实例化类
  67. client = LayoutParserClient_evidence()
  68. # 示例 1: 处理单张图片
  69. # single_img = "E:\\project\\arbitration_system\\appplication_extractor\\test\\李述花\\李述花-申请书.png"
  70. # result_1 = client.parse(single_img)
  71. # print(result_1)
  72. # 示例 2: 处理多张图片(按顺序拼接)
  73. multi_imgs = [
  74. "E:\\project\\arbitration_system\\evidence_extractor\\test\\F86-ZC1-2023-0001\\证人证言\\F86-ZC1-2023-0001-009_04.png",
  75. "E:\\project\\arbitration_system\\evidence_extractor\\test\F86-ZC1-2023-0001\\证人证言\\F86-ZC1-2023-0001-009_05.png"
  76. ]
  77. result_2 = client.parse(multi_imgs)
  78. print(result_2)