| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172 |
- import os
- import base64
- import requests
- API_URL = "https://c8pdr6l6q4eal165.aistudio-app.com/ocr"
- TOKEN = "16455708d55afac2f074f4ae5a88fc6c7bae7920"
- file_path = "E:\\project\\arbitration_system\\evidence_extractor\\test\\F86-ZC1-2023-0001\\考勤表\\F86-ZC1-2023-0001-010_00.png"
- input_filename = os.path.splitext(os.path.basename(file_path))[0]
- with open(file_path, "rb") as file:
- file_bytes = file.read()
- file_data = base64.b64encode(file_bytes).decode("ascii")
- headers = {
- "Authorization": f"token {TOKEN}",
- "Content-Type": "application/json"
- }
- required_payload = {
- "file": file_data,
- "fileType": 1,
- }
- optional_payload = {
- "useDocOrientationClassify": False,
- "useDocUnwarping": False,
- "useTextlineOrientation": False,
- }
- payload = {**required_payload, **optional_payload}
- response = requests.post(API_URL, json=payload, headers=headers)
- assert response.status_code == 200
- result = response.json()["result"]
- os.makedirs("output", exist_ok=True)
- # 获取并处理识别文本
- if "rec_texts" in result:
- print("=== 识别文本内容 ===")
- for i, text in enumerate(result["rec_texts"]):
- print(f"{i + 1:2d}: {text}")
- # 保存文本到文件
- text_filename = f"output/{input_filename}_text.txt"
- with open(text_filename, "w", encoding="utf-8") as f:
- for text in result["rec_texts"]:
- f.write(text + "\n")
- print(f"\n文本已保存到: {text_filename}")
- # 如果需要处理多个页面
- for i, res in enumerate(result.get("ocrResults", [])):
- if "prunedResult" in res:
- pruned_result = res["prunedResult"]
- if "rec_texts" in pruned_result:
- print(f"\n=== 页面 {i + 1} 的识别文本 ===")
- for j, text in enumerate(pruned_result["rec_texts"]):
- print(f" 行 {j + 1}: {text}")
- # 下载图片部分保持不变
- if "ocrImage" in res:
- image_url = res["ocrImage"]
- img_response = requests.get(image_url)
- if img_response.status_code == 200:
- filename = f"output/{input_filename}_{i}.jpg"
- with open(filename, "wb") as f:
- f.write(img_response.content)
- print(f"图片已保存到: {filename}")
- else:
- print(f"图片下载失败,状态码: {img_response.status_code}")
|