import os import base64 import requests API_URL = "https://c8pdr6l6q4eal165.aistudio-app.com/ocr" TOKEN = "16455708d55afac2f074f4ae5a88fc6c7bae7920" file_path = "E:\\project\\arbitration_system\\evidence_extractor\\test\\F86-ZC1-2023-0001\\考勤表\\F86-ZC1-2023-0001-010_00.png" input_filename = os.path.splitext(os.path.basename(file_path))[0] with open(file_path, "rb") as file: file_bytes = file.read() file_data = base64.b64encode(file_bytes).decode("ascii") headers = { "Authorization": f"token {TOKEN}", "Content-Type": "application/json" } required_payload = { "file": file_data, "fileType": 1, } optional_payload = { "useDocOrientationClassify": False, "useDocUnwarping": False, "useTextlineOrientation": False, } payload = {**required_payload, **optional_payload} response = requests.post(API_URL, json=payload, headers=headers) assert response.status_code == 200 result = response.json()["result"] os.makedirs("output", exist_ok=True) # 获取并处理识别文本 if "rec_texts" in result: print("=== 识别文本内容 ===") for i, text in enumerate(result["rec_texts"]): print(f"{i + 1:2d}: {text}") # 保存文本到文件 text_filename = f"output/{input_filename}_text.txt" with open(text_filename, "w", encoding="utf-8") as f: for text in result["rec_texts"]: f.write(text + "\n") print(f"\n文本已保存到: {text_filename}") # 如果需要处理多个页面 for i, res in enumerate(result.get("ocrResults", [])): if "prunedResult" in res: pruned_result = res["prunedResult"] if "rec_texts" in pruned_result: print(f"\n=== 页面 {i + 1} 的识别文本 ===") for j, text in enumerate(pruned_result["rec_texts"]): print(f" 行 {j + 1}: {text}") # 下载图片部分保持不变 if "ocrImage" in res: image_url = res["ocrImage"] img_response = requests.get(image_url) if img_response.status_code == 200: filename = f"output/{input_filename}_{i}.jpg" with open(filename, "wb") as f: f.write(img_response.content) print(f"图片已保存到: {filename}") else: print(f"图片下载失败,状态码: {img_response.status_code}")