| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566 |
- # Please make sure the requests library is installed
- # pip install requests
- import base64
- import os
- import requests
- API_URL = "https://q2z8becfm967o4y7.aistudio-app.com/layout-parsing"
- TOKEN = "16455708d55afac2f074f4ae5a88fc6c7bae7920"
- file_path = "E:\\project\\arbitration_system\\evidence_extractor\\test\\F86-ZC1-2023-0001\\考勤表\\F86-ZC1-2023-0001-010_00.png"
- with open(file_path, "rb") as file:
- file_bytes = file.read()
- file_data = base64.b64encode(file_bytes).decode("ascii")
- headers = {
- "Authorization": f"token {TOKEN}",
- "Content-Type": "application/json"
- }
- required_payload = {
- "file": file_data,
- "fileType": 1, # For PDF documents, set `fileType` to 0; for images, set `fileType` to 1
- }
- optional_payload = {
- "useDocOrientationClassify": False,
- "useDocUnwarping": False,
- "useTextlineOrientation": False,
- "useChartRecognition": False,
- }
- payload = {**required_payload, **optional_payload}
- response = requests.post(API_URL, json=payload, headers=headers)
- print(response.status_code)
- assert response.status_code == 200
- result = response.json()["result"]
- print(result["layoutParsingResults"])
- output_dir = "output"
- os.makedirs(output_dir, exist_ok=True)
- for i, res in enumerate(result["layoutParsingResults"]):
- md_filename = os.path.join(output_dir, f"doc_{i}.md")
- with open(md_filename, "w", encoding="utf-8") as md_file:
- md_file.write(res["markdown"]["text"])
- print(f"Markdown document saved at {md_filename}")
- for img_path, img in res["markdown"]["images"].items():
- full_img_path = os.path.join(output_dir, img_path)
- os.makedirs(os.path.dirname(full_img_path), exist_ok=True)
- img_bytes = requests.get(img).content
- with open(full_img_path, "wb") as img_file:
- img_file.write(img_bytes)
- print(f"Image saved to: {full_img_path}")
- for img_name, img in res["outputImages"].items():
- img_response = requests.get(img)
- if img_response.status_code == 200:
- # Save image to local
- filename = os.path.join(output_dir, f"{img_name}_{i}.jpg")
- with open(filename, "wb") as f:
- f.write(img_response.content)
- print(f"Image saved to: {filename}")
- else:
- print(f"Failed to download image, status code: {img_response.status_code}")
|