Update app.py
Browse files
app.py
CHANGED
|
@@ -76,7 +76,8 @@ def pdf_to_images(pdf_bytes: bytes, dpi: int = 200) -> List[Image.Image]:
|
|
| 76 |
|
| 77 |
for i in range(n_pages):
|
| 78 |
page = pdf[i]
|
| 79 |
-
|
|
|
|
| 80 |
images.append(pil_image)
|
| 81 |
|
| 82 |
return images
|
|
@@ -112,7 +113,6 @@ def run_ocr(file: Union[bytes, None], image: Union[Image.Image, None]) -> str:
|
|
| 112 |
# 2. 如果上传了图片
|
| 113 |
if image is not None:
|
| 114 |
text = call_ocr_model(image)
|
| 115 |
-
# 如果前面已经有 PDF 结果,就在后面追加,否则单独一段
|
| 116 |
if results:
|
| 117 |
results.append("===== 图片识别结果 =====\n" + text)
|
| 118 |
else:
|
|
|
|
| 76 |
|
| 77 |
for i in range(n_pages):
|
| 78 |
page = pdf[i]
|
| 79 |
+
# 72 dpi 是 PDF 默认分辨率,这里按比例放大到指定 dpi
|
| 80 |
+
pil_image = page.render(scale=dpi / 72).to_pil()
|
| 81 |
images.append(pil_image)
|
| 82 |
|
| 83 |
return images
|
|
|
|
| 113 |
# 2. 如果上传了图片
|
| 114 |
if image is not None:
|
| 115 |
text = call_ocr_model(image)
|
|
|
|
| 116 |
if results:
|
| 117 |
results.append("===== 图片识别结果 =====\n" + text)
|
| 118 |
else:
|