Spaces:
Running
on
Zero
Running
on
Zero
update app
Browse files
app.py
CHANGED
|
@@ -222,6 +222,15 @@ model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
|
| 222 |
torch_dtype=torch.float16
|
| 223 |
).to(device).eval()
|
| 224 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
def downsample_video(video_path):
|
| 226 |
"""
|
| 227 |
Downsamples the video to evenly spaced frames.
|
|
@@ -270,6 +279,9 @@ def generate_image(model_name: str, text: str, image: Image.Image,
|
|
| 270 |
elif model_name == "olmOCR-7B-0725":
|
| 271 |
processor = processor_w
|
| 272 |
model = model_w
|
|
|
|
|
|
|
|
|
|
| 273 |
else:
|
| 274 |
yield "Invalid model selected.", "Invalid model selected."
|
| 275 |
return
|
|
@@ -332,6 +344,9 @@ def generate_video(model_name: str, text: str, video_path: str,
|
|
| 332 |
elif model_name == "olmOCR-7B-0725":
|
| 333 |
processor = processor_w
|
| 334 |
model = model_w
|
|
|
|
|
|
|
|
|
|
| 335 |
else:
|
| 336 |
yield "Invalid model selected.", "Invalid model selected."
|
| 337 |
return
|
|
@@ -424,7 +439,7 @@ with gr.Blocks(css=css, theme=thistle_theme) as demo:
|
|
| 424 |
markdown_output = gr.Markdown(label="(Result.Md)")
|
| 425 |
|
| 426 |
model_choice = gr.Radio(
|
| 427 |
-
choices=["Nanonets-OCR2-3B", "olmOCR-7B-0725", "RolmOCR-7B",
|
| 428 |
"Aya-Vision-8B", "Qwen2-VL-OCR-2B"],
|
| 429 |
label="Select Model",
|
| 430 |
value="Nanonets-OCR2-3B"
|
|
|
|
| 222 |
torch_dtype=torch.float16
|
| 223 |
).to(device).eval()
|
| 224 |
|
| 225 |
+
# Load Nanonets-OCR2-1.5B-exp
|
| 226 |
+
MODEL_ID_Y = "nanonets/Nanonets-OCR2-1.5B-exp"
|
| 227 |
+
processor_y = AutoProcessor.from_pretrained(MODEL_ID_Y, trust_remote_code=True)
|
| 228 |
+
model_y = Qwen2VLForConditionalGeneration.from_pretrained(
|
| 229 |
+
MODEL_ID_Y,
|
| 230 |
+
trust_remote_code=True,
|
| 231 |
+
torch_dtype=torch.float16
|
| 232 |
+
).to(device).eval()
|
| 233 |
+
|
| 234 |
def downsample_video(video_path):
|
| 235 |
"""
|
| 236 |
Downsamples the video to evenly spaced frames.
|
|
|
|
| 279 |
elif model_name == "olmOCR-7B-0725":
|
| 280 |
processor = processor_w
|
| 281 |
model = model_w
|
| 282 |
+
elif model_name == "Nanonets-OCR2-1.5B-exp":
|
| 283 |
+
processor = processor_y
|
| 284 |
+
model = model_y
|
| 285 |
else:
|
| 286 |
yield "Invalid model selected.", "Invalid model selected."
|
| 287 |
return
|
|
|
|
| 344 |
elif model_name == "olmOCR-7B-0725":
|
| 345 |
processor = processor_w
|
| 346 |
model = model_w
|
| 347 |
+
elif model_name == "Nanonets-OCR2-1.5B-exp":
|
| 348 |
+
processor = processor_y
|
| 349 |
+
model = model_y
|
| 350 |
else:
|
| 351 |
yield "Invalid model selected.", "Invalid model selected."
|
| 352 |
return
|
|
|
|
| 439 |
markdown_output = gr.Markdown(label="(Result.Md)")
|
| 440 |
|
| 441 |
model_choice = gr.Radio(
|
| 442 |
+
choices=["Nanonets-OCR2-3B", "olmOCR-7B-0725", "RolmOCR-7B", "Nanonets-OCR2-1.5B-exp",
|
| 443 |
"Aya-Vision-8B", "Qwen2-VL-OCR-2B"],
|
| 444 |
label="Select Model",
|
| 445 |
value="Nanonets-OCR2-3B"
|