Spaces:
Build error
Build error
Ankur Goyal
commited on
Commit
·
47b5f74
1
Parent(s):
98e826c
Add better examples and confidence threshold
Browse files- acze.png +0 -0
- app.py +22 -13
- contract.jpeg +0 -0
- north_sea.pdf +0 -0
- north_sea.png +0 -0
- statement.pdf +0 -0
- statement.png +0 -0
acze.png
ADDED
|
app.py
CHANGED
|
@@ -73,19 +73,21 @@ def normalize_bbox(box, width, height, padding=0.005):
|
|
| 73 |
EXAMPLES = [
|
| 74 |
[
|
| 75 |
"invoice.png",
|
| 76 |
-
"Invoice
|
| 77 |
],
|
| 78 |
[
|
| 79 |
-
"
|
| 80 |
-
"
|
| 81 |
],
|
| 82 |
[
|
| 83 |
-
"
|
| 84 |
-
"
|
| 85 |
],
|
| 86 |
]
|
| 87 |
|
| 88 |
-
QUESTION_FILES = {
|
|
|
|
|
|
|
| 89 |
|
| 90 |
FIELDS = {
|
| 91 |
"Vendor Name": ["Vendor Name - Logo?", "Vendor Name - Address?"],
|
|
@@ -98,6 +100,8 @@ FIELDS = {
|
|
| 98 |
"Invoice Total": ["Invoice Total?"],
|
| 99 |
"Amount Due": ["Amount Due?"],
|
| 100 |
"Payment Terms": ["Payment Terms?"],
|
|
|
|
|
|
|
| 101 |
}
|
| 102 |
|
| 103 |
|
|
@@ -150,7 +154,7 @@ colors = ["#64A087", "green", "black"]
|
|
| 150 |
|
| 151 |
|
| 152 |
def annotate_page(prediction, pages, document):
|
| 153 |
-
if "word_ids" in prediction:
|
| 154 |
image = pages[prediction["page"]]
|
| 155 |
draw = ImageDraw.Draw(image, "RGBA")
|
| 156 |
word_boxes = lift_word_boxes(document, prediction["page"])
|
|
@@ -192,9 +196,14 @@ def process_fields(document, fields, model=list(CHECKPOINTS.keys())[0]):
|
|
| 192 |
table = []
|
| 193 |
|
| 194 |
for (field_name, questions) in fields.items():
|
| 195 |
-
answers = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
answers.sort(key=lambda x: -x.get("score", 0) if x else 0)
|
| 197 |
-
top = answers[0]
|
| 198 |
annotate_page(top, pages, document)
|
| 199 |
ret[field_name] = top
|
| 200 |
table.append([field_name, top.get("answer") if top is not None else None])
|
|
@@ -400,8 +409,8 @@ with gr.Blocks(css=CSS) as demo:
|
|
| 400 |
None, # document
|
| 401 |
# {**FIELDS}, # fields
|
| 402 |
gr.update(visible=False, value=None), # output
|
| 403 |
-
|
| 404 |
-
|
| 405 |
None,
|
| 406 |
None,
|
| 407 |
None,
|
|
@@ -414,8 +423,8 @@ with gr.Blocks(css=CSS) as demo:
|
|
| 414 |
document,
|
| 415 |
# fields,
|
| 416 |
output,
|
| 417 |
-
|
| 418 |
-
|
| 419 |
example_image,
|
| 420 |
upload,
|
| 421 |
url,
|
|
|
|
| 73 |
EXAMPLES = [
|
| 74 |
[
|
| 75 |
"invoice.png",
|
| 76 |
+
"East Repair Invoice",
|
| 77 |
],
|
| 78 |
[
|
| 79 |
+
"acze.png",
|
| 80 |
+
"ACZE Invoice",
|
| 81 |
],
|
| 82 |
[
|
| 83 |
+
"north_sea.png",
|
| 84 |
+
"North Sea Invoice",
|
| 85 |
],
|
| 86 |
]
|
| 87 |
|
| 88 |
+
QUESTION_FILES = {
|
| 89 |
+
"North Sea Invoice": "north_sea.pdf",
|
| 90 |
+
}
|
| 91 |
|
| 92 |
FIELDS = {
|
| 93 |
"Vendor Name": ["Vendor Name - Logo?", "Vendor Name - Address?"],
|
|
|
|
| 100 |
"Invoice Total": ["Invoice Total?"],
|
| 101 |
"Amount Due": ["Amount Due?"],
|
| 102 |
"Payment Terms": ["Payment Terms?"],
|
| 103 |
+
"Remit To Name": ["Remit To Name?"],
|
| 104 |
+
"Remit To Address": ["Remit To Address?"],
|
| 105 |
}
|
| 106 |
|
| 107 |
|
|
|
|
| 154 |
|
| 155 |
|
| 156 |
def annotate_page(prediction, pages, document):
|
| 157 |
+
if prediction is not None and "word_ids" in prediction:
|
| 158 |
image = pages[prediction["page"]]
|
| 159 |
draw = ImageDraw.Draw(image, "RGBA")
|
| 160 |
word_boxes = lift_word_boxes(document, prediction["page"])
|
|
|
|
| 196 |
table = []
|
| 197 |
|
| 198 |
for (field_name, questions) in fields.items():
|
| 199 |
+
answers = [
|
| 200 |
+
a
|
| 201 |
+
for q in questions
|
| 202 |
+
for a in ensure_list(run_pipeline(model, q, document, top_k=1))
|
| 203 |
+
if a.get("score", 1) > 0.5
|
| 204 |
+
]
|
| 205 |
answers.sort(key=lambda x: -x.get("score", 0) if x else 0)
|
| 206 |
+
top = answers[0] if len(answers) > 0 else None
|
| 207 |
annotate_page(top, pages, document)
|
| 208 |
ret[field_name] = top
|
| 209 |
table.append([field_name, top.get("answer") if top is not None else None])
|
|
|
|
| 409 |
None, # document
|
| 410 |
# {**FIELDS}, # fields
|
| 411 |
gr.update(visible=False, value=None), # output
|
| 412 |
+
gr.update(**empty_table(fields.value)), # output_table
|
| 413 |
+
gr.update(visible=False),
|
| 414 |
None,
|
| 415 |
None,
|
| 416 |
None,
|
|
|
|
| 423 |
document,
|
| 424 |
# fields,
|
| 425 |
output,
|
| 426 |
+
output_table,
|
| 427 |
+
img_clear_button,
|
| 428 |
example_image,
|
| 429 |
upload,
|
| 430 |
url,
|
contract.jpeg
DELETED
|
Binary file (124 kB)
|
|
|
north_sea.pdf
ADDED
|
Binary file (70.9 kB). View file
|
|
|
north_sea.png
ADDED
|
statement.pdf
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
statement.png
DELETED
|
Binary file (140 kB)
|
|
|