Spaces:

mrdbourke
/

trashify_demo_v4

Running

App Files Files Community

mrdbourke commited on Jun 16

Commit

0b9c8ac

verified ·

1 Parent(s): 8915efe

Uploading Trashify box detection model app.py

Browse files

Files changed (5) hide show

README.md +1 -1
app.py +41 -9
trashify_examples/trashify_example_1.jpeg +2 -2
trashify_examples/trashify_example_2.jpeg +2 -2
trashify_examples/trashify_example_3.jpeg +2 -2

README.md CHANGED Viewed

@@ -33,4 +33,4 @@ The dataset can be found on Hugging Face as [`mrdbourke/trashify_manual_labelled
 ## Learn more
-See the full end-to-end code of how this demo was built at [learnhuggingface.com](https://www.learnhuggingface.com/notebooks/hugging_face_object_detection_tutorial).


33
34	## Learn more
35
36	+ See the full end-to-end code of how this demo was built at [learnhuggingface.com](https://www.learnhuggingface.com/notebooks/hugging_face_object_detection_tutorial).

app.py CHANGED Viewed

@@ -1,17 +1,24 @@
 import gradio as gr
 import torch
-from PIL import Image, ImageDraw, ImageFont
 from transformers import AutoImageProcessor
 from transformers import AutoModelForObjectDetection
 # Note: Can load from Hugging Face or can load from local
 model_save_path = "mrdbourke/rt_detrv2_finetuned_trashify_box_detector_v1"
 # Load the model and preprocessor
 image_processor = AutoImageProcessor.from_pretrained(model_save_path)
 model = AutoModelForObjectDetection.from_pretrained(model_save_path)
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model = model.to(device)
@@ -31,30 +38,39 @@ color_dict = {
 # Create helper functions for seeing if items from one list are in another
 def any_in_list(list_a, list_b):
-    "Returns True if any item from list_a is in list_b, otherwise False."
     return any(item in list_b for item in list_a)
 def all_in_list(list_a, list_b):
-    "Returns True if all items from list_a are in list_b, otherwise False."
     return all(item in list_b for item in list_a)
 def predict_on_image(image, conf_threshold):
     with torch.no_grad():
         inputs = image_processor(images=[image], return_tensors="pt")
-        outputs = model(**inputs.to(device))
-        target_sizes = torch.tensor([[image.size[1], image.size[0]]]) # height, width
-        results = image_processor.post_process_object_detection(outputs,
                                                                 threshold=conf_threshold,
                                                                 target_sizes=target_sizes)[0]
-    # Return all items in results to CPU
     for key, value in results.items():
         try:
             results[key] = value.item().cpu() # can't get scalar as .item() so add try/except block
         except:
             results[key] = value.cpu()
     # Can return results as plotted on a PIL image (then display the image)
     draw = ImageDraw.Draw(image)
@@ -64,6 +80,7 @@ def predict_on_image(image, conf_threshold):
     # Get class names as text for print out
     class_name_text_labels = []
     for box, score, label in zip(results["boxes"], results["scores"], results["labels"]):
         # Create coordinates
         x, y, x2, y2 = tuple(box.tolist())
@@ -96,6 +113,8 @@ def predict_on_image(image, conf_threshold):
     # Setup list of target items to discover
     target_items = ["trash", "bin", "hand"]
     # If no items detected or trash, bin, hand not in list, return notification
     if (len(class_name_text_labels) == 0) or not (any_in_list(list_a=target_items, list_b=class_name_text_labels)):
         return_string = f"No trash, bin or hand detected at confidence threshold {conf_threshold}. Try another image or lowering the confidence threshold."
@@ -117,7 +136,20 @@ def predict_on_image(image, conf_threshold):
     return image, return_string
-# Create the interface
 demo = gr.Interface(
     fn=predict_on_image,
     inputs=[

+# 1. Import the required libraries and packages
 import gradio as gr
 import torch
+from PIL import Image, ImageDraw, ImageFont # could also use torch utilities for drawing
 from transformers import AutoImageProcessor
 from transformers import AutoModelForObjectDetection
+### 2. Setup preprocessing and helper functions ###
+# Setup target model path to load
 # Note: Can load from Hugging Face or can load from local
 model_save_path = "mrdbourke/rt_detrv2_finetuned_trashify_box_detector_v1"
 # Load the model and preprocessor
+# Because this app.py file is running directly on Hugging Face Spaces, the model will be loaded from the Hugging Face Hub
 image_processor = AutoImageProcessor.from_pretrained(model_save_path)
 model = AutoModelForObjectDetection.from_pretrained(model_save_path)
+# Set the target device (use CUDA/GPU if it is available)
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model = model.to(device)
 # Create helper functions for seeing if items from one list are in another
 def any_in_list(list_a, list_b):
+    "Returns True if *any* item from list_a is in list_b, otherwise False."
     return any(item in list_b for item in list_a)
 def all_in_list(list_a, list_b):
+    "Returns True if *all* items from list_a are in list_b, otherwise False."
     return all(item in list_b for item in list_a)
+### 3. Create function to predict on a given image with a given confidence threshold ###
 def predict_on_image(image, conf_threshold):
+    # Make sure model is in eval mode
+    model.eval()
+    # Make a prediction on target image
     with torch.no_grad():
         inputs = image_processor(images=[image], return_tensors="pt")
+        model_outputs = model(**inputs.to(device))
+        target_sizes = torch.tensor([[image.size[1], image.size[0]]]) # -> [batch_size, height, width]
+        # Post process the raw outputs from the model
+        results = image_processor.post_process_object_detection(model_outputs,
                                                                 threshold=conf_threshold,
                                                                 target_sizes=target_sizes)[0]
+    # Return all items in results to CPU (we'll want this for displaying outputs with matplotlib)
     for key, value in results.items():
         try:
             results[key] = value.item().cpu() # can't get scalar as .item() so add try/except block
         except:
             results[key] = value.cpu()
+    ### 4. Draw the predictions on the target image ###
     # Can return results as plotted on a PIL image (then display the image)
     draw = ImageDraw.Draw(image)
     # Get class names as text for print out
     class_name_text_labels = []
+    # Iterate through the predictions of the model and draw them on the target image
     for box, score, label in zip(results["boxes"], results["scores"], results["labels"]):
         # Create coordinates
         x, y, x2, y2 = tuple(box.tolist())
     # Setup list of target items to discover
     target_items = ["trash", "bin", "hand"]
+    ### 5. Create logic for outputting information message ###
     # If no items detected or trash, bin, hand not in list, return notification
     if (len(class_name_text_labels) == 0) or not (any_in_list(list_a=target_items, list_b=class_name_text_labels)):
         return_string = f"No trash, bin or hand detected at confidence threshold {conf_threshold}. Try another image or lowering the confidence threshold."
     return image, return_string
+### 6. Setup the demo application to take in image, make a prediction with our model, return the image with drawn predicitons ###
+# Write description for our demo application
+description = """
+Help clean up your local area! Upload an image and get +1 if there is all of the following items detected: trash, bin, hand.
+Model is a fine-tuned version of [RT-DETRv2](https://huggingface.co/docs/transformers/main/en/model_doc/rt_detr_v2#transformers.RTDetrV2Config) on the [Trashify dataset](https://huggingface.co/datasets/mrdbourke/trashify_manual_labelled_images).
+See the full data loading and training code on [learnhuggingface.com](https://www.learnhuggingface.com/notebooks/hugging_face_object_detection_tutorial).
+This version is v4 because the first three versions were using a different model and did not perform as well, see the [README](https://huggingface.co/spaces/mrdbourke/trashify_demo_v4/blob/main/README.md) for more.
+"""
+# Create the Gradio interface to accept an image and confidence threshold and return an image with drawn prediction boxes
 demo = gr.Interface(
     fn=predict_on_image,
     inputs=[