Spaces:
Running
Running
| # Ultralytics π AGPL-3.0 License - https://ultralytics.com/license | |
| """ | |
| Ultralytics Results, Boxes and Masks classes for handling inference results. | |
| Usage: See https://docs.ultralytics.com/modes/predict/ | |
| """ | |
| from copy import deepcopy | |
| from functools import lru_cache | |
| from pathlib import Path | |
| import numpy as np | |
| import torch | |
| from ultralytics.data.augment import LetterBox | |
| from ultralytics.utils import LOGGER, SimpleClass, ops | |
| from ultralytics.utils.checks import check_requirements | |
| from ultralytics.utils.plotting import Annotator, colors, save_one_box | |
| from ultralytics.utils.torch_utils import smart_inference_mode | |
| class BaseTensor(SimpleClass): | |
| """ | |
| Base tensor class with additional methods for easy manipulation and device handling. | |
| Attributes: | |
| data (torch.Tensor | np.ndarray): Prediction data such as bounding boxes, masks, or keypoints. | |
| orig_shape (Tuple[int, int]): Original shape of the image, typically in the format (height, width). | |
| Methods: | |
| cpu: Return a copy of the tensor stored in CPU memory. | |
| numpy: Returns a copy of the tensor as a numpy array. | |
| cuda: Moves the tensor to GPU memory, returning a new instance if necessary. | |
| to: Return a copy of the tensor with the specified device and dtype. | |
| Examples: | |
| >>> import torch | |
| >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]]) | |
| >>> orig_shape = (720, 1280) | |
| >>> base_tensor = BaseTensor(data, orig_shape) | |
| >>> cpu_tensor = base_tensor.cpu() | |
| >>> numpy_array = base_tensor.numpy() | |
| >>> gpu_tensor = base_tensor.cuda() | |
| """ | |
| def __init__(self, data, orig_shape) -> None: | |
| """ | |
| Initialize BaseTensor with prediction data and the original shape of the image. | |
| Args: | |
| data (torch.Tensor | np.ndarray): Prediction data such as bounding boxes, masks, or keypoints. | |
| orig_shape (Tuple[int, int]): Original shape of the image in (height, width) format. | |
| Examples: | |
| >>> import torch | |
| >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]]) | |
| >>> orig_shape = (720, 1280) | |
| >>> base_tensor = BaseTensor(data, orig_shape) | |
| """ | |
| assert isinstance(data, (torch.Tensor, np.ndarray)), "data must be torch.Tensor or np.ndarray" | |
| self.data = data | |
| self.orig_shape = orig_shape | |
| def shape(self): | |
| """ | |
| Returns the shape of the underlying data tensor. | |
| Returns: | |
| (Tuple[int, ...]): The shape of the data tensor. | |
| Examples: | |
| >>> data = torch.rand(100, 4) | |
| >>> base_tensor = BaseTensor(data, orig_shape=(720, 1280)) | |
| >>> print(base_tensor.shape) | |
| (100, 4) | |
| """ | |
| return self.data.shape | |
| def cpu(self): | |
| """ | |
| Returns a copy of the tensor stored in CPU memory. | |
| Returns: | |
| (BaseTensor): A new BaseTensor object with the data tensor moved to CPU memory. | |
| Examples: | |
| >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]]).cuda() | |
| >>> base_tensor = BaseTensor(data, orig_shape=(720, 1280)) | |
| >>> cpu_tensor = base_tensor.cpu() | |
| >>> isinstance(cpu_tensor, BaseTensor) | |
| True | |
| >>> cpu_tensor.data.device | |
| device(type='cpu') | |
| """ | |
| return self if isinstance(self.data, np.ndarray) else self.__class__(self.data.cpu(), self.orig_shape) | |
| def numpy(self): | |
| """ | |
| Returns a copy of the tensor as a numpy array. | |
| Returns: | |
| (np.ndarray): A numpy array containing the same data as the original tensor. | |
| Examples: | |
| >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]]) | |
| >>> orig_shape = (720, 1280) | |
| >>> base_tensor = BaseTensor(data, orig_shape) | |
| >>> numpy_array = base_tensor.numpy() | |
| >>> print(type(numpy_array)) | |
| <class 'numpy.ndarray'> | |
| """ | |
| return self if isinstance(self.data, np.ndarray) else self.__class__(self.data.numpy(), self.orig_shape) | |
| def cuda(self): | |
| """ | |
| Moves the tensor to GPU memory. | |
| Returns: | |
| (BaseTensor): A new BaseTensor instance with the data moved to GPU memory if it's not already a | |
| numpy array, otherwise returns self. | |
| Examples: | |
| >>> import torch | |
| >>> from ultralytics.engine.results import BaseTensor | |
| >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]]) | |
| >>> base_tensor = BaseTensor(data, orig_shape=(720, 1280)) | |
| >>> gpu_tensor = base_tensor.cuda() | |
| >>> print(gpu_tensor.data.device) | |
| cuda:0 | |
| """ | |
| return self.__class__(torch.as_tensor(self.data).cuda(), self.orig_shape) | |
| def to(self, *args, **kwargs): | |
| """ | |
| Return a copy of the tensor with the specified device and dtype. | |
| Args: | |
| *args (Any): Variable length argument list to be passed to torch.Tensor.to(). | |
| **kwargs (Any): Arbitrary keyword arguments to be passed to torch.Tensor.to(). | |
| Returns: | |
| (BaseTensor): A new BaseTensor instance with the data moved to the specified device and/or dtype. | |
| Examples: | |
| >>> base_tensor = BaseTensor(torch.randn(3, 4), orig_shape=(480, 640)) | |
| >>> cuda_tensor = base_tensor.to("cuda") | |
| >>> float16_tensor = base_tensor.to(dtype=torch.float16) | |
| """ | |
| return self.__class__(torch.as_tensor(self.data).to(*args, **kwargs), self.orig_shape) | |
| def __len__(self): # override len(results) | |
| """ | |
| Returns the length of the underlying data tensor. | |
| Returns: | |
| (int): The number of elements in the first dimension of the data tensor. | |
| Examples: | |
| >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]]) | |
| >>> base_tensor = BaseTensor(data, orig_shape=(720, 1280)) | |
| >>> len(base_tensor) | |
| 2 | |
| """ | |
| return len(self.data) | |
| def __getitem__(self, idx): | |
| """ | |
| Returns a new BaseTensor instance containing the specified indexed elements of the data tensor. | |
| Args: | |
| idx (int | List[int] | torch.Tensor): Index or indices to select from the data tensor. | |
| Returns: | |
| (BaseTensor): A new BaseTensor instance containing the indexed data. | |
| Examples: | |
| >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]]) | |
| >>> base_tensor = BaseTensor(data, orig_shape=(720, 1280)) | |
| >>> result = base_tensor[0] # Select the first row | |
| >>> print(result.data) | |
| tensor([1, 2, 3]) | |
| """ | |
| return self.__class__(self.data[idx], self.orig_shape) | |
| class Results(SimpleClass): | |
| """ | |
| A class for storing and manipulating inference results. | |
| This class encapsulates the functionality for handling detection, segmentation, pose estimation, | |
| and classification results from YOLO models. | |
| Attributes: | |
| orig_img (numpy.ndarray): Original image as a numpy array. | |
| orig_shape (Tuple[int, int]): Original image shape in (height, width) format. | |
| boxes (Boxes | None): Object containing detection bounding boxes. | |
| masks (Masks | None): Object containing detection masks. | |
| probs (Probs | None): Object containing class probabilities for classification tasks. | |
| keypoints (Keypoints | None): Object containing detected keypoints for each object. | |
| obb (OBB | None): Object containing oriented bounding boxes. | |
| speed (Dict[str, float | None]): Dictionary of preprocess, inference, and postprocess speeds. | |
| names (Dict[int, str]): Dictionary mapping class IDs to class names. | |
| path (str): Path to the image file. | |
| _keys (Tuple[str, ...]): Tuple of attribute names for internal use. | |
| Methods: | |
| update: Updates object attributes with new detection results. | |
| cpu: Returns a copy of the Results object with all tensors on CPU memory. | |
| numpy: Returns a copy of the Results object with all tensors as numpy arrays. | |
| cuda: Returns a copy of the Results object with all tensors on GPU memory. | |
| to: Returns a copy of the Results object with tensors on a specified device and dtype. | |
| new: Returns a new Results object with the same image, path, and names. | |
| plot: Plots detection results on an input image, returning an annotated image. | |
| show: Shows annotated results on screen. | |
| save: Saves annotated results to file. | |
| verbose: Returns a log string for each task, detailing detections and classifications. | |
| save_txt: Saves detection results to a text file. | |
| save_crop: Saves cropped detection images. | |
| tojson: Converts detection results to JSON format. | |
| Examples: | |
| >>> results = model("path/to/image.jpg") | |
| >>> for result in results: | |
| ... print(result.boxes) # Print detection boxes | |
| ... result.show() # Display the annotated image | |
| ... result.save(filename="result.jpg") # Save annotated image | |
| """ | |
| def __init__( | |
| self, orig_img, path, names, boxes=None, masks=None, probs=None, keypoints=None, obb=None, speed=None | |
| ) -> None: | |
| """ | |
| Initialize the Results class for storing and manipulating inference results. | |
| Args: | |
| orig_img (numpy.ndarray): The original image as a numpy array. | |
| path (str): The path to the image file. | |
| names (Dict): A dictionary of class names. | |
| boxes (torch.Tensor | None): A 2D tensor of bounding box coordinates for each detection. | |
| masks (torch.Tensor | None): A 3D tensor of detection masks, where each mask is a binary image. | |
| probs (torch.Tensor | None): A 1D tensor of probabilities of each class for classification task. | |
| keypoints (torch.Tensor | None): A 2D tensor of keypoint coordinates for each detection. | |
| obb (torch.Tensor | None): A 2D tensor of oriented bounding box coordinates for each detection. | |
| speed (Dict | None): A dictionary containing preprocess, inference, and postprocess speeds (ms/image). | |
| Examples: | |
| >>> results = model("path/to/image.jpg") | |
| >>> result = results[0] # Get the first result | |
| >>> boxes = result.boxes # Get the boxes for the first result | |
| >>> masks = result.masks # Get the masks for the first result | |
| Notes: | |
| For the default pose model, keypoint indices for human body pose estimation are: | |
| 0: Nose, 1: Left Eye, 2: Right Eye, 3: Left Ear, 4: Right Ear | |
| 5: Left Shoulder, 6: Right Shoulder, 7: Left Elbow, 8: Right Elbow | |
| 9: Left Wrist, 10: Right Wrist, 11: Left Hip, 12: Right Hip | |
| 13: Left Knee, 14: Right Knee, 15: Left Ankle, 16: Right Ankle | |
| """ | |
| self.orig_img = orig_img | |
| self.orig_shape = orig_img.shape[:2] | |
| self.boxes = Boxes(boxes, self.orig_shape) if boxes is not None else None # native size boxes | |
| self.masks = Masks(masks, self.orig_shape) if masks is not None else None # native size or imgsz masks | |
| self.probs = Probs(probs) if probs is not None else None | |
| self.keypoints = Keypoints(keypoints, self.orig_shape) if keypoints is not None else None | |
| self.obb = OBB(obb, self.orig_shape) if obb is not None else None | |
| self.speed = speed if speed is not None else {"preprocess": None, "inference": None, "postprocess": None} | |
| self.names = names | |
| self.path = path | |
| self.save_dir = None | |
| self._keys = "boxes", "masks", "probs", "keypoints", "obb" | |
| def __getitem__(self, idx): | |
| """ | |
| Return a Results object for a specific index of inference results. | |
| Args: | |
| idx (int | slice): Index or slice to retrieve from the Results object. | |
| Returns: | |
| (Results): A new Results object containing the specified subset of inference results. | |
| Examples: | |
| >>> results = model("path/to/image.jpg") # Perform inference | |
| >>> single_result = results[0] # Get the first result | |
| >>> subset_results = results[1:4] # Get a slice of results | |
| """ | |
| return self._apply("__getitem__", idx) | |
| def __len__(self): | |
| """ | |
| Return the number of detections in the Results object. | |
| Returns: | |
| (int): The number of detections, determined by the length of the first non-empty attribute | |
| (boxes, masks, probs, keypoints, or obb). | |
| Examples: | |
| >>> results = Results(orig_img, path, names, boxes=torch.rand(5, 4)) | |
| >>> len(results) | |
| 5 | |
| """ | |
| for k in self._keys: | |
| v = getattr(self, k) | |
| if v is not None: | |
| return len(v) | |
| def update(self, boxes=None, masks=None, probs=None, obb=None): | |
| """ | |
| Updates the Results object with new detection data. | |
| This method allows updating the boxes, masks, probabilities, and oriented bounding boxes (OBB) of the | |
| Results object. It ensures that boxes are clipped to the original image shape. | |
| Args: | |
| boxes (torch.Tensor | None): A tensor of shape (N, 6) containing bounding box coordinates and | |
| confidence scores. The format is (x1, y1, x2, y2, conf, class). | |
| masks (torch.Tensor | None): A tensor of shape (N, H, W) containing segmentation masks. | |
| probs (torch.Tensor | None): A tensor of shape (num_classes,) containing class probabilities. | |
| obb (torch.Tensor | None): A tensor of shape (N, 5) containing oriented bounding box coordinates. | |
| Examples: | |
| >>> results = model("image.jpg") | |
| >>> new_boxes = torch.tensor([[100, 100, 200, 200, 0.9, 0]]) | |
| >>> results[0].update(boxes=new_boxes) | |
| """ | |
| if boxes is not None: | |
| self.boxes = Boxes(ops.clip_boxes(boxes, self.orig_shape), self.orig_shape) | |
| if masks is not None: | |
| self.masks = Masks(masks, self.orig_shape) | |
| if probs is not None: | |
| self.probs = probs | |
| if obb is not None: | |
| self.obb = OBB(obb, self.orig_shape) | |
| def _apply(self, fn, *args, **kwargs): | |
| """ | |
| Applies a function to all non-empty attributes and returns a new Results object with modified attributes. | |
| This method is internally called by methods like .to(), .cuda(), .cpu(), etc. | |
| Args: | |
| fn (str): The name of the function to apply. | |
| *args (Any): Variable length argument list to pass to the function. | |
| **kwargs (Any): Arbitrary keyword arguments to pass to the function. | |
| Returns: | |
| (Results): A new Results object with attributes modified by the applied function. | |
| Examples: | |
| >>> results = model("path/to/image.jpg") | |
| >>> for result in results: | |
| ... result_cuda = result.cuda() | |
| ... result_cpu = result.cpu() | |
| """ | |
| r = self.new() | |
| for k in self._keys: | |
| v = getattr(self, k) | |
| if v is not None: | |
| setattr(r, k, getattr(v, fn)(*args, **kwargs)) | |
| return r | |
| def cpu(self): | |
| """ | |
| Returns a copy of the Results object with all its tensors moved to CPU memory. | |
| This method creates a new Results object with all tensor attributes (boxes, masks, probs, keypoints, obb) | |
| transferred to CPU memory. It's useful for moving data from GPU to CPU for further processing or saving. | |
| Returns: | |
| (Results): A new Results object with all tensor attributes on CPU memory. | |
| Examples: | |
| >>> results = model("path/to/image.jpg") # Perform inference | |
| >>> cpu_result = results[0].cpu() # Move the first result to CPU | |
| >>> print(cpu_result.boxes.device) # Output: cpu | |
| """ | |
| return self._apply("cpu") | |
| def numpy(self): | |
| """ | |
| Converts all tensors in the Results object to numpy arrays. | |
| Returns: | |
| (Results): A new Results object with all tensors converted to numpy arrays. | |
| Examples: | |
| >>> results = model("path/to/image.jpg") | |
| >>> numpy_result = results[0].numpy() | |
| >>> type(numpy_result.boxes.data) | |
| <class 'numpy.ndarray'> | |
| Notes: | |
| This method creates a new Results object, leaving the original unchanged. It's useful for | |
| interoperability with numpy-based libraries or when CPU-based operations are required. | |
| """ | |
| return self._apply("numpy") | |
| def cuda(self): | |
| """ | |
| Moves all tensors in the Results object to GPU memory. | |
| Returns: | |
| (Results): A new Results object with all tensors moved to CUDA device. | |
| Examples: | |
| >>> results = model("path/to/image.jpg") | |
| >>> cuda_results = results[0].cuda() # Move first result to GPU | |
| >>> for result in results: | |
| ... result_cuda = result.cuda() # Move each result to GPU | |
| """ | |
| return self._apply("cuda") | |
| def to(self, *args, **kwargs): | |
| """ | |
| Moves all tensors in the Results object to the specified device and dtype. | |
| Args: | |
| *args (Any): Variable length argument list to be passed to torch.Tensor.to(). | |
| **kwargs (Any): Arbitrary keyword arguments to be passed to torch.Tensor.to(). | |
| Returns: | |
| (Results): A new Results object with all tensors moved to the specified device and dtype. | |
| Examples: | |
| >>> results = model("path/to/image.jpg") | |
| >>> result_cuda = results[0].to("cuda") # Move first result to GPU | |
| >>> result_cpu = results[0].to("cpu") # Move first result to CPU | |
| >>> result_half = results[0].to(dtype=torch.float16) # Convert first result to half precision | |
| """ | |
| return self._apply("to", *args, **kwargs) | |
| def new(self): | |
| """ | |
| Creates a new Results object with the same image, path, names, and speed attributes. | |
| Returns: | |
| (Results): A new Results object with copied attributes from the original instance. | |
| Examples: | |
| >>> results = model("path/to/image.jpg") | |
| >>> new_result = results[0].new() | |
| """ | |
| return Results(orig_img=self.orig_img, path=self.path, names=self.names, speed=self.speed) | |
| def plot( | |
| self, | |
| conf=True, | |
| line_width=None, | |
| font_size=None, | |
| font="Arial.ttf", | |
| pil=False, | |
| img=None, | |
| im_gpu=None, | |
| kpt_radius=5, | |
| kpt_line=True, | |
| labels=True, | |
| boxes=True, | |
| masks=True, | |
| probs=True, | |
| show=False, | |
| save=False, | |
| filename=None, | |
| color_mode="class", | |
| ): | |
| """ | |
| Plots detection results on an input RGB image. | |
| Args: | |
| conf (bool): Whether to plot detection confidence scores. | |
| line_width (float | None): Line width of bounding boxes. If None, scaled to image size. | |
| font_size (float | None): Font size for text. If None, scaled to image size. | |
| font (str): Font to use for text. | |
| pil (bool): Whether to return the image as a PIL Image. | |
| img (np.ndarray | None): Image to plot on. If None, uses original image. | |
| im_gpu (torch.Tensor | None): Normalized image on GPU for faster mask plotting. | |
| kpt_radius (int): Radius of drawn keypoints. | |
| kpt_line (bool): Whether to draw lines connecting keypoints. | |
| labels (bool): Whether to plot labels of bounding boxes. | |
| boxes (bool): Whether to plot bounding boxes. | |
| masks (bool): Whether to plot masks. | |
| probs (bool): Whether to plot classification probabilities. | |
| show (bool): Whether to display the annotated image. | |
| save (bool): Whether to save the annotated image. | |
| filename (str | None): Filename to save image if save is True. | |
| color_mode (bool): Specify the color mode, e.g., 'instance' or 'class'. Default to 'class'. | |
| Returns: | |
| (np.ndarray): Annotated image as a numpy array. | |
| Examples: | |
| >>> results = model("image.jpg") | |
| >>> for result in results: | |
| ... im = result.plot() | |
| ... im.show() | |
| """ | |
| assert color_mode in {"instance", "class"}, f"Expected color_mode='instance' or 'class', not {color_mode}." | |
| if img is None and isinstance(self.orig_img, torch.Tensor): | |
| img = (self.orig_img[0].detach().permute(1, 2, 0).contiguous() * 255).to(torch.uint8).cpu().numpy() | |
| names = self.names | |
| is_obb = self.obb is not None | |
| pred_boxes, show_boxes = self.obb if is_obb else self.boxes, boxes | |
| pred_masks, show_masks = self.masks, masks | |
| pred_probs, show_probs = self.probs, probs | |
| annotator = Annotator( | |
| deepcopy(self.orig_img if img is None else img), | |
| line_width, | |
| font_size, | |
| font, | |
| pil or (pred_probs is not None and show_probs), # Classify tasks default to pil=True | |
| example=names, | |
| ) | |
| # Plot Segment results | |
| if pred_masks and show_masks: | |
| if im_gpu is None: | |
| img = LetterBox(pred_masks.shape[1:])(image=annotator.result()) | |
| im_gpu = ( | |
| torch.as_tensor(img, dtype=torch.float16, device=pred_masks.data.device) | |
| .permute(2, 0, 1) | |
| .flip(0) | |
| .contiguous() | |
| / 255 | |
| ) | |
| idx = ( | |
| pred_boxes.id | |
| if pred_boxes.id is not None and color_mode == "instance" | |
| else pred_boxes.cls | |
| if pred_boxes and color_mode == "class" | |
| else reversed(range(len(pred_masks))) | |
| ) | |
| annotator.masks(pred_masks.data, colors=[colors(x, True) for x in idx], im_gpu=im_gpu) | |
| # Plot Detect results | |
| if pred_boxes is not None and show_boxes: | |
| for i, d in enumerate(reversed(pred_boxes)): | |
| c, d_conf, id = int(d.cls), float(d.conf) if conf else None, None if d.id is None else int(d.id.item()) | |
| name = ("" if id is None else f"id:{id} ") + names[c] | |
| label = (f"{name} {d_conf:.2f}" if conf else name) if labels else None | |
| box = d.xyxyxyxy.reshape(-1, 4, 2).squeeze() if is_obb else d.xyxy.squeeze() | |
| annotator.box_label( | |
| box, | |
| label, | |
| color=colors( | |
| c | |
| if color_mode == "class" | |
| else id | |
| if id is not None | |
| else i | |
| if color_mode == "instance" | |
| else None, | |
| True, | |
| ), | |
| rotated=is_obb, | |
| ) | |
| # Plot Classify results | |
| if pred_probs is not None and show_probs: | |
| text = ",\n".join(f"{names[j] if names else j} {pred_probs.data[j]:.2f}" for j in pred_probs.top5) | |
| x = round(self.orig_shape[0] * 0.03) | |
| annotator.text([x, x], text, txt_color=(255, 255, 255)) # TODO: allow setting colors | |
| # Plot Pose results | |
| if self.keypoints is not None: | |
| for i, k in enumerate(reversed(self.keypoints.data)): | |
| annotator.kpts( | |
| k, | |
| self.orig_shape, | |
| radius=kpt_radius, | |
| kpt_line=kpt_line, | |
| kpt_color=colors(i, True) if color_mode == "instance" else None, | |
| ) | |
| # Show results | |
| if show: | |
| annotator.show(self.path) | |
| # Save results | |
| if save: | |
| annotator.save(filename) | |
| return annotator.result() | |
| def show(self, *args, **kwargs): | |
| """ | |
| Display the image with annotated inference results. | |
| This method plots the detection results on the original image and displays it. It's a convenient way to | |
| visualize the model's predictions directly. | |
| Args: | |
| *args (Any): Variable length argument list to be passed to the `plot()` method. | |
| **kwargs (Any): Arbitrary keyword arguments to be passed to the `plot()` method. | |
| Examples: | |
| >>> results = model("path/to/image.jpg") | |
| >>> results[0].show() # Display the first result | |
| >>> for result in results: | |
| ... result.show() # Display all results | |
| """ | |
| self.plot(show=True, *args, **kwargs) | |
| def save(self, filename=None, *args, **kwargs): | |
| """ | |
| Saves annotated inference results image to file. | |
| This method plots the detection results on the original image and saves the annotated image to a file. It | |
| utilizes the `plot` method to generate the annotated image and then saves it to the specified filename. | |
| Args: | |
| filename (str | Path | None): The filename to save the annotated image. If None, a default filename | |
| is generated based on the original image path. | |
| *args (Any): Variable length argument list to be passed to the `plot` method. | |
| **kwargs (Any): Arbitrary keyword arguments to be passed to the `plot` method. | |
| Examples: | |
| >>> results = model("path/to/image.jpg") | |
| >>> for result in results: | |
| ... result.save("annotated_image.jpg") | |
| >>> # Or with custom plot arguments | |
| >>> for result in results: | |
| ... result.save("annotated_image.jpg", conf=False, line_width=2) | |
| """ | |
| if not filename: | |
| filename = f"results_{Path(self.path).name}" | |
| self.plot(save=True, filename=filename, *args, **kwargs) | |
| return filename | |
| def verbose(self): | |
| """ | |
| Returns a log string for each task in the results, detailing detection and classification outcomes. | |
| This method generates a human-readable string summarizing the detection and classification results. It includes | |
| the number of detections for each class and the top probabilities for classification tasks. | |
| Returns: | |
| (str): A formatted string containing a summary of the results. For detection tasks, it includes the | |
| number of detections per class. For classification tasks, it includes the top 5 class probabilities. | |
| Examples: | |
| >>> results = model("path/to/image.jpg") | |
| >>> for result in results: | |
| ... print(result.verbose()) | |
| 2 persons, 1 car, 3 traffic lights, | |
| dog 0.92, cat 0.78, horse 0.64, | |
| Notes: | |
| - If there are no detections, the method returns "(no detections), " for detection tasks. | |
| - For classification tasks, it returns the top 5 class probabilities and their corresponding class names. | |
| - The returned string is comma-separated and ends with a comma and a space. | |
| """ | |
| log_string = "" | |
| probs = self.probs | |
| if len(self) == 0: | |
| return log_string if probs is not None else f"{log_string}(no detections), " | |
| if probs is not None: | |
| log_string += f"{', '.join(f'{self.names[j]} {probs.data[j]:.2f}' for j in probs.top5)}, " | |
| if boxes := self.boxes: | |
| for c in boxes.cls.unique(): | |
| n = (boxes.cls == c).sum() # detections per class | |
| log_string += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " | |
| return log_string | |
| def save_txt(self, txt_file, save_conf=False): | |
| """ | |
| Save detection results to a text file. | |
| Args: | |
| txt_file (str | Path): Path to the output text file. | |
| save_conf (bool): Whether to include confidence scores in the output. | |
| Returns: | |
| (str): Path to the saved text file. | |
| Examples: | |
| >>> from ultralytics import YOLO | |
| >>> model = YOLO("yolo11n.pt") | |
| >>> results = model("path/to/image.jpg") | |
| >>> for result in results: | |
| ... result.save_txt("output.txt") | |
| Notes: | |
| - The file will contain one line per detection or classification with the following structure: | |
| - For detections: `class confidence x_center y_center width height` | |
| - For classifications: `confidence class_name` | |
| - For masks and keypoints, the specific formats will vary accordingly. | |
| - The function will create the output directory if it does not exist. | |
| - If save_conf is False, the confidence scores will be excluded from the output. | |
| - Existing contents of the file will not be overwritten; new results will be appended. | |
| """ | |
| is_obb = self.obb is not None | |
| boxes = self.obb if is_obb else self.boxes | |
| masks = self.masks | |
| probs = self.probs | |
| kpts = self.keypoints | |
| texts = [] | |
| if probs is not None: | |
| # Classify | |
| [texts.append(f"{probs.data[j]:.2f} {self.names[j]}") for j in probs.top5] | |
| elif boxes: | |
| # Detect/segment/pose | |
| for j, d in enumerate(boxes): | |
| c, conf, id = int(d.cls), float(d.conf), None if d.id is None else int(d.id.item()) | |
| line = (c, *(d.xyxyxyxyn.view(-1) if is_obb else d.xywhn.view(-1))) | |
| if masks: | |
| seg = masks[j].xyn[0].copy().reshape(-1) # reversed mask.xyn, (n,2) to (n*2) | |
| line = (c, *seg) | |
| if kpts is not None: | |
| kpt = torch.cat((kpts[j].xyn, kpts[j].conf[..., None]), 2) if kpts[j].has_visible else kpts[j].xyn | |
| line += (*kpt.reshape(-1).tolist(),) | |
| line += (conf,) * save_conf + (() if id is None else (id,)) | |
| texts.append(("%g " * len(line)).rstrip() % line) | |
| if texts: | |
| Path(txt_file).parent.mkdir(parents=True, exist_ok=True) # make directory | |
| with open(txt_file, "a") as f: | |
| f.writelines(text + "\n" for text in texts) | |
| def save_crop(self, save_dir, file_name=Path("im.jpg")): | |
| """ | |
| Saves cropped detection images to specified directory. | |
| This method saves cropped images of detected objects to a specified directory. Each crop is saved in a | |
| subdirectory named after the object's class, with the filename based on the input file_name. | |
| Args: | |
| save_dir (str | Path): Directory path where cropped images will be saved. | |
| file_name (str | Path): Base filename for the saved cropped images. Default is Path("im.jpg"). | |
| Notes: | |
| - This method does not support Classify or Oriented Bounding Box (OBB) tasks. | |
| - Crops are saved as 'save_dir/class_name/file_name.jpg'. | |
| - The method will create necessary subdirectories if they don't exist. | |
| - Original image is copied before cropping to avoid modifying the original. | |
| Examples: | |
| >>> results = model("path/to/image.jpg") | |
| >>> for result in results: | |
| ... result.save_crop(save_dir="path/to/crops", file_name="detection") | |
| """ | |
| if self.probs is not None: | |
| LOGGER.warning("WARNING β οΈ Classify task do not support `save_crop`.") | |
| return | |
| if self.obb is not None: | |
| LOGGER.warning("WARNING β οΈ OBB task do not support `save_crop`.") | |
| return | |
| for d in self.boxes: | |
| save_one_box( | |
| d.xyxy, | |
| self.orig_img.copy(), | |
| file=Path(save_dir) / self.names[int(d.cls)] / Path(file_name).with_suffix(".jpg"), | |
| BGR=True, | |
| ) | |
| def summary(self, normalize=False, decimals=5): | |
| """ | |
| Converts inference results to a summarized dictionary with optional normalization for box coordinates. | |
| This method creates a list of detection dictionaries, each containing information about a single | |
| detection or classification result. For classification tasks, it returns the top class and its | |
| confidence. For detection tasks, it includes class information, bounding box coordinates, and | |
| optionally mask segments and keypoints. | |
| Args: | |
| normalize (bool): Whether to normalize bounding box coordinates by image dimensions. Defaults to False. | |
| decimals (int): Number of decimal places to round the output values to. Defaults to 5. | |
| Returns: | |
| (List[Dict]): A list of dictionaries, each containing summarized information for a single | |
| detection or classification result. The structure of each dictionary varies based on the | |
| task type (classification or detection) and available information (boxes, masks, keypoints). | |
| Examples: | |
| >>> results = model("image.jpg") | |
| >>> summary = results[0].summary() | |
| >>> print(summary) | |
| """ | |
| # Create list of detection dictionaries | |
| results = [] | |
| if self.probs is not None: | |
| class_id = self.probs.top1 | |
| results.append( | |
| { | |
| "name": self.names[class_id], | |
| "class": class_id, | |
| "confidence": round(self.probs.top1conf.item(), decimals), | |
| } | |
| ) | |
| return results | |
| is_obb = self.obb is not None | |
| data = self.obb if is_obb else self.boxes | |
| h, w = self.orig_shape if normalize else (1, 1) | |
| for i, row in enumerate(data): # xyxy, track_id if tracking, conf, class_id | |
| class_id, conf = int(row.cls), round(row.conf.item(), decimals) | |
| box = (row.xyxyxyxy if is_obb else row.xyxy).squeeze().reshape(-1, 2).tolist() | |
| xy = {} | |
| for j, b in enumerate(box): | |
| xy[f"x{j + 1}"] = round(b[0] / w, decimals) | |
| xy[f"y{j + 1}"] = round(b[1] / h, decimals) | |
| result = {"name": self.names[class_id], "class": class_id, "confidence": conf, "box": xy} | |
| if data.is_track: | |
| result["track_id"] = int(row.id.item()) # track ID | |
| if self.masks: | |
| result["segments"] = { | |
| "x": (self.masks.xy[i][:, 0] / w).round(decimals).tolist(), | |
| "y": (self.masks.xy[i][:, 1] / h).round(decimals).tolist(), | |
| } | |
| if self.keypoints is not None: | |
| x, y, visible = self.keypoints[i].data[0].cpu().unbind(dim=1) # torch Tensor | |
| result["keypoints"] = { | |
| "x": (x / w).numpy().round(decimals).tolist(), # decimals named argument required | |
| "y": (y / h).numpy().round(decimals).tolist(), | |
| "visible": visible.numpy().round(decimals).tolist(), | |
| } | |
| results.append(result) | |
| return results | |
| def to_df(self, normalize=False, decimals=5): | |
| """ | |
| Converts detection results to a Pandas Dataframe. | |
| This method converts the detection results into Pandas Dataframe format. It includes information | |
| about detected objects such as bounding boxes, class names, confidence scores, and optionally | |
| segmentation masks and keypoints. | |
| Args: | |
| normalize (bool): Whether to normalize the bounding box coordinates by the image dimensions. | |
| If True, coordinates will be returned as float values between 0 and 1. Defaults to False. | |
| decimals (int): Number of decimal places to round the output values to. Defaults to 5. | |
| Returns: | |
| (DataFrame): A Pandas Dataframe containing all the information in results in an organized way. | |
| Examples: | |
| >>> results = model("path/to/image.jpg") | |
| >>> df_result = results[0].to_df() | |
| >>> print(df_result) | |
| """ | |
| import pandas as pd # scope for faster 'import ultralytics' | |
| return pd.DataFrame(self.summary(normalize=normalize, decimals=decimals)) | |
| def to_csv(self, normalize=False, decimals=5, *args, **kwargs): | |
| """ | |
| Converts detection results to a CSV format. | |
| This method serializes the detection results into a CSV format. It includes information | |
| about detected objects such as bounding boxes, class names, confidence scores, and optionally | |
| segmentation masks and keypoints. | |
| Args: | |
| normalize (bool): Whether to normalize the bounding box coordinates by the image dimensions. | |
| If True, coordinates will be returned as float values between 0 and 1. Defaults to False. | |
| decimals (int): Number of decimal places to round the output values to. Defaults to 5. | |
| *args (Any): Variable length argument list to be passed to pandas.DataFrame.to_csv(). | |
| **kwargs (Any): Arbitrary keyword arguments to be passed to pandas.DataFrame.to_csv(). | |
| Returns: | |
| (str): CSV containing all the information in results in an organized way. | |
| Examples: | |
| >>> results = model("path/to/image.jpg") | |
| >>> csv_result = results[0].to_csv() | |
| >>> print(csv_result) | |
| """ | |
| return self.to_df(normalize=normalize, decimals=decimals).to_csv(*args, **kwargs) | |
| def to_xml(self, normalize=False, decimals=5, *args, **kwargs): | |
| """ | |
| Converts detection results to XML format. | |
| This method serializes the detection results into an XML format. It includes information | |
| about detected objects such as bounding boxes, class names, confidence scores, and optionally | |
| segmentation masks and keypoints. | |
| Args: | |
| normalize (bool): Whether to normalize the bounding box coordinates by the image dimensions. | |
| If True, coordinates will be returned as float values between 0 and 1. Defaults to False. | |
| decimals (int): Number of decimal places to round the output values to. Defaults to 5. | |
| *args (Any): Variable length argument list to be passed to pandas.DataFrame.to_xml(). | |
| **kwargs (Any): Arbitrary keyword arguments to be passed to pandas.DataFrame.to_xml(). | |
| Returns: | |
| (str): An XML string containing all the information in results in an organized way. | |
| Examples: | |
| >>> results = model("path/to/image.jpg") | |
| >>> xml_result = results[0].to_xml() | |
| >>> print(xml_result) | |
| """ | |
| check_requirements("lxml") | |
| df = self.to_df(normalize=normalize, decimals=decimals) | |
| return '<?xml version="1.0" encoding="utf-8"?>\n<root></root>' if df.empty else df.to_xml(*args, **kwargs) | |
| def tojson(self, normalize=False, decimals=5): | |
| """Deprecated version of to_json().""" | |
| LOGGER.warning("WARNING β οΈ 'result.tojson()' is deprecated, replace with 'result.to_json()'.") | |
| return self.to_json(normalize, decimals) | |
| def to_json(self, normalize=False, decimals=5): | |
| """ | |
| Converts detection results to JSON format. | |
| This method serializes the detection results into a JSON-compatible format. It includes information | |
| about detected objects such as bounding boxes, class names, confidence scores, and optionally | |
| segmentation masks and keypoints. | |
| Args: | |
| normalize (bool): Whether to normalize the bounding box coordinates by the image dimensions. | |
| If True, coordinates will be returned as float values between 0 and 1. Defaults to False. | |
| decimals (int): Number of decimal places to round the output values to. Defaults to 5. | |
| Returns: | |
| (str): A JSON string containing the serialized detection results. | |
| Examples: | |
| >>> results = model("path/to/image.jpg") | |
| >>> json_result = results[0].to_json() | |
| >>> print(json_result) | |
| Notes: | |
| - For classification tasks, the JSON will contain class probabilities instead of bounding boxes. | |
| - For object detection tasks, the JSON will include bounding box coordinates, class names, and | |
| confidence scores. | |
| - If available, segmentation masks and keypoints will also be included in the JSON output. | |
| - The method uses the `summary` method internally to generate the data structure before | |
| converting it to JSON. | |
| """ | |
| import json | |
| return json.dumps(self.summary(normalize=normalize, decimals=decimals), indent=2) | |
| class Boxes(BaseTensor): | |
| """ | |
| A class for managing and manipulating detection boxes. | |
| This class provides functionality for handling detection boxes, including their coordinates, confidence scores, | |
| class labels, and optional tracking IDs. It supports various box formats and offers methods for easy manipulation | |
| and conversion between different coordinate systems. | |
| Attributes: | |
| data (torch.Tensor | numpy.ndarray): The raw tensor containing detection boxes and associated data. | |
| orig_shape (Tuple[int, int]): The original image dimensions (height, width). | |
| is_track (bool): Indicates whether tracking IDs are included in the box data. | |
| xyxy (torch.Tensor | numpy.ndarray): Boxes in [x1, y1, x2, y2] format. | |
| conf (torch.Tensor | numpy.ndarray): Confidence scores for each box. | |
| cls (torch.Tensor | numpy.ndarray): Class labels for each box. | |
| id (torch.Tensor | numpy.ndarray): Tracking IDs for each box (if available). | |
| xywh (torch.Tensor | numpy.ndarray): Boxes in [x, y, width, height] format. | |
| xyxyn (torch.Tensor | numpy.ndarray): Normalized [x1, y1, x2, y2] boxes relative to orig_shape. | |
| xywhn (torch.Tensor | numpy.ndarray): Normalized [x, y, width, height] boxes relative to orig_shape. | |
| Methods: | |
| cpu(): Returns a copy of the object with all tensors on CPU memory. | |
| numpy(): Returns a copy of the object with all tensors as numpy arrays. | |
| cuda(): Returns a copy of the object with all tensors on GPU memory. | |
| to(*args, **kwargs): Returns a copy of the object with tensors on specified device and dtype. | |
| Examples: | |
| >>> import torch | |
| >>> boxes_data = torch.tensor([[100, 50, 150, 100, 0.9, 0], [200, 150, 300, 250, 0.8, 1]]) | |
| >>> orig_shape = (480, 640) # height, width | |
| >>> boxes = Boxes(boxes_data, orig_shape) | |
| >>> print(boxes.xyxy) | |
| >>> print(boxes.conf) | |
| >>> print(boxes.cls) | |
| >>> print(boxes.xywhn) | |
| """ | |
| def __init__(self, boxes, orig_shape) -> None: | |
| """ | |
| Initialize the Boxes class with detection box data and the original image shape. | |
| This class manages detection boxes, providing easy access and manipulation of box coordinates, | |
| confidence scores, class identifiers, and optional tracking IDs. It supports multiple formats | |
| for box coordinates, including both absolute and normalized forms. | |
| Args: | |
| boxes (torch.Tensor | np.ndarray): A tensor or numpy array with detection boxes of shape | |
| (num_boxes, 6) or (num_boxes, 7). Columns should contain | |
| [x1, y1, x2, y2, confidence, class, (optional) track_id]. | |
| orig_shape (Tuple[int, int]): The original image shape as (height, width). Used for normalization. | |
| Attributes: | |
| data (torch.Tensor): The raw tensor containing detection boxes and their associated data. | |
| orig_shape (Tuple[int, int]): The original image size, used for normalization. | |
| is_track (bool): Indicates whether tracking IDs are included in the box data. | |
| Examples: | |
| >>> import torch | |
| >>> boxes = torch.tensor([[100, 50, 150, 100, 0.9, 0]]) | |
| >>> orig_shape = (480, 640) | |
| >>> detection_boxes = Boxes(boxes, orig_shape) | |
| >>> print(detection_boxes.xyxy) | |
| tensor([[100., 50., 150., 100.]]) | |
| """ | |
| if boxes.ndim == 1: | |
| boxes = boxes[None, :] | |
| n = boxes.shape[-1] | |
| assert n in {6, 7}, f"expected 6 or 7 values but got {n}" # xyxy, track_id, conf, cls | |
| super().__init__(boxes, orig_shape) | |
| self.is_track = n == 7 | |
| self.orig_shape = orig_shape | |
| def xyxy(self): | |
| """ | |
| Returns bounding boxes in [x1, y1, x2, y2] format. | |
| Returns: | |
| (torch.Tensor | numpy.ndarray): A tensor or numpy array of shape (n, 4) containing bounding box | |
| coordinates in [x1, y1, x2, y2] format, where n is the number of boxes. | |
| Examples: | |
| >>> results = model("image.jpg") | |
| >>> boxes = results[0].boxes | |
| >>> xyxy = boxes.xyxy | |
| >>> print(xyxy) | |
| """ | |
| return self.data[:, :4] | |
| def conf(self): | |
| """ | |
| Returns the confidence scores for each detection box. | |
| Returns: | |
| (torch.Tensor | numpy.ndarray): A 1D tensor or array containing confidence scores for each detection, | |
| with shape (N,) where N is the number of detections. | |
| Examples: | |
| >>> boxes = Boxes(torch.tensor([[10, 20, 30, 40, 0.9, 0]]), orig_shape=(100, 100)) | |
| >>> conf_scores = boxes.conf | |
| >>> print(conf_scores) | |
| tensor([0.9000]) | |
| """ | |
| return self.data[:, -2] | |
| def cls(self): | |
| """ | |
| Returns the class ID tensor representing category predictions for each bounding box. | |
| Returns: | |
| (torch.Tensor | numpy.ndarray): A tensor or numpy array containing the class IDs for each detection box. | |
| The shape is (N,), where N is the number of boxes. | |
| Examples: | |
| >>> results = model("image.jpg") | |
| >>> boxes = results[0].boxes | |
| >>> class_ids = boxes.cls | |
| >>> print(class_ids) # tensor([0., 2., 1.]) | |
| """ | |
| return self.data[:, -1] | |
| def id(self): | |
| """ | |
| Returns the tracking IDs for each detection box if available. | |
| Returns: | |
| (torch.Tensor | None): A tensor containing tracking IDs for each box if tracking is enabled, | |
| otherwise None. Shape is (N,) where N is the number of boxes. | |
| Examples: | |
| >>> results = model.track("path/to/video.mp4") | |
| >>> for result in results: | |
| ... boxes = result.boxes | |
| ... if boxes.is_track: | |
| ... track_ids = boxes.id | |
| ... print(f"Tracking IDs: {track_ids}") | |
| ... else: | |
| ... print("Tracking is not enabled for these boxes.") | |
| Notes: | |
| - This property is only available when tracking is enabled (i.e., when `is_track` is True). | |
| - The tracking IDs are typically used to associate detections across multiple frames in video analysis. | |
| """ | |
| return self.data[:, -3] if self.is_track else None | |
| # maxsize 1 should suffice | |
| def xywh(self): | |
| """ | |
| Convert bounding boxes from [x1, y1, x2, y2] format to [x, y, width, height] format. | |
| Returns: | |
| (torch.Tensor | numpy.ndarray): Boxes in [x_center, y_center, width, height] format, where x_center, y_center are the coordinates of | |
| the center point of the bounding box, width, height are the dimensions of the bounding box and the | |
| shape of the returned tensor is (N, 4), where N is the number of boxes. | |
| Examples: | |
| >>> boxes = Boxes(torch.tensor([[100, 50, 150, 100], [200, 150, 300, 250]]), orig_shape=(480, 640)) | |
| >>> xywh = boxes.xywh | |
| >>> print(xywh) | |
| tensor([[100.0000, 50.0000, 50.0000, 50.0000], | |
| [200.0000, 150.0000, 100.0000, 100.0000]]) | |
| """ | |
| return ops.xyxy2xywh(self.xyxy) | |
| def xyxyn(self): | |
| """ | |
| Returns normalized bounding box coordinates relative to the original image size. | |
| This property calculates and returns the bounding box coordinates in [x1, y1, x2, y2] format, | |
| normalized to the range [0, 1] based on the original image dimensions. | |
| Returns: | |
| (torch.Tensor | numpy.ndarray): Normalized bounding box coordinates with shape (N, 4), where N is | |
| the number of boxes. Each row contains [x1, y1, x2, y2] values normalized to [0, 1]. | |
| Examples: | |
| >>> boxes = Boxes(torch.tensor([[100, 50, 300, 400, 0.9, 0]]), orig_shape=(480, 640)) | |
| >>> normalized = boxes.xyxyn | |
| >>> print(normalized) | |
| tensor([[0.1562, 0.1042, 0.4688, 0.8333]]) | |
| """ | |
| xyxy = self.xyxy.clone() if isinstance(self.xyxy, torch.Tensor) else np.copy(self.xyxy) | |
| xyxy[..., [0, 2]] /= self.orig_shape[1] | |
| xyxy[..., [1, 3]] /= self.orig_shape[0] | |
| return xyxy | |
| def xywhn(self): | |
| """ | |
| Returns normalized bounding boxes in [x, y, width, height] format. | |
| This property calculates and returns the normalized bounding box coordinates in the format | |
| [x_center, y_center, width, height], where all values are relative to the original image dimensions. | |
| Returns: | |
| (torch.Tensor | numpy.ndarray): Normalized bounding boxes with shape (N, 4), where N is the | |
| number of boxes. Each row contains [x_center, y_center, width, height] values normalized | |
| to [0, 1] based on the original image dimensions. | |
| Examples: | |
| >>> boxes = Boxes(torch.tensor([[100, 50, 150, 100, 0.9, 0]]), orig_shape=(480, 640)) | |
| >>> normalized = boxes.xywhn | |
| >>> print(normalized) | |
| tensor([[0.1953, 0.1562, 0.0781, 0.1042]]) | |
| """ | |
| xywh = ops.xyxy2xywh(self.xyxy) | |
| xywh[..., [0, 2]] /= self.orig_shape[1] | |
| xywh[..., [1, 3]] /= self.orig_shape[0] | |
| return xywh | |
| class Masks(BaseTensor): | |
| """ | |
| A class for storing and manipulating detection masks. | |
| This class extends BaseTensor and provides functionality for handling segmentation masks, | |
| including methods for converting between pixel and normalized coordinates. | |
| Attributes: | |
| data (torch.Tensor | numpy.ndarray): The raw tensor or array containing mask data. | |
| orig_shape (tuple): Original image shape in (height, width) format. | |
| xy (List[numpy.ndarray]): A list of segments in pixel coordinates. | |
| xyn (List[numpy.ndarray]): A list of normalized segments. | |
| Methods: | |
| cpu(): Returns a copy of the Masks object with the mask tensor on CPU memory. | |
| numpy(): Returns a copy of the Masks object with the mask tensor as a numpy array. | |
| cuda(): Returns a copy of the Masks object with the mask tensor on GPU memory. | |
| to(*args, **kwargs): Returns a copy of the Masks object with the mask tensor on specified device and dtype. | |
| Examples: | |
| >>> masks_data = torch.rand(1, 160, 160) | |
| >>> orig_shape = (720, 1280) | |
| >>> masks = Masks(masks_data, orig_shape) | |
| >>> pixel_coords = masks.xy | |
| >>> normalized_coords = masks.xyn | |
| """ | |
| def __init__(self, masks, orig_shape) -> None: | |
| """ | |
| Initialize the Masks class with detection mask data and the original image shape. | |
| Args: | |
| masks (torch.Tensor | np.ndarray): Detection masks with shape (num_masks, height, width). | |
| orig_shape (tuple): The original image shape as (height, width). Used for normalization. | |
| Examples: | |
| >>> import torch | |
| >>> from ultralytics.engine.results import Masks | |
| >>> masks = torch.rand(10, 160, 160) # 10 masks of 160x160 resolution | |
| >>> orig_shape = (720, 1280) # Original image shape | |
| >>> mask_obj = Masks(masks, orig_shape) | |
| """ | |
| if masks.ndim == 2: | |
| masks = masks[None, :] | |
| super().__init__(masks, orig_shape) | |
| def xyn(self): | |
| """ | |
| Returns normalized xy-coordinates of the segmentation masks. | |
| This property calculates and caches the normalized xy-coordinates of the segmentation masks. The coordinates | |
| are normalized relative to the original image shape. | |
| Returns: | |
| (List[numpy.ndarray]): A list of numpy arrays, where each array contains the normalized xy-coordinates | |
| of a single segmentation mask. Each array has shape (N, 2), where N is the number of points in the | |
| mask contour. | |
| Examples: | |
| >>> results = model("image.jpg") | |
| >>> masks = results[0].masks | |
| >>> normalized_coords = masks.xyn | |
| >>> print(normalized_coords[0]) # Normalized coordinates of the first mask | |
| """ | |
| return [ | |
| ops.scale_coords(self.data.shape[1:], x, self.orig_shape, normalize=True) | |
| for x in ops.masks2segments(self.data) | |
| ] | |
| def xy(self): | |
| """ | |
| Returns the [x, y] pixel coordinates for each segment in the mask tensor. | |
| This property calculates and returns a list of pixel coordinates for each segmentation mask in the | |
| Masks object. The coordinates are scaled to match the original image dimensions. | |
| Returns: | |
| (List[numpy.ndarray]): A list of numpy arrays, where each array contains the [x, y] pixel | |
| coordinates for a single segmentation mask. Each array has shape (N, 2), where N is the | |
| number of points in the segment. | |
| Examples: | |
| >>> results = model("image.jpg") | |
| >>> masks = results[0].masks | |
| >>> xy_coords = masks.xy | |
| >>> print(len(xy_coords)) # Number of masks | |
| >>> print(xy_coords[0].shape) # Shape of first mask's coordinates | |
| """ | |
| return [ | |
| ops.scale_coords(self.data.shape[1:], x, self.orig_shape, normalize=False) | |
| for x in ops.masks2segments(self.data) | |
| ] | |
| class Keypoints(BaseTensor): | |
| """ | |
| A class for storing and manipulating detection keypoints. | |
| This class encapsulates functionality for handling keypoint data, including coordinate manipulation, | |
| normalization, and confidence values. | |
| Attributes: | |
| data (torch.Tensor): The raw tensor containing keypoint data. | |
| orig_shape (Tuple[int, int]): The original image dimensions (height, width). | |
| has_visible (bool): Indicates whether visibility information is available for keypoints. | |
| xy (torch.Tensor): Keypoint coordinates in [x, y] format. | |
| xyn (torch.Tensor): Normalized keypoint coordinates in [x, y] format, relative to orig_shape. | |
| conf (torch.Tensor): Confidence values for each keypoint, if available. | |
| Methods: | |
| cpu(): Returns a copy of the keypoints tensor on CPU memory. | |
| numpy(): Returns a copy of the keypoints tensor as a numpy array. | |
| cuda(): Returns a copy of the keypoints tensor on GPU memory. | |
| to(*args, **kwargs): Returns a copy of the keypoints tensor with specified device and dtype. | |
| Examples: | |
| >>> import torch | |
| >>> from ultralytics.engine.results import Keypoints | |
| >>> keypoints_data = torch.rand(1, 17, 3) # 1 detection, 17 keypoints, (x, y, conf) | |
| >>> orig_shape = (480, 640) # Original image shape (height, width) | |
| >>> keypoints = Keypoints(keypoints_data, orig_shape) | |
| >>> print(keypoints.xy.shape) # Access xy coordinates | |
| >>> print(keypoints.conf) # Access confidence values | |
| >>> keypoints_cpu = keypoints.cpu() # Move keypoints to CPU | |
| """ | |
| # avoid keypoints < conf in-place error | |
| def __init__(self, keypoints, orig_shape) -> None: | |
| """ | |
| Initializes the Keypoints object with detection keypoints and original image dimensions. | |
| This method processes the input keypoints tensor, handling both 2D and 3D formats. For 3D tensors | |
| (x, y, confidence), it masks out low-confidence keypoints by setting their coordinates to zero. | |
| Args: | |
| keypoints (torch.Tensor): A tensor containing keypoint data. Shape can be either: | |
| - (num_objects, num_keypoints, 2) for x, y coordinates only | |
| - (num_objects, num_keypoints, 3) for x, y coordinates and confidence scores | |
| orig_shape (Tuple[int, int]): The original image dimensions (height, width). | |
| Examples: | |
| >>> kpts = torch.rand(1, 17, 3) # 1 object, 17 keypoints (COCO format), x,y,conf | |
| >>> orig_shape = (720, 1280) # Original image height, width | |
| >>> keypoints = Keypoints(kpts, orig_shape) | |
| """ | |
| if keypoints.ndim == 2: | |
| keypoints = keypoints[None, :] | |
| if keypoints.shape[2] == 3: # x, y, conf | |
| mask = keypoints[..., 2] < 0.5 # points with conf < 0.5 (not visible) | |
| keypoints[..., :2][mask] = 0 | |
| super().__init__(keypoints, orig_shape) | |
| self.has_visible = self.data.shape[-1] == 3 | |
| def xy(self): | |
| """ | |
| Returns x, y coordinates of keypoints. | |
| Returns: | |
| (torch.Tensor): A tensor containing the x, y coordinates of keypoints with shape (N, K, 2), where N is | |
| the number of detections and K is the number of keypoints per detection. | |
| Examples: | |
| >>> results = model("image.jpg") | |
| >>> keypoints = results[0].keypoints | |
| >>> xy = keypoints.xy | |
| >>> print(xy.shape) # (N, K, 2) | |
| >>> print(xy[0]) # x, y coordinates of keypoints for first detection | |
| Notes: | |
| - The returned coordinates are in pixel units relative to the original image dimensions. | |
| - If keypoints were initialized with confidence values, only keypoints with confidence >= 0.5 are returned. | |
| - This property uses LRU caching to improve performance on repeated access. | |
| """ | |
| return self.data[..., :2] | |
| def xyn(self): | |
| """ | |
| Returns normalized coordinates (x, y) of keypoints relative to the original image size. | |
| Returns: | |
| (torch.Tensor | numpy.ndarray): A tensor or array of shape (N, K, 2) containing normalized keypoint | |
| coordinates, where N is the number of instances, K is the number of keypoints, and the last | |
| dimension contains [x, y] values in the range [0, 1]. | |
| Examples: | |
| >>> keypoints = Keypoints(torch.rand(1, 17, 2), orig_shape=(480, 640)) | |
| >>> normalized_kpts = keypoints.xyn | |
| >>> print(normalized_kpts.shape) | |
| torch.Size([1, 17, 2]) | |
| """ | |
| xy = self.xy.clone() if isinstance(self.xy, torch.Tensor) else np.copy(self.xy) | |
| xy[..., 0] /= self.orig_shape[1] | |
| xy[..., 1] /= self.orig_shape[0] | |
| return xy | |
| def conf(self): | |
| """ | |
| Returns confidence values for each keypoint. | |
| Returns: | |
| (torch.Tensor | None): A tensor containing confidence scores for each keypoint if available, | |
| otherwise None. Shape is (num_detections, num_keypoints) for batched data or (num_keypoints,) | |
| for single detection. | |
| Examples: | |
| >>> keypoints = Keypoints(torch.rand(1, 17, 3), orig_shape=(640, 640)) # 1 detection, 17 keypoints | |
| >>> conf = keypoints.conf | |
| >>> print(conf.shape) # torch.Size([1, 17]) | |
| """ | |
| return self.data[..., 2] if self.has_visible else None | |
| class Probs(BaseTensor): | |
| """ | |
| A class for storing and manipulating classification probabilities. | |
| This class extends BaseTensor and provides methods for accessing and manipulating | |
| classification probabilities, including top-1 and top-5 predictions. | |
| Attributes: | |
| data (torch.Tensor | numpy.ndarray): The raw tensor or array containing classification probabilities. | |
| orig_shape (tuple | None): The original image shape as (height, width). Not used in this class. | |
| top1 (int): Index of the class with the highest probability. | |
| top5 (List[int]): Indices of the top 5 classes by probability. | |
| top1conf (torch.Tensor | numpy.ndarray): Confidence score of the top 1 class. | |
| top5conf (torch.Tensor | numpy.ndarray): Confidence scores of the top 5 classes. | |
| Methods: | |
| cpu(): Returns a copy of the probabilities tensor on CPU memory. | |
| numpy(): Returns a copy of the probabilities tensor as a numpy array. | |
| cuda(): Returns a copy of the probabilities tensor on GPU memory. | |
| to(*args, **kwargs): Returns a copy of the probabilities tensor with specified device and dtype. | |
| Examples: | |
| >>> probs = torch.tensor([0.1, 0.3, 0.6]) | |
| >>> p = Probs(probs) | |
| >>> print(p.top1) | |
| 2 | |
| >>> print(p.top5) | |
| [2, 1, 0] | |
| >>> print(p.top1conf) | |
| tensor(0.6000) | |
| >>> print(p.top5conf) | |
| tensor([0.6000, 0.3000, 0.1000]) | |
| """ | |
| def __init__(self, probs, orig_shape=None) -> None: | |
| """ | |
| Initialize the Probs class with classification probabilities. | |
| This class stores and manages classification probabilities, providing easy access to top predictions and their | |
| confidences. | |
| Args: | |
| probs (torch.Tensor | np.ndarray): A 1D tensor or array of classification probabilities. | |
| orig_shape (tuple | None): The original image shape as (height, width). Not used in this class but kept for | |
| consistency with other result classes. | |
| Attributes: | |
| data (torch.Tensor | np.ndarray): The raw tensor or array containing classification probabilities. | |
| top1 (int): Index of the top 1 class. | |
| top5 (List[int]): Indices of the top 5 classes. | |
| top1conf (torch.Tensor | np.ndarray): Confidence of the top 1 class. | |
| top5conf (torch.Tensor | np.ndarray): Confidences of the top 5 classes. | |
| Examples: | |
| >>> import torch | |
| >>> probs = torch.tensor([0.1, 0.3, 0.2, 0.4]) | |
| >>> p = Probs(probs) | |
| >>> print(p.top1) | |
| 3 | |
| >>> print(p.top1conf) | |
| tensor(0.4000) | |
| >>> print(p.top5) | |
| [3, 1, 2, 0] | |
| """ | |
| super().__init__(probs, orig_shape) | |
| def top1(self): | |
| """ | |
| Returns the index of the class with the highest probability. | |
| Returns: | |
| (int): Index of the class with the highest probability. | |
| Examples: | |
| >>> probs = Probs(torch.tensor([0.1, 0.3, 0.6])) | |
| >>> probs.top1 | |
| 2 | |
| """ | |
| return int(self.data.argmax()) | |
| def top5(self): | |
| """ | |
| Returns the indices of the top 5 class probabilities. | |
| Returns: | |
| (List[int]): A list containing the indices of the top 5 class probabilities, sorted in descending order. | |
| Examples: | |
| >>> probs = Probs(torch.tensor([0.1, 0.2, 0.3, 0.4, 0.5])) | |
| >>> print(probs.top5) | |
| [4, 3, 2, 1, 0] | |
| """ | |
| return (-self.data).argsort(0)[:5].tolist() # this way works with both torch and numpy. | |
| def top1conf(self): | |
| """ | |
| Returns the confidence score of the highest probability class. | |
| This property retrieves the confidence score (probability) of the class with the highest predicted probability | |
| from the classification results. | |
| Returns: | |
| (torch.Tensor | numpy.ndarray): A tensor containing the confidence score of the top 1 class. | |
| Examples: | |
| >>> results = model("image.jpg") # classify an image | |
| >>> probs = results[0].probs # get classification probabilities | |
| >>> top1_confidence = probs.top1conf # get confidence of top 1 class | |
| >>> print(f"Top 1 class confidence: {top1_confidence.item():.4f}") | |
| """ | |
| return self.data[self.top1] | |
| def top5conf(self): | |
| """ | |
| Returns confidence scores for the top 5 classification predictions. | |
| This property retrieves the confidence scores corresponding to the top 5 class probabilities | |
| predicted by the model. It provides a quick way to access the most likely class predictions | |
| along with their associated confidence levels. | |
| Returns: | |
| (torch.Tensor | numpy.ndarray): A tensor or array containing the confidence scores for the | |
| top 5 predicted classes, sorted in descending order of probability. | |
| Examples: | |
| >>> results = model("image.jpg") | |
| >>> probs = results[0].probs | |
| >>> top5_conf = probs.top5conf | |
| >>> print(top5_conf) # Prints confidence scores for top 5 classes | |
| """ | |
| return self.data[self.top5] | |
| class OBB(BaseTensor): | |
| """ | |
| A class for storing and manipulating Oriented Bounding Boxes (OBB). | |
| This class provides functionality to handle oriented bounding boxes, including conversion between | |
| different formats, normalization, and access to various properties of the boxes. | |
| Attributes: | |
| data (torch.Tensor): The raw OBB tensor containing box coordinates and associated data. | |
| orig_shape (tuple): Original image size as (height, width). | |
| is_track (bool): Indicates whether tracking IDs are included in the box data. | |
| xywhr (torch.Tensor | numpy.ndarray): Boxes in [x_center, y_center, width, height, rotation] format. | |
| conf (torch.Tensor | numpy.ndarray): Confidence scores for each box. | |
| cls (torch.Tensor | numpy.ndarray): Class labels for each box. | |
| id (torch.Tensor | numpy.ndarray): Tracking IDs for each box, if available. | |
| xyxyxyxy (torch.Tensor | numpy.ndarray): Boxes in 8-point [x1, y1, x2, y2, x3, y3, x4, y4] format. | |
| xyxyxyxyn (torch.Tensor | numpy.ndarray): Normalized 8-point coordinates relative to orig_shape. | |
| xyxy (torch.Tensor | numpy.ndarray): Axis-aligned bounding boxes in [x1, y1, x2, y2] format. | |
| Methods: | |
| cpu(): Returns a copy of the OBB object with all tensors on CPU memory. | |
| numpy(): Returns a copy of the OBB object with all tensors as numpy arrays. | |
| cuda(): Returns a copy of the OBB object with all tensors on GPU memory. | |
| to(*args, **kwargs): Returns a copy of the OBB object with tensors on specified device and dtype. | |
| Examples: | |
| >>> boxes = torch.tensor([[100, 50, 150, 100, 30, 0.9, 0]]) # xywhr, conf, cls | |
| >>> obb = OBB(boxes, orig_shape=(480, 640)) | |
| >>> print(obb.xyxyxyxy) | |
| >>> print(obb.conf) | |
| >>> print(obb.cls) | |
| """ | |
| def __init__(self, boxes, orig_shape) -> None: | |
| """ | |
| Initialize an OBB (Oriented Bounding Box) instance with oriented bounding box data and original image shape. | |
| This class stores and manipulates Oriented Bounding Boxes (OBB) for object detection tasks. It provides | |
| various properties and methods to access and transform the OBB data. | |
| Args: | |
| boxes (torch.Tensor | numpy.ndarray): A tensor or numpy array containing the detection boxes, | |
| with shape (num_boxes, 7) or (num_boxes, 8). The last two columns contain confidence and class values. | |
| If present, the third last column contains track IDs, and the fifth column contains rotation. | |
| orig_shape (Tuple[int, int]): Original image size, in the format (height, width). | |
| Attributes: | |
| data (torch.Tensor | numpy.ndarray): The raw OBB tensor. | |
| orig_shape (Tuple[int, int]): The original image shape. | |
| is_track (bool): Whether the boxes include tracking IDs. | |
| Raises: | |
| AssertionError: If the number of values per box is not 7 or 8. | |
| Examples: | |
| >>> import torch | |
| >>> boxes = torch.rand(3, 7) # 3 boxes with 7 values each | |
| >>> orig_shape = (640, 480) | |
| >>> obb = OBB(boxes, orig_shape) | |
| >>> print(obb.xywhr) # Access the boxes in xywhr format | |
| """ | |
| if boxes.ndim == 1: | |
| boxes = boxes[None, :] | |
| n = boxes.shape[-1] | |
| assert n in {7, 8}, f"expected 7 or 8 values but got {n}" # xywh, rotation, track_id, conf, cls | |
| super().__init__(boxes, orig_shape) | |
| self.is_track = n == 8 | |
| self.orig_shape = orig_shape | |
| def xywhr(self): | |
| """ | |
| Returns boxes in [x_center, y_center, width, height, rotation] format. | |
| Returns: | |
| (torch.Tensor | numpy.ndarray): A tensor or numpy array containing the oriented bounding boxes with format | |
| [x_center, y_center, width, height, rotation]. The shape is (N, 5) where N is the number of boxes. | |
| Examples: | |
| >>> results = model("image.jpg") | |
| >>> obb = results[0].obb | |
| >>> xywhr = obb.xywhr | |
| >>> print(xywhr.shape) | |
| torch.Size([3, 5]) | |
| """ | |
| return self.data[:, :5] | |
| def conf(self): | |
| """ | |
| Returns the confidence scores for Oriented Bounding Boxes (OBBs). | |
| This property retrieves the confidence values associated with each OBB detection. The confidence score | |
| represents the model's certainty in the detection. | |
| Returns: | |
| (torch.Tensor | numpy.ndarray): A tensor or numpy array of shape (N,) containing confidence scores | |
| for N detections, where each score is in the range [0, 1]. | |
| Examples: | |
| >>> results = model("image.jpg") | |
| >>> obb_result = results[0].obb | |
| >>> confidence_scores = obb_result.conf | |
| >>> print(confidence_scores) | |
| """ | |
| return self.data[:, -2] | |
| def cls(self): | |
| """ | |
| Returns the class values of the oriented bounding boxes. | |
| Returns: | |
| (torch.Tensor | numpy.ndarray): A tensor or numpy array containing the class values for each oriented | |
| bounding box. The shape is (N,), where N is the number of boxes. | |
| Examples: | |
| >>> results = model("image.jpg") | |
| >>> result = results[0] | |
| >>> obb = result.obb | |
| >>> class_values = obb.cls | |
| >>> print(class_values) | |
| """ | |
| return self.data[:, -1] | |
| def id(self): | |
| """ | |
| Returns the tracking IDs of the oriented bounding boxes (if available). | |
| Returns: | |
| (torch.Tensor | numpy.ndarray | None): A tensor or numpy array containing the tracking IDs for each | |
| oriented bounding box. Returns None if tracking IDs are not available. | |
| Examples: | |
| >>> results = model("image.jpg", tracker=True) # Run inference with tracking | |
| >>> for result in results: | |
| ... if result.obb is not None: | |
| ... track_ids = result.obb.id | |
| ... if track_ids is not None: | |
| ... print(f"Tracking IDs: {track_ids}") | |
| """ | |
| return self.data[:, -3] if self.is_track else None | |
| def xyxyxyxy(self): | |
| """ | |
| Converts OBB format to 8-point (xyxyxyxy) coordinate format for rotated bounding boxes. | |
| Returns: | |
| (torch.Tensor | numpy.ndarray): Rotated bounding boxes in xyxyxyxy format with shape (N, 4, 2), where N is | |
| the number of boxes. Each box is represented by 4 points (x, y), starting from the top-left corner and | |
| moving clockwise. | |
| Examples: | |
| >>> obb = OBB(torch.tensor([[100, 100, 50, 30, 0.5, 0.9, 0]]), orig_shape=(640, 640)) | |
| >>> xyxyxyxy = obb.xyxyxyxy | |
| >>> print(xyxyxyxy.shape) | |
| torch.Size([1, 4, 2]) | |
| """ | |
| return ops.xywhr2xyxyxyxy(self.xywhr) | |
| def xyxyxyxyn(self): | |
| """ | |
| Converts rotated bounding boxes to normalized xyxyxyxy format. | |
| Returns: | |
| (torch.Tensor | numpy.ndarray): Normalized rotated bounding boxes in xyxyxyxy format with shape (N, 4, 2), | |
| where N is the number of boxes. Each box is represented by 4 points (x, y), normalized relative to | |
| the original image dimensions. | |
| Examples: | |
| >>> obb = OBB(torch.rand(10, 7), orig_shape=(640, 480)) # 10 random OBBs | |
| >>> normalized_boxes = obb.xyxyxyxyn | |
| >>> print(normalized_boxes.shape) | |
| torch.Size([10, 4, 2]) | |
| """ | |
| xyxyxyxyn = self.xyxyxyxy.clone() if isinstance(self.xyxyxyxy, torch.Tensor) else np.copy(self.xyxyxyxy) | |
| xyxyxyxyn[..., 0] /= self.orig_shape[1] | |
| xyxyxyxyn[..., 1] /= self.orig_shape[0] | |
| return xyxyxyxyn | |
| def xyxy(self): | |
| """ | |
| Converts oriented bounding boxes (OBB) to axis-aligned bounding boxes in xyxy format. | |
| This property calculates the minimal enclosing rectangle for each oriented bounding box and returns it in | |
| xyxy format (x1, y1, x2, y2). This is useful for operations that require axis-aligned bounding boxes, such | |
| as IoU calculation with non-rotated boxes. | |
| Returns: | |
| (torch.Tensor | numpy.ndarray): Axis-aligned bounding boxes in xyxy format with shape (N, 4), where N | |
| is the number of boxes. Each row contains [x1, y1, x2, y2] coordinates. | |
| Examples: | |
| >>> import torch | |
| >>> from ultralytics import YOLO | |
| >>> model = YOLO("yolov8n-obb.pt") | |
| >>> results = model("path/to/image.jpg") | |
| >>> for result in results: | |
| ... obb = result.obb | |
| ... if obb is not None: | |
| ... xyxy_boxes = obb.xyxy | |
| ... print(xyxy_boxes.shape) # (N, 4) | |
| Notes: | |
| - This method approximates the OBB by its minimal enclosing rectangle. | |
| - The returned format is compatible with standard object detection metrics and visualization tools. | |
| - The property uses caching to improve performance for repeated access. | |
| """ | |
| x = self.xyxyxyxy[..., 0] | |
| y = self.xyxyxyxy[..., 1] | |
| return ( | |
| torch.stack([x.amin(1), y.amin(1), x.amax(1), y.amax(1)], -1) | |
| if isinstance(x, torch.Tensor) | |
| else np.stack([x.min(1), y.min(1), x.max(1), y.max(1)], -1) | |
| ) | |