File size: 2,410 Bytes

02667ce

from ultralytics import YOLO
import numpy as np
from PIL import Image
import io
import base64
import torch

def hf_predict(image_bytes):
    """
    Inference function for Hugging Face Inference API
    
    Args:
        image_bytes: Image file bytes from a POST request
        
    Returns:
        dict: Detection results in a format compatible with the Inference API
    """
    # Load image
    image = Image.open(io.BytesIO(image_bytes)).convert('RGB')
    
    # Load model
    model = YOLO('best.pt')
    
    # Run inference
    results = model(image)
    
    # Process results
    result = results[0]
    boxes = result.boxes.xyxy.cpu().numpy()
    scores = result.boxes.conf.cpu().numpy()
    class_ids = result.boxes.cls.cpu().numpy()
    
    # Map class IDs to names
    class_names = ["card"]
    labels = [class_names[int(i)] for i in class_ids]
    
    # Format results
    detections = []
    for box, score, label in zip(boxes, scores, labels):
        x1, y1, x2, y2 = box
        detections.append({
            "box": {
                "xmin": int(x1),
                "ymin": int(y1),
                "xmax": int(x2),
                "ymax": int(y2)
            },
            "score": float(score),
            "label": label
        })
    
    return {"detections": detections}

def visualize(image_bytes, detections):
    """
    Visualize the detections on the image
    
    Args:
        image_bytes: Image file bytes
        detections: Detection results
        
    Returns:
        str: Base64 encoded image with visualizations
    """
    try:
        import cv2
        
        # Load image
        nparr = np.frombuffer(image_bytes, np.uint8)
        image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
        
        # Draw boxes
        for det in detections["detections"]:
            box = det["box"]
            score = det["score"]
            label = det["label"]
            
            x1, y1 = box["xmin"], box["ymin"]
            x2, y2 = box["xmax"], box["ymax"]
            
            cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(image, f"{label}: {score:.2f}", (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        
        # Convert to base64
        _, buffer = cv2.imencode('.jpg', image)
        return base64.b64encode(buffer).decode('utf-8')
    except:
        return None