Skip to content

Functions

Main Functions

The core functions that can be used to visualise the different Class Activated Mapping(CAM) are given below.

yolov8_heatmap

This class is used to implement the YOLOv8 target layer.

Args: weight (str): The path to the checkpoint file. device (str): The device to use for inference. Defaults to "cuda:0" if a GPU is available, otherwise "cpu". method (str): The method to use for computing the CAM. Defaults to "EigenGradCAM". layer (list): The indices of the layers to use for computing the CAM. Defaults to [10, 12, 14, 16, 18, -3]. conf_threshold (float): The confidence threshold for detections. Defaults to 0.2. ratio (float): The ratio of maximum scores to return. Defaults to 0.02. show_box (bool): Whether to show bounding boxes with the CAM. Defaults to True. renormalize (bool): Whether to renormalize the CAM to be in the range [0, 1] across the entire image. Defaults to False.

Returns:

Type Description

A tensor containing the output.

Source code in YOLOv8_Explainer/core.py
class yolov8_heatmap:
    """
    This class is used to implement the YOLOv8 target layer.

     Args:
            weight (str): The path to the checkpoint file.
            device (str): The device to use for inference. Defaults to "cuda:0" if a GPU is available, otherwise "cpu".
            method (str): The method to use for computing the CAM. Defaults to "EigenGradCAM".
            layer (list): The indices of the layers to use for computing the CAM. Defaults to [10, 12, 14, 16, 18, -3].
            conf_threshold (float): The confidence threshold for detections. Defaults to 0.2.
            ratio (float): The ratio of maximum scores to return. Defaults to 0.02.
            show_box (bool): Whether to show bounding boxes with the CAM. Defaults to True.
            renormalize (bool): Whether to renormalize the CAM to be in the range [0, 1] across the entire image. Defaults to False.

    Returns:
        A tensor containing the output.

    """

    def __init__(
            self,
            weight: str,
            device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
            method="EigenGradCAM",
            layer=[12, 17, 21],
            conf_threshold=0.2,
            ratio=0.02,
            show_box=True,
            renormalize=False,
    ) -> None:
        """
        Initialize the YOLOv8 heatmap layer.
        """
        device = device
        backward_type = "all"
        ckpt = torch.load(weight)
        model_names = ckpt['model'].names
        model = attempt_load_weights(weight, device)
        model.info()
        for p in model.parameters():
            p.requires_grad_(True)
        model.eval()

        target = yolov8_target(backward_type, conf_threshold, ratio)
        target_layers = [model.model[l] for l in layer]

        method = eval(method)(model, target_layers,
                              use_cuda=device.type == 'cuda')
        method.activations_and_grads = ActivationsAndGradients(
            model, target_layers, None)

        colors = np.random.uniform(
            0, 255, size=(len(model_names), 3)).astype(int)
        self.__dict__.update(locals())

    def post_process(self, result):
        """
        Perform non-maximum suppression on the detections and process results.

        Args:
            result (torch.Tensor): The raw detections from the model.

        Returns:
            torch.Tensor: Filtered and processed detections.
        """
        # Perform non-maximum suppression
        processed_result = non_max_suppression(
            result,
            conf_thres=self.conf_threshold,  # Use the class's confidence threshold
            iou_thres=0.45  # Intersection over Union threshold
        )

        # If no detections, return an empty tensor
        if len(processed_result) == 0 or processed_result[0].numel() == 0:
            return torch.empty(0, 6)  # Return an empty tensor with 6 columns

        # Take the first batch of detections (assuming single image)
        detections = processed_result[0]

        # Filter detections based on confidence
        mask = detections[:, 4] >= self.conf_threshold
        filtered_detections = detections[mask]

        return filtered_detections

    def draw_detections(self, box, color, name, img):
        """
        Draw bounding boxes and labels on an image for multiple detections.

        Args:
            box (torch.Tensor or np.ndarray): The bounding box coordinates in the format [x1, y1, x2, y2]
            color (list): The color of the bounding box in the format [B, G, R]
            name (str): The label for the bounding box.
            img (np.ndarray): The image on which to draw the bounding box

        Returns:
            np.ndarray: The image with the bounding box drawn.
        """
        # Ensure box coordinates are integers
        xmin, ymin, xmax, ymax = map(int, box[:4])

        # Draw rectangle
        cv2.rectangle(img, (xmin, ymin), (xmax, ymax),
                      tuple(int(x) for x in color), 2)

        # Draw label
        cv2.putText(img, name, (xmin, ymin - 5),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.8, tuple(int(x) for x in color), 2,
                    lineType=cv2.LINE_AA)

        return img

    def renormalize_cam_in_bounding_boxes(
            self,
            boxes: np.ndarray,  # type: ignore
            image_float_np: np.ndarray,  # type: ignore
            grayscale_cam: np.ndarray,  # type: ignore
    ) -> np.ndarray:
        """
        Normalize the CAM to be in the range [0, 1]
        inside every bounding boxes, and zero outside of the bounding boxes.

        Args:
            boxes (np.ndarray): The bounding boxes.
            image_float_np (np.ndarray): The image as a numpy array of floats in the range [0, 1].
            grayscale_cam (np.ndarray): The CAM as a numpy array of floats in the range [0, 1].

        Returns:
            np.ndarray: The renormalized CAM.
        """
        renormalized_cam = np.zeros(grayscale_cam.shape, dtype=np.float32)
        for x1, y1, x2, y2 in boxes:
            x1, y1 = max(x1, 0), max(y1, 0)
            x2, y2 = min(grayscale_cam.shape[1] - 1,
                         x2), min(grayscale_cam.shape[0] - 1, y2)
            renormalized_cam[y1:y2, x1:x2] = scale_cam_image(
                grayscale_cam[y1:y2, x1:x2].copy())
        renormalized_cam = scale_cam_image(renormalized_cam)
        eigencam_image_renormalized = show_cam_on_image(
            image_float_np, renormalized_cam, use_rgb=True)
        return eigencam_image_renormalized

    def renormalize_cam(self, boxes, image_float_np, grayscale_cam):
        """Normalize the CAM to be in the range [0, 1]
        across the entire image."""
        renormalized_cam = scale_cam_image(grayscale_cam)
        eigencam_image_renormalized = show_cam_on_image(
            image_float_np, renormalized_cam, use_rgb=True)
        return eigencam_image_renormalized

    def process(self, img_path):
        """Process the input image and generate CAM visualization."""
        img = cv2.imread(img_path)
        img = letterbox(img)[0]
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = np.float32(img) / 255.0

        tensor = (
            torch.from_numpy(np.transpose(img, axes=[2, 0, 1]))
            .unsqueeze(0)
            .to(self.device)
        )

        try:
            grayscale_cam = self.method(tensor, [self.target])
        except AttributeError as e:
            print(e)
            return

        grayscale_cam = grayscale_cam[0, :]

        pred1 = self.model(tensor)[0]
        pred = non_max_suppression(
            pred1,
            conf_thres=self.conf_threshold,
            iou_thres=0.45
        )[0]

        # Debugging print

        if self.renormalize:
            cam_image = self.renormalize_cam(
                pred[:, :4].cpu().detach().numpy().astype(np.int32),
                img,
                grayscale_cam
            )
        else:
            cam_image = show_cam_on_image(img, grayscale_cam, use_rgb=True)

        if self.show_box and len(pred) > 0:
            for detection in pred:
                detection = detection.cpu().detach().numpy()

                # Get class index and confidence
                class_index = int(detection[5])
                conf = detection[4]

                # Draw detection
                cam_image = self.draw_detections(
                    detection[:4],  # Box coordinates
                    self.colors[class_index],  # Color for this class
                    f"{self.model_names[class_index]}",  # Label with confidence
                    cam_image,
                )

        cam_image = Image.fromarray(cam_image)
        return cam_image

    def __call__(self, img_path):
        """Generate CAM visualizations for one or more images.

        Args:
            img_path (str): Path to the input image or directory containing images.

        Returns:
            None
        """
        if os.path.isdir(img_path):
            image_list = []
            for img_path_ in os.listdir(img_path):
                img_pil = self.process(f"{img_path}/{img_path_}")
                image_list.append(img_pil)
            return image_list
        else:
            return [self.process(img_path)]

__call__(img_path)

Generate CAM visualizations for one or more images.

Parameters:

Name Type Description Default
img_path str

Path to the input image or directory containing images.

required

Returns:

Type Description

None

Source code in YOLOv8_Explainer/core.py
def __call__(self, img_path):
    """Generate CAM visualizations for one or more images.

    Args:
        img_path (str): Path to the input image or directory containing images.

    Returns:
        None
    """
    if os.path.isdir(img_path):
        image_list = []
        for img_path_ in os.listdir(img_path):
            img_pil = self.process(f"{img_path}/{img_path_}")
            image_list.append(img_pil)
        return image_list
    else:
        return [self.process(img_path)]

__init__(weight, device=torch.device('cuda:0' if torch.cuda.is_available() else 'cpu'), method='EigenGradCAM', layer=[12, 17, 21], conf_threshold=0.2, ratio=0.02, show_box=True, renormalize=False)

Initialize the YOLOv8 heatmap layer.

Source code in YOLOv8_Explainer/core.py
def __init__(
        self,
        weight: str,
        device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
        method="EigenGradCAM",
        layer=[12, 17, 21],
        conf_threshold=0.2,
        ratio=0.02,
        show_box=True,
        renormalize=False,
) -> None:
    """
    Initialize the YOLOv8 heatmap layer.
    """
    device = device
    backward_type = "all"
    ckpt = torch.load(weight)
    model_names = ckpt['model'].names
    model = attempt_load_weights(weight, device)
    model.info()
    for p in model.parameters():
        p.requires_grad_(True)
    model.eval()

    target = yolov8_target(backward_type, conf_threshold, ratio)
    target_layers = [model.model[l] for l in layer]

    method = eval(method)(model, target_layers,
                          use_cuda=device.type == 'cuda')
    method.activations_and_grads = ActivationsAndGradients(
        model, target_layers, None)

    colors = np.random.uniform(
        0, 255, size=(len(model_names), 3)).astype(int)
    self.__dict__.update(locals())

draw_detections(box, color, name, img)

Draw bounding boxes and labels on an image for multiple detections.

Parameters:

Name Type Description Default
box Tensor or ndarray

The bounding box coordinates in the format [x1, y1, x2, y2]

required
color list

The color of the bounding box in the format [B, G, R]

required
name str

The label for the bounding box.

required
img ndarray

The image on which to draw the bounding box

required

Returns:

Type Description

np.ndarray: The image with the bounding box drawn.

Source code in YOLOv8_Explainer/core.py
def draw_detections(self, box, color, name, img):
    """
    Draw bounding boxes and labels on an image for multiple detections.

    Args:
        box (torch.Tensor or np.ndarray): The bounding box coordinates in the format [x1, y1, x2, y2]
        color (list): The color of the bounding box in the format [B, G, R]
        name (str): The label for the bounding box.
        img (np.ndarray): The image on which to draw the bounding box

    Returns:
        np.ndarray: The image with the bounding box drawn.
    """
    # Ensure box coordinates are integers
    xmin, ymin, xmax, ymax = map(int, box[:4])

    # Draw rectangle
    cv2.rectangle(img, (xmin, ymin), (xmax, ymax),
                  tuple(int(x) for x in color), 2)

    # Draw label
    cv2.putText(img, name, (xmin, ymin - 5),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.8, tuple(int(x) for x in color), 2,
                lineType=cv2.LINE_AA)

    return img

post_process(result)

Perform non-maximum suppression on the detections and process results.

Parameters:

Name Type Description Default
result Tensor

The raw detections from the model.

required

Returns:

Type Description

torch.Tensor: Filtered and processed detections.

Source code in YOLOv8_Explainer/core.py
def post_process(self, result):
    """
    Perform non-maximum suppression on the detections and process results.

    Args:
        result (torch.Tensor): The raw detections from the model.

    Returns:
        torch.Tensor: Filtered and processed detections.
    """
    # Perform non-maximum suppression
    processed_result = non_max_suppression(
        result,
        conf_thres=self.conf_threshold,  # Use the class's confidence threshold
        iou_thres=0.45  # Intersection over Union threshold
    )

    # If no detections, return an empty tensor
    if len(processed_result) == 0 or processed_result[0].numel() == 0:
        return torch.empty(0, 6)  # Return an empty tensor with 6 columns

    # Take the first batch of detections (assuming single image)
    detections = processed_result[0]

    # Filter detections based on confidence
    mask = detections[:, 4] >= self.conf_threshold
    filtered_detections = detections[mask]

    return filtered_detections

process(img_path)

Process the input image and generate CAM visualization.

Source code in YOLOv8_Explainer/core.py
def process(self, img_path):
    """Process the input image and generate CAM visualization."""
    img = cv2.imread(img_path)
    img = letterbox(img)[0]
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = np.float32(img) / 255.0

    tensor = (
        torch.from_numpy(np.transpose(img, axes=[2, 0, 1]))
        .unsqueeze(0)
        .to(self.device)
    )

    try:
        grayscale_cam = self.method(tensor, [self.target])
    except AttributeError as e:
        print(e)
        return

    grayscale_cam = grayscale_cam[0, :]

    pred1 = self.model(tensor)[0]
    pred = non_max_suppression(
        pred1,
        conf_thres=self.conf_threshold,
        iou_thres=0.45
    )[0]

    # Debugging print

    if self.renormalize:
        cam_image = self.renormalize_cam(
            pred[:, :4].cpu().detach().numpy().astype(np.int32),
            img,
            grayscale_cam
        )
    else:
        cam_image = show_cam_on_image(img, grayscale_cam, use_rgb=True)

    if self.show_box and len(pred) > 0:
        for detection in pred:
            detection = detection.cpu().detach().numpy()

            # Get class index and confidence
            class_index = int(detection[5])
            conf = detection[4]

            # Draw detection
            cam_image = self.draw_detections(
                detection[:4],  # Box coordinates
                self.colors[class_index],  # Color for this class
                f"{self.model_names[class_index]}",  # Label with confidence
                cam_image,
            )

    cam_image = Image.fromarray(cam_image)
    return cam_image

renormalize_cam(boxes, image_float_np, grayscale_cam)

Normalize the CAM to be in the range [0, 1] across the entire image.

Source code in YOLOv8_Explainer/core.py
def renormalize_cam(self, boxes, image_float_np, grayscale_cam):
    """Normalize the CAM to be in the range [0, 1]
    across the entire image."""
    renormalized_cam = scale_cam_image(grayscale_cam)
    eigencam_image_renormalized = show_cam_on_image(
        image_float_np, renormalized_cam, use_rgb=True)
    return eigencam_image_renormalized

renormalize_cam_in_bounding_boxes(boxes, image_float_np, grayscale_cam)

Normalize the CAM to be in the range [0, 1] inside every bounding boxes, and zero outside of the bounding boxes.

Parameters:

Name Type Description Default
boxes ndarray

The bounding boxes.

required
image_float_np ndarray

The image as a numpy array of floats in the range [0, 1].

required
grayscale_cam ndarray

The CAM as a numpy array of floats in the range [0, 1].

required

Returns:

Type Description
ndarray

np.ndarray: The renormalized CAM.

Source code in YOLOv8_Explainer/core.py
def renormalize_cam_in_bounding_boxes(
        self,
        boxes: np.ndarray,  # type: ignore
        image_float_np: np.ndarray,  # type: ignore
        grayscale_cam: np.ndarray,  # type: ignore
) -> np.ndarray:
    """
    Normalize the CAM to be in the range [0, 1]
    inside every bounding boxes, and zero outside of the bounding boxes.

    Args:
        boxes (np.ndarray): The bounding boxes.
        image_float_np (np.ndarray): The image as a numpy array of floats in the range [0, 1].
        grayscale_cam (np.ndarray): The CAM as a numpy array of floats in the range [0, 1].

    Returns:
        np.ndarray: The renormalized CAM.
    """
    renormalized_cam = np.zeros(grayscale_cam.shape, dtype=np.float32)
    for x1, y1, x2, y2 in boxes:
        x1, y1 = max(x1, 0), max(y1, 0)
        x2, y2 = min(grayscale_cam.shape[1] - 1,
                     x2), min(grayscale_cam.shape[0] - 1, y2)
        renormalized_cam[y1:y2, x1:x2] = scale_cam_image(
            grayscale_cam[y1:y2, x1:x2].copy())
    renormalized_cam = scale_cam_image(renormalized_cam)
    eigencam_image_renormalized = show_cam_on_image(
        image_float_np, renormalized_cam, use_rgb=True)
    return eigencam_image_renormalized

Helper Functions

The functions that can be used to display images and provide various other functionalities can be found here.

display_images(images)

Display a list of PIL images in a grid.

Parameters:

Name Type Description Default
images list[Image]

A list of PIL images to display.

required

Returns:

Type Description

None

Source code in YOLOv8_Explainer/utils.py
def display_images(images):
    """
    Display a list of PIL images in a grid.

    Args:
        images (list[PIL.Image]): A list of PIL images to display.

    Returns:
        None
    """
    fig, axes = plt.subplots(1, len(images), figsize=(15, 7))
    if len(images) == 1:
        axes = [axes]
    for ax, img in zip(axes, images):
        ax.imshow(img)
        ax.axis('off')
    plt.show()

letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32)

Resize and pad image while meeting stride-multiple constraints.

Parameters:

Name Type Description Default
im ndarray

Input image.

required
new_shape tuple

Desired output shape. Defaults to (640, 640).

(640, 640)
color tuple

Color of the border. Defaults to (114, 114, 114).

(114, 114, 114)
auto bool

Whether to automatically determine padding. Defaults to True.

True
scaleFill bool

Whether to stretch the image to fill the new shape. Defaults to False.

False
scaleup bool

Whether to scale the image up if necessary. Defaults to True.

True
stride int

Stride of the sliding window. Defaults to 32.

32

Returns:

Name Type Description

numpy.ndarray: Letterboxed image.

tuple

Ratio of the resized image.

tuple

Padding sizes.

Source code in YOLOv8_Explainer/utils.py
def letterbox(
    im: np.ndarray,
    new_shape=(640, 640),
    color=(114, 114, 114),
    auto=True,
    scaleFill=False,
    scaleup=True,
    stride=32,
):
    """
    Resize and pad image while meeting stride-multiple constraints.

    Args:
        im (numpy.ndarray): Input image.
        new_shape (tuple, optional): Desired output shape. Defaults to (640, 640).
        color (tuple, optional): Color of the border. Defaults to (114, 114, 114).
        auto (bool, optional): Whether to automatically determine padding. Defaults to True.
        scaleFill (bool, optional): Whether to stretch the image to fill the new shape. Defaults to False.
        scaleup (bool, optional): Whether to scale the image up if necessary. Defaults to True.
        stride (int, optional): Stride of the sliding window. Defaults to 32.

    Returns:
        numpy.ndarray: Letterboxed image.
        tuple: Ratio of the resized image.
        tuple: Padding sizes.

    """
    shape = im.shape[:2]  # current shape [height, width]
    if isinstance(new_shape, int):
        new_shape = (new_shape, new_shape)

    # Scale ratio (new / old)
    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
    if not scaleup:  # only scale down, do not scale up (for better val mAP)
        r = min(r, 1.0)

    # Compute padding
    ratio = r, r  # width, height ratios
    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
    if auto:  # minimum rectangle
        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding
    elif scaleFill:  # stretch
        dw, dh = 0.0, 0.0
        new_unpad = (new_shape[1], new_shape[0])
        ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratios

    dw /= 2  # divide padding into 2 sides
    dh /= 2

    if shape[::-1] != new_unpad:  # resize
        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border

    return im, ratio, (dw, dh)