Functions

Main Functions

The core functions that can be used to visualise the different Class Activated Mapping(CAM) are given below.

`yolov8_heatmap`

This class is used to implement the YOLOv8 target layer.

Args: weight (str): The path to the checkpoint file. device (str): The device to use for inference. Defaults to "cuda:0" if a GPU is available, otherwise "cpu". method (str): The method to use for computing the CAM. Defaults to "EigenGradCAM". layer (list): The indices of the layers to use for computing the CAM. Defaults to [10, 12, 14, 16, 18, -3]. conf_threshold (float): The confidence threshold for detections. Defaults to 0.2. ratio (float): The ratio of maximum scores to return. Defaults to 0.02. show_box (bool): Whether to show bounding boxes with the CAM. Defaults to True. renormalize (bool): Whether to renormalize the CAM to be in the range [0, 1] across the entire image. Defaults to False.

Returns:

Type	Description
	A tensor containing the output.

Source code in YOLOv8_Explainer/core.py

class yolov8_heatmap:
    """
    This class is used to implement the YOLOv8 target layer.

     Args:
            weight (str): The path to the checkpoint file.
            device (str): The device to use for inference. Defaults to "cuda:0" if a GPU is available, otherwise "cpu".
            method (str): The method to use for computing the CAM. Defaults to "EigenGradCAM".
            layer (list): The indices of the layers to use for computing the CAM. Defaults to [10, 12, 14, 16, 18, -3].
            conf_threshold (float): The confidence threshold for detections. Defaults to 0.2.
            ratio (float): The ratio of maximum scores to return. Defaults to 0.02.
            show_box (bool): Whether to show bounding boxes with the CAM. Defaults to True.
            renormalize (bool): Whether to renormalize the CAM to be in the range [0, 1] across the entire image. Defaults to False.

    Returns:
        A tensor containing the output.

    """

    def __init__(
            self,
            weight: str,
            device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
            method="EigenGradCAM",
            layer=[12, 17, 21],
            conf_threshold=0.2,
            ratio=0.02,
            show_box=True,
            renormalize=False,
    ) -> None:
        """
        Initialize the YOLOv8 heatmap layer.
        """
        device = device
        backward_type = "all"
        ckpt = torch.load(weight)
        model_names = ckpt['model'].names
        model = attempt_load_weights(weight, device)
        model.info()
        for p in model.parameters():
            p.requires_grad_(True)
        model.eval()

        target = yolov8_target(backward_type, conf_threshold, ratio)
        target_layers = [model.model[l] for l in layer]

        method = eval(method)(model, target_layers,
                              use_cuda=device.type == 'cuda')
        method.activations_and_grads = ActivationsAndGradients(
            model, target_layers, None)

        colors = np.random.uniform(
            0, 255, size=(len(model_names), 3)).astype(int)
        self.__dict__.update(locals())

    def post_process(self, result):
        """
        Perform non-maximum suppression on the detections and process results.

        Args:
            result (torch.Tensor): The raw detections from the model.

        Returns:
            torch.Tensor: Filtered and processed detections.
        """
        # Perform non-maximum suppression
        processed_result = non_max_suppression(
            result,
            conf_thres=self.conf_threshold,  # Use the class's confidence threshold
            iou_thres=0.45  # Intersection over Union threshold
        )

        # If no detections, return an empty tensor
        if len(processed_result) == 0 or processed_result[0].numel() == 0:
            return torch.empty(0, 6)  # Return an empty tensor with 6 columns

        # Take the first batch of detections (assuming single image)
        detections = processed_result[0]

        # Filter detections based on confidence
        mask = detections[:, 4] >= self.conf_threshold
        filtered_detections = detections[mask]

        return filtered_detections

    def draw_detections(self, box, color, name, img):
        """
        Draw bounding boxes and labels on an image for multiple detections.

        Args:
            box (torch.Tensor or np.ndarray): The bounding box coordinates in the format [x1, y1, x2, y2]
            color (list): The color of the bounding box in the format [B, G, R]
            name (str): The label for the bounding box.
            img (np.ndarray): The image on which to draw the bounding box

        Returns:
            np.ndarray: The image with the bounding box drawn.
        """
        # Ensure box coordinates are integers
        xmin, ymin, xmax, ymax = map(int, box[:4])

        # Draw rectangle
        cv2.rectangle(img, (xmin, ymin), (xmax, ymax),
                      tuple(int(x) for x in color), 2)

        # Draw label
        cv2.putText(img, name, (xmin, ymin - 5),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.8, tuple(int(x) for x in color), 2,
                    lineType=cv2.LINE_AA)

        return img

    def renormalize_cam_in_bounding_boxes(
            self,
            boxes: np.ndarray,  # type: ignore
            image_float_np: np.ndarray,  # type: ignore
            grayscale_cam: np.ndarray,  # type: ignore
    ) -> np.ndarray:
        """
        Normalize the CAM to be in the range [0, 1]
        inside every bounding boxes, and zero outside of the bounding boxes.

        Args:
            boxes (np.ndarray): The bounding boxes.
            image_float_np (np.ndarray): The image as a numpy array of floats in the range [0, 1].
            grayscale_cam (np.ndarray): The CAM as a numpy array of floats in the range [0, 1].

        Returns:
            np.ndarray: The renormalized CAM.
        """
        renormalized_cam = np.zeros(grayscale_cam.shape, dtype=np.float32)
        for x1, y1, x2, y2 in boxes:
            x1, y1 = max(x1, 0), max(y1, 0)
            x2, y2 = min(grayscale_cam.shape[1] - 1,
                         x2), min(grayscale_cam.shape[0] - 1, y2)
            renormalized_cam[y1:y2, x1:x2] = scale_cam_image(
                grayscale_cam[y1:y2, x1:x2].copy())
        renormalized_cam = scale_cam_image(renormalized_cam)
        eigencam_image_renormalized = show_cam_on_image(
            image_float_np, renormalized_cam, use_rgb=True)
        return eigencam_image_renormalized

    def renormalize_cam(self, boxes, image_float_np, grayscale_cam):
        """Normalize the CAM to be in the range [0, 1]
        across the entire image."""
        renormalized_cam = scale_cam_image(grayscale_cam)
        eigencam_image_renormalized = show_cam_on_image(
            image_float_np, renormalized_cam, use_rgb=True)
        return eigencam_image_renormalized

    def process(self, img_path):
        """Process the input image and generate CAM visualization."""
        img = cv2.imread(img_path)
        img = letterbox(img)[0]
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = np.float32(img) / 255.0

        tensor = (
            torch.from_numpy(np.transpose(img, axes=[2, 0, 1]))
            .unsqueeze(0)
            .to(self.device)
        )

        try:
            grayscale_cam = self.method(tensor, [self.target])
        except AttributeError as e:
            print(e)
            return

        grayscale_cam = grayscale_cam[0, :]

        pred1 = self.model(tensor)[0]
        pred = non_max_suppression(
            pred1,
            conf_thres=self.conf_threshold,
            iou_thres=0.45
        )[0]

        # Debugging print

        if self.renormalize:
            cam_image = self.renormalize_cam(
                pred[:, :4].cpu().detach().numpy().astype(np.int32),
                img,
                grayscale_cam
            )
        else:
            cam_image = show_cam_on_image(img, grayscale_cam, use_rgb=True)

        if self.show_box and len(pred) > 0:
            for detection in pred:
                detection = detection.cpu().detach().numpy()

                # Get class index and confidence
                class_index = int(detection[5])
                conf = detection[4]

                # Draw detection
                cam_image = self.draw_detections(
                    detection[:4],  # Box coordinates
                    self.colors[class_index],  # Color for this class
                    f"{self.model_names[class_index]}",  # Label with confidence
                    cam_image,
                )

        cam_image = Image.fromarray(cam_image)
        return cam_image

    def __call__(self, img_path):
        """Generate CAM visualizations for one or more images.

        Args:
            img_path (str): Path to the input image or directory containing images.

        Returns:
            None
        """
        if os.path.isdir(img_path):
            image_list = []
            for img_path_ in os.listdir(img_path):
                img_pil = self.process(f"{img_path}/{img_path_}")
                image_list.append(img_pil)
            return image_list
        else:
            return [self.process(img_path)]

`call(img_path)`

Generate CAM visualizations for one or more images.

Parameters:

Name	Type	Description	Default
`img_path`	`str`	Path to the input image or directory containing images.	required

Returns:

Type	Description
	None

Source code in YOLOv8_Explainer/core.py

def __call__(self, img_path):
    """Generate CAM visualizations for one or more images.

    Args:
        img_path (str): Path to the input image or directory containing images.

    Returns:
        None
    """
    if os.path.isdir(img_path):
        image_list = []
        for img_path_ in os.listdir(img_path):
            img_pil = self.process(f"{img_path}/{img_path_}")
            image_list.append(img_pil)
        return image_list
    else:
        return [self.process(img_path)]

`init(weight, device=torch.device('cuda:0' if torch.cuda.is_available() else 'cpu'), method='EigenGradCAM', layer=[12, 17, 21], conf_threshold=0.2, ratio=0.02, show_box=True, renormalize=False)`

Initialize the YOLOv8 heatmap layer.

Source code in YOLOv8_Explainer/core.py

def __init__(
        self,
        weight: str,
        device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
        method="EigenGradCAM",
        layer=[12, 17, 21],
        conf_threshold=0.2,
        ratio=0.02,
        show_box=True,
        renormalize=False,
) -> None:
    """
    Initialize the YOLOv8 heatmap layer.
    """
    device = device
    backward_type = "all"
    ckpt = torch.load(weight)
    model_names = ckpt['model'].names
    model = attempt_load_weights(weight, device)
    model.info()
    for p in model.parameters():
        p.requires_grad_(True)
    model.eval()

    target = yolov8_target(backward_type, conf_threshold, ratio)
    target_layers = [model.model[l] for l in layer]

    method = eval(method)(model, target_layers,
                          use_cuda=device.type == 'cuda')
    method.activations_and_grads = ActivationsAndGradients(
        model, target_layers, None)

    colors = np.random.uniform(
        0, 255, size=(len(model_names), 3)).astype(int)
    self.__dict__.update(locals())

`draw_detections(box, color, name, img)`

Draw bounding boxes and labels on an image for multiple detections.

Parameters:

Name	Type	Description	Default
`box`	`Tensor or ndarray`	The bounding box coordinates in the format [x1, y1, x2, y2]	required
`color`	`list`	The color of the bounding box in the format [B, G, R]	required
`name`	`str`	The label for the bounding box.	required
`img`	`ndarray`	The image on which to draw the bounding box	required

Returns:

Type	Description
	np.ndarray: The image with the bounding box drawn.

Source code in YOLOv8_Explainer/core.py

def draw_detections(self, box, color, name, img):
    """
    Draw bounding boxes and labels on an image for multiple detections.

    Args:
        box (torch.Tensor or np.ndarray): The bounding box coordinates in the format [x1, y1, x2, y2]
        color (list): The color of the bounding box in the format [B, G, R]
        name (str): The label for the bounding box.
        img (np.ndarray): The image on which to draw the bounding box

    Returns:
        np.ndarray: The image with the bounding box drawn.
    """
    # Ensure box coordinates are integers
    xmin, ymin, xmax, ymax = map(int, box[:4])

    # Draw rectangle
    cv2.rectangle(img, (xmin, ymin), (xmax, ymax),
                  tuple(int(x) for x in color), 2)

    # Draw label
    cv2.putText(img, name, (xmin, ymin - 5),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.8, tuple(int(x) for x in color), 2,
                lineType=cv2.LINE_AA)

    return img

`post_process(result)`

Perform non-maximum suppression on the detections and process results.

Parameters:

Name	Type	Description	Default
`result`	`Tensor`	The raw detections from the model.	required

Returns:

Type	Description
	torch.Tensor: Filtered and processed detections.

Source code in YOLOv8_Explainer/core.py

def post_process(self, result):
    """
    Perform non-maximum suppression on the detections and process results.

    Args:
        result (torch.Tensor): The raw detections from the model.

    Returns:
        torch.Tensor: Filtered and processed detections.
    """
    # Perform non-maximum suppression
    processed_result = non_max_suppression(
        result,
        conf_thres=self.conf_threshold,  # Use the class's confidence threshold
        iou_thres=0.45  # Intersection over Union threshold
    )

    # If no detections, return an empty tensor
    if len(processed_result) == 0 or processed_result[0].numel() == 0:
        return torch.empty(0, 6)  # Return an empty tensor with 6 columns

    # Take the first batch of detections (assuming single image)
    detections = processed_result[0]

    # Filter detections based on confidence
    mask = detections[:, 4] >= self.conf_threshold
    filtered_detections = detections[mask]

    return filtered_detections

`process(img_path)`

Process the input image and generate CAM visualization.

Source code in YOLOv8_Explainer/core.py

def process(self, img_path):
    """Process the input image and generate CAM visualization."""
    img = cv2.imread(img_path)
    img = letterbox(img)[0]
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = np.float32(img) / 255.0

    tensor = (
        torch.from_numpy(np.transpose(img, axes=[2, 0, 1]))
        .unsqueeze(0)
        .to(self.device)
    )

    try:
        grayscale_cam = self.method(tensor, [self.target])
    except AttributeError as e:
        print(e)
        return

    grayscale_cam = grayscale_cam[0, :]

    pred1 = self.model(tensor)[0]
    pred = non_max_suppression(
        pred1,
        conf_thres=self.conf_threshold,
        iou_thres=0.45
    )[0]

    # Debugging print

    if self.renormalize:
        cam_image = self.renormalize_cam(
            pred[:, :4].cpu().detach().numpy().astype(np.int32),
            img,
            grayscale_cam
        )
    else:
        cam_image = show_cam_on_image(img, grayscale_cam, use_rgb=True)

    if self.show_box and len(pred) > 0:
        for detection in pred:
            detection = detection.cpu().detach().numpy()

            # Get class index and confidence
            class_index = int(detection[5])
            conf = detection[4]

            # Draw detection
            cam_image = self.draw_detections(
                detection[:4],  # Box coordinates
                self.colors[class_index],  # Color for this class
                f"{self.model_names[class_index]}",  # Label with confidence
                cam_image,
            )

    cam_image = Image.fromarray(cam_image)
    return cam_image

`renormalize_cam(boxes, image_float_np, grayscale_cam)`

Normalize the CAM to be in the range [0, 1] across the entire image.

Source code in YOLOv8_Explainer/core.py

def renormalize_cam(self, boxes, image_float_np, grayscale_cam):
    """Normalize the CAM to be in the range [0, 1]
    across the entire image."""
    renormalized_cam = scale_cam_image(grayscale_cam)
    eigencam_image_renormalized = show_cam_on_image(
        image_float_np, renormalized_cam, use_rgb=True)
    return eigencam_image_renormalized

`renormalize_cam_in_bounding_boxes(boxes, image_float_np, grayscale_cam)`

Normalize the CAM to be in the range [0, 1] inside every bounding boxes, and zero outside of the bounding boxes.

Parameters:

Name	Type	Description	Default
`boxes`	`ndarray`	The bounding boxes.	required
`image_float_np`	`ndarray`	The image as a numpy array of floats in the range [0, 1].	required
`grayscale_cam`	`ndarray`	The CAM as a numpy array of floats in the range [0, 1].	required

Returns:

Type	Description
`ndarray`	np.ndarray: The renormalized CAM.

Source code in YOLOv8_Explainer/core.py

def renormalize_cam_in_bounding_boxes(
        self,
        boxes: np.ndarray,  # type: ignore
        image_float_np: np.ndarray,  # type: ignore
        grayscale_cam: np.ndarray,  # type: ignore
) -> np.ndarray:
    """
    Normalize the CAM to be in the range [0, 1]
    inside every bounding boxes, and zero outside of the bounding boxes.

    Args:
        boxes (np.ndarray): The bounding boxes.
        image_float_np (np.ndarray): The image as a numpy array of floats in the range [0, 1].
        grayscale_cam (np.ndarray): The CAM as a numpy array of floats in the range [0, 1].

    Returns:
        np.ndarray: The renormalized CAM.
    """
    renormalized_cam = np.zeros(grayscale_cam.shape, dtype=np.float32)
    for x1, y1, x2, y2 in boxes:
        x1, y1 = max(x1, 0), max(y1, 0)
        x2, y2 = min(grayscale_cam.shape[1] - 1,
                     x2), min(grayscale_cam.shape[0] - 1, y2)
        renormalized_cam[y1:y2, x1:x2] = scale_cam_image(
            grayscale_cam[y1:y2, x1:x2].copy())
    renormalized_cam = scale_cam_image(renormalized_cam)
    eigencam_image_renormalized = show_cam_on_image(
        image_float_np, renormalized_cam, use_rgb=True)
    return eigencam_image_renormalized

Helper Functions

The functions that can be used to display images and provide various other functionalities can be found here.

`display_images(images)`

Display a list of PIL images in a grid.

Parameters:

Name	Type	Description	Default
`images`	`list[Image]`	A list of PIL images to display.	required

Returns:

Type	Description
	None

Source code in YOLOv8_Explainer/utils.py

def display_images(images):
    """
    Display a list of PIL images in a grid.

    Args:
        images (list[PIL.Image]): A list of PIL images to display.

    Returns:
        None
    """
    fig, axes = plt.subplots(1, len(images), figsize=(15, 7))
    if len(images) == 1:
        axes = [axes]
    for ax, img in zip(axes, images):
        ax.imshow(img)
        ax.axis('off')
    plt.show()

`letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32)`

Resize and pad image while meeting stride-multiple constraints.

Parameters:

Name	Type	Description	Default
`im`	`ndarray`	Input image.	required
`new_shape`	`tuple`	Desired output shape. Defaults to (640, 640).	`(640, 640)`
`color`	`tuple`	Color of the border. Defaults to (114, 114, 114).	`(114, 114, 114)`
`auto`	`bool`	Whether to automatically determine padding. Defaults to True.	`True`
`scaleFill`	`bool`	Whether to stretch the image to fill the new shape. Defaults to False.	`False`
`scaleup`	`bool`	Whether to scale the image up if necessary. Defaults to True.	`True`
`stride`	`int`	Stride of the sliding window. Defaults to 32.	`32`

Returns:

Name	Type	Description
		numpy.ndarray: Letterboxed image.
`tuple`		Ratio of the resized image.
`tuple`		Padding sizes.

Source code in YOLOv8_Explainer/utils.py

def letterbox(
    im: np.ndarray,
    new_shape=(640, 640),
    color=(114, 114, 114),
    auto=True,
    scaleFill=False,
    scaleup=True,
    stride=32,
):
    """
    Resize and pad image while meeting stride-multiple constraints.

    Args:
        im (numpy.ndarray): Input image.
        new_shape (tuple, optional): Desired output shape. Defaults to (640, 640).
        color (tuple, optional): Color of the border. Defaults to (114, 114, 114).
        auto (bool, optional): Whether to automatically determine padding. Defaults to True.
        scaleFill (bool, optional): Whether to stretch the image to fill the new shape. Defaults to False.
        scaleup (bool, optional): Whether to scale the image up if necessary. Defaults to True.
        stride (int, optional): Stride of the sliding window. Defaults to 32.

    Returns:
        numpy.ndarray: Letterboxed image.
        tuple: Ratio of the resized image.
        tuple: Padding sizes.

    """
    shape = im.shape[:2]  # current shape [height, width]
    if isinstance(new_shape, int):
        new_shape = (new_shape, new_shape)

    # Scale ratio (new / old)
    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
    if not scaleup:  # only scale down, do not scale up (for better val mAP)
        r = min(r, 1.0)

    # Compute padding
    ratio = r, r  # width, height ratios
    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
    if auto:  # minimum rectangle
        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding
    elif scaleFill:  # stretch
        dw, dh = 0.0, 0.0
        new_unpad = (new_shape[1], new_shape[0])
        ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratios

    dw /= 2  # divide padding into 2 sides
    dh /= 2

    if shape[::-1] != new_unpad:  # resize
        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border

    return im, ratio, (dw, dh)

Functions

Main Functions

yolov8_heatmap

__call__(img_path)

__init__(weight, device=torch.device('cuda:0' if torch.cuda.is_available() else 'cpu'), method='EigenGradCAM', layer=[12, 17, 21], conf_threshold=0.2, ratio=0.02, show_box=True, renormalize=False)

draw_detections(box, color, name, img)

post_process(result)

process(img_path)

renormalize_cam(boxes, image_float_np, grayscale_cam)

renormalize_cam_in_bounding_boxes(boxes, image_float_np, grayscale_cam)

Helper Functions

display_images(images)

letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32)

`yolov8_heatmap`

`call(img_path)`

`init(weight, device=torch.device('cuda:0' if torch.cuda.is_available() else 'cpu'), method='EigenGradCAM', layer=[12, 17, 21], conf_threshold=0.2, ratio=0.02, show_box=True, renormalize=False)`

`draw_detections(box, color, name, img)`

`post_process(result)`

`process(img_path)`

`renormalize_cam(boxes, image_float_np, grayscale_cam)`

`renormalize_cam_in_bounding_boxes(boxes, image_float_np, grayscale_cam)`

`display_images(images)`

`letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32)`