淮安做网站seo,自己如何优化网站排名,如何做电商运营,网站首页收录突然没有了#xff08;a#xff09;Mask RCNN总体流程
一.Mask RCNN 架构
自己整理了一份Mask RCNN架构图如下#xff0c;其中绿色模块只有推理过程才会涉及。 核心模块包括#xff1a;数据预处理#xff0c;骨干网络#xff0c;区域提议网络#xff0c;FastRCNN分支#xff0c…aMask RCNN总体流程
一.Mask RCNN 架构
自己整理了一份Mask RCNN架构图如下其中绿色模块只有推理过程才会涉及。 核心模块包括数据预处理骨干网络区域提议网络FastRCNN分支Mask分支数据后处理等。
二.网络核心流程
class FasterRCNNBase(nn.Module):def __init__(self, backbone, rpn, roi_heads, transform):super(FasterRCNNBase, self).__init__()self.transform transformself.backbone backboneself.rpn rpnself.roi_heads roi_heads# used only on torchscript modeself._has_warned Falsetorch.jit.unuseddef eager_outputs(self, losses, detections):# type: (Dict[str, Tensor], List[Dict[str, Tensor]]) - Union[Dict[str, Tensor], List[Dict[str, Tensor]]]if self.training:return lossesreturn detectionsdef forward(self, images, targetsNone):# type: (List[Tensor], Optional[List[Dict[str, Tensor]]]) - Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]]if self.training and targets is None:raise ValueError(In training mode, targets should be passed)if self.training:assert targets is not Nonefor target in targets: # 进一步判断传入的target的boxes参数是否符合规定boxes target[boxes]if isinstance(boxes, torch.Tensor):if len(boxes.shape) ! 2 or boxes.shape[-1] ! 4:raise ValueError(Expected target boxes to be a tensorof shape [N, 4], got {:}..format(boxes.shape))else:raise ValueError(Expected target boxes to be of type Tensor, got {:}..format(type(boxes)))original_image_sizes torch.jit.annotate(List[Tuple[int, int]], [])for img in images:val img.shape[-2:]assert len(val) 2 # 防止输入的是个一维向量original_image_sizes.append((val[0], val[1]))# original_image_sizes [img.shape[-2:] for img in images]images, targets self.transform(images, targets) # 对图像进行预处理# print(images.tensors.shape)features self.backbone(images.tensors) # 将图像输入backbone得到特征图if isinstance(features, torch.Tensor): # 若只在一层特征层上预测将feature放入有序字典中并编号为‘0’features OrderedDict([(0, features)]) # 若在多层特征层上预测传入的就是一个有序字典# 将特征层以及标注target信息传入rpn中# proposals: List[Tensor], Tensor_shape: [num_proposals, 4],# 每个proposals是绝对坐标且为(x1, y1, x2, y2)格式proposals, proposal_losses self.rpn(images, features, targets)# 将rpn生成的数据以及标注target信息传入fast rcnn后半部分detections, detector_losses self.roi_heads(features, proposals, images.image_sizes, targets)# 对网络的预测结果进行后处理主要将bboxes还原到原图像尺度上detections self.transform.postprocess(detections, images.image_sizes, original_image_sizes)losses {}losses.update(detector_losses)losses.update(proposal_losses)if torch.jit.is_scripting():if not self._has_warned:warnings.warn(RCNN always returns a (Losses, Detections) tuple in scripting)self._has_warned Truereturn losses, detectionselse:return self.eager_outputs(losses, detections)# if self.training:# return losses## return detectionsFasterRCNNBase是RCNN检测算法的基类FasterRCNN类要继承FasterRCNNBase类而MaskRCNN类又要继承FasterRCNN类所以当实例化一个model并传入数据x时会调用FasterRCNNBase的forward函数
model MaskRCNN(backbone,num_classes)
model(images,targets)FasterRCNNBase的 init() 函数: def __init__(self, backbone, rpn, roi_heads, transform):super(FasterRCNNBase, self).__init__()self.transform transformself.backbone backboneself.rpn rpnself.roi_heads roi_heads# used only on torchscript modeself._has_warned False传入参数包括 1backbone resnet50 resnet101 resnet50fpn resnet101fpn 2rpn: 区域提议网络 3roi_haeds: box roi pooling/align two MLP head box predictor mask roi pool mask head mask predictor 4transforms GeneraRCNNtransforms类的实例用于数据预处理
FasterRCNNBase的 forward() 函数: def forward(self, images, targetsNone):# type: (List[Tensor], Optional[List[Dict[str, Tensor]]]) - Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]]if self.training and targets is None:raise ValueError(In training mode, targets should be passed)if self.training:assert targets is not Nonefor target in targets: # 进一步判断传入的target的boxes参数是否符合规定boxes target[boxes]if isinstance(boxes, torch.Tensor):if len(boxes.shape) ! 2 or boxes.shape[-1] ! 4:raise ValueError(Expected target boxes to be a tensorof shape [N, 4], got {:}..format(boxes.shape))else:raise ValueError(Expected target boxes to be of type Tensor, got {:}..format(type(boxes)))original_image_sizes torch.jit.annotate(List[Tuple[int, int]], [])for img in images:val img.shape[-2:]assert len(val) 2 # 防止输入的是个一维向量original_image_sizes.append((val[0], val[1]))# original_image_sizes [img.shape[-2:] for img in images]images, targets self.transform(images, targets) # 对图像进行预处理# print(images.tensors.shape)features self.backbone(images.tensors) # 将图像输入backbone得到特征图if isinstance(features, torch.Tensor): # 若只在一层特征层上预测将feature放入有序字典中并编号为‘0’features OrderedDict([(0, features)]) # 若在多层特征层上预测传入的就是一个有序字典# 将特征层以及标注target信息传入rpn中# proposals: List[Tensor], Tensor_shape: [num_proposals, 4],# 每个proposals是绝对坐标且为(x1, y1, x2, y2)格式proposals, proposal_losses self.rpn(images, features, targets)# 将rpn生成的数据以及标注target信息传入fast rcnn后半部分detections, detector_losses self.roi_heads(features, proposals, images.image_sizes, targets)# 对网络的预测结果进行后处理主要将bboxes还原到原图像尺度上detections self.transform.postprocess(detections, images.image_sizes, original_image_sizes)losses {}losses.update(detector_losses)losses.update(proposal_losses)if torch.jit.is_scripting():if not self._has_warned:warnings.warn(RCNN always returns a (Losses, Detections) tuple in scripting)self._has_warned Truereturn losses, detectionselse:return self.eager_outputs(losses, detections)首先增加一些容错机制保住输入数据格式符合模型要求然后将images和targets输入transforms中进行数据格式的预处理然后将images输入到backbone中得到特征图features将featuresimagestargets输入rpn网络中得到proposals和proposals_loss;然后将proposalsimagesfeatures等输入到roi_heads得到detections和detector_loss;如果在训练模式下则返回lossproposals_loss和detection_loss在推理模式下则返回detections。