diff --git a/VOCdevkit/VOC2007/ImageSets/Segmentation/README.md b/VOCdevkit/VOC2007/ImageSets/Segmentation/README.md new file mode 100644 index 0000000..9042c5f --- /dev/null +++ b/VOCdevkit/VOC2007/ImageSets/Segmentation/README.md @@ -0,0 +1,2 @@ +存放的是指向文件名称的txt + diff --git a/VOCdevkit/VOC2007/JPEGImages/README.md b/VOCdevkit/VOC2007/JPEGImages/README.md new file mode 100644 index 0000000..528096d --- /dev/null +++ b/VOCdevkit/VOC2007/JPEGImages/README.md @@ -0,0 +1 @@ +这里面存放的是训练用的图片文件。 diff --git a/VOCdevkit/VOC2007/SegmentationClass/README.md b/VOCdevkit/VOC2007/SegmentationClass/README.md new file mode 100644 index 0000000..3581546 --- /dev/null +++ b/VOCdevkit/VOC2007/SegmentationClass/README.md @@ -0,0 +1 @@ +这里面存放的是训练过程中产生的权重。 diff --git a/VOCdevkit/voc2pspnet.py b/VOCdevkit/voc2pspnet.py new file mode 100644 index 0000000..010bf58 --- /dev/null +++ b/VOCdevkit/voc2pspnet.py @@ -0,0 +1,44 @@ +import os +import random + +segfilepath=r'./VOCdevkit/VOC2007/SegmentationClass' +saveBasePath=r"./VOCdevkit/VOC2007/ImageSets/Segmentation/" + +trainval_percent=1 +train_percent=0.9 + +temp_seg = os.listdir(segfilepath) +total_seg = [] +for seg in temp_seg: + if seg.endswith(".png"): + total_seg.append(seg) + +num=len(total_seg) +list=range(num) +tv=int(num*trainval_percent) +tr=int(tv*train_percent) +trainval= random.sample(list,tv) +train=random.sample(trainval,tr) + +print("train and val size",tv) +print("traub suze",tr) +ftrainval = open(os.path.join(saveBasePath,'trainval.txt'), 'w') +ftest = open(os.path.join(saveBasePath,'test.txt'), 'w') +ftrain = open(os.path.join(saveBasePath,'train.txt'), 'w') +fval = open(os.path.join(saveBasePath,'val.txt'), 'w') + +for i in list: + name=total_seg[i][:-4]+'\n' + if i in trainval: + ftrainval.write(name) + if i in train: + ftrain.write(name) + else: + fval.write(name) + else: + ftest.write(name) + +ftrainval.close() +ftrain.close() +fval.close() +ftest .close() diff --git a/datasets/JPEGImages/1.jpg b/datasets/JPEGImages/1.jpg new file mode 100644 index 0000000..ebc7659 Binary files /dev/null and b/datasets/JPEGImages/1.jpg differ diff --git a/datasets/SegmentationClass/1.png b/datasets/SegmentationClass/1.png new file mode 100644 index 0000000..b5c8317 Binary files /dev/null and b/datasets/SegmentationClass/1.png differ diff --git a/datasets/before/1.jpg b/datasets/before/1.jpg new file mode 100644 index 0000000..797ee10 Binary files /dev/null and b/datasets/before/1.jpg differ diff --git a/datasets/before/1.json b/datasets/before/1.json new file mode 100644 index 0000000..27d580b --- /dev/null +++ b/datasets/before/1.json @@ -0,0 +1,135 @@ +{ + "version": "3.16.7", + "flags": {}, + "shapes": [ + { + "label": "cat", + "line_color": null, + "fill_color": null, + "points": [ + [ + 202.77358490566036, + 626.0943396226414 + ], + [ + 178.24528301886792, + 552.5094339622641 + ], + [ + 195.22641509433961, + 444.9622641509434 + ], + [ + 177.30188679245282, + 340.2452830188679 + ], + [ + 173.52830188679243, + 201.56603773584905 + ], + [ + 211.2641509433962, + 158.16981132075472 + ], + [ + 226.35849056603772, + 87.41509433962264 + ], + [ + 208.43396226415092, + 6.283018867924525 + ], + [ + 277.3018867924528, + 57.226415094339615 + ], + [ + 416.92452830188677, + 80.81132075471697 + ], + [ + 497.1132075471698, + 64.77358490566037 + ], + [ + 578.2452830188679, + 6.283018867924525 + ], + [ + 599.0, + 35.52830188679245 + ], + [ + 589.566037735849, + 96.84905660377359 + ], + [ + 592.3962264150944, + 133.64150943396226 + ], + [ + 679.188679245283, + 174.2075471698113 + ], + [ + 723.5283018867924, + 165.71698113207546 + ], + [ + 726.3584905660377, + 222.32075471698113 + ], + [ + 759.377358490566, + 262.88679245283015 + ], + [ + 782.9622641509434, + 350.62264150943395 + ], + [ + 766.9245283018868, + 428.92452830188677 + ], + [ + 712.2075471698113, + 465.71698113207543 + ], + [ + 695.2264150943396, + 538.3584905660377 + ], + [ + 657.4905660377358, + 601.566037735849 + ], + [ + 606, + 633 + ], + [ + 213, + 633 + ] + ], + "shape_type": "polygon", + "flags": {} + } + ], + "lineColor": [ + 0, + 255, + 0, + 128 + ], + "fillColor": [ + 255, + 0, + 0, + 128 + ], + "imagePath": "1.jpg", + "imageData": "", + "imageHeight": 634, + "imageWidth": 950 +} \ No newline at end of file diff --git a/get_miou_prediction.py b/get_miou_prediction.py new file mode 100644 index 0000000..10a5a58 --- /dev/null +++ b/get_miou_prediction.py @@ -0,0 +1,46 @@ +from pspnet import PSPNet +from torch import nn +from PIL import Image +from torch.autograd import Variable +import torch.nn.functional as F +import numpy as np +import colorsys +import torch +import copy +import os + +class miou_Pspnet(PSPNet): + def detect_image(self, image): + orininal_h = np.array(image).shape[0] + orininal_w = np.array(image).shape[1] + + image, nw, nh = self.letterbox_image(image,(self.model_image_size[1],self.model_image_size[0])) + images = [np.array(image)/255] + images = np.transpose(images,(0,3,1,2)) + + with torch.no_grad(): + images = Variable(torch.from_numpy(images).type(torch.FloatTensor)) + if self.cuda: + images = images.cuda() + pr = self.net(images)[0] + pr = F.softmax(pr.permute(1,2,0),dim = -1).cpu().numpy().argmax(axis=-1) + + pr = pr[int((self.model_image_size[0]-nh)//2):int((self.model_image_size[0]-nh)//2+nh), int((self.model_image_size[1]-nw)//2):int((self.model_image_size[1]-nw)//2+nw)] + + image = Image.fromarray(np.uint8(pr)).resize((orininal_w,orininal_h),Image.NEAREST) + + return image + +pspnet = miou_Pspnet() + +image_ids = open(r"VOCdevkit\VOC2007\ImageSets\Segmentation\val.txt",'r').read().splitlines() + +if not os.path.exists("./miou_pr_dir"): + os.makedirs("./miou_pr_dir") + +for image_id in image_ids: + image_path = "./VOCdevkit/VOC2007/JPEGImages/"+image_id+".jpg" + image = Image.open(image_path) + image = pspnet.detect_image(image) + image.save("./miou_pr_dir/" + image_id + ".png") + print(image_id," done!") diff --git a/img/street.jpg b/img/street.jpg new file mode 100644 index 0000000..6750d37 Binary files /dev/null and b/img/street.jpg differ diff --git a/json_to_dataset.py b/json_to_dataset.py new file mode 100644 index 0000000..93e144c --- /dev/null +++ b/json_to_dataset.py @@ -0,0 +1,63 @@ +import argparse +import json +import os +import os.path as osp +import warnings + +import PIL.Image +import yaml +import numpy as np +from labelme import utils +import base64 + +if __name__ == '__main__': + jpgs_path = "datasets/JPEGImages" + pngs_path = "datasets/SegmentationClass" + classes = ["_background_","aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] + # classes = ["_background_","cat","dog"] + + count = os.listdir("./datasets/before/") + for i in range(0, len(count)): + path = os.path.join("./datasets/before", count[i]) + + if os.path.isfile(path) and path.endswith('json'): + data = json.load(open(path)) + + if data['imageData']: + imageData = data['imageData'] + else: + imagePath = os.path.join(os.path.dirname(path), data['imagePath']) + with open(imagePath, 'rb') as f: + imageData = f.read() + imageData = base64.b64encode(imageData).decode('utf-8') + + img = utils.img_b64_to_arr(imageData) + label_name_to_value = {'_background_': 0} + for shape in data['shapes']: + label_name = shape['label'] + if label_name in label_name_to_value: + label_value = label_name_to_value[label_name] + else: + label_value = len(label_name_to_value) + label_name_to_value[label_name] = label_value + + # label_values must be dense + label_values, label_names = [], [] + for ln, lv in sorted(label_name_to_value.items(), key=lambda x: x[1]): + label_values.append(lv) + label_names.append(ln) + assert label_values == list(range(len(label_values))) + + lbl = utils.shapes_to_label(img.shape, data['shapes'], label_name_to_value) + + + PIL.Image.fromarray(img).save(osp.join(jpgs_path, count[i].split(".")[0]+'.jpg')) + + new = np.zeros([np.shape(img)[0],np.shape(img)[1]]) + for name in label_names: + index_json = label_names.index(name) + index_all = classes.index(name) + new = new + index_all*(np.array(lbl) == index_json) + + utils.lblsave(osp.join(pngs_path, count[i].split(".")[0]+'.png'), new) + print('Saved ' + count[i].split(".")[0] + '.jpg and ' + count[i].split(".")[0] + '.png') \ No newline at end of file diff --git a/logs/README.MD b/logs/README.MD new file mode 100644 index 0000000..fb58387 --- /dev/null +++ b/logs/README.MD @@ -0,0 +1,2 @@ +这一部分用来存放训练后的文件。 +This part is used to store post training documents. \ No newline at end of file diff --git a/miou.py b/miou.py new file mode 100644 index 0000000..5774222 --- /dev/null +++ b/miou.py @@ -0,0 +1,71 @@ +import numpy as np +import argparse +import json +from PIL import Image +from os.path import join + +# 设标签宽W,长H +def fast_hist(a, b, n): + # a是转化成一维数组的标签,形状(H×W,);b是转化成一维数组的标签,形状(H×W,) + k = (a >= 0) & (a < n) + # np.bincount计算了从0到n**2-1这n**2个数中每个数出现的次数,返回值形状(n, n) + # 返回中,写对角线上的为分类正确的像素点 + return np.bincount(n * a[k].astype(int) + b[k], minlength=n ** 2).reshape(n, n) + +def per_class_iu(hist): + # 矩阵的对角线上的值组成的一维数组/矩阵的所有元素之和,返回值形状(n,) + return np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist)) + +def per_class_PA(hist): + # 矩阵的对角线上的值组成的一维数组/矩阵的所有元素之和,返回值形状(n,) + return np.diag(hist) / hist.sum(1) + +def compute_mIoU(gt_dir, pred_dir, png_name_list, num_classes, name_classes): + # 计算mIoU的函数 + print('Num classes', num_classes) + ## 1 + hist = np.zeros((num_classes, num_classes)) + + gt_imgs = [join(gt_dir, x + ".png") for x in png_name_list] # 获得验证集标签路径列表,方便直接读取 + pred_imgs = [join(pred_dir, x + ".png") for x in png_name_list] # 获得验证集图像分割结果路径列表,方便直接读取 + + # 读取每一个(图片-标签)对 + for ind in range(len(gt_imgs)): + # 读取一张图像分割结果,转化成numpy数组 + pred = np.array(Image.open(pred_imgs[ind])) + # 读取一张对应的标签,转化成numpy数组 + label = np.array(Image.open(gt_imgs[ind])) + + # 如果图像分割结果与标签的大小不一样,这张图片就不计算 + if len(label.flatten()) != len(pred.flatten()): + print( + 'Skipping: len(gt) = {:d}, len(pred) = {:d}, {:s}, {:s}'.format( + len(label.flatten()), len(pred.flatten()), gt_imgs[ind], + pred_imgs[ind])) + continue + # 对一张图片计算19×19的hist矩阵,并累加 + hist += fast_hist(label.flatten(), pred.flatten(),num_classes) + # 每计算10张就输出一下目前已计算的图片中所有类别平均的mIoU值 + if ind > 0 and ind % 10 == 0: + print('{:d} / {:d}: mIou-{:0.2f}; mPA-{:0.2f}'.format(ind, len(gt_imgs), + 100 * np.mean(per_class_iu(hist)), + 100 * np.mean(per_class_PA(hist)))) + # 计算所有验证集图片的逐类别mIoU值 + mIoUs = per_class_iu(hist) + mPA = per_class_PA(hist) + # 逐类别输出一下mIoU值 + for ind_class in range(num_classes): + print('===>' + name_classes[ind_class] + ':\tmIou-' + str(round(mIoUs[ind_class] * 100, 2)) + '; mPA-' + str(round(mPA[ind_class] * 100, 2))) + # 在所有验证集图像上求所有类别平均的mIoU值,计算时忽略NaN值 + print('===> mIoU: ' + str(round(np.nanmean(mIoUs) * 100, 2)) + '; mPA: ' + str(round(np.nanmean(mPA) * 100, 2))) + return mIoUs + + +if __name__ == "__main__": + gt_dir = "./VOCdevkit/VOC2007/SegmentationClass" + pred_dir = "./miou_pr_dir" + png_name_list = open(r"VOCdevkit\VOC2007\ImageSets\Segmentation\val.txt",'r').read().splitlines() + + num_classes = 21 + name_classes = ["background","aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] + compute_mIoU(gt_dir, pred_dir, png_name_list, num_classes, name_classes) # 执行计算mIoU的函数 diff --git a/model_data/pspnet_mobilenetv2.pth b/model_data/pspnet_mobilenetv2.pth new file mode 100644 index 0000000..b138b90 Binary files /dev/null and b/model_data/pspnet_mobilenetv2.pth differ diff --git a/nets/mobilenetv2.py b/nets/mobilenetv2.py new file mode 100644 index 0000000..1e18d37 --- /dev/null +++ b/nets/mobilenetv2.py @@ -0,0 +1,157 @@ +import torch +import torch.nn.functional as F +import torch.nn as nn +import math +import os +import torch.utils.model_zoo as model_zoo +BatchNorm2d = nn.BatchNorm2d + +def conv_bn(inp, oup, stride): + return nn.Sequential( + nn.Conv2d(inp, oup, 3, stride, 1, bias=False), + BatchNorm2d(oup), + nn.ReLU6(inplace=True) + ) + + +def conv_1x1_bn(inp, oup): + return nn.Sequential( + nn.Conv2d(inp, oup, 1, 1, 0, bias=False), + BatchNorm2d(oup), + nn.ReLU6(inplace=True) + ) + + +class InvertedResidual(nn.Module): + def __init__(self, inp, oup, stride, expand_ratio): + super(InvertedResidual, self).__init__() + self.stride = stride + assert stride in [1, 2] + + hidden_dim = round(inp * expand_ratio) + self.use_res_connect = self.stride == 1 and inp == oup + + if expand_ratio == 1: + self.conv = nn.Sequential( + # dw + nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False), + BatchNorm2d(hidden_dim), + nn.ReLU6(inplace=True), + # pw-linear + nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), + BatchNorm2d(oup), + ) + else: + self.conv = nn.Sequential( + # pw + nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False), + BatchNorm2d(hidden_dim), + nn.ReLU6(inplace=True), + # dw + nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False), + BatchNorm2d(hidden_dim), + nn.ReLU6(inplace=True), + # pw-linear + nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), + BatchNorm2d(oup), + ) + + def forward(self, x): + if self.use_res_connect: + return x + self.conv(x) + else: + return self.conv(x) + + +class MobileNetV2(nn.Module): + def __init__(self, n_class=1000, input_size=224, width_mult=1.): + super(MobileNetV2, self).__init__() + block = InvertedResidual + input_channel = 32 + last_channel = 1280 + + interverted_residual_setting = [ + # t, c, n, s + # 473,473,3 -> 237,237,32 + # 237,237,32 -> 237,237,16 + [1, 16, 1, 1], + # 237,237,16 -> 119,119,24 + [6, 24, 2, 2], + # 119,119,24 -> 60,60,32 + [6, 32, 3, 2], + # 60,60,32 -> 30,30,64 + [6, 64, 4, 2], + # 30,30,64 -> 30,30,96 + [6, 96, 3, 1], + # 30,30,96 -> 15,15,160 + [6, 160, 3, 2], + # 15,15,160 -> 15,15,320 + [6, 320, 1, 1], + ] + + assert input_size % 32 == 0 + # 建立stem层 + input_channel = int(input_channel * width_mult) + self.last_channel = int(last_channel * width_mult) if width_mult > 1.0 else last_channel + + self.features = [conv_bn(3, input_channel, 2)] + + # 根据上述列表进行循环,构建mobilenetv2的结构 + for t, c, n, s in interverted_residual_setting: + output_channel = int(c * width_mult) + for i in range(n): + if i == 0: + self.features.append(block(input_channel, output_channel, s, expand_ratio=t)) + else: + self.features.append(block(input_channel, output_channel, 1, expand_ratio=t)) + input_channel = output_channel + + # mobilenetv2结构的收尾工作 + self.features.append(conv_1x1_bn(input_channel, self.last_channel)) + self.features = nn.Sequential(*self.features) + + # 最后的分类部分 + self.classifier = nn.Sequential( + nn.Dropout(0.2), + nn.Linear(self.last_channel, n_class), + ) + + self._initialize_weights() + + def forward(self, x): + x = self.features(x) + x = x.mean(3).mean(2) + x = self.classifier(x) + return x + + def _initialize_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + if m.bias is not None: + m.bias.data.zero_() + elif isinstance(m, BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + elif isinstance(m, nn.Linear): + n = m.weight.size(1) + m.weight.data.normal_(0, 0.01) + m.bias.data.zero_() + + +def load_url(url, model_dir='./model_data', map_location=None): + if not os.path.exists(model_dir): + os.makedirs(model_dir) + filename = url.split('/')[-1] + cached_file = os.path.join(model_dir, filename) + if os.path.exists(cached_file): + return torch.load(cached_file, map_location=map_location) + else: + return model_zoo.load_url(url,model_dir=model_dir) + +def mobilenetv2(pretrained=False, **kwargs): + model = MobileNetV2(n_class=1000, **kwargs) + if pretrained: + model.load_state_dict(load_url('http://sceneparsing.csail.mit.edu/model/pretrained_resnet/mobilenet_v2.pth.tar'), strict=False) + return model diff --git a/nets/pspnet.py b/nets/pspnet.py new file mode 100644 index 0000000..67012de --- /dev/null +++ b/nets/pspnet.py @@ -0,0 +1,202 @@ +import math +import torch +import torch.nn.functional as F +from torch import nn +from torchvision import models +from nets.resnet import resnet50 +from nets.mobilenetv2 import mobilenetv2 + + +class Resnet(nn.Module): + def __init__(self, dilate_scale=8, pretrained=True): + super(Resnet, self).__init__() + from functools import partial + model = resnet50(pretrained) + + if dilate_scale == 8: + model.layer3.apply( + partial(self._nostride_dilate, dilate=2)) + model.layer4.apply( + partial(self._nostride_dilate, dilate=4)) + elif dilate_scale == 16: + model.layer4.apply( + partial(self._nostride_dilate, dilate=2)) + + # take pretrained resnet, except AvgPool and FC + self.conv1 = model.conv1 + self.bn1 = model.bn1 + self.relu1 = model.relu1 + self.conv2 = model.conv2 + self.bn2 = model.bn2 + self.relu2 = model.relu2 + self.conv3 = model.conv3 + self.bn3 = model.bn3 + self.relu3 = model.relu3 + self.maxpool = model.maxpool + self.layer1 = model.layer1 + self.layer2 = model.layer2 + self.layer3 = model.layer3 + self.layer4 = model.layer4 + + def _nostride_dilate(self, m, dilate): + classname = m.__class__.__name__ + if classname.find('Conv') != -1: + # the convolution with stride + if m.stride == (2, 2): + m.stride = (1, 1) + if m.kernel_size == (3, 3): + m.dilation = (dilate//2, dilate//2) + m.padding = (dilate//2, dilate//2) + # other convoluions + else: + if m.kernel_size == (3, 3): + m.dilation = (dilate, dilate) + m.padding = (dilate, dilate) + + def forward(self, x): + x = self.relu1(self.bn1(self.conv1(x))) + x = self.relu2(self.bn2(self.conv2(x))) + x = self.relu3(self.bn3(self.conv3(x))) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x_aux = self.layer3(x) + x = self.layer4(x_aux) + return x_aux, x + +class MobileNetV2(nn.Module): + def __init__(self, downsample_factor=8, pretrained=True): + super(MobileNetV2, self).__init__() + from functools import partial + + model = mobilenetv2(pretrained) + self.features = model.features[:-1] + + self.total_idx = len(self.features) + self.down_idx = [2, 4, 7, 14] + + if downsample_factor == 8: + for i in range(self.down_idx[-2], self.down_idx[-1]): + self.features[i].apply( + partial(self._nostride_dilate, dilate=2) + ) + for i in range(self.down_idx[-1], self.total_idx): + self.features[i].apply( + partial(self._nostride_dilate, dilate=4) + ) + elif downsample_factor == 16: + for i in range(self.down_idx[-1], self.total_idx): + self.features[i].apply( + partial(self._nostride_dilate, dilate=2) + ) + + + def _nostride_dilate(self, m, dilate): + classname = m.__class__.__name__ + if classname.find('Conv') != -1: + # the convolution with stride + if m.stride == (2, 2): + m.stride = (1, 1) + if m.kernel_size == (3, 3): + m.dilation = (dilate//2, dilate//2) + m.padding = (dilate//2, dilate//2) + # other convoluions + else: + if m.kernel_size == (3, 3): + m.dilation = (dilate, dilate) + m.padding = (dilate, dilate) + + def forward(self, x): + x_aux = self.features[:14](x) + x = self.features[14:](x_aux) + # x -> 30x30x320 + return x_aux, x + +class _PSPModule(nn.Module): + def __init__(self, in_channels, pool_sizes, norm_layer): + super(_PSPModule, self).__init__() + out_channels = in_channels // len(pool_sizes) + self.stages = nn.ModuleList([self._make_stages(in_channels, out_channels, pool_size, norm_layer) + for pool_size in pool_sizes]) + self.bottleneck = nn.Sequential( + nn.Conv2d(in_channels+(out_channels * len(pool_sizes)), out_channels, + kernel_size=3, padding=1, bias=False), + norm_layer(out_channels), + nn.ReLU(inplace=True), + nn.Dropout2d(0.1) + ) + + def _make_stages(self, in_channels, out_channels, bin_sz, norm_layer): + prior = nn.AdaptiveAvgPool2d(output_size=bin_sz) + conv = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False) + bn = norm_layer(out_channels) + relu = nn.ReLU(inplace=True) + return nn.Sequential(prior, conv, bn, relu) + + def forward(self, features): + h, w = features.size()[2], features.size()[3] + pyramids = [features] + pyramids.extend([F.interpolate(stage(features), size=(h, w), mode='bilinear', + align_corners=True) for stage in self.stages]) + output = self.bottleneck(torch.cat(pyramids, dim=1)) + return output + + +class PSPNet(nn.Module): + def __init__(self, num_classes, downsample_factor, backbone="resnet50", pretrained=True, aux_branch=True): + super(PSPNet, self).__init__() + norm_layer = nn.BatchNorm2d + if backbone=="resnet50": + self.backbone = Resnet(downsample_factor, pretrained) + aux_channel = 1024 + out_channel = 2048 + elif backbone=="mobilenet": + self.backbone = MobileNetV2(downsample_factor, pretrained) + aux_channel = 96 + out_channel = 320 + else: + raise ValueError('Unsupported backbone - `{}`, Use mobilenet, resnet50.'.format(backbone)) + + self.master_branch = nn.Sequential( + _PSPModule(out_channel, pool_sizes=[1, 2, 3, 6], norm_layer=norm_layer), + nn.Conv2d(out_channel//4, num_classes, kernel_size=1) + ) + + self.aux_branch = aux_branch + + if self.aux_branch: + self.auxiliary_branch = nn.Sequential( + nn.Conv2d(aux_channel, out_channel//8, kernel_size=3, padding=1, bias=False), + norm_layer(out_channel//8), + nn.ReLU(inplace=True), + nn.Dropout2d(0.1), + nn.Conv2d(out_channel//8, num_classes, kernel_size=1) + ) + + self.initialize_weights(self.master_branch) + + def forward(self, x): + input_size = (x.size()[2], x.size()[3]) + x_aux, x = self.backbone(x) + + output = self.master_branch(x) + output = F.interpolate(output, size=input_size, mode='bilinear', align_corners=True) + if self.aux_branch: + output_aux = self.auxiliary_branch(x_aux) + output_aux = F.interpolate(output_aux, size=input_size, mode='bilinear', align_corners=True) + return output_aux, output + else: + return output + + def initialize_weights(self, *models): + for model in models: + for m in model.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight.data, nonlinearity='relu') + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1.) + m.bias.data.fill_(1e-4) + elif isinstance(m, nn.Linear): + m.weight.data.normal_(0.0, 0.0001) + m.bias.data.zero_() diff --git a/nets/pspnet_training.py b/nets/pspnet_training.py new file mode 100644 index 0000000..2c0a743 --- /dev/null +++ b/nets/pspnet_training.py @@ -0,0 +1,41 @@ +import torch +import torch.nn.functional as F +import numpy as np +from torch import nn +from torch.autograd import Variable +from random import shuffle +from matplotlib.colors import rgb_to_hsv, hsv_to_rgb +from PIL import Image +import cv2 + +def CE_Loss(inputs, target, num_classes=21): + n, c, h, w = inputs.size() + nt, ht, wt = target.size() + if h != ht and w != wt: + inputs = F.interpolate(inputs, size=(ht, wt), mode="bilinear", align_corners=True) + + temp_inputs = inputs.transpose(1, 2).transpose(2, 3).contiguous().view(-1, c) + temp_target = target.view(-1) + + CE_loss = nn.NLLLoss(ignore_index=num_classes)(F.log_softmax(temp_inputs, dim = -1), temp_target) + return CE_loss + +def Dice_loss(inputs, target, beta=1, smooth = 1e-5): + n, c, h, w = inputs.size() + nt, ht, wt, ct = target.size() + + if h != ht and w != wt: + inputs = F.interpolate(inputs, size=(ht, wt), mode="bilinear", align_corners=True) + temp_inputs = torch.softmax(inputs.transpose(1, 2).transpose(2, 3).contiguous().view(n, -1, c),-1) + temp_target = target.view(n, -1, ct) + + #--------------------------------------------# + # 计算dice loss + #--------------------------------------------# + tp = torch.sum(temp_target[...,:-1] * temp_inputs, axis=[0,1]) + fp = torch.sum(temp_inputs , axis=[0,1]) - tp + fn = torch.sum(temp_target[...,:-1] , axis=[0,1]) - tp + + score = ((1 + beta ** 2) * tp + smooth) / ((1 + beta ** 2) * tp + beta ** 2 * fn + fp + smooth) + dice_loss = 1 - torch.mean(score) + return dice_loss \ No newline at end of file diff --git a/nets/resnet.py b/nets/resnet.py new file mode 100644 index 0000000..5fb0db3 --- /dev/null +++ b/nets/resnet.py @@ -0,0 +1,138 @@ +import math +import os +import torch +import torch.nn as nn +import torchvision +import torch.utils.model_zoo as model_zoo +BatchNorm2d = nn.BatchNorm2d + +model_urls = { + 'resnet50': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnet50-imagenet.pth', +} + + +def conv3x3(in_planes, out_planes, stride=1): + "3x3 convolution with padding" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=1, bias=False) + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) + self.bn1 = BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, + padding=1, bias=False) + self.bn2 = BatchNorm2d(planes) + self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) + self.bn3 = BatchNorm2d(planes * 4) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class ResNet(nn.Module): + + def __init__(self, block, layers, num_classes=1000): + self.inplanes = 128 + super(ResNet, self).__init__() + self.conv1 = conv3x3(3, 64, stride=2) + self.bn1 = BatchNorm2d(64) + self.relu1 = nn.ReLU(inplace=True) + self.conv2 = conv3x3(64, 64) + self.bn2 = BatchNorm2d(64) + self.relu2 = nn.ReLU(inplace=True) + self.conv3 = conv3x3(64, 128) + self.bn3 = BatchNorm2d(128) + self.relu3 = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) + self.avgpool = nn.AvgPool2d(7, stride=1) + self.fc = nn.Linear(512 * block.expansion, num_classes) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + elif isinstance(m, BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + + def _make_layer(self, block, planes, blocks, stride=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(self.inplanes, planes * block.expansion, + kernel_size=1, stride=stride, bias=False), + BatchNorm2d(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample)) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.inplanes, planes)) + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.relu1(self.bn1(self.conv1(x))) + x = self.relu2(self.bn2(self.conv2(x))) + x = self.relu3(self.bn3(self.conv3(x))) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + x = self.avgpool(x) + x = x.view(x.size(0), -1) + x = self.fc(x) + + return x + +def load_url(url, model_dir='./model_data', map_location=None): + if not os.path.exists(model_dir): + os.makedirs(model_dir) + filename = url.split('/')[-1] + cached_file = os.path.join(model_dir, filename) + if os.path.exists(cached_file): + return torch.load(cached_file, map_location=map_location) + else: + return model_zoo.load_url(url,model_dir=model_dir) + + +def resnet50(pretrained=False, **kwargs): + model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) + if pretrained: + model.load_state_dict(load_url(model_urls['resnet50']), strict=False) + return model \ No newline at end of file diff --git a/predict.py b/predict.py new file mode 100644 index 0000000..81f1308 --- /dev/null +++ b/predict.py @@ -0,0 +1,18 @@ +#-------------------------------------# +# 对单张图片进行预测 +#-------------------------------------# +from pspnet import PSPNet +from PIL import Image + +pspnet = PSPNet() + +while True: + img = input('Input image filename:') + try: + image = Image.open(img) + except: + print('Open Error! Try again!') + continue + else: + r_image = pspnet.detect_image(image) + r_image.show() diff --git a/pspnet.py b/pspnet.py new file mode 100644 index 0000000..46dcf80 --- /dev/null +++ b/pspnet.py @@ -0,0 +1,108 @@ +from nets.pspnet import PSPNet as pspnet +from torch import nn +from PIL import Image +from torch.autograd import Variable +import torch.nn.functional as F +import numpy as np +import colorsys +import torch +import copy +import os + +class PSPNet(object): + #-----------------------------------------# + # 注意修改model_path、num_classes + # 和backbone + # 使其符合自己的模型 + #-----------------------------------------# + _defaults = { + "model_path" : 'model_data/pspnet_mobilenetv2.pth', + "model_image_size" : (473, 473, 3), + "backbone" : "mobilenet", + "downsample_factor" : 16, + "num_classes" : 21, + "cuda" : True, + "blend" : True, + } + + #---------------------------------------------------# + # 初始化UNET + #---------------------------------------------------# + def __init__(self, **kwargs): + self.__dict__.update(self._defaults) + self.generate() + + #---------------------------------------------------# + # 获得所有的分类 + #---------------------------------------------------# + def generate(self): + os.environ["CUDA_VISIBLE_DEVICES"] = '0' + self.net = pspnet(num_classes=self.num_classes, downsample_factor=self.downsample_factor, pretrained=False, backbone=self.backbone, aux_branch=False) + self.net = self.net.eval() + + state_dict = torch.load(self.model_path) + self.net.load_state_dict(state_dict, strict=False) + if self.cuda: + self.net = nn.DataParallel(self.net) + self.net = self.net.cuda() + + print('{} model, anchors, and classes loaded.'.format(self.model_path)) + # 画框设置不同的颜色 + if self.num_classes <= 21: + self.colors = [(0, 0, 0), (128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128), (0, 128, 128), + (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0), (192, 128, 0), (64, 0, 128), (192, 0, 128), + (64, 128, 128), (192, 128, 128), (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128), (128, 64, 12)] + else: + # 画框设置不同的颜色 + hsv_tuples = [(x / len(self.class_names), 1., 1.) + for x in range(len(self.class_names))] + self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) + self.colors = list( + map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), + self.colors)) + + def letterbox_image(self ,image, size): + '''resize image with unchanged aspect ratio using padding''' + iw, ih = image.size + w, h = size + scale = min(w/iw, h/ih) + nw = int(iw*scale) + nh = int(ih*scale) + + image = image.resize((nw,nh), Image.BICUBIC) + new_image = Image.new('RGB', size, (128,128,128)) + new_image.paste(image, ((w-nw)//2, (h-nh)//2)) + return new_image,nw,nh + #---------------------------------------------------# + # 检测图片 + #---------------------------------------------------# + def detect_image(self, image): + old_img = copy.deepcopy(image) + orininal_h = np.array(image).shape[0] + orininal_w = np.array(image).shape[1] + + image, nw, nh = self.letterbox_image(image,(self.model_image_size[1],self.model_image_size[0])) + images = [np.array(image)/255] + images = np.transpose(images,(0,3,1,2)) + + with torch.no_grad(): + images = Variable(torch.from_numpy(images).type(torch.FloatTensor)) + if self.cuda: + images =images.cuda() + + pr = self.net(images)[0] + pr = F.softmax(pr.permute(1,2,0),dim = -1).cpu().numpy().argmax(axis=-1) + pr = pr[int((self.model_image_size[0]-nh)//2):int((self.model_image_size[0]-nh)//2+nh), int((self.model_image_size[1]-nw)//2):int((self.model_image_size[1]-nw)//2+nw)] + + seg_img = np.zeros((np.shape(pr)[0],np.shape(pr)[1],3)) + for c in range(self.num_classes): + seg_img[:,:,0] += ((pr[:,: ] == c )*( self.colors[c][0] )).astype('uint8') + seg_img[:,:,1] += ((pr[:,: ] == c )*( self.colors[c][1] )).astype('uint8') + seg_img[:,:,2] += ((pr[:,: ] == c )*( self.colors[c][2] )).astype('uint8') + + image = Image.fromarray(np.uint8(seg_img)).resize((orininal_w,orininal_h)) + if self.blend: + image = Image.blend(old_img,image,0.7) + + return image + diff --git a/test.py b/test.py new file mode 100644 index 0000000..ce99279 --- /dev/null +++ b/test.py @@ -0,0 +1,7 @@ +import torch +from nets.pspnet import PSPNet +from torchsummary import summary + +model = PSPNet(num_classes=21,backbone="mobilenet",downsample_factor=16,aux_branch=False,pretrained=False).train().cuda() + +summary(model,(3,473,473)) \ No newline at end of file diff --git a/train.py b/train.py new file mode 100644 index 0000000..8c187e3 --- /dev/null +++ b/train.py @@ -0,0 +1,256 @@ +import time +import torch +import torch.optim as optim +import torch.backends.cudnn as cudnn +import torchvision.models as models +import numpy as np +from tqdm import tqdm +from torchvision import models +from torch.autograd import Variable +from PIL import Image +from torch import nn +from nets.pspnet import PSPNet +from nets.pspnet_training import CE_Loss,Dice_loss +from utils.metrics import f_score +from torch.utils.data import DataLoader +from utils.dataloader import pspnet_dataset_collate, PSPnetDataset + + +def get_lr(optimizer): + for param_group in optimizer.param_groups: + return param_group['lr'] + +def fit_one_epoch(net,epoch,epoch_size,epoch_size_val,gen,genval,Epoch,cuda,aux_branch): + net = net.train() + total_loss = 0 + total_f_score = 0 + + val_toal_loss = 0 + val_total_f_score = 0 + start_time = time.time() + with tqdm(total=epoch_size,desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar: + for iteration, batch in enumerate(gen): + if iteration >= epoch_size: + break + imgs, pngs, labels = batch + + with torch.no_grad(): + imgs = Variable(torch.from_numpy(imgs).type(torch.FloatTensor)) + pngs = Variable(torch.from_numpy(pngs).type(torch.FloatTensor)).long() + labels = Variable(torch.from_numpy(labels).type(torch.FloatTensor)) + if cuda: + imgs = imgs.cuda() + pngs = pngs.cuda() + labels = labels.cuda() + + #-------------------------------# + # 判断是否使用辅助分支并回传 + #-------------------------------# + optimizer.zero_grad() + if aux_branch: + aux_outputs, outputs = net(imgs) + aux_loss = CE_Loss(aux_outputs, pngs, num_classes = NUM_CLASSES) + main_loss = CE_Loss(outputs, pngs, num_classes = NUM_CLASSES) + loss = aux_loss + main_loss + if dice_loss: + aux_dice = Dice_loss(aux_outputs, labels) + main_dice = Dice_loss(outputs, labels) + loss = loss + aux_dice + main_dice + + else: + outputs = net(imgs) + loss = CE_Loss(outputs, pngs, num_classes = NUM_CLASSES) + if dice_loss: + main_dice = Dice_loss(outputs, labels) + loss = loss + main_dice + + with torch.no_grad(): + #-------------------------------# + # 计算f_score + #-------------------------------# + _f_score = f_score(outputs, labels) + + loss.backward() + optimizer.step() + + total_loss += loss.item() + total_f_score += _f_score.item() + + waste_time = time.time() - start_time + pbar.set_postfix(**{'total_loss': total_loss / (iteration + 1), + 'f_score' : total_f_score / (iteration + 1), + 's/step' : waste_time, + 'lr' : get_lr(optimizer)}) + pbar.update(1) + + start_time = time.time() + + print('Start Validation') + with tqdm(total=epoch_size_val, desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar: + for iteration, batch in enumerate(genval): + if iteration >= epoch_size_val: + break + imgs, pngs, labels = batch + with torch.no_grad(): + imgs = Variable(torch.from_numpy(imgs).type(torch.FloatTensor)) + pngs = Variable(torch.from_numpy(pngs).type(torch.FloatTensor)).long() + labels = Variable(torch.from_numpy(labels).type(torch.FloatTensor)) + if cuda: + imgs = imgs.cuda() + pngs = pngs.cuda() + labels = labels.cuda() + #-------------------------------# + # 判断是否使用辅助分支 + #-------------------------------# + if aux_branch: + aux_outputs, outputs = net(imgs) + aux_loss = CE_Loss(aux_outputs, pngs, num_classes = NUM_CLASSES) + main_loss = CE_Loss(outputs, pngs, num_classes = NUM_CLASSES) + val_loss = aux_loss + main_loss + if dice_loss: + aux_dice = Dice_loss(aux_outputs, labels) + main_dice = Dice_loss(outputs, labels) + val_loss = val_loss + aux_dice + main_dice + + else: + outputs = net(imgs) + val_loss = CE_Loss(outputs, pngs, num_classes = NUM_CLASSES) + if dice_loss: + main_dice = Dice_loss(outputs, labels) + val_loss = val_loss + main_dice + + #-------------------------------# + # 计算f_score + #-------------------------------# + _f_score = f_score(outputs, labels) + + val_toal_loss += val_loss.item() + val_total_f_score += _f_score.item() + + + pbar.set_postfix(**{'total_loss': val_toal_loss / (iteration + 1), + 'f_score' : val_total_f_score / (iteration + 1), + 'lr' : get_lr(optimizer)}) + pbar.update(1) + + print('Finish Validation') + print('Epoch:'+ str(epoch+1) + '/' + str(Epoch)) + print('Total Loss: %.4f || Val Loss: %.4f ' % (total_loss/(epoch_size+1),val_toal_loss/(epoch_size_val+1))) + + print('Saving state, iter:', str(epoch+1)) + torch.save(model.state_dict(), 'logs/Epoch%d-Total_Loss%.4f-Val_Loss%.4f.pth'%((epoch+1),total_loss/(epoch_size+1),val_toal_loss/(epoch_size_val+1))) + + + +if __name__ == "__main__": + inputs_size = [473,473,3] + log_dir = "logs/" + #---------------------# + # 分类个数+1 + # 2+1 + #---------------------# + NUM_CLASSES = 21 + #--------------------------------------------------------------------# + # 建议选项: + # 种类少(几类)时,设置为True + # 种类多(十几类)时,如果batch_size比较大(10以上),那么设置为True + # 种类多(十几类)时,如果batch_size比较小(10以下),那么设置为False + #---------------------------------------------------------------------# + dice_loss = False + #-------------------------------# + # 主干网络预训练权重的使用 + # mobilenet和resnet50 + #-------------------------------# + pretrained = False + backbone = "mobilenet" + #---------------------# + # 是否使用辅助分支 + # 会占用大量显存 + #---------------------# + aux_branch = False + #---------------------# + # 下采样的倍数 + # 8和16 + #---------------------# + downsample_factor = 16 + #-------------------------------# + # Cuda的使用 + #-------------------------------# + Cuda = True + + model = PSPNet(num_classes=NUM_CLASSES, backbone=backbone, downsample_factor=downsample_factor, pretrained=pretrained, aux_branch=aux_branch).train() + + # voc数据集下进行训练的 + model_path = r"model_data/pspnet_mobilenetv2.pth" + # 加快模型训练的效率 + print('Loading weights into state dict...') + model_dict = model.state_dict() + pretrained_dict = torch.load(model_path) + pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v)} + model_dict.update(pretrained_dict) + model.load_state_dict(model_dict) + print('Finished!') + + if Cuda: + net = torch.nn.DataParallel(model) + cudnn.benchmark = True + net = net.cuda() + + # 打开数据集的txt + with open(r"VOCdevkit/VOC2007/ImageSets/Segmentation/train.txt","r") as f: + train_lines = f.readlines() + + # 打开数据集的txt + with open(r"VOCdevkit/VOC2007/ImageSets/Segmentation/val.txt","r") as f: + val_lines = f.readlines() + + if True: + lr = 1e-4 + Init_Epoch = 0 + Interval_Epoch = 50 + Batch_size = 8 + optimizer = optim.Adam(model.parameters(),lr) + lr_scheduler = optim.lr_scheduler.StepLR(optimizer,step_size=1,gamma=0.9) + + train_dataset = PSPnetDataset(train_lines, inputs_size, NUM_CLASSES, True) + val_dataset = PSPnetDataset(val_lines, inputs_size, NUM_CLASSES, False) + gen = DataLoader(train_dataset, batch_size=Batch_size, num_workers=1, pin_memory=True, + drop_last=True, collate_fn=pspnet_dataset_collate) + gen_val = DataLoader(val_dataset, batch_size=Batch_size, num_workers=4,pin_memory=True, + drop_last=True, collate_fn=pspnet_dataset_collate) + + epoch_size = max(1, len(train_lines)//Batch_size) + epoch_size_val = max(1, len(val_lines)//Batch_size) + + for param in model.backbone.parameters(): + param.requires_grad = False + + for epoch in range(Init_Epoch,Interval_Epoch): + fit_one_epoch(model,epoch,epoch_size,epoch_size_val,gen,gen_val,Interval_Epoch,Cuda,aux_branch) + lr_scheduler.step() + + if True: + lr = 1e-5 + Interval_Epoch = 50 + Epoch = 100 + Batch_size = 4 + optimizer = optim.Adam(model.parameters(),lr) + lr_scheduler = optim.lr_scheduler.StepLR(optimizer,step_size=1,gamma=0.9) + + train_dataset = PSPnetDataset(train_lines, inputs_size, NUM_CLASSES, True) + val_dataset = PSPnetDataset(val_lines, inputs_size, NUM_CLASSES, False) + gen = DataLoader(train_dataset, batch_size=Batch_size, num_workers=4, pin_memory=True, + drop_last=True, collate_fn=pspnet_dataset_collate) + gen_val = DataLoader(val_dataset, batch_size=Batch_size, num_workers=4,pin_memory=True, + drop_last=True, collate_fn=pspnet_dataset_collate) + + epoch_size = max(1, len(train_lines)//Batch_size) + epoch_size_val = max(1, len(val_lines)//Batch_size) + + for param in model.backbone.parameters(): + param.requires_grad = True + + for epoch in range(Interval_Epoch,Epoch): + fit_one_epoch(model,epoch,epoch_size,epoch_size_val,gen,gen_val,Epoch,Cuda,aux_branch) + lr_scheduler.step() + diff --git a/utils/dataloader.py b/utils/dataloader.py new file mode 100644 index 0000000..37ac5ca --- /dev/null +++ b/utils/dataloader.py @@ -0,0 +1,144 @@ +from random import shuffle +import numpy as np +import torch +import torch.nn as nn +import math +import torch.nn.functional as F +from PIL import Image +from torch.autograd import Variable +from torch.utils.data import DataLoader +from torch.utils.data.dataset import Dataset +from matplotlib.colors import rgb_to_hsv, hsv_to_rgb +import cv2 + +def letterbox_image(image, label , size): + label = Image.fromarray(np.array(label)) + '''resize image with unchanged aspect ratio using padding''' + iw, ih = image.size + w, h = size + scale = min(w/iw, h/ih) + nw = int(iw*scale) + nh = int(ih*scale) + + image = image.resize((nw,nh), Image.BICUBIC) + new_image = Image.new('RGB', size, (128,128,128)) + new_image.paste(image, ((w-nw)//2, (h-nh)//2)) + + label = label.resize((nw,nh), Image.NEAREST) + new_label = Image.new('L', size, (0)) + new_label.paste(label, ((w-nw)//2, (h-nh)//2)) + + return new_image, new_label + +def rand(a=0, b=1): + return np.random.rand()*(b-a) + a + +class PSPnetDataset(Dataset): + def __init__(self,train_lines,image_size,num_classes,random_data): + super(PSPnetDataset, self).__init__() + + self.train_lines = train_lines + self.train_batches = len(train_lines) + self.image_size = image_size + self.num_classes = num_classes + self.random_data = random_data + + def __len__(self): + return self.train_batches + + def rand(self, a=0, b=1): + return np.random.rand() * (b - a) + a + + def get_random_data(self, image, label, input_shape, jitter=.3, hue=.1, sat=1.5, val=1.5): + label = Image.fromarray(np.array(label)) + + h, w = input_shape + # resize image + rand_jit1 = rand(1-jitter,1+jitter) + rand_jit2 = rand(1-jitter,1+jitter) + new_ar = w/h * rand_jit1/rand_jit2 + + scale = rand(0.5,1.5) + if new_ar < 1: + nh = int(scale*h) + nw = int(nh*new_ar) + else: + nw = int(scale*w) + nh = int(nw/new_ar) + image = image.resize((nw,nh), Image.BICUBIC) + label = label.resize((nw,nh), Image.NEAREST) + label = label.convert("L") + + # flip image or not + flip = rand()<.5 + if flip: + image = image.transpose(Image.FLIP_LEFT_RIGHT) + label = label.transpose(Image.FLIP_LEFT_RIGHT) + + # place image + dx = int(rand(0, w-nw)) + dy = int(rand(0, h-nh)) + new_image = Image.new('RGB', (w,h), (128,128,128)) + new_label = Image.new('L', (w,h), (0)) + new_image.paste(image, (dx, dy)) + new_label.paste(label, (dx, dy)) + image = new_image + label = new_label + + # distort image + hue = rand(-hue, hue) + sat = rand(1, sat) if rand()<.5 else 1/rand(1, sat) + val = rand(1, val) if rand()<.5 else 1/rand(1, val) + x = cv2.cvtColor(np.array(image,np.float32)/255, cv2.COLOR_RGB2HSV) + x[..., 0] += hue*360 + x[..., 0][x[..., 0]>1] -= 1 + x[..., 0][x[..., 0]<0] += 1 + x[..., 1] *= sat + x[..., 2] *= val + x[x[:,:, 0]>360, 0] = 360 + x[:, :, 1:][x[:, :, 1:]>1] = 1 + x[x<0] = 0 + image_data = cv2.cvtColor(x, cv2.COLOR_HSV2RGB)*255 + return image_data,label + + + def __getitem__(self, index): + if index == 0: + shuffle(self.train_lines) + + annotation_line = self.train_lines[index] + name = annotation_line.split()[0] + # 从文件中读取图像 + jpg = Image.open(r"./VOCdevkit/VOC2007/JPEGImages" + '/' + name + ".jpg") + png = Image.open(r"./VOCdevkit/VOC2007/SegmentationClass" + '/' + name + ".png") + + if self.random_data: + jpg, png = self.get_random_data(jpg,png,(int(self.image_size[1]),int(self.image_size[0]))) + else: + jpg, png = letterbox_image(jpg, png, (int(self.image_size[1]),int(self.image_size[0]))) + + # 从文件中读取图像 + png = np.array(png) + png[png >= self.num_classes] = self.num_classes + + # 转化成one_hot的形式 + seg_labels = np.eye(self.num_classes+1)[png.reshape([-1])] + seg_labels = seg_labels.reshape((int(self.image_size[1]),int(self.image_size[0]),self.num_classes+1)) + jpg = np.transpose(np.array(jpg),[2,0,1])/255 + + return jpg, png, seg_labels + + +# DataLoader中collate_fn使用 +def pspnet_dataset_collate(batch): + images = [] + pngs = [] + seg_labels = [] + for img, png, labels in batch: + images.append(img) + pngs.append(png) + seg_labels.append(labels) + images = np.array(images) + pngs = np.array(pngs) + seg_labels = np.array(seg_labels) + return images, pngs, seg_labels \ No newline at end of file diff --git a/utils/metrics.py b/utils/metrics.py new file mode 100644 index 0000000..f9fa969 --- /dev/null +++ b/utils/metrics.py @@ -0,0 +1,23 @@ +import torch +import torch.nn.functional as F + +def f_score(inputs, target, beta=1, smooth = 1e-5, threhold = 0.5): + n, c, h, w = inputs.size() + nt, ht, wt, ct = target.size() + + if h != ht and w != wt: + inputs = F.interpolate(inputs, size=(ht, wt), mode="bilinear", align_corners=True) + temp_inputs = torch.softmax(inputs.transpose(1, 2).transpose(2, 3).contiguous().view(n, -1, c),-1) + temp_target = target.view(n, -1, ct) + + #--------------------------------------------# + # 计算dice系数 + #--------------------------------------------# + temp_inputs = torch.gt(temp_inputs, threhold).float() + tp = torch.sum(temp_target[...,:-1] * temp_inputs, axis=[0,1]) + fp = torch.sum(temp_inputs , axis=[0,1]) - tp + fn = torch.sum(temp_target[...,:-1] , axis=[0,1]) - tp + + score = ((1 + beta ** 2) * tp + smooth) / ((1 + beta ** 2) * tp + beta ** 2 * fn + fp + smooth) + score = torch.mean(score) + return score \ No newline at end of file diff --git a/video.py b/video.py new file mode 100644 index 0000000..a29f71e --- /dev/null +++ b/video.py @@ -0,0 +1,39 @@ +#-------------------------------------# +# 调用摄像头检测 +#-------------------------------------# +from pspnet import PSPNet +from PIL import Image +import numpy as np +import cv2 +import time + +pspnet = PSPNet() +# 调用摄像头 +capture=cv2.VideoCapture(0) # capture=cv2.VideoCapture("1.mp4") + +fps = 0.0 +while(True): + t1 = time.time() + # 读取某一帧 + ref,frame=capture.read() + # 格式转变,BGRtoRGB + frame = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB) + # 转变成Image + frame = Image.fromarray(np.uint8(frame)) + + # 进行检测 + frame = np.array(pspnet.detect_image(frame)) + + # RGBtoBGR满足opencv显示格式 + frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR) + + fps = ( fps + (1./(time.time()-t1)) ) / 2 + print("fps= %.2f"%(fps)) + frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) + + cv2.imshow("video",frame) + + c= cv2.waitKey(30) & 0xff + if c==27: + capture.release() + break