本文主要讲解基于mxnet深度学习框架实现目标检测,鉴于之前写chainer的麻烦,本结构代码也类似chainer的目标检测框架,各个模型只需要修改网络结构即可,本次直接一篇博文写完目标检测框架及网络结构的搭建,让志同道合者不需要切换文章。
本次多模型实现语义分割,可按实际情况进行选择
环境配置:
python 3.8
mxnet 1.7.0
cuda 10.1
图像语义分割,它是将整个图像分成一个个像素组,然后对其进行标记和分类。特别地,语义分割试图在语义上理解图像中每个像素的角色。
图像语义分割可以说是图像理解的基石性技术,在自动驾驶系统中举足轻重。众所周知,图像是由一个个像素(Pixel)组成的,而语义分割就是将图像中表达语义含义的不同进行分组(Grouping)/分割(Segmentation)。语义图像分割就是将每个像素都标注上其对应的类别。需要注意的是,这里不但单独区分同一类别的不同个体,而是仅仅关系该像素是属于哪个类别。
分割任务对于许多任务都非常有用,比如自动驾驶汽车(为了使自动驾驶汽车能够适应现存道路,其需要具有对周围环境的感知能力);医疗图像判断(可以通过机器辅助放射治疗师的分析,从而加速放射检查)
语义分割数据标注主要使用labelImg工具,python安装只需要:pip install labelme 即可,然后在命令提示符输入:labelme即可,如图:
在这里只需要修改“OpenDir“,“OpenDir“主要是存放图片需要标注的路径
选择好路径之后即可开始绘制:
我在平时标注的时候快捷键一般只用到:
createpolygons:(ctrl+N)开始绘制
a:上一张
d:下一张
绘制过程如图:
就只需要一次把目标绘制完成即可。
本语义分割架目录结构如下:
core:此目录主要保存标准的py文件,功能如语义分割评估算法等计算
data:此目录主要保存标准py文件,功能如数据加载器,迭代器等
nets:快速生成pyd文件
utils:整个项目的一些预处理文件
Ctu_Segementation.py:语义分割主入口
import os, math,json, random,cv2, time,sys,warnings
import numpy as np
from tqdm import tqdm
from PIL import Image
from functools import partial
import mxnet as mx
from mxnet import gluon, autograd, ndarray as nd
from mxnet.gluon.data.vision import transforms
from data.data_loader import VOCSegmentation
from nets.fcn import get_fcn_Net
from nets.pspnet import get_psp_Net
from nets.deeplabv3 import get_deeplabv3_Net
from nets.deeplabv3_plus import get_deeplabv3_plus_Net
from nets.deeplabv3b_plus import get_deeplabv3b_plus_Net
from nets.fastscnn import get_fastscnn_Net
from nets.icnet import get_icnet_Net
from nets.danet import get_danet_Net
from nets.backbone.resnest import set_drop_prob
from nets.segbase import SegEvalModel
from core.softdog import CheckSoft
from core.loss import ICNetLoss, SegmentationMultiLosses, MixSoftmaxCrossEntropyLoss
from utils.parallel import DataParallelModel, DataParallelCriterion
from utils.lr_scheduler import LRScheduler, LRSequential
from utils.metrics import SegmentationMetric
if USEGPU == '-1':self.kvstore = 'local'self.ctx = [mx.cpu(0)]self.USEGPU = 0os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
else:self.kvstore = 'device'self.ctx = [mx.gpu(i) for i in range(len(USEGPU.split(',')))]self.USEGPU = len(USEGPU.split(','))
self.input_transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize([.485, .456, .406], [.229, .224, .225]),
])
def CreateDataList(self, DataDir):DataList = [os.path.join(DataDir, fileEach) for fileEach in [each for each in os.listdir(DataDir) if re.match(r'.*\.json', each)]]All_label_name = ['_background_']voc_colormap = [[0, 0, 0]]for each_json in DataList:data = json.load(open(each_json,encoding='utf-8'))for shape in sorted(data['shapes'], key=lambda x: x['label']):label_name = shape['label']if label_name not in All_label_name:All_label_name.append(label_name)voc_colormap.append([len(voc_colormap), len(voc_colormap), len(voc_colormap)])return DataList, All_label_name, voc_colormap
def CreateDataList2(self,ImgDir,LabDir,ClassTxt,ext_file=['bmp','BMP','jpg','JPG','png','PNG','jpeg','JPEG']):ImgList = [fileEach for fileEach in [each for each in os.listdir(ImgDir) if each.split('.')[-1] in ext_file]]LabList = [fileEach for fileEach in [each for each in os.listdir(LabDir) if each.split('.')[-1] in ext_file]]DataList=[]for each_img in ImgList:file_name = each_img.split('.')[0]for each_lab in LabList:if file_name == each_lab.split('.')[0]:DataList.append([os.path.join(ImgDir, each_img),os.path.join(LabDir, each_lab)])breakAll_label_name = ['_background_']voc_colormap = [[0, 0, 0]]with open(ClassTxt,'r') as f_read:for each_class in f_read.readlines():c = each_class.strip()if c!='':All_label_name.append(c)voc_colormap.append([len(voc_colormap), len(voc_colormap), len(voc_colormap)])return DataList, All_label_name, voc_colormap
self.train_data = gluon.data.DataLoader(trainset, batch_size, shuffle=True, last_batch='rollover', num_workers=num_workers)
self.eval_data = gluon.data.DataLoader(valset, batch_size, last_batch='rollover', num_workers=num_workers)
本次模型拥有多个形式,如下字典:
self.networks={'fcn':get_fcn_Net,'pspnet':get_psp_Net,'deeplabv3':get_deeplabv3_Net,'deeplabv3_plus':get_deeplabv3_plus_Net,'deeplabv3b_plus':get_deeplabv3b_plus_Net,#'icnet':get_icnet_Net,'fastscnn':get_fastscnn_Net,'danet':get_danet_Net
}
self.model = self.networks[self.network](self.backbone, self.classes_names,norm_layer=self.norm_layer, norm_kwargs=self.norm_kwargs, aux=self.aux, base_size=self.base_size, crop_size=self.image_size,ctx=self.ctx[0],alpha=self.alpha)
class FCN(SegBaseModel):def __init__(self, nclass, backbone='resnet50', aux=True, ctx=cpu(), base_size=520, crop_size=480,alpha=1, **kwargs):self.alpha=alphasuper(FCN, self).__init__(nclass, aux, backbone, ctx=ctx, base_size=base_size, crop_size=crop_size,alpha=self.alpha, **kwargs)with self.name_scope():if backbone == 'resnet18' or backbone == 'resnet34':in_channels = 512//self.alphaelse:in_channels = 2048//self.alphaself.head = _FCNHead(in_channels, nclass, **kwargs) #2048self.head.initialize(ctx=ctx)self.head.collect_params().setattr('lr_mult', 10)if self.aux:self.auxlayer = _FCNHead(1024//self.alpha, nclass, **kwargs) #1024self.auxlayer.initialize(ctx=ctx)self.auxlayer.collect_params().setattr('lr_mult', 10)def hybrid_forward(self, F, x):c3, c4 = self.base_forward(x)outputs = []x = self.head(c4)x = F.contrib.BilinearResize2D(x, **self._up_kwargs)outputs.append(x)if self.aux:auxout = self.auxlayer(c3)auxout = F.contrib.BilinearResize2D(auxout, **self._up_kwargs)outputs.append(auxout)return tuple(outputs)
class PSPNet(SegBaseModel):def __init__(self, nclass, backbone='resnet50', aux=True, ctx=cpu(), base_size=520, crop_size=480,alpha=1, **kwargs):self.alpha=alphasuper(PSPNet, self).__init__(nclass, aux, backbone, ctx=ctx, base_size=base_size, crop_size=crop_size,alpha=self.alpha, **kwargs)with self.name_scope():if backbone == 'resnet18' or backbone == 'resnet34':in_channels = 512//self.alphaelse:in_channels = 2048//self.alphaself.head = _PSPHead(in_channels, nclass, feature_map_height=self._up_kwargs['height']//8, feature_map_width=self._up_kwargs['width']//8,alpha=self.alpha, **kwargs)self.head.initialize(ctx=ctx)self.head.collect_params().setattr('lr_mult', 10)if self.aux:self.auxlayer = _FCNHead(1024//self.alpha, nclass, **kwargs)self.auxlayer.initialize(ctx=ctx)self.auxlayer.collect_params().setattr('lr_mult', 10)def hybrid_forward(self, F, x):c3, c4 = self.base_forward(x)outputs = []x = self.head(c4)x = F.contrib.BilinearResize2D(x, **self._up_kwargs)outputs.append(x)if self.aux:auxout = self.auxlayer(c3)auxout = F.contrib.BilinearResize2D(auxout, **self._up_kwargs)outputs.append(auxout)return tuple(outputs)def demo(self, x):return self.predict(x)def predict(self, x):h, w = x.shape[2:]self._up_kwargs['height'] = hself._up_kwargs['width'] = wc3, c4 = self.base_forward(x)outputs = []x = self.head.demo(c4)import mxnet.ndarray as Fpred = F.contrib.BilinearResize2D(x, **self._up_kwargs)return pred
class DeepLabV3(SegBaseModel):def __init__(self, nclass, backbone='resnet50', aux=True, ctx=cpu(), height=None, width=None, base_size=520, crop_size=480,alpha=1, **kwargs):self.alpha=alphasuper(DeepLabV3, self).__init__(nclass, aux, backbone, ctx=ctx, base_size=base_size, crop_size=crop_size,alpha=self.alpha, **kwargs)height = height if height is not None else crop_sizewidth = width if width is not None else crop_sizewith self.name_scope():if backbone == 'resnet18' or backbone == 'resnet34':in_channels = 512//self.alphaelse:in_channels = 2048//self.alphaself.head = _DeepLabHead(in_channels, nclass, height=height//8, width=width//8,alpha=self.alpha, **kwargs)self.head.initialize(ctx=ctx)self.head.collect_params().setattr('lr_mult', 10)if self.aux:self.auxlayer = _FCNHead(1024//self.alpha, nclass, **kwargs)self.auxlayer.initialize(ctx=ctx)self.auxlayer.collect_params().setattr('lr_mult', 10)self._up_kwargs = {'height': height, 'width': width}def hybrid_forward(self, F, x):c3, c4 = self.base_forward(x)outputs = []x = self.head(c4)x = F.contrib.BilinearResize2D(x, **self._up_kwargs)outputs.append(x)if self.aux:auxout = self.auxlayer(c3)auxout = F.contrib.BilinearResize2D(auxout, **self._up_kwargs)outputs.append(auxout)return tuple(outputs)def demo(self, x):return self.predict(x)def predict(self, x):h, w = x.shape[2:]self._up_kwargs['height'] = hself._up_kwargs['width'] = wc3, c4 = self.base_forward(x)x = self.head.demo(c4)import mxnet.ndarray as Fpred = F.contrib.BilinearResize2D(x, **self._up_kwargs)return pred
class DeepLabV3Plus(HybridBlock):def __init__(self, nclass, backbone='xception65', aux=True, ctx=cpu(), height=None, width=None,base_size=576, crop_size=512, dilated=True,alpha=1, **kwargs):super(DeepLabV3Plus, self).__init__()self.alpha=alphaself.aux = auxheight = height if height is not None else crop_sizewidth = width if width is not None else crop_sizeoutput_stride = 8 if dilated else 32with self.name_scope():pretrained = get_xcetption(num_classes = nclass, backbone=backbone, output_stride=output_stride, ctx=ctx,alpha=self.alpha, **kwargs)# base networkself.conv1 = pretrained.conv1self.bn1 = pretrained.bn1self.relu = pretrained.reluself.conv2 = pretrained.conv2self.bn2 = pretrained.bn2self.block1 = pretrained.block1self.block2 = pretrained.block2self.block3 = pretrained.block3# Middle flowself.midflow = pretrained.midflow# Exit flowself.block20 = pretrained.block20self.conv3 = pretrained.conv3self.bn3 = pretrained.bn3self.conv4 = pretrained.conv4self.bn4 = pretrained.bn4self.conv5 = pretrained.conv5self.bn5 = pretrained.bn5# deeplabv3 plusself.head = _DeepLabHead(nclass, height=height//4, width=width//4,alpha=self.alpha, **kwargs)self.head.initialize(ctx=ctx)self.head.collect_params().setattr('lr_mult', 10)if self.aux:self.auxlayer = _FCNHead(728//self.alpha, nclass, **kwargs)self.auxlayer.initialize(ctx=ctx)self.auxlayer.collect_params().setattr('lr_mult', 10)self._up_kwargs = {'height': height, 'width': width}self.base_size = base_sizeself.crop_size = crop_sizedef base_forward(self, x):# Entry flowx = self.conv1(x)x = self.bn1(x)x = self.relu(x)x = self.conv2(x)x = self.bn2(x)x = self.relu(x)x = self.block1(x)# add relu herex = self.relu(x)low_level_feat = xx = self.block2(x)x = self.block3(x)# Middle flowx = self.midflow(x)mid_level_feat = x# Exit flowx = self.block20(x)x = self.relu(x)x = self.conv3(x)x = self.bn3(x)x = self.relu(x)x = self.conv4(x)x = self.bn4(x)x = self.relu(x)x = self.conv5(x)x = self.bn5(x)x = self.relu(x)return low_level_feat, mid_level_feat, xdef hybrid_forward(self, F, x):c1, c3, c4 = self.base_forward(x)outputs = []x = self.head(c4, c1)x = F.contrib.BilinearResize2D(x, **self._up_kwargs)outputs.append(x)if self.aux:auxout = self.auxlayer(c3)auxout = F.contrib.BilinearResize2D(auxout, **self._up_kwargs)outputs.append(auxout)return tuple(outputs)def demo(self, x):h, w = x.shape[2:]self._up_kwargs['height'] = hself._up_kwargs['width'] = wself.head.aspp.concurent[-1]._up_kwargs['height'] = h// 8self.head.aspp.concurent[-1]._up_kwargs['width'] = w// 8pred = self.forward(x)if self.aux:pred = pred[0]return preddef evaluate(self, x):return self.forward(x)[0]
class ICNet(SegBaseModel):def __init__(self, nclass, backbone='resnet50', aux=False, ctx=cpu(), height=None, width=None, base_size=520, crop_size=480, lr_mult=10,alpha=1, **kwargs):self.alpha=alphasuper(ICNet, self).__init__(nclass, aux=aux, backbone=backbone, ctx=ctx, base_size=base_size, crop_size=crop_size,alpha=self.alpha, **kwargs)height = height if height is not None else crop_sizewidth = width if width is not None else crop_sizeself._up_kwargs = {'height': height, 'width': width}self.base_size = base_sizeself.crop_size = crop_sizewith self.name_scope():self.conv_sub1 = nn.HybridSequential()with self.conv_sub1.name_scope():self.conv_sub1.add(ConvBnRelu(3, 32//self.alpha, 3, 2, 1, **kwargs),ConvBnRelu(32//self.alpha, 32//self.alpha, 3, 2, 1, **kwargs),ConvBnRelu(32//self.alpha, 64//self.alpha, 3, 2, 1, **kwargs))self.conv_sub1.initialize(ctx=ctx)self.conv_sub1.collect_params().setattr('lr_mult', lr_mult)if backbone == 'resnet18' or backbone == 'resnet34':in_channels = 512//self.alphaelse:in_channels = 2048//self.alphaself.psp_head = _PSPHead(in_channels, nclass,feature_map_height=self._up_kwargs['height'] // 32,feature_map_width=self._up_kwargs['width'] // 32,alpha=self.alpha,**kwargs)self.psp_head.block = self.psp_head.block[:-1]self.psp_head.initialize(ctx=ctx)self.psp_head.collect_params().setattr('lr_mult', lr_mult)self.head = _ICHead(nclass=nclass,height=self._up_kwargs['height'],width=self._up_kwargs['width'],alpha=self.alpha,**kwargs)self.head.initialize(ctx=ctx)self.head.collect_params().setattr('lr_mult', lr_mult)self.conv_sub4 = ConvBnRelu(512//self.alpha, 256//self.alpha, 1, **kwargs)self.conv_sub4.initialize(ctx=ctx)self.conv_sub4.collect_params().setattr('lr_mult', lr_mult)self.conv_sub2 = ConvBnRelu(in_channels//4, 256//self.alpha, 1, **kwargs)self.conv_sub2.initialize(ctx=ctx)self.conv_sub2.collect_params().setattr('lr_mult', lr_mult)def hybrid_forward(self, F, x):x_sub1_out = self.conv_sub1(x)x_sub2 = F.contrib.BilinearResize2D(x, height=self._up_kwargs['height'] // 2, width=self._up_kwargs['width'] // 2)x = self.conv1(x_sub2)x = self.bn1(x)x = self.relu(x)x = self.maxpool(x)x = self.layer1(x)x_sub2_out = self.layer2(x)x_sub4 = F.contrib.BilinearResize2D(x_sub2_out, height=self._up_kwargs['height'] // 32, width=self._up_kwargs['width'] // 32)x = self.layer3(x_sub4)x = self.layer4(x)x_sub4_out = self.psp_head(x)x_sub4_out = self.conv_sub4(x_sub4_out)x_sub2_out = self.conv_sub2(x_sub2_out)res = self.head(x_sub1_out, x_sub2_out, x_sub4_out)return resdef demo(self, x):return self.predict(x)def predict(self, x):h, w = x.shape[2:]self._up_kwargs['height'] = hself._up_kwargs['width'] = wimport mxnet.ndarray as Fx_sub1_out = self.conv_sub1(x)x_sub2 = F.contrib.BilinearResize2D(x, height=self._up_kwargs['height'] // 2, width=self._up_kwargs['width'] // 2)x = self.conv1(x_sub2)x = self.bn1(x)x = self.relu(x)x = self.maxpool(x)x = self.layer1(x)x_sub2_out = self.layer2(x)x_sub4 = F.contrib.BilinearResize2D(x_sub2_out, height=self._up_kwargs['height'] // 32, width=self._up_kwargs['width'] // 32)x = self.layer3(x_sub4)x = self.layer4(x)x_sub4_out = self.psp_head.demo(x)x_sub4_out = self.conv_sub4(x_sub4_out)x_sub2_out = self.conv_sub2(x_sub2_out)res = self.head.demo(x_sub1_out, x_sub2_out, x_sub4_out)return res[0]
class FastSCNN(HybridBlock):def __init__(self, nclass, aux=True, ctx=cpu(), height=None, width=None, base_size=2048, crop_size=1024,alpha=1, **kwargs):super(FastSCNN, self).__init__()self.alpha=alphaheight = height if height is not None else crop_sizewidth = width if width is not None else crop_sizeself._up_kwargs = {'height': height, 'width': width}self.base_size = base_sizeself.crop_size = crop_sizeself.aux = auxwith self.name_scope():self.learning_to_downsample = LearningToDownsample(32//self.alpha, 48//self.alpha, 64//self.alpha, **kwargs)self.learning_to_downsample.initialize(ctx=ctx)self.global_feature_extractor = GlobalFeatureExtractor(64//self.alpha, [64//self.alpha, 96//self.alpha, 128//self.alpha], 128//self.alpha, 6, [3, 3, 3], height=height//32, width=width//32, **kwargs)self.global_feature_extractor.initialize(ctx=ctx)self.feature_fusion = FeatureFusionModule(64//self.alpha, 128//self.alpha, 128//self.alpha, height=height//8, width=width//8, **kwargs)self.feature_fusion.initialize(ctx=ctx)self.classifier = Classifer(128//self.alpha, nclass, **kwargs)self.classifier.initialize(ctx=ctx)if self.aux:self.auxlayer = _auxHead(in_channels=64//self.alpha, channels=64//self.alpha, nclass=nclass, **kwargs)self.auxlayer.initialize(ctx=ctx)self.auxlayer.collect_params().setattr('lr_mult', 10)def hybrid_forward(self, F, x):higher_res_features = self.learning_to_downsample(x)x = self.global_feature_extractor(higher_res_features)x = self.feature_fusion(higher_res_features, x)x = self.classifier(x)x = F.contrib.BilinearResize2D(x, **self._up_kwargs)outputs = []outputs.append(x)if self.aux:auxout = self.auxlayer(higher_res_features)auxout = F.contrib.BilinearResize2D(auxout, **self._up_kwargs)outputs.append(auxout)return tuple(outputs)def demo(self, x):h, w = x.shape[2:]self._up_kwargs['height'] = hself._up_kwargs['width'] = wself.global_feature_extractor.ppm._up_kwargs = {'height': h // 32, 'width': w // 32}self.feature_fusion._up_kwargs = {'height': h // 8, 'width': w // 8}higher_res_features = self.learning_to_downsample(x)x = self.global_feature_extractor(higher_res_features)x = self.feature_fusion(higher_res_features, x)x = self.classifier(x)import mxnet.ndarray as Fx = F.contrib.BilinearResize2D(x, **self._up_kwargs)return xdef predict(self, x):return self.demo(x)def evaluate(self, x):return self.forward(x)[0]
class DANet(SegBaseModel):def __init__(self, nclass, backbone='resnet50', aux=False, ctx=cpu(), height=None, width=None, base_size=520, crop_size=480, dilated=True,alpha=1, **kwargs):self.alpha=alphasuper(DANet, self).__init__(nclass, aux, backbone, ctx=ctx, base_size=base_size, crop_size=crop_size,alpha=self.alpha, **kwargs)self.aux = auxheight = height if height is not None else crop_sizewidth = width if width is not None else crop_sizeif backbone == 'resnet18' or backbone == 'resnet34':in_channels = 512//self.alphaelse:in_channels = 2048//self.alphawith self.name_scope():self.head = DANetHead(in_channels, nclass, backbone, alpha=self.alpha, **kwargs)self.head.initialize(ctx=ctx)self._up_kwargs = {'height': height, 'width': width}def hybrid_forward(self, F, x):c3, c4 = self.base_forward(x)x = self.head(c4)x = list(x)x[0] = F.contrib.BilinearResize2D(x[0], **self._up_kwargs)x[1] = F.contrib.BilinearResize2D(x[1], **self._up_kwargs)x[2] = F.contrib.BilinearResize2D(x[2], **self._up_kwargs)outputs = [x[0]]outputs.append(x[1])outputs.append(x[2])return tuple(outputs)
self.lr_scheduler = LRSequential([LRScheduler('linear', base_lr=0, target_lr=learning_rate,nepochs=0, iters_per_epoch=len(self.train_data)),LRScheduler(mode='poly', base_lr=learning_rate,nepochs=TrainNum-0,iters_per_epoch=len(self.train_data),power=0.9)
])
if optim == 'sgd':optimizer_params = {'lr_scheduler': self.lr_scheduler,'wd': 1e-4,'momentum': 0.9,'learning_rate': learning_rate}
else:optimizer_params = {'lr_scheduler': self.lr_scheduler,'wd': 1e-4,'learning_rate': learning_rate}
if self.dtype == 'float16':optimizer_params['multi_precision'] = Trueself.optimizer = gluon.Trainer(self.net.module.collect_params(), optim, optimizer_params, kvstore=kv)
for i, (data, target) in enumerate(tbar):if self.TrainWhileFlag == False:breakwith autograd.record(True):outputs = self.net(data.astype(self.dtype, copy=False))losses = self.criterion(outputs, target)mx.nd.waitall()autograd.backward(losses)self.optimizer.step(self.batch_size)for loss in losses:train_loss += np.mean(loss.asnumpy()) / len(losses)tbar.set_description('迭代:%d:%d -> 训练损失值:%.5f' % (epoch+1,TrainNum, train_loss/(i+1)))mx.nd.waitall()self.status_Data['train_loss'] = train_loss/(i+1)self.status_Data['train_progress'] = (epoch*len(self.train_data) + i + 1)/(len(self.train_data)*TrainNum)
print('迭代:%d:%04d/%04d -> 训练损失值:%.5f' % (epoch, i, len(self.train_data), train_loss/(i+1)))
def predict(self,img_cv):if self.status_Data['can_test'] == False:return Nonestart_time = time.time()base_imageSize = img_cv.shapeimg_cv_ = cv2.resize(img_cv,(self.image_size,self.image_size))img = Image.fromarray(cv2.cvtColor(img_cv_,cv2.COLOR_BGR2RGB)).convert('RGB')img = mx.ndarray.array(np.array(img), self.ctx[0]) data = self.input_transform(img)data = data.as_in_context(self.ctx[0])if len(data.shape) < 4:data = nd.expand_dims(data, axis=0)data = data.astype(self.dtype, copy=False)predict = self.model(data)[0]target = nd.argmax(predict, axis=1)predict = self.predict2img(target,self.colormap)predict_reslabel = self.predict2img(target,self.colormap_label)image_result = cv2.resize(predict, (base_imageSize[1], base_imageSize[0]))predict_reslabel = cv2.resize(predict_reslabel, (base_imageSize[1], base_imageSize[0]))img_add = cv2.addWeighted(img_cv, 1.0, image_result, 0.5, 0)predict_reslabel = cv2.cvtColor(predict_reslabel,cv2.COLOR_BGR2GRAY)DataJson = {}for each in range(len(self.classes_names)-1):DataJson[self.classes_names[each+1]] = []_, thresh = cv2.threshold(predict_reslabel, each + 1, 255, cv2.THRESH_TOZERO_INV)_, thresh = cv2.threshold(thresh, each, 255, cv2.THRESH_BINARY_INV)contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)for i in range(len(contours) - 1):img_cv = cv2.drawContours(img_cv, [contours[i + 1]], -1, 0, 2)polygonList = []for each_point in range(len(contours[i + 1])):polygonList.append([contours[i + 1][each_point][0][0], contours[i + 1][each_point][0][1]])DataJson[self.classes_names[each+1]].append(polygonList)result_value = {"classes_names":self.classes_names,"image_result": image_result,"colormap":self.colormap,"image_result_label":predict_reslabel,"polygon":DataJson,"polygon_img":img_cv,"img_add": img_add,"time": (time.time() - start_time) * 1000}return result_value
这里可以根据多个模型自定义选择,调用和修改及其方便
主入口代码:
if __name__ == '__main__':# ctu = Ctu_Segmentation(USEGPU = '0', image_size = 512, aux=True)# ctu.InitModel(DataDir=r'E:\DL_Project\DataSet\DataSet_Segmentation\DataSet_YaoPian\DataImage', DataLabel = None,batch_size=2,num_workers = 0, Pre_Model= None,network='fcn',backbone='resnet18',dtype='float32',alpha=1)# ctu.train(TrainNum=120, learning_rate=0.0001,optim = 'adam', ModelPath='./Model_LSEC')ctu = Ctu_Segmentation(USEGPU = '0')ctu.LoadModel('./Model_LSEC_fcn')cv2.namedWindow("result", 0)cv2.resizeWindow("result", 640, 480)for root, dirs, files in os.walk(r'E:\DL_Project\DataSet\DataSet_Segmentation\DataSet_YaoPian\DataImage'):for f in files:img_cv = ctu.read_image(os.path.join(root, f))if img_cv is None:continueres = ctu.predict(img_cv)if res is not None:print(os.path.join(root, f))print("耗时:" + str(res['time']) + ' ms')# print(res['polygon'])cv2.imshow("result", res['img_add'])cv2.waitKey()
因已训练模型文件已经删除(在模型测试时占用过多空间),有序有机会会上传模型训练结果,此处为模型训练过程,可观察到逐步收敛
训练生成文件