








def center_crop(img_array, crop_size=-1, resize=-1, write_path=None):    """ crop and resize a square image from the centeral area.    Args:        img_array: image array        crop_size: crop_size (default: -1, min(height, width)).        resize: resized size (default: -1, keep cropped size)        write_path: write path of the image (default: None, do not write to the disk).    Return:        img_crop: copped and resized image.    """    rows = img_array.shape[0]    cols = img_array.shape[1]    if crop_size==-1 or crop_size>max(rows,cols):        crop_size = min(rows, cols)    row_s = max(int((rows-crop_size)/2), 0)    row_e = min(row_s+crop_size, rows)     col_s = max(int((cols-crop_size)/2), 0)    col_e = min(col_s+crop_size, cols)    img_crop = img_array[row_s:row_e,col_s:col_e,]    if resize>0:        img_crop = cv2.resize(img_crop, (resize, resize))    if write_path is not None:        cv2.imwrite(write_path, img_crop)    return img_crop 

def crop_img_dir(img_dir,  save_dir, crop_method = "center", rename_pre=-1):    """ crop and save square images from original images saved in img_dir.    Args:        img_dir: image directory.        save_dir: save directory.        crop_method: crop method (default: "center").        rename_pre: prename of all images (default: -1, use primary image name).    Return: none    """    img_names = os.listdir(img_dir)    img_names = [img_name for img_name in img_names if img_name.split(".")[-1]=="jpg"]    index = 0    for img_name in img_names:        img = cv2.imread(os.path.join(img_dir, img_name))        rename = img_name if rename_pre==-1 else rename_pre+str(index)+".jpg"        img_out_path = os.path.join(save_dir, rename)        if crop_method == "center":            img_crop = center_crop(img, resize=640, write_path=img_out_path)        if index%100 == 0:            print "total images number = ", len(img_names), "current image number = ", index        index += 1


标注信息采用和PASCAL VOC数据集一样的方式,对于正样本,直接使用labelImg工具进行标注,这里给出我用的一个版本的链接:https://pan.baidu.com/s/1Q0cqJI9Dnvxkj7159Be4Sw。对于负样本,可以使用python中的xml模块自己写xml标注文件,主要函数如下:

from xml.dom.minidom import Documentimport osimport cv2def write_img_to_xml(imgfile, xmlfile):    """    write xml file.    Args:        imgfile: image file.        xmlfile: output xml file.    """    img = cv2.imread(imgfile)    img_folder, img_name = os.path.split(imgfile)    img_height, img_width, img_depth = img.shape    doc = Document()    annotation = doc.createElement("annotation")    doc.appendChild(annotation)    folder = doc.createElement("folder")    folder.appendChild(doc.createTextNode(img_folder))    annotation.appendChild(folder)    filename = doc.createElement("filename")    filename.appendChild(doc.createTextNode(img_name))    annotation.appendChild(filename)    size = doc.createElement("size")    annotation.appendChild(size)    width = doc.createElement("width")    width.appendChild(doc.createTextNode(str(img_width)))    size.appendChild(width)    height = doc.createElement("height")    height.appendChild(doc.createTextNode(str(img_height)))    size.appendChild(height)    depth = doc.createElement("depth")    depth.appendChild(doc.createTextNode(str(img_depth)))    size.appendChild(depth)    with open(xmlfile, "w") as f:        doc.writexml(f, indent="\t", addindent="\t", newl="\n", encoding="utf-8")

def write_imgs_to_xmls(imgdir, xmldir):    img_names = os.listdir(imgdir)    for img_name in img_names:        img_file = os.path.join(imgdir,img_name)        xml_file = os.path.join(xmldir, img_name.split(".")[0]+".xml")        print img_name, "has been written to xml file in ", xml_file         write_img_to_xml(img_file, xml_file)



import osimport shutilimport randomdef _copy_file(src_file, dst_file):    """copy file.    """    if not os.path.isfile(src_file):        print"%s not exist!" %(src_file)    else:        fpath, fname = os.path.split(dst_file)        if not os.path.exists(fpath):            os.makedirs(fpath)        shutil.copyfile(src_file, dst_file)

def split_data(data_dir, train_dir, test_dir, valid_dir, ratio=[0.7, 0.2, 0.1], shuffle=True):    """ split data to train data, test data, valid data.    Args:        data_dir -- data dir to to be splitted.        train_dir, test_dir, valid_dir -- splitted dir.        ratio -- [train_ratio, test_ratio, valid_ratio].        shuffle -- shuffle or not.    """    all_img_dir = os.path.join(data_dir, "JPEGImages/")    all_xml_dir = os.path.join(data_dir, "Annotations/")    train_img_dir = os.path.join(train_dir, "JPEGImages/")    train_xml_dir = os.path.join(train_dir, "Annotations/")    test_img_dir = os.path.join(test_dir, "JPEGImages/")    test_xml_dir = os.path.join(test_dir, "Annotations/")    valid_img_dir = os.path.join(valid_dir, "JPEGImages/")    valid_xml_dir = os.path.join(valid_dir, "Annotations/")    all_imgs_name = os.listdir(all_img_dir)    img_num = len(all_imgs_name)    train_num = int(1.0*img_num*ratio[0]/sum(ratio))    test_num = int(1.0*img_num*ratio[1]/sum(ratio))    valid_num = img_num-train_num-test_num    if shuffle:        random.shuffle(all_imgs_name)    train_imgs_name = all_imgs_name[:train_num]    test_imgs_name = all_imgs_name[train_num:train_num+test_num]    valid_imgs_name = all_imgs_name[-valid_num:]    for img_name in train_imgs_name:        img_srcfile = os.path.join(all_img_dir, img_name)        xml_srcfile = os.path.join(all_xml_dir, img_name.split(".")[0]+".xml")        xml_name = img_name.split(".")[0] + ".xml"        img_dstfile = os.path.join(train_img_dir, img_name)        xml_dstfile = os.path.join(train_xml_dir, xml_name)        _copy_file(img_srcfile, img_dstfile)        _copy_file(xml_srcfile, xml_dstfile)    for img_name in test_imgs_name:        img_srcfile = os.path.join(all_img_dir, img_name)        xml_srcfile = os.path.join(all_xml_dir, img_name.split(".")[0]+".xml")        xml_name = img_name.split(".")[0] + ".xml"        img_dstfile = os.path.join(test_img_dir, img_name)        xml_dstfile = os.path.join(test_xml_dir, xml_name)        _copy_file(img_srcfile, img_dstfile)        _copy_file(xml_srcfile, xml_dstfile)    for img_name in valid_imgs_name:        img_srcfile = os.path.join(all_img_dir, img_name)        xml_srcfile = os.path.join(all_xml_dir, img_name.split(".")[0]+".xml")        xml_name = img_name.split(".")[0] + ".xml"        img_dstfile = os.path.join(valid_img_dir, img_name)        xml_dstfile = os.path.join(valid_xml_dir, xml_name)        _copy_file(img_srcfile, img_dstfile)        _copy_file(xml_srcfile, xml_dstfile)









def preprocess_img(imgBGR, erode_dilate=True):    """preprocess the image for contour detection.    Args:        imgBGR: source image.        erode_dilate: erode and dilate or not.    Return:        img_bin: a binary image (blue and red).    """    rows, cols, _ = imgBGR.shape    imgHSV = cv2.cvtColor(imgBGR, cv2.COLOR_BGR2HSV)    Bmin = np.array([100, 43, 46])    Bmax = np.array([124, 255, 255])    img_Bbin = cv2.inRange(imgHSV,Bmin, Bmax)    Rmin1 = np.array([0, 43, 46])    Rmax1 = np.array([10, 255, 255])    img_Rbin1 = cv2.inRange(imgHSV,Rmin1, Rmax1)    Rmin2 = np.array([156, 43, 46])    Rmax2 = np.array([180, 255, 255])    img_Rbin2 = cv2.inRange(imgHSV,Rmin2, Rmax2)    img_Rbin = np.maximum(img_Rbin1, img_Rbin2)    img_bin = np.maximum(img_Bbin, img_Rbin)    if erode_dilate is True:        kernelErosion = np.ones((3,3), np.uint8)        kernelDilation = np.ones((3,3), np.uint8)         img_bin = cv2.erode(img_bin, kernelErosion, iterations=2)        img_bin = cv2.dilate(img_bin, kernelDilation, iterations=2)    return img_bin

def contour_detect(img_bin, min_area=0, max_area=-1, wh_ratio=2.0):    """detect contours in a binary image.    Args:        img_bin: a binary image.        min_area: the minimum area of the contours detected.            (default: 0)        max_area: the maximum area of the contours detected.            (default: -1, no maximum area limitation)        wh_ratio: the ration between the large edge and short edge.            (default: 2.0)    Return:        rects: a list of rects enclosing the contours. if no contour is detected, rects=[]    """    rects = []    _, contours, _ = cv2.findContours(img_bin.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)    if len(contours) == 0:        return rects    max_area = img_bin.shape[0]*img_bin.shape[1] if max_area<0 else max_area    for contour in contours:        area = cv2.contourArea(contour)        if area >= min_area and area <= max_area:            x, y, w, h = cv2.boundingRect(contour)            if 1.0*w/h < wh_ratio and 1.0*h/w < wh_ratio:                rects.append([x,y,w,h])    return rects




HOG特征即梯度方向直方图。这里不多介绍,详细的原理可以看我的这篇博客:梯度方向直方图Histogram of Oriented Gradients (HOG)。在具体的实现上是利用skimage库中的feature模块,函数如下:

def hog_feature(img_array, resize=(64,64)):    """extract hog feature from an image.    Args:        img_array: an image array.        resize: size of the image for extracture.      Return:    features:  a ndarray vector.          """    img = cv2.cvtColor(img_array, cv2.COLOR_BGR2GRAY)    img = cv2.resize(img, resize)    bins = 9    cell_size = (8, 8)    cpb = (2, 2)    norm = "L2"    features = ft.hog(img, orientations=bins, pixels_per_cell=cell_size,                         cells_per_block=cpb, block_norm=norm, transform_sqrt=True)    return features

def extra_hog_features_dir(img_dir, write_txt, resize=(64,64)):    """extract hog features from images in a directory.    Args:        img_dir: image directory.        write_txt: the path of a txt file used for saving the hog features of all images.        resize: size of the image for extracture.      Return:        none.    """    img_names = os.listdir(img_dir)    img_names = [os.path.join(img_dir, img_name) for img_name in img_names]    if os.path.exists(write_txt):        os.remove(write_txt)    with open(write_txt, "a") as f:        index = 0        for img_name in img_names:            img_array = cv2.imread(img_name)            features = hog_feature(img_array, resize)            label_name = img_name.split("/")[-1].split("_")[0]            label_num = img_label[label_name]            row_data = img_name + "\t" + str(label_num) + "\t"            for element in features:                row_data = row_data + str(round(element,3)) + " "            row_data = row_data + "\n"            f.write(row_data)            if index%100 == 0:                print "total image number = ", len(img_names), "current image number = ", index            index += 1

HOG特征提取的一些参数设置可以在函数中看到,如图像尺寸为64×64,设置了9个梯度方向(bin=9)进行梯度信息统计,cell的大小为8×8,每个block包含4个cell(cpb=(2, 2)),标准化方法采用L2标准化(norm=”L2”)。









def parse_xml(xml_file):    """parse xml_file    Args:        xml_file: the input xml file path    Returns:        image_path: string        labels: list of [xmin, ymin, xmax, ymax, class]    """    tree = ET.parse(xml_file)    root = tree.getroot()    image_path = ''    labels = []    for item in root:        if item.tag == 'filename':            image_path = os.path.join(DATA_PATH, "JPEGImages/", item.text)        elif item.tag == 'object':            obj_name = item[0].text            obj_num = classes_num[obj_name]            xmin = int(item[4][0].text)            ymin = int(item[4][1].text)            xmax = int(item[4][2].text)            ymax = int(item[4][3].text)            labels.append([xmin, ymin, xmax, ymax, obj_num])    return image_path, labels


def produce_pos_proposals(img_path, write_dir, labels, min_size, square=False, proposal_num=0, ):    """produce positive proposals based on labels.    Args:        img_path: image path.        write_dir: write directory.        min_size: the minimum size of the proposals.        labels: a list of bounding boxes.            [[x1, y1, x2, y2, cls_num], [x1, y1, x2, y2, cls_num], ...]        square:  crop a square or not.    Return:        proposal_num: proposal numbers.    """    img = cv2.imread(img_path)    rows = img.shape[0]    cols = img.shape[1]    for label in labels:        xmin, ymin, xmax, ymax, cls_num = np.int32(label)        # remove the proposal with small area        if xmax-xmin<min_size or ymax-ymin<min_size:            continue        # crop a square area        if square is True:            xcenter = int((xmin + xmax)/2)            ycenter = int((ymin + ymax)/2)            size = max(xmax-xmin, ymax-ymin)            xmin = max(xcenter-size/2, 0)            xmax = min(xcenter+size/2,cols)            ymin = max(ycenter-size/2, 0)            ymax = min(ycenter+size/2,rows)            proposal = img[ymin:ymax, xmin:xmax]            proposal = cv2.resize(proposal, (size,size))        else:            proposal = img[ymin:ymax, xmin:xmax]        cls_name = classes_name[cls_num]        proposal_num[cls_name] +=1        write_name = cls_name + "_" + str(proposal_num[cls_name]) + ".jpg"        cv2.imwrite(os.path.join(write_dir,write_name), proposal)    return proposal_num

def produce_neg_proposals(img_path, write_dir, min_size, square=False, proposal_num=0):    """produce negative proposals from a negative image.    Args:        img_path: image path.        write_dir: write directory.        min_size: the minimum size of the proposals.        square:  crop a square or not.        proposal_num: current negative proposal numbers.    Return:        proposal_num: negative proposal numbers.    """    img = cv2.imread(img_path)    rows = img.shape[0]    cols = img.shape[1]    imgHSV = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)    imgBinBlue = cv2.inRange(imgHSV,np.array([100,43,46]), np.array([124,255,255]))    imgBinRed1 = cv2.inRange(imgHSV,np.array([0,43,46]), np.array([10,255,255]))    imgBinRed2 = cv2.inRange(imgHSV,np.array([156,43,46]), np.array([180,255,255]))    imgBinRed = np.maximum(imgBinRed1, imgBinRed2)    imgBin = np.maximum(imgBinRed, imgBinBlue)    _, contours, _ = cv2.findContours(imgBin, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)    for contour in contours:        x,y,w,h = cv2.boundingRect(contour)        if w<min_size or h<min_size:            continue        if square is True:            xcenter = int(x+w/2)            ycenter = int(y+h/2)            size = max(w,h)            xmin = max(xcenter-size/2, 0)            xmax = min(xcenter+size/2,cols)            ymin = max(ycenter-size/2, 0)            ymax = min(ycenter+size/2,rows)            proposal = img[ymin:ymax, xmin:xmax]            proposal = cv2.resize(proposal, (size,size))        else:            proposal = img[y:y+h, x:x+w]        write_name = "background" + "_" + str(proposal_num) + ".jpg"        proposal_num += 1        cv2.imwrite(os.path.join(write_dir,write_name), proposal)    return proposal_num

def produce_proposals(xml_dir, write_dir, square=False, min_size=30):    """produce proposals (positive examples for classification) to disk.    Args:        xml_dir: image xml file directory.        write_dir: write directory of all proposals.        square: crop a square or not.        min_size: the minimum size of the proposals.    Returns:        proposal_num: a dict of proposal numbers.    """    proposal_num = {}    for cls_name in classes_name:        proposal_num[cls_name] = 0    index = 0    for xml_file in os.listdir(xml_dir):        img_path, labels = parse_xml(os.path.join(xml_dir,xml_file))        img = cv2.imread(img_path)        rows = img.shape[0]        cols = img.shape[1]        if len(labels) == 0:            neg_proposal_num = produce_neg_proposals(img_path, write_dir, min_size, square, proposal_num["background"])            proposal_num["background"] = neg_proposal_num        else:            proposal_num = produce_pos_proposals(img_path, write_dir, labels, min_size, square=True, proposal_num=proposal_num)        if index%100 == 0:            print "total xml file number = ", len(os.listdir(xml_dir)), "current xml file number = ", index            print "proposal num = ", proposal_num        index += 1    return proposal_num


proposal_num = {'right': 117, 'straight': 334, 'stop': 224, 'no hook': 168, 'crosswalk': 128, 'left': 208, 'background': 1116}
从上面各个类别样本数量上来看,正样本的各类标志数量相对背景(负样本)很少。为了近些年数据的平衡,我们对正样本进行了扩充。由于我们的数据中包含了向左向右等标志,如何通过旋转或者镜像变换会出问题(当然可以旋转小范围旋转),我也考虑过亮度变换,但是由于HOG特征中引入了归一化方法使得HOG特征对光照不敏感。最终我选用的是仿射变换,这个可以通过OpenCV很方便地实现,具体的仿射变换理论和代码示例可以参考OpenCV官方教程中的Affine Transformations ,这里也给出我对数据集仿射变换的函数:

def affine(img, delta_pix):    """affine transformation    Args:        img: a numpy image array.        delta_pix: the offset for affine.    Return:        res: affined image.     """    rows, cols, _ = img.shape    pts1 = np.float32([[0,0], [rows,0], [0, cols]])    pts2 = pts1 + delta_pix    M = cv2.getAffineTransform(pts1, pts2)    res = cv2.warpAffine(img, M, (rows, cols))    return resdef affine_dir(img_dir, write_dir, max_delta_pix):    """ affine transformation on the images in a directory.    Args:        img_dir: image directory.        write_dir: save directory of affined images.        max_delta_pix: the maximum offset for affine.    """    img_names = os.listdir(img_dir)    img_names = [img_name for img_name in img_names if img_name.split(".")[-1]=="jpg"]    for index, img_name in enumerate(img_names):        img = cv2.imread(os.path.join(img_dir,img_name))        save_name = os.path.join(write_dir, img_name.split(".")[0]+"f.jpg")        delta_pix = np.float32(np.random.randint(-max_delta_pix, max_delta_pix+1, [3,2]))        img_a = affine(img, delta_pix)        cv2.imwrite(save_name, img_a)

上面函数输入参数max_delta_pix用来控制随机仿射变换的最大强度(正整数),max_delta_pix的绝对值越大,变换越明显(太大可能导致目标信息的完全丢失),我在扩充时这个参数取为10。需要注意的是,10只是变换的最大强度,在对每一张图片进行变换前,会在[-max_delta, max_delta]生成一个随机整数delta_pix(当然你也可以多取几次不同的值来生成更多的变换图片),这个整数控制了当前图片变换的强度。以下是一些变换的结果示例:



def load_hog_data(hog_txt):    """ load hog features.    Args:        hog_txt: a txt file used to save hog features.            one line data is formated as "img_path \t cls_num \t hog_feature_vector"    Return:        img_names: a list of image names.        labels: numpy array labels (1-dim).        hog_feature: numpy array hog features.            formated as [[hog1], [hog2], ...]    """    img_names = []    labels = []    hog_features = []    with open(hog_txt, "r") as f:        data = f.readlines()        for row_data in data:            row_data = row_data.rstrip()            img_path, label, hog_str = row_data.split("\t")            img_name = img_path.split("/")[-1]            hog_feature = hog_str.split(" ")            hog_feature = [float(hog) for hog in hog_feature]            #print "hog feature length = ", len(hog_feature)            img_names.append(img_name)            labels.append(int(label))            hog_features.append(hog_feature)    return img_names, np.array(labels), np.array(hog_features)def svm_train(hog_features, labels, save_path="./svm_model.pkl"):    """ SVM train    Args:        hog_feature: numpy array hog features.            formated as [[hog1], [hog2], ...]        labels: numpy array labels (1-dim).        save_path: model save path.    Return:        none.    """    clf = SVC(C=10, tol=1e-3, probability = True)    clf.fit(hog_features, labels)    joblib.dump(clf, save_path)    print "finished."def svm_test(svm_model, hog_feature, labels):    """SVM test    Args:        hog_feature: numpy array hog features.            formated as [[hog1], [hog2], ...]        labels: numpy array labels (1-dim).    Return:        accuracy: test accuracy.    """    clf = joblib.load(svm_model)    accuracy = clf.score(hog_feature, labels)    return accuracy




import osimport numpy as np import cv2from skimage import feature as ft from sklearn.externals import joblibcls_names = ["straight", "left", "right", "stop", "nohonk", "crosswalk", "background"]img_label = {"straight": 0, "left": 1, "right": 2, "stop": 3, "nohonk": 4, "crosswalk": 5, "background": 6}def preprocess_img(imgBGR, erode_dilate=True):    """preprocess the image for contour detection.    Args:        imgBGR: source image.        erode_dilate: erode and dilate or not.    Return:        img_bin: a binary image (blue and red).    """    rows, cols, _ = imgBGR.shape    imgHSV = cv2.cvtColor(imgBGR, cv2.COLOR_BGR2HSV)    Bmin = np.array([100, 43, 46])    Bmax = np.array([124, 255, 255])    img_Bbin = cv2.inRange(imgHSV,Bmin, Bmax)    Rmin1 = np.array([0, 43, 46])    Rmax1 = np.array([10, 255, 255])    img_Rbin1 = cv2.inRange(imgHSV,Rmin1, Rmax1)    Rmin2 = np.array([156, 43, 46])    Rmax2 = np.array([180, 255, 255])    img_Rbin2 = cv2.inRange(imgHSV,Rmin2, Rmax2)    img_Rbin = np.maximum(img_Rbin1, img_Rbin2)    img_bin = np.maximum(img_Bbin, img_Rbin)    if erode_dilate is True:        kernelErosion = np.ones((9,9), np.uint8)        kernelDilation = np.ones((9,9), np.uint8)         img_bin = cv2.erode(img_bin, kernelErosion, iterations=2)        img_bin = cv2.dilate(img_bin, kernelDilation, iterations=2)    return img_bindef contour_detect(img_bin, min_area=0, max_area=-1, wh_ratio=2.0):    """detect contours in a binary image.    Args:        img_bin: a binary image.        min_area: the minimum area of the contours detected.            (default: 0)        max_area: the maximum area of the contours detected.            (default: -1, no maximum area limitation)        wh_ratio: the ration between the large edge and short edge.            (default: 2.0)    Return:        rects: a list of rects enclosing the contours. if no contour is detected, rects=[]    """    rects = []    _, contours, _ = cv2.findContours(img_bin.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)    if len(contours) == 0:        return rects    max_area = img_bin.shape[0]*img_bin.shape[1] if max_area<0 else max_area    for contour in contours:        area = cv2.contourArea(contour)        if area >= min_area and area <= max_area:            x, y, w, h = cv2.boundingRect(contour)            if 1.0*w/h < wh_ratio and 1.0*h/w < wh_ratio:                rects.append([x,y,w,h])    return rectsdef draw_rects_on_img(img, rects):    """ draw rects on an image.    Args:        img: an image where the rects are drawn on.        rects: a list of rects.    Return:        img_rects: an image with rects.    """    img_copy = img.copy()    for rect in rects:        x, y, w, h = rect        cv2.rectangle(img_copy, (x,y), (x+w,y+h), (0,255,0), 2)    return img_copydef hog_extra_and_svm_class(proposal, clf, resize = (64, 64)):    """classify the region proposal.    Args:        proposal: region proposal (numpy array).        clf: a SVM model.        resize: resize the region proposal            (default: (64, 64))    Return:        cls_prop: propabality of all classes.    """    img = cv2.cvtColor(proposal, cv2.COLOR_BGR2GRAY)    img = cv2.resize(img, resize)    bins = 9    cell_size = (8, 8)    cpb = (2, 2)    norm = "L2"    features = ft.hog(img, orientations=bins, pixels_per_cell=cell_size,                         cells_per_block=cpb, block_norm=norm, transform_sqrt=True)    print "feature = ", features.shape    features = np.reshape(features, (1,-1))    cls_prop = clf.predict_proba(features)    print("type = ", cls_prop)    print "cls prop = ", cls_prop    return cls_propif __name__ == "__main__":    img = cv2.imread("/home/meringue/Documents/traffic_sign_detection/svm_hog_classification/sign_89.jpg")    rows, cols, _ = img.shape    img_bin = preprocess_img(img,False)    cv2.imshow("bin image", img_bin)    cv2.imwrite("bin_image.jpg", img_bin)    min_area = img_bin.shape[0]*img.shape[1]/(25*25)    rects = contour_detect(img_bin, min_area=min_area)    img_rects = draw_rects_on_img(img, rects)    cv2.imshow("image with rects", img_rects)    cv2.imwrite("image_rects.jpg", img_rects)    clf = joblib.load("./svm_model.pkl")    img_bbx = img.copy()    for rect in rects:        xc = int(rect[0] + rect[2]/2)        yc = int(rect[1] + rect[3]/2)        size = max(rect[2], rect[3])        x1 = max(0, int(xc-size/2))        y1 = max(0, int(yc-size/2))        x2 = min(cols, int(xc+size/2))        y2 = min(rows, int(yc+size/2))        proposal = img[y1:y2, x1:x2]        cls_prop = hog_extra_and_svm_class(proposal, clf)        cls_prop = np.round(cls_prop, 2)[0]        cls_num = np.argmax(cls_prop)        cls_name = cls_names[cls_num]        prop = cls_prop[cls_num]        if cls_name is not "background":            cv2.rectangle(img_bbx,(rect[0],rect[1]), (rect[0]+rect[2],rect[1]+rect[3]), (0,0,255), 2)            cv2.putText(img_bbx, cls_name+str(prop), (rect[0], rect[1]), 1, 1.5, (0,0,255),2)    cv2.imshow("detect result", img_bbx)    cv2.imwrite("detect_result.jpg", img_bbx)    cv2.waitKey(0)


标志 直行 (straight) 左转(left) 右转 (right) 禁止鸣笛(no-honk) 人行横道(crosswalk) 禁止通行(stop)
准确率(precision) 41.6% 45.8% 43.5% 45.3% 75.6% 45.7%
召回率 (recall) 37.1% 39.8% 43.5% 48.3% 50.8% 57.1%

对SVM输出的概率值依次设置0.1、0.2 …0.9的阈值,得到的平均准确率和召回率变化趋势如下:


(1)有很多标志所在的候选区域被漏检(详见Bad Cases Analysis),这直接导致最终的召回率很低。


