本文是学习opencv之初的一个阶段性小任务,主要做练习使用,并没有过多的追求准确率和高可用性,比如对输入身份证照片有要求,必须是完全的身份证照片,不能有背景,如需改进,可以通过增加轮廓检测和透视变换来裁剪出身份证照片;还有对身份证号区域的检测,采用的是先裁剪出一个固定大小的模板,进行模板匹配,当时学的比较浅,所以直接草率的这样做了,其实也可以通过轮廓检测排序,从而查找到身份证号区域。
另外,利用KNN算法做识别,也是我当时刚接触KNN算法,一时兴起做的,这算是我初次做算法模型训练,然后进行检测,也是为后来学习的各种深度学习算法打基础。效果并不是很好,因为我训练用到的数据集就是最后那一张图片(trainum.png),进行了少量的数据集增强。
KNN train训练程序(train.py):
importcv2.cv2ascvimportnumpyasnpdefKNN(): train=cv.imread("trainum.png", 0) trainimgs= [train] foriinrange(1,3): kernel=np.ones((i, i), np.uint8) j=cv.erode(train, kernel) trainimgs.append(j) r=cv.dilate(train, kernel) trainimgs.append(r) knn=cv.ml.KNearest_create() fortrainimgintrainimgs: cells= [np.hsplit(row, 30) forrowinnp.vsplit(trainimg, 11)] x=np.array(cells) trn=x[:, :].reshape(-1,768).astype(np.float32) k=np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) train_label=np.repeat(k,30) knn.train(trn,cv.ml.ROW_SAMPLE,train_label) cell= [np.hsplit(row, 30) forrowinnp.vsplit(train, 11)] x=np.array(cell) train=x[:, :].reshape(-1, 768).astype(np.float32) t=np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) train_label=np.repeat(t, 30) returnknn,train,train_labeldefmain(): knn ,train,train_label=KNN() test=train.copy() test_label=train_label.copy() ret, result, neighbours, dist=knn.findNearest(test, 3) right=0foriinrange(330): ifresult[i] ==test_label[i]: right+=1ac=right/result.sizeprint(f'正确率{ac*100:.2f}%') if__name__=='__main__': main() cv.waitKey(0)
KNN算法主程序:
importcv2.cv2ascvimportnumpyasnpimporttrainidimg=cv.imread("033.jpg") idimg=cv.resize(idimg, (509, 321), interpolation=cv.INTER_CUBIC) template=cv.imread("position1.jpg", 1) cv.imshow("idimg", idimg) gray=cv.cvtColor(idimg, cv.COLOR_BGR2GRAY) cv.imshow("gray", gray) kernel1=np.ones((15, 15), np.uint8) cvblackhat=cv.morphologyEx(gray, cv.MORPH_BLACKHAT, kernel1) cv.imshow("black", cvblackhat) cvclose1=cv.morphologyEx(cvblackhat, cv.MORPH_CLOSE, kernel1) cv.imshow("cvclose", cvclose1) ref=cv.threshold(cvclose1, 0, 255, cv.THRESH_OTSU)[1] twoimg=cv.threshold(cvblackhat, 0, 255, cv.THRESH_OTSU)[1] cv.imshow("ref", ref) cv.imwrite("ref.jpg", ref) ref=cv.imread("ref.jpg", 1) h, w=template.shape[:2] res=cv.matchTemplate(ref, template, cv.TM_CCORR) min_val, max_val, min_loc, max_loc=cv.minMaxLoc(res) top_left=max_locbottom_right= (top_left[0] +w, top_left[1] +h) cv.rectangle(idimg, top_left, bottom_right, (0, 255, 0), 2) cv.imshow("idimgOK", idimg) rectangleid=cv.resize(twoimg[top_left[1]:bottom_right[1], top_left[0]:bottom_right[0]], (432, 32), interpolation=cv.INTER_CUBIC) cv.imshow("rectangleid", rectangleid) cells= [np.hsplit(row, 18) forrowinnp.vsplit(rectangleid, 1)] x=np.array(cells) cv.imshow("cell9", x[0][9]) cv.imshow("cell10", x[0][10]) test=x[:, :].reshape(-1, 768).astype(np.float32) knn=train.KNN() ret, result, neighbours, dist=knn.findNearest(test, 2) result=np.uint8(result).reshape(-1, 18)[0] id= ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "X"] idstr=""foriinresult: idstr+=id[i] print(idstr) cv.waitKey(0)
OCR算法主程序
importcv2.cv2ascvimportnumpyasnpimportpytesseractidimg=cv.imread("0033.jpg") idimg=cv.resize(idimg, (509, 321), interpolation=cv.INTER_CUBIC) idimgok=idimg.copy() template=cv.imread("position1.jpg", 1) cv.imshow("idimg", idimg) gray=cv.cvtColor(idimg, cv.COLOR_BGR2GRAY) cv.imshow("gray", gray) kernel1=np.ones((15, 15), np.uint8) cvblackhat=cv.morphologyEx(gray, cv.MORPH_BLACKHAT, kernel1) cv.imshow("black", cvblackhat) cvclose1=cv.morphologyEx(cvblackhat, cv.MORPH_CLOSE, kernel1) cv.imshow("cvclose", cvclose1) ref=cv.threshold(cvclose1, 0, 255, cv.THRESH_OTSU)[1] twoimg=cv.threshold(cvblackhat, 0, 255, cv.THRESH_OTSU)[1] cv.imshow("ref", ref) cv.imwrite("ref.jpg", ref) ref=cv.imread("ref.jpg", 1) h, w=template.shape[:2] res=cv.matchTemplate(ref, template, cv.TM_CCORR) min_val, max_val, min_loc, max_loc=cv.minMaxLoc(res) top_left=max_locbottom_right= (top_left[0] +w, top_left[1] +h) cv.rectangle(idimg, top_left, bottom_right, (0, 255, 0), 2) cv.imshow("idimgOK", idimg) rectangleid=cv.resize(idimgok[top_left[1] -2:bottom_right[1] +2, top_left[0] -2:bottom_right[0] +2], (436, 36), interpolation=cv.INTER_CUBIC) cv.imshow("rectangleid", rectangleid) text=pytesseract.image_to_string(rectangleid) print(text) cv.waitKey(0)
程序中所用到的图片
0033.jpg为标准身份证照片
position1.jpg