Denua 博客

PIL 识别简单验证码

发布时间: 2017-12-01 22:53   分类 : Python    标签: Python 图像识别 浏览: 764   


只使用了 PIL (Python Image Lib), 直接放代码, 具体细节查看知乎, [Python识别简单验证码](https://zhuanlan.zhihu.com/p/26181088) ``` # coding=utf-8 # /usr/bin/Python from __future__ import division, print_function import os from PIL import Image # import re H = 12 # 切片高 W = 8 # 切片宽 ST = (7, 7) # 左边开始位置, 顶部开始位置 PIECE = 5 # 切片 PAD = 1 # 间隔 END_Y = ST[1] + H # 底部 PAD_X = W + PAD # 偏移 DEBUG_LEAVE = False ''' LEAVE False: not detail LEAVE 3: LEAVE 5: all detail ''' EACH_RECT = ( (ST[0] + PAD_X * 0, ST[1], ST[0] + PAD_X * 0 + W, END_Y), # 左边距 上边距 右边距 下边距 (ST[0] + PAD_X * 1, ST[1], ST[0] + PAD_X * 1 + W, END_Y), (ST[0] + PAD_X * 2, ST[1], ST[0] + PAD_X * 2 + W, END_Y), (ST[0] + PAD_X * 3, ST[1], ST[0] + PAD_X * 3 + W, END_Y), (ST[0] + PAD_X * 4, ST[1], ST[0] + PAD_X * 4 + W, END_Y) ) def get_cut(file_name): img = Image.open(file_name) img = img.convert('L') cut_img = [] # 每个数字的范围 rct = EACH_RECT # 转换图片 bin_img = get_bin(img) # bin_img.crop(rct[0]).show() # pr(bin_img.crop(rct[0])) # 将图片分割为CAP部分 for part in range(PIECE): debug('\n', rct[part]) if DEBUG_LEAVE >= 5: bin_img.crop(rct[part]).save(part.__str__() + '.gif', 'gif') cut_img.append(bin_img.crop(rct[part])) return cut_img # 去除图片噪点并转为黑白 def get_bin(img): bin_img = Image.new('L', img.size, 255) # 新图片 模式 L灰度 大小 色彩深度 cvt_img = img.convert("L") # 转换模式为灰度图 for x in range(img.size[1]): for y in range(img.size[0]): pix = cvt_img.getpixel((y, x)) if pix < 110: # 去噪阈值过滤灰度大于 110 的像素点 bin_img.putpixel((y, x), 0) # 填充新图片 else: bin_img.putpixel((y, x), 255) return bin_img def pr(img): if img.mode != 'L': img = img.convert('L') size = img.size for x in range(size[1]): for y in range(size[0]): px = img.getpixel((y, x)) if px == 255: print ('-',) else: print ('@',) print() print ('━' * 50) # 去干扰 def denoise(img): cvt_im = Image.new('L', img.size, 255) img = img.convert('L') s = img.size for x in range(s[1]): for y in range(s[0]): if x == 0 or x == s[1]-1 or y == 0 or y == s[0]-1: cvt_im.putpixel((y, x), 255) else: if img.getpixel((y, x)) == 255: cvt_im.putpixel((y, x), 255) else: cvt_im.putpixel((y, x), 0) return cvt_im # 转换为矢量 def get_vector(img): img = img.convert('L') d1 = [] for i in range(H): for j in range(W): d1.append(0) index = i * W + j if img.getpixel((j, i)) == 255: d1[index] = 1 else: d1[index] = 0 return d1 # 获取列表最大数 def getmax(list): li = list max = -1 index = 0 for i in range(len(li)): if max < li[i]: max = li[i] index = i return index def getmini(list): li = list mini = 0 index = 0 for i in range(len(li)): if mini > li[i]: mini = li[i] index = i return index def prbin(i): for o in range(H): for j in range(W): ind = o * W + j if i[ind] == 1: print ('. ', end='') else: print('# ', end='') print() print() def get_dict(path, deep): dict_ = {} for i in range(10): count = 0 temp = [] imDir = (''.join([path, '\\', str(i)])) debug ('\n', imDir) for im in os.listdir(imDir): img = ''.join([path, '\\', str(i), '\\', im]) img = Image.open(img) temp.append(get_vector(denoise(img))) count += 1 if count >= deep: break dict_[i] = temp # print 'Finished: ', i return dict_ def createDict(labeledPath, savePath, dictDeepness): fil = open(savePath, 'w') d = get_dict(labeledPath, dictDeepness).values() fil.write('val = \\\n') fil.write(d.__str__()) fil.close() # 识别每个图片块的内容 def get_result(src_img, labeled): guess = [-1, []] src_img = src_img.convert('L') # 将图片转换为二值列表 srcimgbin = get_vector(src_img) # 用于储存每个对比数字的相似率 simalist = [] for i in range(H): simalist.append(0) # 与每个数字对比 for cp_numb in range(W): comp = labeled[cp_numb] cp_count = len(labeled[cp_numb]) for each in comp: xy = 0 yx = 0 # 纵向扫描对比 并统计相同 for x in range(W): for y in range(H): # 如果像素点一样 if each[x * y + y] == srcimgbin[x * y + y] == 1: xy += 1 else: if each[x * y + y] != srcimgbin[x * y + y]: xy -= 1 # 横向扫描对比 for y in range(H): for x in range(W): if each[y * x + x] == srcimgbin[y * x + x] == 1: yx += 1 else: if each[y * x + x] != srcimgbin[y * x + x]: yx -= 1 # 得出单张图片相似率 sima = (xy + yx) / 280 # 统计该数相似率 simalist[cp_numb] += sima # 求于个数平均值 simalist[cp_numb] /= cp_count debug('\n', simalist[cp_numb]) # 得到相似度最高的数字 guess[0] = getmax(simalist) # max(simalist) guess[1] = simalist return guess def scan(img, dict_val): import random global DEBUG_LEAVE try: dictionary = dict_val except: print ('dict Exception') return # 得到只有黑白值的图片分割块 cut_img = get_cut(img) result = '' # 识别每个图片块的内容 for im in cut_img: if DEBUG_LEAVE >= 3: prbin(get_vector(im)) # 将识别的结果连接 res = str(get_result(denoise(im), dictionary)[0]) # im.save(res + '\\' + str(random.randint(0, 2000)) + '.gif') # get_bin(denoise(img)) result = result + str(res) return result def detect(imFile, dict): import time for x in os.listdir(imFile): image = ''.join([imFile, '\\', x]) t = time.time() capRes = scan(image, dict) t = time.time() - t print (t, capRes) newName = ''.join([imFile, capRes, '.gif']) try: os.rename(image, newName) except WindowsError, Exception : os.rename(image, newName.join('0')) def make_piece(samplePath = 'img\\', pieceSavePath = 'classify\\'): import random if not os.path.isdir(samplePath): os.makedirs(samplePath) for labeledPath in range(10): labeledPath = ''.join([pieceSavePath, str(labeledPath)]) if not os.path.isdir(labeledPath): os.makedirs(labeledPath) for sample in os.listdir(samplePath): sample = ''.join([samplePath, sample]) if os.path.isfile(sample): for piece_ in get_cut(sample): pieceName = ''.join([pieceSavePath, random.randint(0, 9999).__str__(), '.gif']) piece_.save(pieceName, 'gif') piece_.close() def debug(end = '\n', *args): if not DEBUG_LEAVE: return for x in args: print(x, end=end) def cutPiece(Height_, Width_, ST_, Piece_, Pad_, samplePath_ = '\img', pieceSavePath_ = 'classify\\'): global H global W global ST global PIECE global PAD H = Height_ W = Width_ ST = ST_ PIECE = Piece_ PAD = Pad_ global END_Y global PAD_X global EACH_RECT END_Y = ST[1] + H PAD_X = W + PAD EACH_RECT = ( (ST[0] + PAD_X * 0, ST[1], ST[0] + PAD_X * 0 + W, END_Y), # 左边距 上边距 右边距 下边距 (ST[0] + PAD_X * 1, ST[1], ST[0] + PAD_X * 1 + W, END_Y), (ST[0] + PAD_X * 2, ST[1], ST[0] + PAD_X * 2 + W, END_Y), (ST[0] + PAD_X * 3, ST[1], ST[0] + PAD_X * 3 + W, END_Y), (ST[0] + PAD_X * 4, ST[1], ST[0] + PAD_X * 4 + W, END_Y) ) make_piece(samplePath_, pieceSavePath_) if __name__ == '__main__': import login_x as cap_dict # print (read_dict('login.dict')) # dictPath = 'labeled.dict' # # print scan('7.gif', dictPath) # count = 0 # for im in os.listdir('img\\'): # count += 1 # print scan('img\\' + im, dictPath) # # createDict('classify', 'login_x.py', 10) # detect('img\\', cap_dict.val) # make_piece() print('', '\nResult: ', scan('chk.gif', cap_dict.val), '\n') ```

评论    

Copyright denua denua.cn