# openvideo.py import cv2 def openvideo(window_name ,video_id): cv2.namedWindow(window_name) cap=cv2.VideoCapture(video_id) while cap.isOpened(): ok,frame=cap.read() if not ok : break cv2.imshow(window_name,frame) c=cv2.waitKey(10) # 等待10ms,10ms内没有按键操作就进入下一次while循环,从而得到10ms一帧的效果,waitKey返回在键盘上按的键 if c & 0xFF==ord('q'): # 按键q后break break cap.release() cv2.destroyWindow(window_name) print("camera closed") if __name__ == '__main__': print ('open camera...') openvideo('mycam' ,0)
#getTrainingData.py import cv2 def getTrainingData(window_name, camera_id, path_name, max_num): # path_name是图片存储目录,max_num是需要捕捉的图片数量 cv2.namedWindow(window_name) cap = cv2.VideoCapture(camera_id) classifier = cv2.CascadeClassifier('haarcascade_frontalface_alt2.xml') color = (0, 255, 0) num = 0 # 记录存储的图片数量 while cap.isOpened(): ok, frame = cap.read() if not ok: break gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) faceRects = classifier.detectMultiScale(gray, scaleFactor=1.2, minNeighbors=3, minSize=(32, 32)) if len(faceRects) > 0: for faceRect in faceRects: x, y, w, h = faceRect # 捕捉到的图片的名字,这里用到了格式化字符串的输出 image_name = '%s%d.jpg' % (path_name, num) # 注意这里图片名一定要加上扩展名,否则后面imwrite的时候会报错:could not find a writer for the specified extension in function cv::imwrite_ image = frame[y:y+h, x:x+w] cv2.imwrite(image_name, image) num += 1 # 超过指定最大保存数量则退出循环 if num > max_num: break cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2) font = cv2.FONT_HERSHEY_SIMPLEX cv2.putText(frame, ('%d' % num), (x + 30, y + 30), font, 1, (255, 0, 255), 4) if num > max_num: break cv2.imshow(window_name, frame) c = cv2.waitKey(10) if c & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows() print('Finished.') if __name__ == '__main__': print ('catching your face and writting into disk...') getTrainingData('getTrainData',0,'training_data_other/',100) # 要先创建这个文件夹
# facedetect.py import cv2 def facedetect(windowname,camera_id): cv2.namedWindow(windowname) cap = cv2.VideoCapture(camera_id) classfier = cv2.CascadeClassifier('haarcascade_frontalface_alt2.xml') color = (255, 0, 0) while cap.isOpened(): ok, frame = cap.read() if not ok: break gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) faceRects = classfier.detectMultiScale(gray, scaleFactor=1.2, minNeighbors=3, minSize=(32, 32)) """ 由于人脸可能出现在图像的任何位置,在检测时用固定大小的窗口对图像从上到下、从左到右扫描,判断窗口里的子图像是否为人脸,这称为滑动窗口技术(sliding window)。 为了检测不同大小的人脸,还需要对图像进行放大或者缩小构造图像金字塔,对每张缩放后的图像都用上面的方法进行扫描。 由于采用了滑动窗口扫描技术,并且要对图像进行反复缩放然后扫描,因此整个检测过程会非常耗时。 """ if len(faceRects) > 0: for faceRect in faceRects: x, y, w, h = faceRect # 这里外扩了10个像素以框出比人脸稍大一点的区域,从而得到相对完整一点的人脸图像 cv2.rectangle(frame,(x-10,y-10),(x+w+10,y+h+10),color,2) cv2.imshow(windowname,frame) c=cv2.waitKey(10) if c & 0xFF == ord('q'): break cap.release()#释放摄像头 cv2.destroyAllWindows() print("camera closed") if __name__ == '__main__': print ('face detecting... ') facedetect('facedetect',0)
# load_face_dataset """ 由于这个分类器准确度有限,得到的人脸图片会有一些错误 所以在下一步处理数据之前就要手工检查数据 用OpenCV统一图片尺寸大小以便输入到卷积神经网络中 """ import os import numpy as np import cv2 IMAGE_SIZE = 160 # 指定图像大小 # 按指定图像大小调整尺寸 def resize_image(image, height=IMAGE_SIZE, width=IMAGE_SIZE): top, bottom, left, right = (0,0,0,0) # 获取图片尺寸 h, w, _ = image.shape # 对于长宽不等的图片,找到最长的一边 longest_edge = max(h, w) # 计算短边需要增加多少像素宽度才能与长边等长(相当于padding,长边的padding为0,短边才会有padding) if h < longest_edge: dh = longest_edge - h top = dh // 2 bottom = dh - top elif w < longest_edge: dw = longest_edge - w left = dw // 2 right = dw - left else: pass # RGB颜色 BLACK = [0,0,0] constant = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT, value = BLACK) # 调整图像大小并返回图像,目的是减少计算量和内存占用,提升训练速度 return cv2.resize(constant, (height, width)) # 读取训练数据到内存,这里数据结构是列表 # # path_name是当前工作目录,后面会由os.getcwd()获得 # def read_path(path_name): # images = [] # labels = [] # for dir_item in os.listdir(path_name): # os.listdir() 方法用于返回指定的文件夹包含的文件或文件夹的名字的列表 # # 从当前工作目录寻找训练集图片的文件夹 # full_path = os.path.abspath(os.path.join(path_name, dir_item)) # if os.path.isdir(full_path): # 如果是文件夹,继续递归调用,去读取文件夹里的内容 # read_path(full_path) # else: # 如果是文件了 # if dir_item.endswith('.jpg'): # image = cv2.imread(full_path) # if image is None: # 遇到部分数据有点问题,报错'NoneType' object has no attribute 'shape' # pass # else: # image = resize_image(image, IMAGE_SIZE, IMAGE_SIZE) # images.append(image) # labels.append(path_name) # print(images,labels) # return images, labels # # 读取训练数据并完成标注 # def load_dataset(path_name): # images,labels = read_path(path_name) # # 将lsit转换为numpy array # images = np.array(images, dtype='float') # 注意这里要将数据类型设为float,否则后面face_train_keras.py里图像归一化的时候会报错,TypeError: No loop matching the specified signature and casting was found for ufunc true_divide # # print(images.shape) # (1969, 64, 64, 3) # # 标注数据,me文件夹下是我,指定为0,其他指定为1,这里的0和1不是logistic regression二分类输出下的0和1,而是softmax下的多分类的类别 # labels = np.array([0 if label.endswith('me') else 1 for label in labels]) # # print(images,labels) # return images, labels def load_dataset(data_dir): images = [] # 用来存放图片 labels = [] # 用来存放类别标签 sample_nums = [] # 用来存放不同类别的人脸数据量 classes = os.listdir(data_dir) # 通过数据集路径下文件夹的数量得到所有类别 category = 0 # 分类标签计数 for person in classes: # person是不同分类人脸的文件夹名 person_dir = os.path.join(data_dir, person) # person_dir是某一分类人脸的路径名 if os.path.isdir(person_dir): person_pics = os.listdir(person_dir) # 某一类人脸路径下的全部人脸数据文件 for face in person_pics: # face是某一分类文件夹下人脸图片数据的文件名 img = cv2.imread(os.path.join(person_dir, face)) # 通过os.path.join得到人脸图片的绝对路径 if img is None: # 遇到部分数据有点问题,报错'NoneType' object has no attribute 'shape' pass else: img = resize_image(img, IMAGE_SIZE, IMAGE_SIZE) images.append(img) # 得到某一分类下的所有图片 labels.append(category) # 给某一分类下的所有图片赋予分类标签值 sample_nums.append(len(person_pics)) # 得到某一分类下的样本量 category += 1 images = np.array(images) labels = np.array(labels) print("Number of classes: ", len(classes)) # 输出分类数 for i in range(len(sample_nums)): print("Number of the sample of class ", i, ": ", sample_nums[i]) # 输出每个类别的样本量 return images, labels if __name__ == '__main__': path_name = os.getcwd() # 获取当前工作目录 print(path_name) images = load_dataset("./data/") print(images)
# face_train_keras.py import random import keras import numpy as np from sklearn.model_selection import train_test_split from keras.preprocessing.image import ImageDataGenerator from keras.models import Sequential from keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D from keras.optimizers import SGD from keras.models import load_model from keras import backend as K from load_face_dataset import load_dataset, resize_image, IMAGE_SIZE import cv2 ''' 对数据集的处理,包括: 1、加载数据集 2、将数据集分为训练集和测试集 3、根据Keras后端张量操作引擎的不同调整数据维度顺序 4、对数据集中的标签进行One-hot编码 5、数据归一化 ''' class Dataset: def __init__(self, path_name): # 训练集 self.train_images = None self.train_labels = None # 测试集 self.test_images = None self.test_labels = None # 数据集加载路径 self.path_name = path_name # 当前库采用的维度顺序,包括rows,cols,channels,用于后续卷积神经网络模型中第一层卷积层的input_shape参数 self.input_shape = None # 加载数据集并按照交叉验证的原则划分数据集并进行相关预处理工作 def load(self, img_rows = IMAGE_SIZE, img_cols = IMAGE_SIZE, img_channels = 3, nb_classes = 2): # 加载数据集到内存 images, labels = load_dataset(self.path_name) # 注意下面数据集的划分是随机的,所以每次运行程序的训练结果会不一样 train_images, test_images, train_labels, test_labels = train_test_split(images, labels, test_size = 0.3, random_state = random.randint(0, 100)) #print(test_labels) 简单来说 就是将images和labels数组分别划分为3:7 的两部分 # tensorflow 作为后端,数据格式约定是channel_last,与这里数据本身的格式相符,如果是channel_first,就要对数据维度顺序进行一下调整 if K.image_data_format == 'channel_first': # (100,3,16,32) train_images = train_images.reshape(train_images.shape[0],img_channels, img_rows, img_cols) test_images = test_images.reshape(test_images.shape[0],img_channels, img_rows, img_cols) self.input_shape = (img_channels, img_rows, img_cols) else: # (100,16,32,3) train_images = train_images.reshape(train_images.shape[0], img_rows, img_cols, img_channels) test_images = test_images.reshape(test_images.shape[0], img_rows, img_cols, img_channels) self.input_shape = (img_rows, img_cols, img_channels) # 输出训练集和测试集的数量 print(train_images.shape[0], 'train samples') print(test_images.shape[0], 'test samples') # 后面模型中会使用categorical_crossentropy作为损失函数,这里要对类别标签进行One-hot编码 train_labels = keras.utils.to_categorical(train_labels, nb_classes) test_labels = keras.utils.to_categorical(test_labels,nb_classes) train_images = train_images.astype('float32') test_images = test_images.astype('float32') # 图像归一化,将图像的各像素值归一化到0~1区间。 train_images /= 255 test_images /= 255 self.train_images = train_images self.test_images = test_images self.train_labels = train_labels self.test_labels = test_labels # 建立卷积神经网络模型 class Model: # 初始化构造方法 def __init__(self): self.model = None # 建立模型 def build_model(self, dataset, nb_classes = 2): self.model = Sequential() self.model.add(Conv2D(32, (3, 3), padding = 'same', input_shape = dataset.input_shape)) # 当使用该层作为模型第一层时,需要提供 input_shape 参数 (整数元组,不包含batch_size) self.model.add(Activation('relu')) self.model.add(Conv2D(32, (3, 3))) self.model.add(Activation('relu')) self.model.add(MaxPooling2D(pool_size = (2,2))) # strides默认等于pool_size self.model.add(Conv2D(64, (3, 3), padding = 'same')) self.model.add(Activation('relu')) self.model.add(Conv2D(64, (3, 3))) self.model.add(Activation('relu')) self.model.add(MaxPooling2D(pool_size = (2,2))) self.model.add(Dropout(0.25)) self.model.add(Flatten()) self.model.add(Dense(512)) self.model.add(Activation('relu')) self.model.add(Dropout(0.25)) self.model.add(Dense(nb_classes)) self.model.add(Activation('softmax')) # 训练模型 def train(self, dataset, batch_size = 128, nb_epoch = 15, data_augmentation = True): self.model.compile(loss = 'categorical_crossentropy', optimizer = 'ADAM', metrics = ['accuracy']) if not data_augmentation: self.model.fit(dataset.train_images, dataset.train_labels, batch_size = batch_size, epochs = nb_epoch, shuffle = True) # 图像预处理 else: #是否使输入数据去中心化(均值为0),是否使输入数据的每个样本均值为0,是否数据标准化(输入数据除以数据集的标准差),是否将每个样本数据除以自身的标准差,是否对输入数据施以ZCA白化,数据提升时图片随机转动的角度(这里的范围为0~20),数据提升时图片水平偏移的幅度(单位为图片宽度的占比,0~1之间的浮点数),和rotation一样在0~0.2之间随机取值,同上,只不过这里是垂直,随机水平翻转,不是对所有图片翻转,随机垂直翻转,同上 # 每个epoch内都对每个样本以以下配置生成一个对应的增强样本,最终生成了1969*(1-0.3)=1378*10=13780个训练样本,因为下面配置的很多参数都是在一定范围内随机取值,因此每个epoch内生成的样本都不一样 datagen = ImageDataGenerator(rotation_range = 20, width_shift_range = 0.2, height_shift_range = 0.2, horizontal_flip = True) #计算数据增强所需要的统计数据,计算整个训练样本集的数量以用于特征值归一化、ZCA白化等处理 # 当且仅当 featurewise_center 或 featurewise_std_normalization 或 zca_whitening 设置为 True 时才需要。 #利用生成器开始训练模型 # flow方法输入原始训练数据,生成批量增强数据 self.model.fit_generator( datagen.flow(dataset.train_images, dataset.train_labels, batch_size=batch_size), steps_per_epoch=dataset.train_images.shape[0] // batch_size, epochs=nb_epoch) def evaluate(self, dataset): score = self.model.evaluate(dataset.test_images, dataset.test_labels) # evaluate返回的结果是list,两个元素分别是test loss和test accuracy print("%s: %.3f%%" % (self.model.metrics_names[1], score[1] * 100)) # 注意这里.3f后面的第二个百分号就是百分号,其余两个百分号则是格式化输出浮点数的语法。 def save_model(self, file_path): self.model.save(file_path) def load_model(self, file_path): self.model = load_model(file_path) def face_predict(self, image): # 将探测到的人脸reshape为符合输入要求的尺寸 image = resize_image(image) image = image.reshape((1, IMAGE_SIZE, IMAGE_SIZE, 3)) # 图片浮点化并归一化 image = image.astype('float32') # float32 Single precision float: sign bit, 8 bits exponent, 23 bits mantissa image /= 255 result = self.model.predict(image) # print('result:', result) # print(result.shape) # (1,2) # print(type(result)) # <class 'numpy.ndarray'> return result.argmax(axis=-1) # The axis=-1 in numpy corresponds to the last dimension if __name__ == '__main__': dataset = Dataset('./data/') dataset.load() # 训练模型 model = Model() model.build_model(dataset) #测试训练函数的代码 model.train(dataset) model.evaluate(dataset) model.save_model('./model/me.face.model.h5') # 注意这里要在工作目录下先新建model文件夹,否则会报错:Unable to create file,error message = 'No such file or directory'
# face_recognition.py import cv2 #import sys from face_train_keras import Model #加载模型 model = Model() model.load_model(file_path = './model/me.face.model.h5') #框住人脸的矩形边框颜色 cv2.namedWindow('Detecting your face.') # 创建窗口 color = (0, 255, 0) classifier = cv2.CascadeClassifier('haarcascade_frontalface_alt2.xml') # 加载分类器 #捕获指定摄像头的实时视频流 cap = cv2.VideoCapture(0) while cap.isOpened(): ok, frame = cap.read() # type(frame) <class 'numpy.ndarray'> if not ok: break gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # 灰度化 faceRects=classifier.detectMultiScale(gray,scaleFactor=1.2,minNeighbors=3,minSize=(32,32)) if len(faceRects) > 0: for faceRect in faceRects: x, y, w, h = faceRect #截取脸部图像提交给模型识别这是谁 image = frame[y - 10: y + h + 10, x - 10: x + w + 10] if image is None: # 有的时候可能是人脸探测有问题,会报错 error (-215) ssize.width > 0 && ssize.height > 0 in function cv::resize,所以这里要判断一下image是不是None,防止极端情况 break else: faceID = model.face_predict(image) # print(faceID) # [0] # print(type(faceID)) # <class 'numpy.ndarray'> # print(faceID.shape) # (1,) # #如果是“我” if faceID[0] == 0: cv2.rectangle(frame, (x - 10, y - 10), (x + w + 10, y + h + 10), color, thickness = 2) #文字提示是谁 cv2.putText(frame,'me', (x + 30, y + 30), #坐标 cv2.FONT_HERSHEY_SIMPLEX, #字体 1, #字号 (255,0,255), #颜色 2) #字的线宽 else: cv2.rectangle(frame, (x - 10, y - 10), (x + w + 10, y + h + 10), color, thickness = 2) #文字提示是谁 cv2.putText(frame,'Unknown', (x + 30, y + 30), #坐标 cv2.FONT_HERSHEY_SIMPLEX, #字体 1, #字号 (255,0,255), #颜色 2) #字的线宽 cv2.imshow("Detecting your face.", frame) #等待10毫秒看是否有按键输入 k = cv2.waitKey(10) #如果输入q则退出循环 if k & 0xFF == ord('q'): break #释放摄像头并销毁所有窗口 cap.release() cv2.destroyAllWindows()