dlib 配置及使用方法

時間 2021-01-05

配置

py3.6+dlib：目前window這麼配置不會報錯。dlib去官網或者python包網（https://pypi.org/simple/dlib/ ）都能下載，下載到本地，然後pip直接安裝就好。

使用

準備工作

dlib之所以能識別臉部關鍵點是因爲他是深度學習模型，在使用前，需要去下載模型：http://dlib.net/files/，看下圖，主要是這些模型。

我這裏選擇shape_predictor_68_face_landmarks.dat.bz2 ，然後解壓，能檢測出來68個點。下圖爲關鍵點的位置

例子 1

下面爲識別一個圖片的例子

import cv2
import dlib
 
path = "img/meinv.png"
img = cv2.imread(path)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
 
#人臉分類器
detector = dlib.get_frontal_face_detector()
# 獲取人臉檢測器
predictor = dlib.shape_predictor(
    "C:\\Python36\\Lib\\site-packages\\dlib-data\\shape_predictor_68_face_landmarks.dat"
)
 
dets = detector(gray, 1)
for face in dets:
    shape = predictor(img, face)  # 尋找人臉的68個標定點
    # 遍歷所有點，打印出其座標，並圈出來
    for pt in shape.parts():
        pt_pos = (pt.x, pt.y)
        cv2.circle(img, pt_pos, 2, (0, 255, 0), 1)
    cv2.imshow("image", img)
 
cv2.waitKey(0)
cv2.destroyAllWindows()

例子2

這個是活體檢測，用的是判斷人眼的眨動。

$\l =[(a-b)+(c-d)]/2\times (l_{1}-l_{2})$

參數的含義看示意圖

# USAGE
# python detect_blinks.py --shape-predictor shape_predictor_68_face_landmarks.dat --video blink_detection_demo.mp4
# python detect_blinks.py --shape-predictor shape_predictor_68_face_landmarks.dat

# import the necessary packages
from scipy.spatial import distance as dist
from imutils.video import FileVideoStream
from imutils.video import VideoStream
from imutils import face_utils
import numpy as np
import argparse
import imutils
import time
import dlib
import cv2

def eye_aspect_ratio(eye):
	# compute the euclidean distances between the two sets of
	# vertical eye landmarks (x, y)-coordinates 歐氏距離
	A = dist.euclidean(eye[1], eye[5])
	B = dist.euclidean(eye[2], eye[4])

	# compute the euclidean distance between the horizontal
	# eye landmark (x, y)-coordinates
	C = dist.euclidean(eye[0], eye[3])
	'''
		[1]	[2]
	[0]			[3]
		[5]	[4]
	'''
	# compute the eye aspect ratio
	ear = (A + B) / (2.0 * C)

	# return the eye aspect ratio
	return ear



# # 我註釋掉的 construct the argument parse and parse the arguments
# ap = argparse.ArgumentParser()
# ap.add_argument("-p", "--shape-predictor", required=True,
# 	help="path to facial landmark predictor")
# ap.add_argument("-v", "--video", type=str, default="",
# 	help="path to input video file")
# args = vars(ap.parse_args())

p = 'shape_predictor_68_face_landmarks.dat'
v = "blink_detection_demo.mp4"

# define two constants, one for the eye aspect ratio to indicate
# blink and then a second constant for the number of consecutive
# frames the eye must be below the threshold
EYE_AR_THRESH = 0.3
EYE_AR_CONSEC_FRAMES = 3

# initialize the frame counters and the total number of blinks
COUNTER = 0
TOTAL = 0

# 初始化dlib's face detector (HOG-based)，然後創建「面部標誌預測器」facial landmark predictor
print("[INFO] loading facial landmark predictor...")
detector = dlib.get_frontal_face_detector() # 創建識別器
'''
print("detector:",help(detector))
This object represents a sliding window histogram-of-oriented-gradients based object detector.
此對象表示基於定向梯度的對象檢測器的滑動窗口直方圖。
'''

# predictor = dlib.shape_predictor(args["shape_predictor"])
predictor = dlib.shape_predictor(p) # 讀取訓練好的模型
"""
print("predictor",help(predictor))
This object is a tool that takes in an image region containing some 
object and outputs a set of point locations that define the pose of the object. 
The classic example of this is human face pose prediction, where you take 
an image of a human face as input and are expected to identify the locations of 
important facial landmarks such as the corners of the mouth and eyes, tip of the nose, and so forth。
此對象是一個工具，它接受包含某些對象的圖像區域，並輸出一組定義對象姿勢的點位置。
這方面的經典例子是人臉姿勢預測，在這裏，您可以將人臉的圖像作爲輸入，
並期望識別重要面部標誌的位置，如嘴角和眼睛、鼻尖等。
"""

# 分別地抓取人臉的左眼和右眼的座標 respectively
(lStart, lEnd) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"]
(rStart, rEnd) = face_utils.FACIAL_LANDMARKS_IDXS["right_eye"]
# face_utils.FACIAL_LANDMARKS_IDXS:Dictionary that remembers insertion order
# 開始讀取視頻流
print("[INFO] starting video stream thread...")
# vs = FileVideoStream(args["video"]).start()
vs = FileVideoStream(v).start()  # 開始讀取
fileStream = True
# vs = VideoStream(src=0).start()
# vs = VideoStream(usePiCamera=True).start()
# fileStream = False
time.sleep(1.0)



# 從視頻流循環幀
while True:
	# if this is a file video stream, then we need to check if
	# there any more frames left in the buffer to process
	if fileStream and not vs.more():  # vs.more() 當vs存在時返回True，否則返回False
		break

	# grab the frame from the threaded video file stream, resize
	# it, and convert it to grayscale
	# channels)
	frame = vs.read() # 讀取一針
	frame = imutils.resize(frame, width=450) # 設置寬度  ·450
	gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

	# 創建灰度圖識別器  進行識別加快速度
	rects = detector(gray, 0)

	# loop over the face detections
	for rect in rects:
		# determine the facial landmarks for the face region, then
		# convert the facial landmark (x, y)-coordinates to a NumPy
		# array
		shape = predictor(gray, rect) # 進行預測 返回值包括眼睛鼻子嘴的座標
		shape = face_utils.shape_to_np(shape)

		# extract the left and right eye coordinates, then use the
		# coordinates to compute the eye aspect ratio for both eyes
		leftEye = shape[lStart:lEnd]
		rightEye = shape[rStart:rEnd]
		leftEAR = eye_aspect_ratio(leftEye)
		rightEAR = eye_aspect_ratio(rightEye)

		# average the eye aspect ratio together for both eyes
		ear = (leftEAR + rightEAR) / 2.0

		# compute the convex hull for the left and right eye, then
		# visualize each of the eyes
		leftEyeHull = cv2.convexHull(leftEye)
		rightEyeHull = cv2.convexHull(rightEye)
		cv2.drawContours(frame, [leftEyeHull], -1, (0, 255, 0), 1)
		cv2.drawContours(frame, [rightEyeHull], -1, (0, 255, 0), 1)

		# check to see if the eye aspect ratio is below the blink
		# threshold, and if so, increment the blink frame counter
		if ear < EYE_AR_THRESH:
			COUNTER += 1

		# otherwise, the eye aspect ratio is not below the blink
		# threshold
		else:
			# if the eyes were closed for a sufficient number of
			# then increment the total number of blinks
			if COUNTER >= EYE_AR_CONSEC_FRAMES:
				TOTAL += 1

			# reset the eye frame counter
			COUNTER = 0

		# draw the total number of blinks on the frame along with
		# the computed eye aspect ratio for the frame
		cv2.putText(frame, "Blinks: {}".format(TOTAL), (10, 30),
			cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
		cv2.putText(frame, "EAR: {:.2f}".format(ear), (300, 30),
			cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
 
	# show the frame
	cv2.imshow("Frame", frame)
	key = cv2.waitKey(1) & 0xFF
 
	# if the `q` key was pressed, break from the loop
	if key == ord("q"):
		break

# do a bit of cleanup
cv2.destroyAllWindows()
vs.stop()

同理你也可以判斷嘴巴的張合。不過用到的公司需要修改下。要用到52-58-49-55，63--67--61--65。公式見下面，裏面考慮了一個問題，就是後脣和薄脣帶的來的閉合不明顯。

$IF (P_{52}-P_{58})/(P_{49}-P_{55}) or (P_{63}-P_{67})/(P_{61}-P_{65}): True , ELSE: Fasle$

例子3

由於某些原因不停工這個代碼，說出自己想法。抖音上換臉的玩法都知道霸，可以通過關鍵點來實現，有電腦都。

例子4

某遊戲可以通過拍照人臉來捏遊戲人物角色的臉，也是用關鍵點實現的。