March 28, 2023
Plan to build the basic facial emotion recognition system using OpenCV and Haar cascades that take in a live feed from a webcam.
Haar cascades, first introduced by Viola and Jones in their seminal 2001 publication, Rapid Object Detection using a Boosted Cascade of Simple Features, are arguably OpenCV’s most popular object detection algorithm.
Sure, many algorithms are more accurate than Haar cascades (HOG + Linear SVM, SSDs, Faster R-CNN, YOLO, to name a few), but they are still relevant and useful today.
One of the primary benefits of Haar cascades is that they are just so fast — it’s hard to beat their speed.
The downside to Haar cascades is that they tend to be prone to false-positive detections, require parameter tuning when being applied for inference/detection, and just, in general, are not as accurate as the more “modern” algorithms we have today.
Code:
import os
import cv2
import numpy as np
from tensorflow import keras
from tensorflow.keras.preprocessing import image
import warnings
warnings.filterwarnings("ignore")
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from keras.models import load_model
import matplotlib.pyplot as plt
import numpy as np
# load model
model = load_model("best_model.h5")
face_haar_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
cap = cv2.VideoCapture(0)
while True:
ret, test_img = cap.read() # captures frame and returns boolean value and captured image
if not ret:
continue
gray_img = cv2.cvtColor(test_img, cv2.COLOR_BGR2RGB)
faces_detected = face_haar_cascade.detectMultiScale(gray_img, 1.32, 5)
for (x, y, w, h) in faces_detected:
cv2.rectangle(test_img, (x, y), (x + w, y + h), (255, 255, 255), thickness=3)
roi_gray = gray_img[y:y + w, x:x + h] # cropping region of interest i.e. face area from image
roi_gray = cv2.resize(roi_gray, (224, 224))
img_pixels = image.img_to_array(roi_gray)
img_pixels = np.expand_dims(img_pixels, axis=0)
img_pixels /= 255
predictions = model.predict(img_pixels)
# find max indexed array
max_index = np.argmax(predictions[0])
emotions = ('angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral')
predicted_emotion = emotions[max_index]
cv2.putText(test_img, predicted_emotion, (int(x), int(y)), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
resized_img = cv2.resize(test_img, (1280, 720))
cv2.imshow('Facial emotion analysis ', resized_img)
if cv2.waitKey(10) == ord('q'): # wait until 'q' key is pressed
break
cap.release()
cv2.destroyAllWindows
Demo:
Not running so accurately but it’s enough for me to do the test.