Hand Tracking

Objective: Use MediaPipe Hands to balance a pole on a cart and control an LED.

Hand Tracking Notebook

Jupyter Notebook in Google Colab

Hand Tracking Live Script

MediaPipe detects the shape and motion of hands (see Online Demo). Hand gestures can be the basis for computer control of simulated or physical objects.

The purpose of this exercise is to demonstrate the use of hand detection for control of a simulated cart pole and a physical LED.

# https://google.github.io/mediapipe/solutions/hands
import cv2
import mediapipe as mp
import os
import urllib.request

mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands

# download image as hands.jpg
url = 'http://apmonitor.com/pds/uploads/Main/hands.jpg'
urllib.request.urlretrieve(url, 'hands.jpg')
IMAGE_FILES = ['hands.jpg']
with mp_hands.Hands(
static_image_mode=True,
max_num_hands=2,
min_detection_confidence=0.5) as hands:
for idx, file in enumerate(IMAGE_FILES):
image = cv2.flip(cv2.imread(file), 1)

# Convert the BGR image to RGB before processing.
results = hands.process(cv2.cvtColor(image, \
cv2.COLOR_BGR2RGB))

# Print handedness and draw hand landmarks on the image.
print('Handedness:', results.multi_handedness)
if not results.multi_hand_landmarks:
continue
image_height, image_width, _ = image.shape
annotated_image = image.copy()
for hand_landmarks in results.multi_hand_landmarks:
tip = mp_hands.HandLandmark.INDEX_FINGER_TIP
print('hand_landmarks:', hand_landmarks)
print(
f'Index finger tip coordinates: (',
f'{hand_landmarks.landmark[tip].x * image_width}, '
f'{hand_landmarks.landmark[tip].y * image_height})'
)
mp_drawing.draw_landmarks(
annotated_image,
hand_landmarks,
mp_hands.HAND_CONNECTIONS,
mp_drawing_styles.get_default_hand_landmarks_style(),
mp_drawing_styles.get_default_hand_connections_style())
cv2.imwrite('annotated_image' + str(idx) + '.png', \
cv2.flip(annotated_image, 1))
# Draw hand world landmarks.
if not results.multi_hand_world_landmarks:
continue
for hand_world_landmarks in results.multi_hand_world_landmarks:
mp_drawing.plot_landmarks(
hand_world_landmarks, \
mp_hands.HAND_CONNECTIONS, azimuth=5)

[$[Get Code]]

Getting Started

There are a few required packages for these exercises and can be installed with pip. These include OpenCV, MediaPipe from Google, and Farama Foundation.

pip install opencv-python
pip install mediapipe pyglet pygame
pip install gymnasium

[$[Get Code]]

The exercises also require a webcam for video input and the Temperature Control Lab for LED control.

Cart Pole

A pole is attached by a frictionless joint and cart that moves along a track. The cart is controlled by applying a force to the left (action=0) or right (action=1). The pendulum starts upright and a reward of +1 is provided for every timestep that the pole remains upright. The gym returns done=True when the pole is more than 15° from vertical or the cart is more than 2.4 units from the starting location.

import gymnasium as gym
env = gym.make("CartPole-v1",render_mode='human')
observation = env.reset()
for _ in range(100):
env.render()
# Input:
# Force to the cart with actions: 0=left, 1=right
# Returns:
# obs = cart position, cart velocity, pole angle, rot rate
# reward = +1 for every timestep
# done = True when abs(angle)>15 or abs(cart pos)>2.4
action = env.action_space.sample() # random action
observation, reward, done, info, e = env.step(action)

if done:
observation = env.reset()
env.close()

[$[Get Code]]

Use the webcam input to control the force on a cart. Maintaining the inverted pendulum is very challenging with hand control. This gym exercise is a benchmark for Reinforcement Learning (RL) where algorithms successively improve based on experience. As a first step, practice moving your hand to center the cart position. As a next step, try to stop the pendulum motion while it is facing down. As a final step, try to balance the pendulum.

# may not work in Jupyter Notebook
# run as a script from the terminal
import cv2
import mediapipe as mp
import os
import gymnasium as gym
import random
import warnings
import time

env = gym.make("CartPole-v1",render_mode='rgb_array')
observation = env.reset()
warnings.filterwarnings("ignore", category=UserWarning)

mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands

# Webcam input
error_count = 0
cap = cv2.VideoCapture(0)
with mp_hands.Hands(model_complexity=0,min_detection_confidence=0.5,
min_tracking_confidence=0.5) as hands:
while cap.isOpened():
success, image = cap.read()
if image is None:
time.sleep(0.1)
error_count += 1
if error_count<100:
continue
else:
print('Error reading from webcam')
break
else:
error_count = 0
image.flags.writeable = False
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
results = hands.process(image)

# Draw the hand annotations on the image.
image.flags.writeable = True
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
hand_landmarks = None
if results.multi_hand_landmarks:
for hand_landmarks in results.multi_hand_landmarks:
mp_drawing.draw_landmarks(
image,
hand_landmarks,
mp_hands.HAND_CONNECTIONS,
mp_drawing_styles.get_default_hand_landmarks_style(),
mp_drawing_styles.get_default_hand_connections_style())
# Flip the image horizontally for a selfie-view display.
image2 = cv2.flip(image, 1)
# Resize image
width = 800
height = int(image.shape[0]*(width/image2.shape[1]))
dim = (width,height)
image2 = cv2.resize(image2, dim)

if hand_landmarks:
h, w, _ = image.shape
t = mp_hands.HandLandmark.INDEX_FINGER_TIP
x = hand_landmarks.landmark[t].x
y = hand_landmarks.landmark[t].y
else:
x = 0.5

# determine action
r = random.random()
if x<0.3:
action = 1
elif x>0.7:
action = 0
else:
if r>=x:
action = 1
else:
action = 0

# add slider to webcam image
pt1 = (int(400-(x-0.5)*700),20)
pt2 = (400,20)
color = (int(x*255),int((1-x)*255),255)
cv2.line(image2, pt1, pt2, color, 10)

cart = env.render()
# take bottom 2/3 of image
cart = cart[int(cart.shape[0]/3):]
height = int(cart.shape[0] * (image2.shape[1]/cart.shape[1]))
dim = (image2.shape[1], height)
cart = cv2.resize(cart, dim)

# show image
image3 = cv2.vconcat([cart,image2])
cv2.imshow('Hands - CartPole', image3)
if cv2.waitKey(5) & 0xFF == 27:
break

# send action
observation, reward, done, info, e = env.step(action)

if abs(observation[0])>3:
observation = env.reset()

env.close()
cap.release()

[$[Get Code]]

Exercise

Adapt the webcam hand tracking script to control the TCLab LED. Adjust the TCLab brightness up and down as the index fingertip moves up and down. Adjust the blinking rate as the index fingertip moves to the left and right.

import tclab
import time
a = tclab.TCLab()
# Blink LED 10 times
for i in range(10):
a.LED(100) # LED On, 100%
time.sleep(0.5)
a.LED(0) # LED Off
time.sleep(0.5)
a.close()

[$[Get Code]]

import cv2
import mediapipe as mp
import os
import tclab
import random
import time

mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands

lab = tclab.TCLab()

def blink(lab,pause,bright):
lab.LED(bright)
time.sleep(pause)
lab.LED(0)
return

# Webcam input
cap = cv2.VideoCapture(0)
with mp_hands.Hands(
model_complexity=0,
min_detection_confidence=0.5,
min_tracking_confidence=0.5) as hands:
while cap.isOpened():
success, image = cap.read()
image.flags.writeable = False
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
results = hands.process(image)

# Draw the hand annotations on the image.
image.flags.writeable = True
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
hand_landmarks = None
if results.multi_hand_landmarks:
for hand_landmarks in results.multi_hand_landmarks:
mp_drawing.draw_landmarks(
image,
hand_landmarks,
mp_hands.HAND_CONNECTIONS,
mp_drawing_styles.get_default_hand_landmarks_style(),
mp_drawing_styles.get_default_hand_connections_style())
# Flip the image horizontally for a selfie-view display.
image2 = cv2.flip(image, 1)
cv2.imshow('MediaPipe Hands', image2)
if cv2.waitKey(5) & 0xFF == 27:
break

if hand_landmarks:
h, w, _ = image.shape
t = mp_hands.HandLandmark.INDEX_FINGER_TIP
x = hand_landmarks.landmark[t].x
y = hand_landmarks.landmark[t].y
else:
x = y = 0.5

pause = max(0,min(0.5,(1-x)*0.5))
bright = max(0,min(50,(1-y)*50))
blink(lab,pause,bright)

lab.close()
cap.release()

[$[Get Code]]

Machine Learning for Engineers

Hand Tracking

Exercise

Search

Options: