Hand Tracking

Objective: Use MediaPipe Hands to balance a pole on a cart and control an LED.

MediaPipe detects the shape and motion of hands (see Online Demo). Hand gestures can be the basis for computer control of simulated or physical objects.

The purpose of this exercise is to demonstrate the use of hand detection for control of a simulated cart pole and a physical LED.

# https://google.github.io/mediapipe/solutions/hands
import cv2
import mediapipe as mp
import os
import urllib.request

mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands

# download image as hands.jpg
url = 'http://apmonitor.com/pds/uploads/Main/hands.jpg'
urllib.request.urlretrieve(url, 'hands.jpg')
IMAGE_FILES = ['hands.jpg']
with mp_hands.Hands(
    static_image_mode=True,
    max_num_hands=2,
    min_detection_confidence=0.5) as hands:
  for idx, file in enumerate(IMAGE_FILES):
    image = cv2.flip(cv2.imread(file), 1)

    # Convert the BGR image to RGB before processing.
    results = hands.process(cv2.cvtColor(image, \
                            cv2.COLOR_BGR2RGB))

    # Print handedness and draw hand landmarks on the image.
    print('Handedness:', results.multi_handedness)
    if not results.multi_hand_landmarks:
      continue
    image_height, image_width, _ = image.shape
    annotated_image = image.copy()
    for hand_landmarks in results.multi_hand_landmarks:
      tip = mp_hands.HandLandmark.INDEX_FINGER_TIP
      print('hand_landmarks:', hand_landmarks)
      print(
          f'Index finger tip coordinates: (',
          f'{hand_landmarks.landmark[tip].x * image_width}, '
          f'{hand_landmarks.landmark[tip].y * image_height})'
      )
      mp_drawing.draw_landmarks(
          annotated_image,
          hand_landmarks,
          mp_hands.HAND_CONNECTIONS,
          mp_drawing_styles.get_default_hand_landmarks_style(),
          mp_drawing_styles.get_default_hand_connections_style())
    cv2.imwrite('annotated_image' + str(idx) + '.png', \
                cv2.flip(annotated_image, 1))
    # Draw hand world landmarks.
    if not results.multi_hand_world_landmarks:
      continue
    for hand_world_landmarks in results.multi_hand_world_landmarks:
      mp_drawing.plot_landmarks(
        hand_world_landmarks, \
        mp_hands.HAND_CONNECTIONS, azimuth=5)

Getting Started

There are a few required packages for these exercises and can be installed with pip. These include OpenCV, MediaPipe from Google, and OpenAI Gym.

pip install opencv-python
pip install mediapipe pyglet
pip install gym

The exercises also require a webcam for video input and the Temperature Control Lab for LED control.


Cart Pole

A pole is attached by a frictionless joint and cart that moves along a track. The cart is controlled by applying a force to the left (action=0) or right (action=1). The pendulum starts upright and a reward of +1 is provided for every timestep that the pole remains upright. The gym returns done=True when the pole is more than 15Β° from vertical or the cart is more than 2.4 units from the starting location.

import gym
env = gym.make("CartPole-v1")
observation = env.reset()
for _ in range(1000):
  env.render()
  # Input:
  #   Force to the cart with actions: 0=left, 1=right
  # Returns:
  #   obs = cart position, cart velocity, pole angle, rot rate
  #   reward = +1 for every timestep
  #   done = True when abs(angle)>15 or abs(cart pos)>2.4
  action = env.action_space.sample() # random action
  observation, reward, done, info = env.step(action)

  if done:
    observation = env.reset()
env.close()

Use the webcam input to control the force on a cart. Maintaining the inverted pendulum is very challenging with hand control. This gym exercise is a benchmark for Reinforcement Learning (RL) where algorithms successively improve based on experience. As a first step, practice moving your hand to center the cart position. As a next step, try to stop the pendulum motion while it is facing down. As a final step, try to balance the pendulum.

import cv2
import mediapipe as mp
import os
import gym
import random
import warnings

env = gym.make("CartPole-v1")
observation = env.reset()
warnings.filterwarnings("ignore", category=UserWarning)

mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands

# Webcam input
cap = cv2.VideoCapture(0)
with mp_hands.Hands(
    model_complexity=0,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5) as hands:
  while cap.isOpened():
    success, image = cap.read()
    image.flags.writeable = False
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = hands.process(image)

    # Draw the hand annotations on the image.
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    hand_landmarks = None
    if results.multi_hand_landmarks:
      for hand_landmarks in results.multi_hand_landmarks:
        mp_drawing.draw_landmarks(
            image,
            hand_landmarks,
            mp_hands.HAND_CONNECTIONS,
            mp_drawing_styles.get_default_hand_landmarks_style(),
            mp_drawing_styles.get_default_hand_connections_style())
    # Flip the image horizontally for a selfie-view display.
    image2 = cv2.flip(image, 1)
    cv2.imshow('MediaPipe Hands', image2)
    if cv2.waitKey(5) & 0xFF == 27:
      break

    if hand_landmarks:
      h, w, _ = image.shape
      t = mp_hands.HandLandmark.INDEX_FINGER_TIP
      x = hand_landmarks.landmark[t].x
      y = hand_landmarks.landmark[t].y
    else:
      x = 0.5

    env.render()
    r = random.random()
    if x<0.3:
      action = 1
    elif x>0.7:
      action = 0
    else:
      if r>=x:
        action = 1
      else:
        action = 0
    observation, reward, done, info = env.step(action)

    if abs(observation[0])>3:
      observation = env.reset()

env.close()    
cap.release()

Exercise

Adapt the webcam hand tracking script to control the TCLab LED. Adjust the TCLab brightness up and down as the index fingertip moves up and down. Adjust the blinking rate as the index fingertip moves to the left and right.

import tclab
import time
a = tclab.TCLab()
# Blink LED 10 times
for i in range(10):
    a.LED(100)      # LED On, 100%
    time.sleep(0.5)
    a.LED(0)        # LED Off
    time.sleep(0.5)  
a.close()

import cv2
import mediapipe as mp
import os
import tclab
import random
import time

mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands

lab = tclab.TCLab()

def blink(lab,pause,bright):
    lab.LED(bright)
    time.sleep(pause)
    lab.LED(0)
    return

# Webcam input
cap = cv2.VideoCapture(0)
with mp_hands.Hands(
    model_complexity=0,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5) as hands:
  while cap.isOpened():
    success, image = cap.read()
    image.flags.writeable = False
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = hands.process(image)

    # Draw the hand annotations on the image.
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    hand_landmarks = None
    if results.multi_hand_landmarks:
      for hand_landmarks in results.multi_hand_landmarks:
        mp_drawing.draw_landmarks(
            image,
            hand_landmarks,
            mp_hands.HAND_CONNECTIONS,
            mp_drawing_styles.get_default_hand_landmarks_style(),
            mp_drawing_styles.get_default_hand_connections_style())
    # Flip the image horizontally for a selfie-view display.
    image2 = cv2.flip(image, 1)
    cv2.imshow('MediaPipe Hands', image2)
    if cv2.waitKey(5) & 0xFF == 27:
      break

    if hand_landmarks:
      h, w, _ = image.shape
      t = mp_hands.HandLandmark.INDEX_FINGER_TIP
      x = hand_landmarks.landmark[t].x
      y = hand_landmarks.landmark[t].y
    else:
      x = y = 0.5

    pause = max(0,min(0.5,(1-x)*0.5))
    bright = max(0,min(50,(1-y)*50))
    blink(lab,pause,bright)

lab.close()
cap.release()