MicroPython with Computer Vision on RTL8722DM_MINI (AMB23)

Computer Vision has been around for quite a while but running computer vision application is always a little too demanding for microcontroller (MCU), as it requires a lot of computation and also burns a lot of power, thus it's better to offload the CV related computation to a more capable machine, for example our PC or a cloud server and let the MCU to control sensers/actuators in real time. 

This little project has 2 main building blocks,

  • Video capture and running of computer vision algorithm -> PC
  • Running MicroPython for wireless data transmission and LED control -> AMB23

Let's go through them one by one.

1.  Video capture and Computer Vision Algorithm

To achieve hand gesture recognition, I chose to the well-known [OpenCV](https://opencv.org/) and [MediaPipe](https://mediapipe.dev/) libraries as they are open source project available on Github and they all have Python binding -- meaning I can key in all my logic using Python for quick prototyping. here is the flow chart for the logic,

Untitled Diagram

The Python script,

import cv2
import time
import numpy as np
import math
import socket
import mediapipe as mp

########## Module #############
class handDetector():
    def __init__(self, mode=False, maxHands=2, detectionCon=0.5, trackCon=0.5):
        self.mode = mode
        self.maxHands = maxHands
        self.detectionCon = detectionCon
        self.trackCon = trackCon

        self.mpHands = mp.solutions.hands
        self.hands = self.mpHands.Hands(self.mode, self.maxHands,
                                        self.detectionCon, self.trackCon)
        self.mpDraw = mp.solutions.drawing_utils

    def findHands(self, img, draw=True):
        imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        self.results = self.hands.process(imgRGB)
        # print(results.multi_hand_landmarks)

        if self.results.multi_hand_landmarks:
            for handLms in self.results.multi_hand_landmarks:
                if draw:
                    self.mpDraw.draw_landmarks(img, handLms,
                                               self.mpHands.HAND_CONNECTIONS)
        return img

    def findPosition(self, img, handNo=0, draw=True):

        lmList = []
        if self.results.multi_hand_landmarks:
            myHand = self.results.multi_hand_landmarks[handNo]
            for id, lm in enumerate(myHand.landmark):
                # print(id, lm)
                h, w, c = img.shape
                cx, cy = int(lm.x * w), int(lm.y * h)
                # print(id, cx, cy)
                lmList.append( [id, cx, cy])
                if draw:
                    cv2.circle(img, (cx, cy), 15, (255, 0, 255), cv2.FILLED)

        return lmList


############## Variables ##################
wCam, hCam = 640, 480
pTime = 0
minBri = 0
maxBri = 1
briArd =0


############## Declararion ##################

cap = cv2.VideoCapture(0) # default camera is 0, if you have another cam, you can set to 1
cap.set(3, wCam)
cap.set(4, hCam)
detector = handDetector(detectionCon=0.7)


########## Step 1 ###########
# Start a TCP server and bind to port 12345
# Use ipconfig to check the IP address of your PC
s = socket.socket()
print ("Socket successfully created")
port = 12345
s.bind(('', port))
print("socket binded to %s" % (port))
s.listen(5)
print ("socket is listening")
c, addr = s.accept()
print('Got connection from', addr)


######### Step 2 ###############
# Image capture and processing using mediapipe and opencv
while True:
    success, img = cap.read()
    img = detector.findHands(img)
    lmList = detector.findPosition(img, draw=False)
    if len(lmList) != 0:
        x1, y1 = lmList[4][1], lmList[4][2]
        x2, y2 = lmList[8][1], lmList[8][2]
        cx, cy = (x1 + x2) // 2, (y1 + y2) // 2

        cv2.circle(img, (x1, y1), 15, (255, 0, 255), cv2.FILLED)
        cv2.circle(img, (x2, y2), 15, (255, 0, 255), cv2.FILLED)
        cv2.line(img, (x1, y1), (x2, y2), (255, 0, 255), 3)
        cv2.circle(img, (cx, cy), 15, (255, 0, 255), cv2.FILLED)

        length = math.hypot(x2 - x1, y2 - y1)
        #print(length)

        # Hand range 50 - 300

        brightness = np.interp(length, [50, 300], [minBri, maxBri])

        briArd = np.around(brightness,2)

        #print(briArd, brightness, length)

        if length < 50:
            cv2.circle(img, (cx, cy), 15, (0, 255, 0), cv2.FILLED)

    # Print FPS
    cTime = time.time()
    fps = 1 / (cTime - pTime)
    pTime = cTime
    cv2.putText(img, f'FPS:...
Read more »