flipoff/src/flipoff/detector.py
NotAShelf f217384332
detector: convert BGR frame to RGB before passing to MediaPipe
Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: Id1b4bb7e9f34b13952f92c639ef0ba986a6a6964
2026-04-07 16:20:43 +03:00

53 lines
1.6 KiB
Python

import time
import cv2
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from mediapipe.tasks.python.components.containers.landmark import NormalizedLandmark
import numpy as np
class HandDetector:
def __init__(
self,
model_path: str,
num_hands: int = 1,
) -> None:
base_options = python.BaseOptions(model_asset_path=model_path)
options = vision.HandLandmarkerOptions(
base_options=base_options,
num_hands=num_hands,
running_mode=vision.RunningMode.VIDEO,
)
self._detector = vision.HandLandmarker.create_from_options(options)
def detect(
self,
frame: np.ndarray,
timestamp_ms: int | None = None,
) -> list[list[NormalizedLandmark]]:
if timestamp_ms is None:
timestamp_ms = int(time.time() * 1000)
# OpenCV captures in BGR; MediaPipe expects RGB.
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame_rgb)
result = self._detector.detect_for_video(mp_image, timestamp_ms)
return result.hand_landmarks if result.hand_landmarks else []
def close(self) -> None:
self._detector.close()
class Camera:
def __init__(self, index: int = 0) -> None:
self._cap = cv2.VideoCapture(index)
if not self._cap.isOpened():
raise RuntimeError(f"Cannot open camera at index {index}")
def read(self) -> tuple[bool, np.ndarray]:
ret, frame = self._cap.read()
return ret, frame
def release(self) -> None:
self._cap.release()