software-mansion · chmjkb · May 5, 2026 · Apr 24, 2026 · Apr 24, 2026 · Apr 24, 2026
diff --git a/.cspell-wordlist.txt b/.cspell-wordlist.txt
@@ -193,3 +193,8 @@ BIOES
 viterbi
 argmaxes
 unpadded
+keypoint
+keypoints
+Keypoint
+Keypoints
+letterboxing
diff --git a/.eslintrc.js b/.eslintrc.js
@@ -10,6 +10,7 @@ const VALID_CATEGORIES = [
   'Models - LLM',
   'Models - Object Detection',
   'Models - Instance Segmentation',
+  'Models - Pose Estimation',
   'Models - Semantic Segmentation',
   'Models - Speech To Text',
   'Models - Style Transfer',

diff --git a/apps/computer-vision/app/_layout.tsx b/apps/computer-vision/app/_layout.tsx
@@ -149,6 +149,14 @@ export default function _layout() {
             headerTitleStyle: { color: ColorPalette.primary },
           }}
         />
+        <Drawer.Screen
+          name="pose_estimation/index"
+          options={{
+            drawerLabel: 'Pose Estimation',
+            title: 'Pose Estimation',
+            headerTitleStyle: { color: ColorPalette.primary },
+          }}
+        />
         <Drawer.Screen
           name="ocr/index"
           options={{

diff --git a/apps/computer-vision/app/index.tsx b/apps/computer-vision/app/index.tsx
@@ -41,6 +41,12 @@ export default function Home() {
         >
           <Text style={styles.buttonText}>Instance Segmentation</Text>
         </TouchableOpacity>
+        <TouchableOpacity
+          style={styles.button}
+          onPress={() => router.navigate('pose_estimation/')}
+        >
+          <Text style={styles.buttonText}>Pose Estimation</Text>
+        </TouchableOpacity>
         <TouchableOpacity
           style={styles.button}
           onPress={() => router.navigate('ocr/')}

diff --git a/apps/computer-vision/app/pose_estimation/index.tsx b/apps/computer-vision/app/pose_estimation/index.tsx
@@ -0,0 +1,259 @@
+import Spinner from '../../components/Spinner';
+import { BottomBar } from '../../components/BottomBar';
+import { getImage } from '../../utils';
+import {
+  usePoseEstimation,
+  PoseDetections,
+  RnExecutorchError,
+  RnExecutorchErrorCode,
+  YOLO26N_POSE,
+} from 'react-native-executorch';
+import { View, StyleSheet, Image, Text } from 'react-native';
+import React, { useContext, useEffect, useState } from 'react';
+import { GeneratingContext } from '../../context';
+import ScreenWrapper from '../../ScreenWrapper';
+import { StatsBar } from '../../components/StatsBar';
+import Svg, { Circle, Line } from 'react-native-svg';
+import ErrorBanner from '../../components/ErrorBanner';
+import { COCO_SKELETON_CONNECTIONS } from '../../components/utils/cocoSkeleton';
+
+// Colors for different people
+const PERSON_COLORS = ['lime', 'cyan', 'magenta', 'yellow', 'orange', 'pink'];
+
+export default function PoseEstimationScreen() {
+  const [imageUri, setImageUri] = useState('');
+  const [results, setResults] = useState<PoseDetections>([]);
+  const [error, setError] = useState<string | null>(null);
+  const [imageDimensions, setImageDimensions] = useState<{
+    width: number;
+    height: number;
+  }>();
+  const [inferenceTime, setInferenceTime] = useState<number | null>(null);
+  const [layout, setLayout] = useState({ width: 0, height: 0 });
+
+  const model = usePoseEstimation({ model: YOLO26N_POSE });
+  const { setGlobalGenerating } = useContext(GeneratingContext);
+
+  useEffect(() => {
+    setGlobalGenerating(model.isGenerating);
+  }, [model.isGenerating, setGlobalGenerating]);
+
+  useEffect(() => {
+    if (model.error) setError(String(model.error));
+  }, [model.error]);
+
+  const handleCameraPress = async (isCamera: boolean) => {
+    const image = await getImage(isCamera);
+    const uri = image?.uri;
+    const width = image?.width;
+    const height = image?.height;
+
+    if (uri && width && height) {
+      setImageUri(image.uri as string);
+      setImageDimensions({ width, height });
+      setResults([]);
+      setInferenceTime(null);
+    }
+  };
+
+  const runForward = async () => {
+    if (imageUri) {
+      try {
+        const start = Date.now();
+        const output = await model.forward(imageUri, { inputSize: 384 });
+        setInferenceTime(Date.now() - start);
+        setResults(output);
+      } catch (e) {
+        if (e instanceof RnExecutorchError) {
+          switch (e.code) {
+            case RnExecutorchErrorCode.FileReadFailed:
+              setError('Could not read the selected image.');
+              break;
+            case RnExecutorchErrorCode.ModelGenerating:
+              setError('Model is busy — wait for the current run to finish.');
+              break;
+            case RnExecutorchErrorCode.InvalidUserInput:
+            case RnExecutorchErrorCode.InvalidArgument:
+              setError(`Invalid input: ${e.message}`);
+              break;
+            default:
+              setError(e.message);
+          }
+        } else {
+          setError(e instanceof Error ? e.message : String(e));
+        }
+      }
+    }
+  };
+
+  if (!model.isReady) {
+    return (
+      <Spinner
+        visible={!model.isReady}
+        textContent={`Loading the model ${(model.downloadProgress * 100).toFixed(0)} %`}
+      />
+    );
+  }
+
+  return (
+    <ScreenWrapper>
+      <ErrorBanner message={error} onDismiss={() => setError(null)} />
+      <View style={styles.imageContainer}>
+        <View style={styles.image}>
+          {imageUri && imageDimensions?.width && imageDimensions?.height ? (
+            <View
+              style={styles.imageWrapper}
+              onLayout={(e) =>
+                setLayout({
+                  width: e.nativeEvent.layout.width,
+                  height: e.nativeEvent.layout.height,
+                })
+              }
+            >
+              <Image
+                source={{ uri: imageUri }}
+                style={styles.fullSizeImage}
+                resizeMode="contain"
+              />
+              {results.length > 0 &&
+                layout.width > 0 &&
+                layout.height > 0 &&
+                (() => {
+                  // Account for resizeMode="contain" letterboxing: the image's
+                  // displayed area is smaller than the container in one axis.
+                  const imageRatio =
+                    imageDimensions.width / imageDimensions.height;
+                  const layoutRatio = layout.width / layout.height;
+                  let scaleX: number, scaleY: number;
+                  if (imageRatio > layoutRatio) {
+                    scaleX = layout.width / imageDimensions.width;
+                    scaleY = layout.width / imageRatio / imageDimensions.height;
+                  } else {
+                    scaleY = layout.height / imageDimensions.height;
+                    scaleX =
+                      (layout.height * imageRatio) / imageDimensions.width;
+                  }
+                  const offsetX =
+                    (layout.width - imageDimensions.width * scaleX) / 2;
+                  const offsetY =
+                    (layout.height - imageDimensions.height * scaleY) / 2;
+                  const isInBounds = (kp: { x: number; y: number }) =>
+                    kp.x >= 0 &&
+                    kp.y >= 0 &&
+                    kp.x <= imageDimensions.width &&
+                    kp.y <= imageDimensions.height;
+                  return (
+                    <Svg style={StyleSheet.absoluteFill}>
+                      {results.map((personKeypoints, personIdx) => {
+                        const color =
+                          PERSON_COLORS[personIdx % PERSON_COLORS.length];
+                        return (
+                          <React.Fragment key={`person-${personIdx}`}>
+                            {COCO_SKELETON_CONNECTIONS.map(
+                              ([from, to], lineIdx) => {
+                                const kp1 = personKeypoints[from];
+                                const kp2 = personKeypoints[to];
+                                if (!kp1 || !kp2) return null;
+                                if (!isInBounds(kp1) || !isInBounds(kp2))
+                                  return null;
+                                return (
+                                  <Line
+                                    key={`person-${personIdx}-line-${lineIdx}`}
+                                    x1={kp1.x * scaleX + offsetX}
+                                    y1={kp1.y * scaleY + offsetY}
+                                    x2={kp2.x * scaleX + offsetX}
+                                    y2={kp2.y * scaleY + offsetY}
+                                    stroke={color}
+                                    strokeWidth="2"
+                                  />
+                                );
+                              }
+                            )}
+                            {Object.entries(personKeypoints)
+                              .filter(([, kp]) => isInBounds(kp))
+                              .map(([name, kp]) => (
+                                <Circle
+                                  key={`person-${personIdx}-kp-${name}`}
+                                  cx={kp.x * scaleX + offsetX}
+                                  cy={kp.y * scaleY + offsetY}
+                                  r="4"
+                                  fill="red"
+                                />
+                              ))}
+                          </React.Fragment>
+                        );
+                      })}
+                    </Svg>
+                  );
+                })()}
+            </View>
+          ) : (
+            <Image
+              style={styles.fullSizeImage}
+              resizeMode="contain"
+              source={require('../../assets/icons/executorch_logo.png')}
+            />
+          )}
+        </View>
+        {!imageUri && (
+          <View style={styles.infoContainer}>
+            <Text style={styles.infoTitle}>Pose Estimation</Text>
+            <Text style={styles.infoText}>
+              This model detects human body keypoints (17 COCO keypoints) and
+              draws a skeleton overlay. Pick an image from your gallery or take
+              one with your camera to get started.
+            </Text>
+          </View>
+        )}
+      </View>
+      <StatsBar
+        inferenceTime={inferenceTime}
+        detectionCount={results.length > 0 ? results.length : null}
+      />
+      <BottomBar
+        handleCameraPress={handleCameraPress}
+        runForward={runForward}
+        hasImage={!!imageUri}
+        isGenerating={model.isGenerating}
+      />
+    </ScreenWrapper>
+  );
+}
+
+const styles = StyleSheet.create({
+  imageContainer: {
+    flex: 6,
+    width: '100%',
+    padding: 16,
+  },
+  image: {
+    flex: 2,
+    borderRadius: 8,
+    width: '100%',
+  },
+  imageWrapper: {
+    flex: 1,
+    width: '100%',
+    height: '100%',
+  },
+  fullSizeImage: {
+    width: '100%',
+    height: '100%',
+  },
+  infoContainer: {
+    alignItems: 'center',
+    padding: 16,
+    gap: 8,
+  },
+  infoTitle: {
+    fontSize: 18,
+    fontWeight: '600',
+    color: 'navy',
+  },
+  infoText: {
+    fontSize: 14,
+    color: '#555',
+    textAlign: 'center',
+    lineHeight: 20,
+  },
+});
diff --git a/apps/computer-vision/app/vision_camera/index.tsx b/apps/computer-vision/app/vision_camera/index.tsx
@@ -28,6 +28,7 @@ import SegmentationTask from '../../components/vision_camera/tasks/SegmentationT
 import InstanceSegmentationTask from '../../components/vision_camera/tasks/InstanceSegmentationTask';
 import OCRTask from '../../components/vision_camera/tasks/OCRTask';
 import StyleTransferTask from '../../components/vision_camera/tasks/StyleTransferTask';
+import PoseEstimationTask from '../../components/vision_camera/tasks/PoseEstimationTask';
 // 1. Import ErrorBanner
 import ErrorBanner from '../../components/ErrorBanner';
 
@@ -36,6 +37,7 @@ type TaskId =
   | 'objectDetection'
   | 'segmentation'
   | 'instanceSegmentation'
+  | 'poseEstimation'
   | 'ocr'
   | 'styleTransfer';
 type ModelId =
@@ -52,6 +54,7 @@ type ModelId =
   | 'segmentationSelfie'
   | 'instanceSegmentationYolo26n'
   | 'instanceSegmentationRfdetr'
+  | 'poseEstimationYolo26n'
   | 'ocr'
   | 'styleTransferCandy'
   | 'styleTransferMosaic';
@@ -86,6 +89,11 @@ const TASKS: Task[] = [
       { id: 'instanceSegmentationRfdetr', label: 'RF-DETR Nano Seg' },
     ],
   },
+  {
+    id: 'poseEstimation',
+    label: 'Pose',
+    variants: [{ id: 'poseEstimationYolo26n', label: 'YOLO26N Pose' }],
+  },
   {
     id: 'objectDetection',
     label: 'Detect',
@@ -223,6 +231,12 @@ export default function VisionCameraScreen() {
         outputs={frameOutput ? [frameOutput] : []}
         isActive={isFocused}
         orientationSource="device"
+        onError={(e) => {
+          console.warn('[Camera] onError', e);
+          setError(e.message);
+        }}
+        onStarted={() => console.log('[Camera] session started')}
+        onPreviewStarted={() => console.log('[Camera] preview got first frame')}
       />
 
       <View
@@ -273,6 +287,12 @@ export default function VisionCameraScreen() {
           }
         />
       )}
+      {activeTask === 'poseEstimation' && (
+        <PoseEstimationTask
+          {...taskProps}
+          activeModel={activeModel as 'poseEstimationYolo26n'}
+        />
+      )}
       {activeTask === 'ocr' && <OCRTask {...taskProps} />}
       {activeTask === 'styleTransfer' && (
         <StyleTransferTask

diff --git a/apps/computer-vision/components/utils/cocoSkeleton.ts b/apps/computer-vision/components/utils/cocoSkeleton.ts
@@ -0,0 +1,18 @@
+export const COCO_SKELETON_CONNECTIONS = [
+  ['NOSE', 'LEFT_EYE'],
+  ['NOSE', 'RIGHT_EYE'],
+  ['LEFT_EYE', 'LEFT_EAR'],
+  ['RIGHT_EYE', 'RIGHT_EAR'],
+  ['LEFT_SHOULDER', 'RIGHT_SHOULDER'],
+  ['LEFT_SHOULDER', 'LEFT_ELBOW'],
+  ['LEFT_ELBOW', 'LEFT_WRIST'],
+  ['RIGHT_SHOULDER', 'RIGHT_ELBOW'],
+  ['RIGHT_ELBOW', 'RIGHT_WRIST'],
+  ['LEFT_SHOULDER', 'LEFT_HIP'],
+  ['RIGHT_SHOULDER', 'RIGHT_HIP'],
+  ['LEFT_HIP', 'RIGHT_HIP'],
+  ['LEFT_HIP', 'LEFT_KNEE'],
+  ['LEFT_KNEE', 'LEFT_ANKLE'],
+  ['RIGHT_HIP', 'RIGHT_KNEE'],
+  ['RIGHT_KNEE', 'RIGHT_ANKLE'],
+] as const;