Machine Learning with ml5.js
Use pre-trained models for pose estimation, hand tracking, image classification, and body segmentation — all in the browser.
What is ml5.js?
ml5.js is a high-level library built on top of TensorFlow.js. It wraps pre-trained models — pose detection, object detection, image classification, text generation, and more — in a p5.js-friendly API. No ML expertise required.
<script src="https://cdn.jsdelivr.net/npm/ml5@1/dist/ml5.min.js"></script>
PoseNet — full-body pose estimation
PoseNet detects 17 body keypoints from a webcam feed in real-time:
let cam, pose, detector;
async function setup() {
createCanvas(640, 480);
cam = createCapture(VIDEO);
cam.size(640, 480);
cam.hide();
// Load the MoveNet model (faster than PoseNet classic)
detector = await ml5.poseDetection('MoveNet', {
modelType: 'SINGLEPOSE_THUNDER' // or SINGLEPOSE_LIGHTNING (faster)
});
detectPose();
}
async function detectPose() {
while (true) {
let results = await detector.detect(cam.elt);
if (results.length > 0) pose = results[0];
await new Promise(r => setTimeout(r, 0)); // yield to browser
}
}
function draw() {
image(cam, 0, 0);
if (!pose) return;
// Draw keypoints
for (let kp of pose.keypoints) {
if (kp.score < 0.3) continue;
fill(100, 255, 150);
noStroke();
circle(kp.x, kp.y, 10);
}
// Draw skeleton
let connections = ml5.POSE_CONNECTIONS || poseConnections();
stroke(255, 100, 50);
strokeWeight(2);
for (let [a, b] of connections) {
let kpA = pose.keypoints[a];
let kpB = pose.keypoints[b];
if (kpA.score > 0.3 && kpB.score > 0.3) {
line(kpA.x, kpA.y, kpB.x, kpB.y);
}
}
}
HandPose — hand landmark detection
21 keypoints per hand:
let handDetector, hands = [];
async function setup() {
createCanvas(640, 480);
cam = createCapture(VIDEO);
cam.size(640, 480);
cam.hide();
handDetector = await ml5.handPose({ maxHands: 2, flipped: false });
handDetector.detectStart(cam, results => hands = results);
}
function draw() {
image(cam, 0, 0);
for (let hand of hands) {
// 21 keypoints
for (let kp of hand.keypoints) {
fill(200, 100, 255);
noStroke();
circle(kp.x, kp.y, 8);
}
// Connections
for (let [a, b] of hand.connections) {
stroke(255, 255, 0);
strokeWeight(1.5);
let kpA = hand.keypoints[a], kpB = hand.keypoints[b];
line(kpA.x, kpA.y, kpB.x, kpB.y);
}
// Pinch gesture detection
let thumb = hand.keypoints[4];
let index = hand.keypoints[8];
let pinchDist = dist(thumb.x, thumb.y, index.x, index.y);
if (pinchDist < 30) {
fill(255, 255, 0, 150);
noStroke();
circle((thumb.x + index.x) / 2, (thumb.y + index.y) / 2, 30);
}
}
}
Image classification
let classifier, label = 'Loading...', confidence = 0;
let cam;
function setup() {
createCanvas(640, 520);
cam = createCapture(VIDEO);
cam.size(640, 480);
cam.hide();
classifier = ml5.imageClassifier('MobileNet', () => {
classifyFrame();
});
}
function classifyFrame() {
classifier.classify(cam, (error, results) => {
if (error) { console.error(error); return; }
label = results[0].label;
confidence = results[0].confidence;
classifyFrame(); // continuous classification
});
}
function draw() {
image(cam, 0, 0);
fill(0, 0, 0, 160);
noStroke();
rect(0, height - 50, width, 50);
fill(255);
textSize(18);
textAlign(CENTER, CENTER);
text(`${label} (${nf(confidence * 100, 1, 1)}%)`, width / 2, height - 20);
}
Body segmentation — remove background
let bodySegmentation, segmentation;
let cam;
async function setup() {
createCanvas(640, 480);
cam = createCapture(VIDEO);
cam.size(640, 480);
cam.hide();
bodySegmentation = ml5.bodySegmentation('SelfieSegmentation', {
maskType: 'person'
});
await bodySegmentation.ready;
bodySegmentation.detectStart(cam, result => segmentation = result);
}
function draw() {
background(20, 80, 150); // replacement background
if (segmentation && segmentation.mask) {
// Draw mask (white = person, black = background)
image(segmentation.mask, 0, 0, width, height);
// Use blending to composite person over background
// (full compositing requires drawingBuffer techniques or a shader)
}
image(cam, 0, 0);
}
Custom classifier — teachable machine style
Train a simple image classifier on your own categories using a webcam:
let featureExtractor, classifier;
let isTraining = false;
function setup() {
createCanvas(640, 520);
cam = createCapture(VIDEO);
cam.size(640, 480);
cam.hide();
featureExtractor = ml5.featureExtractor('MobileNet', () => {
classifier = featureExtractor.classification(cam);
});
}
function addExample(label) {
classifier.addImage(label);
print(`Added example for: ${label}`);
}
function trainModel() {
classifier.train((lossValue) => {
if (lossValue === null) {
print('Training complete');
classifyLoop();
}
});
}
function classifyLoop() {
classifier.classify((err, results) => {
if (!err) {
currentLabel = results[0].label;
}
classifyLoop();
});
}
Key takeaways
- ml5.js wraps TensorFlow.js models in a friendly API — include via CDN
- MoveNet (SINGLEPOSE_LIGHTNING) is the fastest current pose model; use THUNDER for accuracy
- HandPose provides 21 landmarks per hand; measure finger distances for gesture recognition
- Image classification (MobileNet) runs continuously via a callback loop
- Body segmentation produces a mask that separates the person from the background
- Feature extraction + custom training lets you build Teachable Machine-style classifiers in code