p5.js Intermediate Article 5

Working with Video

Video file playback, webcam capture, pixel processing, frame differencing, and basic optical flow.

⏱ 22 min read video webcam pixels optical flow capture

Playing a video file

let vid;

function preload() {
  vid = createVideo('clip.mp4');
}

function setup() {
  createCanvas(640, 480);
  vid.size(640, 480);
  vid.loop();
  vid.volume(0);  // mute
  vid.hide();     // hide the HTML element — we'll draw it to the canvas
  vid.play();
}

function draw() {
  image(vid, 0, 0);
}

Video controls

vid.play();
vid.pause();
vid.stop();
vid.loop();
vid.noLoop();
vid.volume(0.5);     // 0–1
vid.speed(0.5);      // playback rate
vid.time(3.5);       // jump to 3.5 seconds
vid.duration();      // total length
vid.time();          // current position
vid.elt.currentTime  // direct DOM access for finer control

Webcam capture

let cam;

function setup() {
  createCanvas(640, 480);
  cam = createCapture(VIDEO);
  cam.size(640, 480);
  cam.hide();
}

function draw() {
  image(cam, 0, 0);
}

Mirror the feed

function draw() {
  translate(width, 0);
  scale(-1, 1);  // flip horizontally
  image(cam, 0, 0);
}

Pixel processing on video

Access individual pixels each frame. Works the same as still-image pixel manipulation:

function draw() {
  image(cam, 0, 0);
  loadPixels();

  for (let i = 0; i < pixels.length; i += 4) {
    let r = pixels[i];
    let g = pixels[i + 1];
    let b = pixels[i + 2];

    // Threshold — high contrast black/white
    let bright = (r + g + b) / 3;
    let val    = bright > 128 ? 255 : 0;
    pixels[i] = pixels[i + 1] = pixels[i + 2] = val;
  }

  updatePixels();
}

Processing every pixel every frame is expensive. Sample every 2nd or 4th pixel, or use WebGL/shaders for real-time filtering.

Frame differencing — motion detection

Compare the current frame with the previous to find what moved:

let prev;

function setup() {
  createCanvas(640, 480);
  cam = createCapture(VIDEO);
  cam.size(640, 480);
  cam.hide();
  prev = createImage(640, 480);
}

function draw() {
  cam.loadPixels();
  prev.loadPixels();
  loadPixels();

  let motionTotal = 0;

  for (let i = 0; i < cam.pixels.length; i += 4) {
    let dr = abs(cam.pixels[i]     - prev.pixels[i]);
    let dg = abs(cam.pixels[i + 1] - prev.pixels[i + 1]);
    let db = abs(cam.pixels[i + 2] - prev.pixels[i + 2]);
    let diff = (dr + dg + db) / 3;

    motionTotal += diff;

    pixels[i]     = diff > 30 ? 255 : 0;
    pixels[i + 1] = 0;
    pixels[i + 2] = diff > 30 ? 100 : 0;
    pixels[i + 3] = 255;
  }

  updatePixels();
  prev.copy(cam, 0, 0, cam.width, cam.height, 0, 0, prev.width, prev.height);

  // Motion level
  let level = motionTotal / (cam.pixels.length / 4) / 255;
  fill(255);
  textSize(14);
  text(`Motion: ${nf(level, 1, 3)}`, 10, 20);
}

Pixelation and mosaic effect

Sample the video at a lower resolution and draw rectangles:

function draw() {
  cam.loadPixels();

  let tileSize = 16;
  noStroke();

  for (let x = 0; x < width; x += tileSize) {
    for (let y = 0; y < height; y += tileSize) {
      let cx    = constrain(x + tileSize / 2, 0, cam.width - 1);
      let cy    = constrain(y + tileSize / 2, 0, cam.height - 1);
      let idx   = (floor(cx) + floor(cy) * cam.width) * 4;
      let r     = cam.pixels[idx];
      let g     = cam.pixels[idx + 1];
      let b     = cam.pixels[idx + 2];

      fill(r, g, b);
      rect(x, y, tileSize, tileSize);
    }
  }
}

ASCII video

Replace each tile’s brightness with an ASCII character:

const CHARS = ' .:-=+*#@';

function draw() {
  background(0);
  cam.loadPixels();

  let tileW = 8, tileH = 14;
  textFont('monospace');
  textSize(12);

  for (let x = 0; x < width; x += tileW) {
    for (let y = 0; y < height; y += tileH) {
      let idx    = (floor(x) + floor(y) * cam.width) * 4;
      let bright = (cam.pixels[idx] + cam.pixels[idx + 1] + cam.pixels[idx + 2]) / 3;
      let charIdx = floor(map(bright, 0, 255, CHARS.length - 1, 0));
      fill(cam.pixels[idx], cam.pixels[idx + 1], cam.pixels[idx + 2]);
      noStroke();
      text(CHARS[charIdx], x, y + tileH);
    }
  }
}

Key takeaways

  • createVideo() plays files; createCapture(VIDEO) accesses the webcam — always .hide() both
  • Video pixels work exactly like image pixels: loadPixels(), access pixels[], updatePixels()
  • Frame differencing compares current vs previous frame to detect motion
  • Tiled sampling (pixelation, ASCII, mosaic) is fast because you skip most pixels
  • Mirror the camera by translating to the right edge and scaling x by -1
  • Processing all pixels every frame is expensive — downsample or use shaders for production