p5.js Expert Article 3

Real-time Audio Visualisation

Web Audio API pipeline, pitch detection, beat detection, and building reactive visuals from live audio analysis.

⏱ 28 min read audio Web Audio API FFT beat detection pitch visualisation

The signal chain

Microphone / File → AnalyserNode → FFT / Waveform data → p5.js canvas
                 ↓
            GainNode → CompressorNode → DestinationNode (speakers)

p5.sound wraps this chain. For fine control, use the Web Audio API directly.

Direct Web Audio API access

p5.sound exposes the underlying audio context:

let mic, analyser, dataArray;
const FFT_SIZE = 2048;

function setup() {
  createCanvas(800, 400);

  let audioCtx = getAudioContext();

  // Create analyser node
  analyser = audioCtx.createAnalyser();
  analyser.fftSize = FFT_SIZE;
  analyser.smoothingTimeConstant = 0.85;
  dataArray = new Uint8Array(analyser.frequencyBinCount);

  // Connect microphone through analyser
  navigator.mediaDevices.getUserMedia({ audio: true })
    .then(stream => {
      let source = audioCtx.createMediaStreamSource(stream);
      source.connect(analyser);
    });
}

function draw() {
  background(15);
  analyser.getByteFrequencyData(dataArray);  // frequency domain
  // or: analyser.getByteTimeDomainData(dataArray);  // time domain (waveform)

  drawSpectrum();
}

function drawSpectrum() {
  let binCount = dataArray.length;
  noStroke();

  for (let i = 0; i < binCount; i++) {
    let x = map(i, 0, binCount, 0, width);
    let h = map(dataArray[i], 0, 255, 0, height);
    let hue = map(i, 0, binCount, 200, 360);

    colorMode(HSB, 360, 100, 100);
    fill(hue, 80, 90);
    rect(x, height - h, width / binCount + 1, h);
  }
}

Logarithmic frequency scaling

Human hearing is logarithmic — map bins to a log scale for a natural-looking spectrum:

function drawLogSpectrum() {
  let bins = dataArray.length;
  let numBars = 120;

  for (let i = 0; i < numBars; i++) {
    // Map bar index to bin index logarithmically
    let t    = i / numBars;
    let bin  = floor(pow(bins, t));
    bin      = constrain(bin, 0, bins - 1);

    let x    = map(i, 0, numBars, 0, width);
    let barW = width / numBars;
    let h    = map(dataArray[bin], 0, 255, 0, height);

    colorMode(HSB, 360, 100, 100);
    fill(map(i, 0, numBars, 180, 340), 75, 90);
    noStroke();
    rect(x, height - h, barW - 1, h);
  }
}

Beat detection

Detect kicks, snares, and hihats by monitoring specific frequency bands:

let kickBand   = [60, 150];    // Hz
let snareBand  = [200, 2000];
let hihatBand  = [8000, 16000];

let kickHist   = [], snareHist = [], hihatHist = [];
const HIST_LEN = 43;  // roughly 1 beat at 60bpm / 60fps

function getEnergy(lowHz, highHz) {
  let ctx    = getAudioContext();
  let nyq    = ctx.sampleRate / 2;
  let binLo  = floor(lowHz  / nyq * analyser.frequencyBinCount);
  let binHi  = floor(highHz / nyq * analyser.frequencyBinCount);

  let sum = 0;
  for (let i = binLo; i <= binHi; i++) sum += dataArray[i];
  return sum / (binHi - binLo + 1);
}

function detectBeat(energy, history) {
  history.push(energy);
  if (history.length > HIST_LEN) history.shift();

  let avg = history.reduce((a, b) => a + b, 0) / history.length;
  let variance = history.reduce((a, b) => a + (b - avg) ** 2, 0) / history.length;
  let threshold = (-0.0025714 * variance + 1.5142857) * avg;

  return energy > threshold;
}

function draw() {
  analyser.getByteFrequencyData(dataArray);

  let kick  = detectBeat(getEnergy(...kickBand),  kickHist);
  let snare = detectBeat(getEnergy(...snareBand), snareHist);
  let hihat = detectBeat(getEnergy(...hihatBand), hihatHist);

  if (kick)  triggerKickEffect();
  if (snare) triggerSnareEffect();
  if (hihat) triggerHihatEffect();
}

Pitch detection with ACF

Autocorrelation gives a rough fundamental frequency:

function getPitch() {
  let timeDomain = new Float32Array(analyser.fftSize);
  analyser.getFloatTimeDomainData(timeDomain);

  let SIZE  = timeDomain.length;
  let corr  = new Float32Array(SIZE);

  for (let lag = 0; lag < SIZE; lag++) {
    for (let i = 0; i < SIZE - lag; i++) {
      corr[lag] += timeDomain[i] * timeDomain[i + lag];
    }
  }

  // Find first peak after initial dip
  let d = false;
  let maxVal = -Infinity, maxLag = -1;
  for (let lag = 1; lag < SIZE; lag++) {
    if (!d && corr[lag] < corr[lag - 1]) { d = true; continue; }
    if (d && corr[lag] > maxVal) {
      maxVal = corr[lag]; maxLag = lag;
    }
    if (d && corr[lag] < corr[lag - 1]) break;
  }

  if (maxLag === -1) return 0;
  return getAudioContext().sampleRate / maxLag;
}

For production use, the pitchfinder library or Aubio.js provide more reliable pitch detection.

Circular spectrum visualiser

function draw() {
  background(15, 15, 20, 40);
  translate(width / 2, height / 2);

  analyser.getByteFrequencyData(dataArray);

  let bins  = dataArray.length;
  let rBase = 100;

  noFill();
  for (let i = 0; i < bins; i++) {
    let angle  = TWO_PI / bins * i - HALF_PI;
    let energy = dataArray[i];
    let r      = rBase + map(energy, 0, 255, 0, 150);

    let x1 = cos(angle) * rBase;
    let y1 = sin(angle) * rBase;
    let x2 = cos(angle) * r;
    let y2 = sin(angle) * r;

    colorMode(HSB, 360, 100, 100);
    stroke(map(i, 0, bins, 180, 360), 80, map(energy, 0, 255, 20, 100));
    strokeWeight(1.5);
    line(x1, y1, x2, y2);
  }
}

Key takeaways

  • Access Web Audio directly via getAudioContext() for maximum control
  • AnalyserNode.getByteFrequencyData() fills an array with frequency magnitudes per frame
  • Log-scale bin mapping gives perceptually natural spectrum displays
  • Beat detection compares current energy to a historical average with a dynamic threshold
  • Pitch detection via autocorrelation works for monophonic signals; use a library for polyphonic
  • Circular spectrum: map bin index to angle, energy to radius