Skip to content
This repository has been archived by the owner on Sep 11, 2024. It is now read-only.

Use a MediaElementSourceAudioNode to process large audio files #6436

Merged
merged 2 commits into from
Jul 22, 2021
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 65 additions & 25 deletions src/voice/Playback.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,10 @@ export class Playback extends EventEmitter implements IDestroyable {
public readonly thumbnailWaveform: number[];

private readonly context: AudioContext;
private source: AudioBufferSourceNode;
private source: AudioBufferSourceNode | MediaElementAudioSourceNode;
private state = PlaybackState.Decoding;
private audioBuf: AudioBuffer;
private element: HTMLAudioElement;
private resampledWaveform: number[];
private waveformObservable = new SimpleObservable<number[]>();
private readonly clock: PlaybackClock;
Expand Down Expand Up @@ -129,36 +130,59 @@ export class Playback extends EventEmitter implements IDestroyable {
this.removeAllListeners();
this.clock.destroy();
this.waveformObservable.close();
if (this.element) {
URL.revokeObjectURL(this.element.src);
this.element.remove();
}
}

public async prepare() {
// Safari compat: promise API not supported on this function
this.audioBuf = await new Promise((resolve, reject) => {
this.context.decodeAudioData(this.buf, b => resolve(b), async e => {
// This error handler is largely for Safari as well, which doesn't support Opus/Ogg
// very well.
console.error("Error decoding recording: ", e);
console.warn("Trying to re-encode to WAV instead...");

const wav = await decodeOgg(this.buf);

// noinspection ES6MissingAwait - not needed when using callbacks
this.context.decodeAudioData(wav, b => resolve(b), e => {
console.error("Still failed to decode recording: ", e);
reject(e);
// The point where we use an audio element is fairly arbitrary, though we don't want
// it to be too low. As of writing, voice messages want to show a waveform but audio
// messages do not. Using an audio element means we can't show a waveform preview, so
// we try to target the difference between a voice message file and large audio file.
// Overall, the point of this is to avoid memory-related issues due to storing a massive
// audio buffer in memory, as that can balloon to far greater than the input buffer's
// byte length.
if (this.buf.byteLength > 5 * 1024 * 1024) { // 5mb
console.log("Audio file too large: processing through <audio /> element");
this.element = document.createElement("AUDIO") as HTMLAudioElement;
const prom = new Promise((resolve, reject) => {
this.element.onloadeddata = () => resolve(null);
this.element.onerror = (e) => reject(e);
});
this.element.src = URL.createObjectURL(new Blob([this.buf]));
await prom; // make sure the audio element is ready for us
} else {
// Safari compat: promise API not supported on this function
this.audioBuf = await new Promise((resolve, reject) => {
this.context.decodeAudioData(this.buf, b => resolve(b), async e => {
// This error handler is largely for Safari as well, which doesn't support Opus/Ogg
// very well.
console.error("Error decoding recording: ", e);
console.warn("Trying to re-encode to WAV instead...");

const wav = await decodeOgg(this.buf);

// noinspection ES6MissingAwait - not needed when using callbacks
this.context.decodeAudioData(wav, b => resolve(b), e => {
console.error("Still failed to decode recording: ", e);
reject(e);
});
});
});
});

// Update the waveform to the real waveform once we have channel data to use. We don't
// exactly trust the user-provided waveform to be accurate...
const waveform = Array.from(this.audioBuf.getChannelData(0));
this.resampledWaveform = makePlaybackWaveform(waveform);
// Update the waveform to the real waveform once we have channel data to use. We don't
// exactly trust the user-provided waveform to be accurate...
const waveform = Array.from(this.audioBuf.getChannelData(0));
this.resampledWaveform = makePlaybackWaveform(waveform);
}

this.waveformObservable.update(this.resampledWaveform);

this.emit(PlaybackState.Stopped); // signal that we're not decoding anymore
this.clock.flagLoadTime(); // must happen first because setting the duration fires a clock update
this.clock.durationSeconds = this.audioBuf.duration;
this.clock.durationSeconds = this.element ? this.element.duration : this.audioBuf.duration;
}

private onPlaybackEnd = async () => {
Expand All @@ -171,7 +195,11 @@ export class Playback extends EventEmitter implements IDestroyable {
if (this.state === PlaybackState.Stopped) {
this.disconnectSource();
this.makeNewSourceBuffer();
this.source.start();
if (this.element) {
await this.element.play();
} else {
(this.source as AudioBufferSourceNode).start();
}
}

// We use the context suspend/resume functions because it allows us to pause a source
Expand All @@ -182,13 +210,21 @@ export class Playback extends EventEmitter implements IDestroyable {
}

private disconnectSource() {
if (this.element) return; // leave connected, we can (and must) re-use it
this.source?.disconnect();
this.source?.removeEventListener("ended", this.onPlaybackEnd);
}

private makeNewSourceBuffer() {
this.source = this.context.createBufferSource();
this.source.buffer = this.audioBuf;
if (this.element && this.source) return; // leave connected, we can (and must) re-use it

if (this.element) {
this.source = this.context.createMediaElementSource(this.element);
} else {
this.source = this.context.createBufferSource();
this.source.buffer = this.audioBuf;
}

this.source.addEventListener("ended", this.onPlaybackEnd);
this.source.connect(this.context.destination);
}
Expand Down Expand Up @@ -241,7 +277,11 @@ export class Playback extends EventEmitter implements IDestroyable {
// when it comes time to the user hitting play. After a couple jumps, the user
// will have desynced the clock enough to be about 10-15 seconds off, while this
// keeps it as close to perfect as humans can perceive.
this.source.start(now, timeSeconds);
if (this.element) {
this.element.currentTime = timeSeconds;
} else {
(this.source as AudioBufferSourceNode).start(now, timeSeconds);
}

// Dev note: it's critical that the code gap between `this.source.start()` and
// `this.pause()` is as small as possible: we do not want to delay *anything*
Expand Down