engine: client: voice: new version, move to Opus Custom codec

* Despite Opus Custom have strict requirements, it's more barebones, allowing us to use maximum frame size and custom sample rate, without resampling * Encode each frame size to network buffer, allowing smooth voice chat even in 10 FPS * Fix possible buffer overruns, underruns and races with platform side * Revise all usages of offset variables, samples vs bytes
2022-08-31 06:44:45 +03:00 · 2022-08-31 06:44:45 +03:00 · 82ab06efdd
parent 4110ee0928
commit 82ab06efdd
4 changed files with 206 additions and 160 deletions
--- a/engine/client/cl_main.c
+++ b/engine/client/cl_main.c
@ -3085,7 +3085,7 @@ void CL_Init( void )

 	VID_Init();	// init video
 	S_Init();	// init sound
-	Voice_Init( "opus", 3 ); // init voice
+	Voice_Init( VOICE_DEFAULT_CODEC, 3 ); // init voice

 	// unreliable buffer. unsed for unreliable commands and voice stream
 	MSG_Init( &cls.datagram, "cls.datagram", cls.datagram_buf, sizeof( cls.datagram_buf ));
--- a/engine/client/sound.h
+++ b/engine/client/sound.h
@ -27,7 +27,6 @@ extern poolhandle_t sndpool;
 #define SOUND_22k			22050	// 22khz sample rate
 #define SOUND_32k			32000	// 32khz sample rate
 #define SOUND_44k			44100	// 44khz sample rate
-#define SOUND_48k			48000	// 48khz sample rate
 #define DMA_MSEC_PER_SAMPLE		((float)(1000.0 / SOUND_DMA_SPEED))

 // fixed point stuff for real-time resampling
--- a/engine/client/voice.c
+++ b/engine/client/voice.c
@ -15,13 +15,11 @@ GNU General Public License for more details.
 */

 #include <opus.h>
+#include <opus_custom.h>
 #include "common.h"
 #include "client.h"
 #include "voice.h"

-static wavdata_t *input_file;
-static fs_offset_t input_pos;
-
 voice_state_t voice = { 0 };

 CVAR_DEFINE_AUTO( voice_enable, "1", FCVAR_PRIVILEGED|FCVAR_ARCHIVE, "enable voice chat" );
@ -31,6 +29,8 @@ CVAR_DEFINE_AUTO( voice_avggain, "0.5", FCVAR_PRIVILEGED|FCVAR_ARCHIVE, "automat
 CVAR_DEFINE_AUTO( voice_maxgain, "5.0", FCVAR_PRIVILEGED|FCVAR_ARCHIVE, "automatic voice gain control (maximum)" );
 CVAR_DEFINE_AUTO( voice_inputfromfile, "0", FCVAR_PRIVILEGED, "input voice from voice_input.wav" );

+static void Voice_ApplyGainAdjust( int16_t *samples, int count );
+
 /*
 ===============================================================================

@ -39,25 +39,6 @@ CVAR_DEFINE_AUTO( voice_inputfromfile, "0", FCVAR_PRIVILEGED, "input voice from
 ===============================================================================
 */

-/*
-=========================
-Voice_GetBandwithTypeName
-
-=========================
-*/
-static const char* Voice_GetBandwidthTypeName( int bandwidthType )
-{
-	switch( bandwidthType )
-	{
-	case OPUS_BANDWIDTH_FULLBAND: return "Full Band (20 kHz)";
-	case OPUS_BANDWIDTH_SUPERWIDEBAND: return "Super Wide Band (12 kHz)";
-	case OPUS_BANDWIDTH_WIDEBAND: return "Wide Band (8 kHz)";
-	case OPUS_BANDWIDTH_MEDIUMBAND: return "Medium Band (6 kHz)";
-	case OPUS_BANDWIDTH_NARROWBAND: return "Narrow Band (4 kHz)";
-	default: return "Unknown";
-	}
-}
-
 /*
 =========================
 Voice_CodecInfo_f
@ -68,7 +49,6 @@ static void Voice_CodecInfo_f( void )
 {
 	int encoderComplexity;
 	opus_int32 encoderBitrate;
-	opus_int32 encoderBandwidthType;

 	if( !voice.initialized )
 	{
@ -76,72 +56,12 @@ static void Voice_CodecInfo_f( void )
 		return;
 	}

-	opus_encoder_ctl( voice.encoder, OPUS_GET_BITRATE( &encoderBitrate ));
-	opus_encoder_ctl( voice.encoder, OPUS_GET_COMPLEXITY( &encoderComplexity ));
-	opus_encoder_ctl( voice.encoder, OPUS_GET_BANDWIDTH( &encoderBandwidthType ));
+	opus_custom_encoder_ctl( voice.encoder, OPUS_GET_BITRATE( &encoderBitrate ));
+	opus_custom_encoder_ctl( voice.encoder, OPUS_GET_COMPLEXITY( &encoderComplexity ));

 	Con_Printf( "Encoder:\n" );
 	Con_Printf( "  Bitrate: %.3f kbps\n", encoderBitrate / 1000.0f );
 	Con_Printf( "  Complexity: %d\n", encoderComplexity );
-	Con_Printf( "  Bandwidth: %s\n", Voice_GetBandwidthTypeName( encoderBandwidthType ));
-}
-
-/*
-=========================
-Voice_GetFrameSize
-
-=========================
-*/
-static uint Voice_GetFrameSize( float durationMsec )
-{
-	return voice.channels * voice.width * (( float )voice.samplerate / ( 1000.0f / durationMsec ));
-}
-
-/*
-=========================
-Voice_ApplyGainAdjust
-
-=========================
-*/
-static void Voice_ApplyGainAdjust( opus_int16 *samples, int count )
-{
-	float gain, modifiedMax;
-	int average, adjustedSample;
-	int blockOffset = 0;
-
-	for( ;;)
-	{
-		int i;
-		int localMax = 0;
-		int localSum = 0;
-		int blockSize = Q_min( count - ( blockOffset + voice.autogain.block_size ), voice.autogain.block_size );
-		
-		if( blockSize < 1 )
-			break;
-
-		for( i = 0; i < blockSize; ++i )
-		{
-			int sample = samples[blockOffset + i];
-			if( abs( sample ) > localMax ) {
-				localMax = abs( sample );
-			}
-			localSum += sample;
-
-			gain = voice.autogain.current_gain + i * voice.autogain.gain_multiplier;
-			adjustedSample = Q_min( 32767, Q_max(( int )( sample * gain ), -32768 ));
-			samples[blockOffset + i] = adjustedSample;
-		}
-
-		if( blockOffset % voice.autogain.block_size == 0 )
-		{
-			average = localSum / blockSize;
-			modifiedMax = average + ( localMax - average ) * voice_avggain.value;
-			voice.autogain.current_gain = voice.autogain.next_gain * voice_scale.value;
-			voice.autogain.next_gain = Q_min( 32767.0f / modifiedMax, voice_maxgain.value ) * voice_scale.value;
-			voice.autogain.gain_multiplier = ( voice.autogain.next_gain - voice.autogain.current_gain ) / ( voice.autogain.block_size - 1 );
-		}
-		blockOffset += blockSize;
-	}
 }

 /*
@ -153,11 +73,22 @@ Voice_InitOpusDecoder
 static qboolean Voice_InitOpusDecoder( void )
 {
 	int err;
-	voice.decoder = opus_decoder_create( voice.samplerate, voice.channels, &err );

+	voice.width      = sizeof( opus_int16 );
+	voice.samplerate = VOICE_OPUS_CUSTOM_SAMPLERATE;
+	voice.frame_size = VOICE_OPUS_CUSTOM_FRAME_SIZE;
+
+	voice.custom_mode = opus_custom_mode_create( SOUND_44k, voice.frame_size, &err );
+	if( !voice.custom_mode )
+	{
+		Con_Printf( S_ERROR "Can't create Opus Custom mode: %s\n", opus_strerror( err ));
+		return false;
+	}
+
+	voice.decoder = opus_custom_decoder_create( voice.custom_mode, VOICE_PCM_CHANNELS, &err );
 	if( !voice.decoder )
 	{
-		Con_Printf( S_ERROR "Can't create Opus encoder: %s", opus_strerror( err ));
+		Con_Printf( S_ERROR "Can't create Opus encoder: %s\n", opus_strerror( err ));
 		return false;
 	}

@ -173,37 +104,30 @@ Voice_InitOpusEncoder
 static qboolean Voice_InitOpusEncoder( int quality )
 {
 	int err;
-	int app = quality == 5 ? OPUS_APPLICATION_AUDIO : OPUS_APPLICATION_VOIP;
-
-	voice.encoder = opus_encoder_create( voice.samplerate, voice.channels, app, &err );

+	voice.encoder = opus_custom_encoder_create( voice.custom_mode, VOICE_PCM_CHANNELS, &err );
 	if( !voice.encoder )
 	{
-		Con_Printf( S_ERROR "Can't create Opus encoder: %s", opus_strerror( err ));
+		Con_Printf( S_ERROR "Can't create Opus encoder: %s\n", opus_strerror( err ));
 		return false;
 	}

 	switch( quality )
 	{
-	case 1: // 6 kbps, <6 kHz bandwidth
-		opus_encoder_ctl( voice.encoder, OPUS_SET_BITRATE( 6000 ));
-		opus_encoder_ctl( voice.encoder, OPUS_SET_BANDWIDTH( OPUS_BANDWIDTH_MEDIUMBAND ));
+	case 1: // 6 kbps
+		opus_custom_encoder_ctl( voice.encoder, OPUS_SET_BITRATE( 6000 ));
 		break;
-	case 2: // 12 kbps, <12 kHz bandwidth
-		opus_encoder_ctl( voice.encoder, OPUS_SET_BITRATE( 12000 ));
-		opus_encoder_ctl( voice.encoder, OPUS_SET_BANDWIDTH( OPUS_BANDWIDTH_SUPERWIDEBAND ));
+	case 2: // 12 kbps
+		opus_custom_encoder_ctl( voice.encoder, OPUS_SET_BITRATE( 12000 ));
 		break;
-	case 4: // 64 kbps, full band (20 kHz)
-		opus_encoder_ctl( voice.encoder, OPUS_SET_BITRATE( 64000 ));
-		opus_encoder_ctl( voice.encoder, OPUS_SET_BANDWIDTH( OPUS_BANDWIDTH_FULLBAND ));
+	case 4: // 64 kbps
+		opus_custom_encoder_ctl( voice.encoder, OPUS_SET_BITRATE( 64000 ));
 		break;
-	case 5: // 96 kbps, full band (20 kHz)
-		opus_encoder_ctl( voice.encoder, OPUS_SET_BITRATE( 96000 ));
-		opus_encoder_ctl( voice.encoder, OPUS_SET_BANDWIDTH( OPUS_BANDWIDTH_FULLBAND ));
+	case 5: // 96 kbps
+		opus_custom_encoder_ctl( voice.encoder, OPUS_SET_BITRATE( 96000 ));
 		break;
-	default: // 36 kbps, <12 kHz bandwidth
-		opus_encoder_ctl( voice.encoder, OPUS_SET_BITRATE( 36000 ));
-		opus_encoder_ctl( voice.encoder, OPUS_SET_BANDWIDTH( OPUS_BANDWIDTH_SUPERWIDEBAND ));
+	default: // 36 kbps
+		opus_custom_encoder_ctl( voice.encoder, OPUS_SET_BITRATE( 36000 ));
 		break;
 	}

@ -220,7 +144,7 @@ static void Voice_ShutdownOpusDecoder( void )
 {
 	if( voice.decoder )
 	{
-		opus_decoder_destroy( voice.decoder );
+		opus_custom_decoder_destroy( voice.decoder );
 		voice.decoder = NULL;
 	}
 }
@ -235,9 +159,15 @@ static void Voice_ShutdownOpusEncoder( void )
 {
 	if( voice.encoder )
 	{
-		opus_encoder_destroy( voice.encoder );
+		opus_custom_encoder_destroy( voice.encoder );
 		voice.encoder = NULL;
 	}
+
+	if( voice.custom_mode )
+	{
+		opus_custom_mode_destroy( voice.custom_mode );
+		voice.custom_mode = NULL;
+	}
 }

 /*
@ -248,44 +178,72 @@ Voice_GetOpusCompressedData
 */
 static uint Voice_GetOpusCompressedData( byte *out, uint maxsize, uint *frames )
 {
-	uint ofs, size = 0;
+	uint ofs = 0, size = 0;
+	uint frame_size_bytes = voice.frame_size * voice.width;

-	if( input_file )
+	if( voice.input_file )
 	{
 		uint numbytes;
 		double updateInterval;

 		updateInterval = cl.mtime[0] - cl.mtime[1];
-		numbytes = updateInterval * voice.samplerate * voice.width * voice.channels;
-		numbytes = Q_min( numbytes, input_file->size - input_pos );
+		numbytes = updateInterval * voice.samplerate * voice.width * VOICE_PCM_CHANNELS;
+		numbytes = Q_min( numbytes, voice.input_file->size - voice.input_file_pos );
 		numbytes = Q_min( numbytes, sizeof( voice.input_buffer ) - voice.input_buffer_pos );

-		memcpy( voice.input_buffer + voice.input_buffer_pos, input_file->buffer + input_pos, numbytes );
+		memcpy( voice.input_buffer + voice.input_buffer_pos, voice.input_file->buffer + voice.input_file_pos, numbytes );
 		voice.input_buffer_pos += numbytes;
-		input_pos += numbytes;
+		voice.input_file_pos += numbytes;
 	}

-	for( ofs = 0; voice.input_buffer_pos - ofs >= voice.frame_size && ofs <= voice.input_buffer_pos; ofs += voice.frame_size )
+	if( !voice.input_file )
+		VoiceCapture_Lock( true );
+
+	for( ofs = 0; voice.input_buffer_pos - ofs >= frame_size_bytes && ofs <= voice.input_buffer_pos; ofs += frame_size_bytes )
 	{
 		int bytes;

-		if( !input_file )
+#if 1
+		if( !voice.input_file )
 		{
 			// adjust gain before encoding, but only for input from voice
-			Voice_ApplyGainAdjust((opus_int16*)voice.input_buffer + ofs, voice.frame_size);
+			Voice_ApplyGainAdjust((opus_int16*)(voice.input_buffer + ofs), voice.frame_size);
 		}
+#endif

-		bytes = opus_encode( voice.encoder, (const opus_int16*)(voice.input_buffer + ofs), voice.frame_size / voice.width, out + size, maxsize );
-		memmove( voice.input_buffer, voice.input_buffer + voice.frame_size, sizeof( voice.input_buffer ) - voice.frame_size );
-		voice.input_buffer_pos -= voice.frame_size;
+		bytes = opus_custom_encode( voice.encoder, (const opus_int16 *)( voice.input_buffer + ofs ),
+			voice.frame_size, out + size + sizeof( uint16_t ), maxsize );

 		if( bytes > 0 )
 		{
-			size += bytes;
+			// write compressed frame size
+			*((uint16_t *)&out[size]) = bytes;
+
+			size += bytes + sizeof( uint16_t );
+			maxsize -= bytes + sizeof( uint16_t );
+
 			(*frames)++;
 		}
+		else
+		{
+			Con_Printf( S_ERROR "%s: failed to encode frame: %s\n", __func__, opus_strerror( bytes ));
+		}
 	}

+	// did we compress anything? update counters
+	if( ofs )
+	{
+		fs_offset_t remaining = voice.input_buffer_pos - ofs;
+
+		// move remaining samples to the beginning of buffer
+		memmove( voice.input_buffer, voice.input_buffer + ofs, remaining );
+
+		voice.input_buffer_pos = remaining;
+	}
+
+	if( !voice.input_file )
+		VoiceCapture_Lock( false );
+
 	return size;
 }

@ -297,6 +255,53 @@ static uint Voice_GetOpusCompressedData( byte *out, uint maxsize, uint *frames )
 ===============================================================================
 */

+/*
+=========================
+Voice_ApplyGainAdjust
+
+=========================
+*/
+static void Voice_ApplyGainAdjust( int16_t *samples, int count )
+{
+	float gain, modifiedMax;
+	int average, adjustedSample, blockOffset = 0;
+
+	for( ;; )
+	{
+		int i, localMax = 0, localSum = 0;
+		int blockSize = Q_min( count - ( blockOffset + voice.autogain.block_size ), voice.autogain.block_size );
+
+		if( blockSize < 1 )
+			break;
+
+		for( i = 0; i < blockSize; ++i )
+		{
+			int sample = samples[blockOffset + i];
+			int absSample = abs( sample );
+
+			if( absSample > localMax )
+				localMax = absSample;
+
+			localSum += absSample;
+
+			gain = voice.autogain.current_gain + i * voice.autogain.gain_multiplier;
+			adjustedSample = Q_min( SHRT_MAX, Q_max(( int )( sample * gain ), SHRT_MIN ));
+			samples[blockOffset + i] = adjustedSample;
+		}
+
+		if( blockOffset % voice.autogain.block_size == 0 )
+		{
+			average = localSum / blockSize;
+			modifiedMax = average + ( localMax - average ) * voice_avggain.value;
+
+			voice.autogain.current_gain = voice.autogain.next_gain * voice_scale.value;
+			voice.autogain.next_gain = Q_min( (float)SHRT_MAX / modifiedMax, voice_maxgain.value ) * voice_scale.value;
+			voice.autogain.gain_multiplier = ( voice.autogain.next_gain - voice.autogain.current_gain ) / ( voice.autogain.block_size - 1 );
+		}
+		blockOffset += blockSize;
+	}
+}
+
 /*
 =========================
 Voice_Status
@ -367,21 +372,19 @@ Voice_RecordStop
 */
 void Voice_RecordStop( void )
 {
-	if( input_file )
+	if( voice.input_file )
 	{
-		FS_FreeSound( input_file );
-		input_file = NULL;
+		FS_FreeSound( voice.input_file );
+		voice.input_file = NULL;
 	}

+	VoiceCapture_Activate( false );
+	voice.is_recording = false;
+
+	Voice_Status( VOICE_LOCALCLIENT_INDEX, false );
+
 	voice.input_buffer_pos = 0;
 	memset( voice.input_buffer, 0, sizeof( voice.input_buffer ));
-
-	if( Voice_IsRecording( ))
-		Voice_Status( VOICE_LOCALCLIENT_INDEX, false );
-	
-	VoiceCapture_RecordStop();
-	
-	voice.is_recording = false;
 }

 /*
@ -396,25 +399,25 @@ void Voice_RecordStart( void )

 	if( voice_inputfromfile.value )
 	{
-		input_file = FS_LoadSound( "voice_input.wav", NULL, 0 );
+		voice.input_file = FS_LoadSound( "voice_input.wav", NULL, 0 );

-		if( input_file )
+		if( voice.input_file )
 		{
-			Sound_Process( &input_file, voice.samplerate, voice.width, SOUND_RESAMPLE );
-			input_pos = 0;
+			Sound_Process( &voice.input_file, voice.samplerate, voice.width, SOUND_RESAMPLE );
+			voice.input_file_pos = 0;
 			
 			voice.start_time = Sys_DoubleTime();
 			voice.is_recording = true;
 		}
 		else
 		{
-			FS_FreeSound( input_file );
-			input_file = NULL;
+			FS_FreeSound( voice.input_file );
+			voice.input_file = NULL;
 		}
 	}

 	if( !Voice_IsRecording() )
-		voice.is_recording = VoiceCapture_RecordStart();
+		voice.is_recording = VoiceCapture_Activate( true );

 	if( Voice_IsRecording() )
 		Voice_Status( VOICE_LOCALCLIENT_INDEX, true );
@ -460,7 +463,7 @@ Feed the decoded data to engine sound subsystem
 static void Voice_StartChannel( uint samples, byte *data, int entnum )
 {
 	SND_ForceInitMouth( entnum );
-	S_RawEntSamples( entnum, samples, voice.samplerate, voice.width, voice.channels, data, 255 );
+	S_RawEntSamples( entnum, samples, voice.samplerate, voice.width, VOICE_PCM_CHANNELS, data, 255 );
 }

 /*
@ -472,12 +475,35 @@ Received encoded voice data, decode it
 */
 void Voice_AddIncomingData( int ent, const byte *data, uint size, uint frames )
 {
-	int samples;
+	int samples = 0;
+	int ofs = 0;

 	if( !voice.decoder )
 		return;

-	samples = opus_decode( voice.decoder, data, size, (short *)voice.decompress_buffer, voice.frame_size / voice.width * frames, false );
+	// decode frame by frame
+	for( ;; )
+	{
+		int frame_samples;
+		uint16_t compressed_size;
+
+		// no compressed size mark
+		if( ofs + sizeof( uint16_t ) > size )
+			break;
+
+		compressed_size = *(const uint16_t *)(data + ofs);
+		ofs += sizeof( uint16_t );
+
+		// no frame data
+		if( ofs + compressed_size > size )
+			break;
+
+		frame_samples = opus_custom_decode( voice.decoder, data + ofs, compressed_size,
+			(opus_int16*)voice.decompress_buffer + samples, voice.frame_size );
+
+		ofs += compressed_size;
+		samples += frame_samples;
+	}

 	if( samples > 0 )
 		Voice_StartChannel( samples, voice.decompress_buffer, ent );
@ -566,7 +592,7 @@ void Voice_Idle( double frametime )
 {
 	int i;

-	if( !voice_enable.value )
+	if( FBitSet( voice_enable.flags, FCVAR_CHANGED ) && !voice_enable.value )
 	{
 		Voice_Shutdown();
 		return;
@ -591,19 +617,16 @@ qboolean Voice_Init( const char *pszCodecName, int quality )
 	if( !voice_enable.value )
 		return false;

-	Voice_Shutdown();
-
-	if( Q_strcmp( pszCodecName, "opus" ))
+	if( Q_strcmp( pszCodecName, VOICE_OPUS_CUSTOM_CODEC ))
 	{
-		Con_Printf( S_ERROR "Server requested unsupported codec: %s", pszCodecName );
+		Con_Printf( S_ERROR "Server requested unsupported codec: %s\n", pszCodecName );
 		return false;
 	}

-	voice.initialized = false;
-	voice.channels = 1;
-	voice.width = 2;
-	voice.samplerate = SOUND_48k;
-	voice.frame_size = Voice_GetFrameSize( 40.0f );
+	// reinitialize only if codec parameters are different
+	if( Q_strcmp( voice.codec, pszCodecName ) && voice.quality != quality )
+		Voice_Shutdown();
+
 	voice.autogain.block_size = 128;

 	if( !Voice_InitOpusDecoder( ))
@ -617,6 +640,7 @@ qboolean Voice_Init( const char *pszCodecName, int quality )

 	// we can hear others players, so it's fine to fail now
 	voice.initialized = true;
+	Q_strncpy( voice.codec, pszCodecName, sizeof( voice.codec ));

 	if( !Voice_InitOpusEncoder( quality ))
 	{
@ -624,6 +648,8 @@ qboolean Voice_Init( const char *pszCodecName, int quality )
 		return false;
 	}

+	voice.quality = quality;
+
 	if( !VoiceCapture_Init( ))
 		Con_Printf( S_WARN "No microphone is available.\n" );

--- a/engine/client/voice.h
+++ b/engine/client/voice.h
@ -17,15 +17,29 @@ GNU General Public License for more details.
 #ifndef VOICE_H
 #define VOICE_H

+#include "common.h"
 #include "protocol.h" // MAX_CLIENTS
 #include "sound.h"

-typedef struct OpusDecoder OpusDecoder;
-typedef struct OpusEncoder OpusEncoder;
+typedef struct OpusCustomEncoder OpusCustomEncoder;
+typedef struct OpusCustomDecoder OpusCustomDecoder;
+typedef struct OpusCustomMode OpusCustomMode;

 #define VOICE_LOOPBACK_INDEX (-2)
 #define VOICE_LOCALCLIENT_INDEX (-1)

+#define VOICE_PCM_CHANNELS 1 // always mono
+
+// never change these parameters when using opuscustom
+#define VOICE_OPUS_CUSTOM_SAMPLERATE SOUND_44k
+// must follow opus custom requirements
+// also be divisible with MAX_RAW_SAMPLES
+#define VOICE_OPUS_CUSTOM_FRAME_SIZE 1024
+#define VOICE_OPUS_CUSTOM_CODEC "opus_custom_44k_512"
+
+// a1ba: do not change, we don't have any re-encoding support now
+#define VOICE_DEFAULT_CODEC VOICE_OPUS_CUSTOM_CODEC
+
 typedef struct voice_status_s
 {
 	qboolean talking_ack;
@ -34,6 +48,9 @@ typedef struct voice_status_s

 typedef struct voice_state_s
 {
+	string codec;
+	int quality;
+
 	qboolean initialized;
 	qboolean is_recording;
 	double start_time;
@ -42,20 +59,24 @@ typedef struct voice_state_s
 	voice_status_t players_status[MAX_CLIENTS];

 	// opus stuff
-	OpusEncoder *encoder;
-	OpusDecoder *decoder;
+	OpusCustomMode    *custom_mode;
+	OpusCustomEncoder *encoder;
+	OpusCustomDecoder *decoder;

 	// audio info
-	uint channels;
 	uint width;
 	uint samplerate;
-	uint frame_size;
+	uint frame_size; // in samples

 	// buffers
 	byte input_buffer[MAX_RAW_SAMPLES];
 	byte output_buffer[MAX_RAW_SAMPLES];
 	byte decompress_buffer[MAX_RAW_SAMPLES];
-	fs_offset_t input_buffer_pos;
+	fs_offset_t input_buffer_pos; // in bytes
+
+	// input from file
+	wavdata_t *input_file;
+	fs_offset_t input_file_pos; // in bytes

 	// automatic gain control
 	struct {