engine: client: voice: new version, move to Opus Custom codec

* Despite Opus Custom have strict requirements, it's more barebones, allowing us to use maximum
frame size and custom sample rate, without resampling
* Encode each frame size to network buffer, allowing smooth voice chat even in 10 FPS
* Fix possible buffer overruns, underruns and races with platform side
* Revise all usages of offset variables, samples vs bytes
This commit is contained in:
Alibek Omarov 2022-08-31 06:44:45 +03:00
parent 4110ee0928
commit 82ab06efdd
4 changed files with 206 additions and 160 deletions

View File

@ -3085,7 +3085,7 @@ void CL_Init( void )
VID_Init(); // init video
S_Init(); // init sound
Voice_Init( "opus", 3 ); // init voice
Voice_Init( VOICE_DEFAULT_CODEC, 3 ); // init voice
// unreliable buffer. unsed for unreliable commands and voice stream
MSG_Init( &cls.datagram, "cls.datagram", cls.datagram_buf, sizeof( cls.datagram_buf ));

View File

@ -27,7 +27,6 @@ extern poolhandle_t sndpool;
#define SOUND_22k 22050 // 22khz sample rate
#define SOUND_32k 32000 // 32khz sample rate
#define SOUND_44k 44100 // 44khz sample rate
#define SOUND_48k 48000 // 48khz sample rate
#define DMA_MSEC_PER_SAMPLE ((float)(1000.0 / SOUND_DMA_SPEED))
// fixed point stuff for real-time resampling

View File

@ -15,13 +15,11 @@ GNU General Public License for more details.
*/
#include <opus.h>
#include <opus_custom.h>
#include "common.h"
#include "client.h"
#include "voice.h"
static wavdata_t *input_file;
static fs_offset_t input_pos;
voice_state_t voice = { 0 };
CVAR_DEFINE_AUTO( voice_enable, "1", FCVAR_PRIVILEGED|FCVAR_ARCHIVE, "enable voice chat" );
@ -31,6 +29,8 @@ CVAR_DEFINE_AUTO( voice_avggain, "0.5", FCVAR_PRIVILEGED|FCVAR_ARCHIVE, "automat
CVAR_DEFINE_AUTO( voice_maxgain, "5.0", FCVAR_PRIVILEGED|FCVAR_ARCHIVE, "automatic voice gain control (maximum)" );
CVAR_DEFINE_AUTO( voice_inputfromfile, "0", FCVAR_PRIVILEGED, "input voice from voice_input.wav" );
static void Voice_ApplyGainAdjust( int16_t *samples, int count );
/*
===============================================================================
@ -39,25 +39,6 @@ CVAR_DEFINE_AUTO( voice_inputfromfile, "0", FCVAR_PRIVILEGED, "input voice from
===============================================================================
*/
/*
=========================
Voice_GetBandwithTypeName
=========================
*/
static const char* Voice_GetBandwidthTypeName( int bandwidthType )
{
switch( bandwidthType )
{
case OPUS_BANDWIDTH_FULLBAND: return "Full Band (20 kHz)";
case OPUS_BANDWIDTH_SUPERWIDEBAND: return "Super Wide Band (12 kHz)";
case OPUS_BANDWIDTH_WIDEBAND: return "Wide Band (8 kHz)";
case OPUS_BANDWIDTH_MEDIUMBAND: return "Medium Band (6 kHz)";
case OPUS_BANDWIDTH_NARROWBAND: return "Narrow Band (4 kHz)";
default: return "Unknown";
}
}
/*
=========================
Voice_CodecInfo_f
@ -68,7 +49,6 @@ static void Voice_CodecInfo_f( void )
{
int encoderComplexity;
opus_int32 encoderBitrate;
opus_int32 encoderBandwidthType;
if( !voice.initialized )
{
@ -76,72 +56,12 @@ static void Voice_CodecInfo_f( void )
return;
}
opus_encoder_ctl( voice.encoder, OPUS_GET_BITRATE( &encoderBitrate ));
opus_encoder_ctl( voice.encoder, OPUS_GET_COMPLEXITY( &encoderComplexity ));
opus_encoder_ctl( voice.encoder, OPUS_GET_BANDWIDTH( &encoderBandwidthType ));
opus_custom_encoder_ctl( voice.encoder, OPUS_GET_BITRATE( &encoderBitrate ));
opus_custom_encoder_ctl( voice.encoder, OPUS_GET_COMPLEXITY( &encoderComplexity ));
Con_Printf( "Encoder:\n" );
Con_Printf( " Bitrate: %.3f kbps\n", encoderBitrate / 1000.0f );
Con_Printf( " Complexity: %d\n", encoderComplexity );
Con_Printf( " Bandwidth: %s\n", Voice_GetBandwidthTypeName( encoderBandwidthType ));
}
/*
=========================
Voice_GetFrameSize
=========================
*/
static uint Voice_GetFrameSize( float durationMsec )
{
return voice.channels * voice.width * (( float )voice.samplerate / ( 1000.0f / durationMsec ));
}
/*
=========================
Voice_ApplyGainAdjust
=========================
*/
static void Voice_ApplyGainAdjust( opus_int16 *samples, int count )
{
float gain, modifiedMax;
int average, adjustedSample;
int blockOffset = 0;
for( ;;)
{
int i;
int localMax = 0;
int localSum = 0;
int blockSize = Q_min( count - ( blockOffset + voice.autogain.block_size ), voice.autogain.block_size );
if( blockSize < 1 )
break;
for( i = 0; i < blockSize; ++i )
{
int sample = samples[blockOffset + i];
if( abs( sample ) > localMax ) {
localMax = abs( sample );
}
localSum += sample;
gain = voice.autogain.current_gain + i * voice.autogain.gain_multiplier;
adjustedSample = Q_min( 32767, Q_max(( int )( sample * gain ), -32768 ));
samples[blockOffset + i] = adjustedSample;
}
if( blockOffset % voice.autogain.block_size == 0 )
{
average = localSum / blockSize;
modifiedMax = average + ( localMax - average ) * voice_avggain.value;
voice.autogain.current_gain = voice.autogain.next_gain * voice_scale.value;
voice.autogain.next_gain = Q_min( 32767.0f / modifiedMax, voice_maxgain.value ) * voice_scale.value;
voice.autogain.gain_multiplier = ( voice.autogain.next_gain - voice.autogain.current_gain ) / ( voice.autogain.block_size - 1 );
}
blockOffset += blockSize;
}
}
/*
@ -153,11 +73,22 @@ Voice_InitOpusDecoder
static qboolean Voice_InitOpusDecoder( void )
{
int err;
voice.decoder = opus_decoder_create( voice.samplerate, voice.channels, &err );
voice.width = sizeof( opus_int16 );
voice.samplerate = VOICE_OPUS_CUSTOM_SAMPLERATE;
voice.frame_size = VOICE_OPUS_CUSTOM_FRAME_SIZE;
voice.custom_mode = opus_custom_mode_create( SOUND_44k, voice.frame_size, &err );
if( !voice.custom_mode )
{
Con_Printf( S_ERROR "Can't create Opus Custom mode: %s\n", opus_strerror( err ));
return false;
}
voice.decoder = opus_custom_decoder_create( voice.custom_mode, VOICE_PCM_CHANNELS, &err );
if( !voice.decoder )
{
Con_Printf( S_ERROR "Can't create Opus encoder: %s", opus_strerror( err ));
Con_Printf( S_ERROR "Can't create Opus encoder: %s\n", opus_strerror( err ));
return false;
}
@ -173,37 +104,30 @@ Voice_InitOpusEncoder
static qboolean Voice_InitOpusEncoder( int quality )
{
int err;
int app = quality == 5 ? OPUS_APPLICATION_AUDIO : OPUS_APPLICATION_VOIP;
voice.encoder = opus_encoder_create( voice.samplerate, voice.channels, app, &err );
voice.encoder = opus_custom_encoder_create( voice.custom_mode, VOICE_PCM_CHANNELS, &err );
if( !voice.encoder )
{
Con_Printf( S_ERROR "Can't create Opus encoder: %s", opus_strerror( err ));
Con_Printf( S_ERROR "Can't create Opus encoder: %s\n", opus_strerror( err ));
return false;
}
switch( quality )
{
case 1: // 6 kbps, <6 kHz bandwidth
opus_encoder_ctl( voice.encoder, OPUS_SET_BITRATE( 6000 ));
opus_encoder_ctl( voice.encoder, OPUS_SET_BANDWIDTH( OPUS_BANDWIDTH_MEDIUMBAND ));
case 1: // 6 kbps
opus_custom_encoder_ctl( voice.encoder, OPUS_SET_BITRATE( 6000 ));
break;
case 2: // 12 kbps, <12 kHz bandwidth
opus_encoder_ctl( voice.encoder, OPUS_SET_BITRATE( 12000 ));
opus_encoder_ctl( voice.encoder, OPUS_SET_BANDWIDTH( OPUS_BANDWIDTH_SUPERWIDEBAND ));
case 2: // 12 kbps
opus_custom_encoder_ctl( voice.encoder, OPUS_SET_BITRATE( 12000 ));
break;
case 4: // 64 kbps, full band (20 kHz)
opus_encoder_ctl( voice.encoder, OPUS_SET_BITRATE( 64000 ));
opus_encoder_ctl( voice.encoder, OPUS_SET_BANDWIDTH( OPUS_BANDWIDTH_FULLBAND ));
case 4: // 64 kbps
opus_custom_encoder_ctl( voice.encoder, OPUS_SET_BITRATE( 64000 ));
break;
case 5: // 96 kbps, full band (20 kHz)
opus_encoder_ctl( voice.encoder, OPUS_SET_BITRATE( 96000 ));
opus_encoder_ctl( voice.encoder, OPUS_SET_BANDWIDTH( OPUS_BANDWIDTH_FULLBAND ));
case 5: // 96 kbps
opus_custom_encoder_ctl( voice.encoder, OPUS_SET_BITRATE( 96000 ));
break;
default: // 36 kbps, <12 kHz bandwidth
opus_encoder_ctl( voice.encoder, OPUS_SET_BITRATE( 36000 ));
opus_encoder_ctl( voice.encoder, OPUS_SET_BANDWIDTH( OPUS_BANDWIDTH_SUPERWIDEBAND ));
default: // 36 kbps
opus_custom_encoder_ctl( voice.encoder, OPUS_SET_BITRATE( 36000 ));
break;
}
@ -220,7 +144,7 @@ static void Voice_ShutdownOpusDecoder( void )
{
if( voice.decoder )
{
opus_decoder_destroy( voice.decoder );
opus_custom_decoder_destroy( voice.decoder );
voice.decoder = NULL;
}
}
@ -235,9 +159,15 @@ static void Voice_ShutdownOpusEncoder( void )
{
if( voice.encoder )
{
opus_encoder_destroy( voice.encoder );
opus_custom_encoder_destroy( voice.encoder );
voice.encoder = NULL;
}
if( voice.custom_mode )
{
opus_custom_mode_destroy( voice.custom_mode );
voice.custom_mode = NULL;
}
}
/*
@ -248,44 +178,72 @@ Voice_GetOpusCompressedData
*/
static uint Voice_GetOpusCompressedData( byte *out, uint maxsize, uint *frames )
{
uint ofs, size = 0;
uint ofs = 0, size = 0;
uint frame_size_bytes = voice.frame_size * voice.width;
if( input_file )
if( voice.input_file )
{
uint numbytes;
double updateInterval;
updateInterval = cl.mtime[0] - cl.mtime[1];
numbytes = updateInterval * voice.samplerate * voice.width * voice.channels;
numbytes = Q_min( numbytes, input_file->size - input_pos );
numbytes = updateInterval * voice.samplerate * voice.width * VOICE_PCM_CHANNELS;
numbytes = Q_min( numbytes, voice.input_file->size - voice.input_file_pos );
numbytes = Q_min( numbytes, sizeof( voice.input_buffer ) - voice.input_buffer_pos );
memcpy( voice.input_buffer + voice.input_buffer_pos, input_file->buffer + input_pos, numbytes );
memcpy( voice.input_buffer + voice.input_buffer_pos, voice.input_file->buffer + voice.input_file_pos, numbytes );
voice.input_buffer_pos += numbytes;
input_pos += numbytes;
voice.input_file_pos += numbytes;
}
for( ofs = 0; voice.input_buffer_pos - ofs >= voice.frame_size && ofs <= voice.input_buffer_pos; ofs += voice.frame_size )
if( !voice.input_file )
VoiceCapture_Lock( true );
for( ofs = 0; voice.input_buffer_pos - ofs >= frame_size_bytes && ofs <= voice.input_buffer_pos; ofs += frame_size_bytes )
{
int bytes;
if( !input_file )
#if 1
if( !voice.input_file )
{
// adjust gain before encoding, but only for input from voice
Voice_ApplyGainAdjust((opus_int16*)voice.input_buffer + ofs, voice.frame_size);
Voice_ApplyGainAdjust((opus_int16*)(voice.input_buffer + ofs), voice.frame_size);
}
#endif
bytes = opus_encode( voice.encoder, (const opus_int16*)(voice.input_buffer + ofs), voice.frame_size / voice.width, out + size, maxsize );
memmove( voice.input_buffer, voice.input_buffer + voice.frame_size, sizeof( voice.input_buffer ) - voice.frame_size );
voice.input_buffer_pos -= voice.frame_size;
bytes = opus_custom_encode( voice.encoder, (const opus_int16 *)( voice.input_buffer + ofs ),
voice.frame_size, out + size + sizeof( uint16_t ), maxsize );
if( bytes > 0 )
{
size += bytes;
// write compressed frame size
*((uint16_t *)&out[size]) = bytes;
size += bytes + sizeof( uint16_t );
maxsize -= bytes + sizeof( uint16_t );
(*frames)++;
}
else
{
Con_Printf( S_ERROR "%s: failed to encode frame: %s\n", __func__, opus_strerror( bytes ));
}
}
// did we compress anything? update counters
if( ofs )
{
fs_offset_t remaining = voice.input_buffer_pos - ofs;
// move remaining samples to the beginning of buffer
memmove( voice.input_buffer, voice.input_buffer + ofs, remaining );
voice.input_buffer_pos = remaining;
}
if( !voice.input_file )
VoiceCapture_Lock( false );
return size;
}
@ -297,6 +255,53 @@ static uint Voice_GetOpusCompressedData( byte *out, uint maxsize, uint *frames )
===============================================================================
*/
/*
=========================
Voice_ApplyGainAdjust
=========================
*/
static void Voice_ApplyGainAdjust( int16_t *samples, int count )
{
float gain, modifiedMax;
int average, adjustedSample, blockOffset = 0;
for( ;; )
{
int i, localMax = 0, localSum = 0;
int blockSize = Q_min( count - ( blockOffset + voice.autogain.block_size ), voice.autogain.block_size );
if( blockSize < 1 )
break;
for( i = 0; i < blockSize; ++i )
{
int sample = samples[blockOffset + i];
int absSample = abs( sample );
if( absSample > localMax )
localMax = absSample;
localSum += absSample;
gain = voice.autogain.current_gain + i * voice.autogain.gain_multiplier;
adjustedSample = Q_min( SHRT_MAX, Q_max(( int )( sample * gain ), SHRT_MIN ));
samples[blockOffset + i] = adjustedSample;
}
if( blockOffset % voice.autogain.block_size == 0 )
{
average = localSum / blockSize;
modifiedMax = average + ( localMax - average ) * voice_avggain.value;
voice.autogain.current_gain = voice.autogain.next_gain * voice_scale.value;
voice.autogain.next_gain = Q_min( (float)SHRT_MAX / modifiedMax, voice_maxgain.value ) * voice_scale.value;
voice.autogain.gain_multiplier = ( voice.autogain.next_gain - voice.autogain.current_gain ) / ( voice.autogain.block_size - 1 );
}
blockOffset += blockSize;
}
}
/*
=========================
Voice_Status
@ -367,21 +372,19 @@ Voice_RecordStop
*/
void Voice_RecordStop( void )
{
if( input_file )
if( voice.input_file )
{
FS_FreeSound( input_file );
input_file = NULL;
FS_FreeSound( voice.input_file );
voice.input_file = NULL;
}
VoiceCapture_Activate( false );
voice.is_recording = false;
Voice_Status( VOICE_LOCALCLIENT_INDEX, false );
voice.input_buffer_pos = 0;
memset( voice.input_buffer, 0, sizeof( voice.input_buffer ));
if( Voice_IsRecording( ))
Voice_Status( VOICE_LOCALCLIENT_INDEX, false );
VoiceCapture_RecordStop();
voice.is_recording = false;
}
/*
@ -396,25 +399,25 @@ void Voice_RecordStart( void )
if( voice_inputfromfile.value )
{
input_file = FS_LoadSound( "voice_input.wav", NULL, 0 );
voice.input_file = FS_LoadSound( "voice_input.wav", NULL, 0 );
if( input_file )
if( voice.input_file )
{
Sound_Process( &input_file, voice.samplerate, voice.width, SOUND_RESAMPLE );
input_pos = 0;
Sound_Process( &voice.input_file, voice.samplerate, voice.width, SOUND_RESAMPLE );
voice.input_file_pos = 0;
voice.start_time = Sys_DoubleTime();
voice.is_recording = true;
}
else
{
FS_FreeSound( input_file );
input_file = NULL;
FS_FreeSound( voice.input_file );
voice.input_file = NULL;
}
}
if( !Voice_IsRecording() )
voice.is_recording = VoiceCapture_RecordStart();
voice.is_recording = VoiceCapture_Activate( true );
if( Voice_IsRecording() )
Voice_Status( VOICE_LOCALCLIENT_INDEX, true );
@ -460,7 +463,7 @@ Feed the decoded data to engine sound subsystem
static void Voice_StartChannel( uint samples, byte *data, int entnum )
{
SND_ForceInitMouth( entnum );
S_RawEntSamples( entnum, samples, voice.samplerate, voice.width, voice.channels, data, 255 );
S_RawEntSamples( entnum, samples, voice.samplerate, voice.width, VOICE_PCM_CHANNELS, data, 255 );
}
/*
@ -472,12 +475,35 @@ Received encoded voice data, decode it
*/
void Voice_AddIncomingData( int ent, const byte *data, uint size, uint frames )
{
int samples;
int samples = 0;
int ofs = 0;
if( !voice.decoder )
return;
samples = opus_decode( voice.decoder, data, size, (short *)voice.decompress_buffer, voice.frame_size / voice.width * frames, false );
// decode frame by frame
for( ;; )
{
int frame_samples;
uint16_t compressed_size;
// no compressed size mark
if( ofs + sizeof( uint16_t ) > size )
break;
compressed_size = *(const uint16_t *)(data + ofs);
ofs += sizeof( uint16_t );
// no frame data
if( ofs + compressed_size > size )
break;
frame_samples = opus_custom_decode( voice.decoder, data + ofs, compressed_size,
(opus_int16*)voice.decompress_buffer + samples, voice.frame_size );
ofs += compressed_size;
samples += frame_samples;
}
if( samples > 0 )
Voice_StartChannel( samples, voice.decompress_buffer, ent );
@ -566,7 +592,7 @@ void Voice_Idle( double frametime )
{
int i;
if( !voice_enable.value )
if( FBitSet( voice_enable.flags, FCVAR_CHANGED ) && !voice_enable.value )
{
Voice_Shutdown();
return;
@ -591,19 +617,16 @@ qboolean Voice_Init( const char *pszCodecName, int quality )
if( !voice_enable.value )
return false;
Voice_Shutdown();
if( Q_strcmp( pszCodecName, "opus" ))
if( Q_strcmp( pszCodecName, VOICE_OPUS_CUSTOM_CODEC ))
{
Con_Printf( S_ERROR "Server requested unsupported codec: %s", pszCodecName );
Con_Printf( S_ERROR "Server requested unsupported codec: %s\n", pszCodecName );
return false;
}
voice.initialized = false;
voice.channels = 1;
voice.width = 2;
voice.samplerate = SOUND_48k;
voice.frame_size = Voice_GetFrameSize( 40.0f );
// reinitialize only if codec parameters are different
if( Q_strcmp( voice.codec, pszCodecName ) && voice.quality != quality )
Voice_Shutdown();
voice.autogain.block_size = 128;
if( !Voice_InitOpusDecoder( ))
@ -617,6 +640,7 @@ qboolean Voice_Init( const char *pszCodecName, int quality )
// we can hear others players, so it's fine to fail now
voice.initialized = true;
Q_strncpy( voice.codec, pszCodecName, sizeof( voice.codec ));
if( !Voice_InitOpusEncoder( quality ))
{
@ -624,6 +648,8 @@ qboolean Voice_Init( const char *pszCodecName, int quality )
return false;
}
voice.quality = quality;
if( !VoiceCapture_Init( ))
Con_Printf( S_WARN "No microphone is available.\n" );

View File

@ -17,15 +17,29 @@ GNU General Public License for more details.
#ifndef VOICE_H
#define VOICE_H
#include "common.h"
#include "protocol.h" // MAX_CLIENTS
#include "sound.h"
typedef struct OpusDecoder OpusDecoder;
typedef struct OpusEncoder OpusEncoder;
typedef struct OpusCustomEncoder OpusCustomEncoder;
typedef struct OpusCustomDecoder OpusCustomDecoder;
typedef struct OpusCustomMode OpusCustomMode;
#define VOICE_LOOPBACK_INDEX (-2)
#define VOICE_LOCALCLIENT_INDEX (-1)
#define VOICE_PCM_CHANNELS 1 // always mono
// never change these parameters when using opuscustom
#define VOICE_OPUS_CUSTOM_SAMPLERATE SOUND_44k
// must follow opus custom requirements
// also be divisible with MAX_RAW_SAMPLES
#define VOICE_OPUS_CUSTOM_FRAME_SIZE 1024
#define VOICE_OPUS_CUSTOM_CODEC "opus_custom_44k_512"
// a1ba: do not change, we don't have any re-encoding support now
#define VOICE_DEFAULT_CODEC VOICE_OPUS_CUSTOM_CODEC
typedef struct voice_status_s
{
qboolean talking_ack;
@ -34,6 +48,9 @@ typedef struct voice_status_s
typedef struct voice_state_s
{
string codec;
int quality;
qboolean initialized;
qboolean is_recording;
double start_time;
@ -42,20 +59,24 @@ typedef struct voice_state_s
voice_status_t players_status[MAX_CLIENTS];
// opus stuff
OpusEncoder *encoder;
OpusDecoder *decoder;
OpusCustomMode *custom_mode;
OpusCustomEncoder *encoder;
OpusCustomDecoder *decoder;
// audio info
uint channels;
uint width;
uint samplerate;
uint frame_size;
uint frame_size; // in samples
// buffers
byte input_buffer[MAX_RAW_SAMPLES];
byte output_buffer[MAX_RAW_SAMPLES];
byte decompress_buffer[MAX_RAW_SAMPLES];
fs_offset_t input_buffer_pos;
fs_offset_t input_buffer_pos; // in bytes
// input from file
wavdata_t *input_file;
fs_offset_t input_file_pos; // in bytes
// automatic gain control
struct {