#include "AudioStreamingPart.h" #include "rtc_base/logging.h" #include "rtc_base/third_party/base64/base64.h" extern "C" { #include #include #include } #include #include #include #include namespace tgcalls { namespace { uint32_t stringToUInt32(std::string const &string) { std::stringstream stringStream(string); uint32_t value = 0; stringStream >> value; return value; } template void splitString(const std::string &s, char delim, Out result) { std::istringstream iss(s); std::string item; while (std::getline(iss, item, delim)) { *result++ = item; } } std::vector splitString(const std::string &s, char delim) { std::vector elems; splitString(s, delim, std::back_inserter(elems)); return elems; } static absl::optional readInt32(std::string const &data, int &offset) { if (offset + 4 > data.length()) { return absl::nullopt; } int32_t value = 0; memcpy(&value, data.data() + offset, 4); offset += 4; return value; } struct ChannelUpdate { int frameIndex = 0; int id = 0; uint32_t ssrc = 0; }; static std::vector parseChannelUpdates(std::string const &data, int &offset) { std::vector result; auto channels = readInt32(data, offset); if (!channels) { return {}; } auto count = readInt32(data, offset); if (!count) { return {}; } for (int i = 0; i < count.value(); i++) { auto frameIndex = readInt32(data, offset); if (!frameIndex) { return {}; } auto channelId = readInt32(data, offset); if (!channelId) { return {}; } auto ssrc = readInt32(data, offset); if (!ssrc) { return {}; } ChannelUpdate update; update.frameIndex = frameIndex.value(); update.id = channelId.value(); update.ssrc = ssrc.value(); result.push_back(update); } return result; } class AVIOContextImpl { public: AVIOContextImpl(std::vector &&fileData) : _fileData(std::move(fileData)) { _buffer.resize(4 * 1024); _context = avio_alloc_context(_buffer.data(), (int)_buffer.size(), 0, this, &AVIOContextImpl::read, NULL, &AVIOContextImpl::seek); } ~AVIOContextImpl() { av_free(_context); } static int read(void *opaque, unsigned char *buffer, int bufferSize) { AVIOContextImpl *instance = static_cast(opaque); int bytesToRead = std::min(bufferSize, ((int)instance->_fileData.size()) - instance->_fileReadPosition); if (bytesToRead < 0) { bytesToRead = 0; } if (bytesToRead > 0) { memcpy(buffer, instance->_fileData.data() + instance->_fileReadPosition, bytesToRead); instance->_fileReadPosition += bytesToRead; return bytesToRead; } else { return AVERROR_EOF; } } static int64_t seek(void *opaque, int64_t offset, int whence) { AVIOContextImpl *instance = static_cast(opaque); if (whence == 0x10000) { return (int64_t)instance->_fileData.size(); } else { int64_t seekOffset = std::min(offset, (int64_t)instance->_fileData.size()); if (seekOffset < 0) { seekOffset = 0; } instance->_fileReadPosition = (int)seekOffset; return seekOffset; } } AVIOContext *getContext() { return _context; } private: std::vector _fileData; int _fileReadPosition = 0; std::vector _buffer; AVIOContext *_context = nullptr; }; } struct ReadPcmResult { int numSamples = 0; int numChannels = 0; }; class AudioStreamingPartInternal { public: AudioStreamingPartInternal(std::vector &&fileData) : _avIoContext(std::move(fileData)) { int ret = 0; _frame = av_frame_alloc(); AVInputFormat *inputFormat = av_find_input_format("ogg"); if (!inputFormat) { _didReadToEnd = true; return; } _inputFormatContext = avformat_alloc_context(); if (!_inputFormatContext) { _didReadToEnd = true; return; } _inputFormatContext->pb = _avIoContext.getContext(); if ((ret = avformat_open_input(&_inputFormatContext, "", inputFormat, nullptr)) < 0) { _didReadToEnd = true; return; } if ((ret = avformat_find_stream_info(_inputFormatContext, nullptr)) < 0) { _didReadToEnd = true; avformat_close_input(&_inputFormatContext); _inputFormatContext = nullptr; return; } AVCodecParameters *audioCodecParameters = nullptr; AVStream *audioStream = nullptr; for (int i = 0; i < _inputFormatContext->nb_streams; i++) { AVStream *inStream = _inputFormatContext->streams[i]; AVCodecParameters *inCodecpar = inStream->codecpar; if (inCodecpar->codec_type != AVMEDIA_TYPE_AUDIO) { continue; } audioCodecParameters = inCodecpar; audioStream = inStream; _durationInMilliseconds = (int)((inStream->duration + inStream->first_dts) * 1000 / 48000); if (inStream->metadata) { AVDictionaryEntry *entry = av_dict_get(inStream->metadata, "TG_META", nullptr, 0); if (entry && entry->value) { std::string result; size_t data_used = 0; std::string sourceBase64 = (const char *)entry->value; rtc::Base64::Decode(sourceBase64, rtc::Base64::DO_LAX, &result, &data_used); if (result.size() != 0) { int offset = 0; _channelUpdates = parseChannelUpdates(result, offset); } } uint32_t videoChannelMask = 0; entry = av_dict_get(inStream->metadata, "ACTIVE_MASK", nullptr, 0); if (entry && entry->value) { std::string sourceString = (const char *)entry->value; videoChannelMask = stringToUInt32(sourceString); } std::vector endpointList; entry = av_dict_get(inStream->metadata, "ENDPOINTS", nullptr, 0); if (entry && entry->value) { std::string sourceString = (const char *)entry->value; endpointList = splitString(sourceString, ' '); } std::bitset<32> videoChannels(videoChannelMask); size_t endpointIndex = 0; if (videoChannels.count() == endpointList.size()) { for (size_t i = 0; i < videoChannels.size(); i++) { if (videoChannels[i]) { _endpointMapping.insert(std::make_pair(endpointList[endpointIndex], i)); endpointIndex++; } } } } break; } if (audioCodecParameters && audioStream) { AVCodec *codec = avcodec_find_decoder(audioCodecParameters->codec_id); if (codec) { _codecContext = avcodec_alloc_context3(codec); ret = avcodec_parameters_to_context(_codecContext, audioCodecParameters); if (ret < 0) { _didReadToEnd = true; avcodec_free_context(&_codecContext); _codecContext = nullptr; } else { _codecContext->pkt_timebase = audioStream->time_base; _channelCount = _codecContext->channels; ret = avcodec_open2(_codecContext, codec, nullptr); if (ret < 0) { _didReadToEnd = true; avcodec_free_context(&_codecContext); _codecContext = nullptr; } } } } } ~AudioStreamingPartInternal() { if (_frame) { av_frame_unref(_frame); } if (_codecContext) { avcodec_close(_codecContext); avcodec_free_context(&_codecContext); } if (_inputFormatContext) { avformat_close_input(&_inputFormatContext); } } ReadPcmResult readPcm(std::vector &outPcm) { int outPcmSampleOffset = 0; ReadPcmResult result; int readSamples = (int)outPcm.size() / _channelCount; result.numChannels = _channelCount; while (outPcmSampleOffset < readSamples) { if (_pcmBufferSampleOffset >= _pcmBufferSampleSize) { fillPcmBuffer(); if (_pcmBufferSampleOffset >= _pcmBufferSampleSize) { break; } } int readFromPcmBufferSamples = std::min(_pcmBufferSampleSize - _pcmBufferSampleOffset, readSamples - outPcmSampleOffset); if (readFromPcmBufferSamples != 0) { std::copy(_pcmBuffer.begin() + _pcmBufferSampleOffset * _channelCount, _pcmBuffer.begin() + _pcmBufferSampleOffset * _channelCount + readFromPcmBufferSamples * _channelCount, outPcm.begin() + outPcmSampleOffset * _channelCount); _pcmBufferSampleOffset += readFromPcmBufferSamples; outPcmSampleOffset += readFromPcmBufferSamples; result.numSamples += readFromPcmBufferSamples; } } return result; } int getDurationInMilliseconds() { return _durationInMilliseconds; } int getChannelCount() { return _channelCount; } std::vector const &getChannelUpdates() const { return _channelUpdates; } std::map getEndpointMapping() const { return _endpointMapping; } private: static int16_t sampleFloatToInt16(float sample) { return av_clip_int16 (static_cast(lrint(sample*32767))); } void fillPcmBuffer() { _pcmBufferSampleSize = 0; _pcmBufferSampleOffset = 0; if (_didReadToEnd) { return; } if (!_inputFormatContext) { _didReadToEnd = true; return; } if (!_codecContext) { _didReadToEnd = true; return; } int ret = 0; do { ret = av_read_frame(_inputFormatContext, &_packet); if (ret < 0) { _didReadToEnd = true; return; } ret = avcodec_send_packet(_codecContext, &_packet); if (ret < 0) { _didReadToEnd = true; return; } int bytesPerSample = av_get_bytes_per_sample(_codecContext->sample_fmt); if (bytesPerSample != 2 && bytesPerSample != 4) { _didReadToEnd = true; return; } ret = avcodec_receive_frame(_codecContext, _frame); } while (ret == AVERROR(EAGAIN)); if (ret != 0) { _didReadToEnd = true; return; } if (_frame->channels != _channelCount || _frame->channels > 8) { _didReadToEnd = true; return; } if (_pcmBuffer.size() < _frame->nb_samples * _frame->channels) { _pcmBuffer.resize(_frame->nb_samples * _frame->channels); } switch (_codecContext->sample_fmt) { case AV_SAMPLE_FMT_S16: { memcpy(_pcmBuffer.data(), _frame->data[0], _frame->nb_samples * 2 * _frame->channels); } break; case AV_SAMPLE_FMT_S16P: { int16_t *to = _pcmBuffer.data(); for (int sample = 0; sample < _frame->nb_samples; ++sample) { for (int channel = 0; channel < _frame->channels; ++channel) { int16_t *shortChannel = (int16_t*)_frame->data[channel]; *to++ = shortChannel[sample]; } } } break; case AV_SAMPLE_FMT_FLT: { float *floatData = (float *)&_frame->data[0]; for (int i = 0; i < _frame->nb_samples * _frame->channels; i++) { _pcmBuffer[i] = sampleFloatToInt16(floatData[i]); } } break; case AV_SAMPLE_FMT_FLTP: { int16_t *to = _pcmBuffer.data(); for (int sample = 0; sample < _frame->nb_samples; ++sample) { for (int channel = 0; channel < _frame->channels; ++channel) { float *floatChannel = (float*)_frame->data[channel]; *to++ = sampleFloatToInt16(floatChannel[sample]); } } } break; default: { //RTC_FATAL() << "Unexpected sample_fmt"; } break; } _pcmBufferSampleSize = _frame->nb_samples; _pcmBufferSampleOffset = 0; } private: AVIOContextImpl _avIoContext; AVFormatContext *_inputFormatContext = nullptr; AVPacket _packet; AVCodecContext *_codecContext = nullptr; AVFrame *_frame = nullptr; bool _didReadToEnd = false; int _durationInMilliseconds = 0; int _channelCount = 0; std::vector _channelUpdates; std::map _endpointMapping; std::vector _pcmBuffer; int _pcmBufferSampleOffset = 0; int _pcmBufferSampleSize = 0; }; class AudioStreamingPartState { struct ChannelMapping { uint32_t ssrc = 0; int channelIndex = 0; ChannelMapping(uint32_t ssrc_, int channelIndex_) : ssrc(ssrc_), channelIndex(channelIndex_) { } }; public: AudioStreamingPartState(std::vector &&data) : _parsedPart(std::move(data)) { if (_parsedPart.getChannelUpdates().size() == 0) { _didReadToEnd = true; return; } _remainingMilliseconds = _parsedPart.getDurationInMilliseconds(); _pcm10ms.resize(480 * _parsedPart.getChannelCount()); for (const auto &it : _parsedPart.getChannelUpdates()) { _allSsrcs.insert(it.ssrc); } } ~AudioStreamingPartState() { } std::map getEndpointMapping() const { return _parsedPart.getEndpointMapping(); } int getRemainingMilliseconds() const { return _remainingMilliseconds; } std::vector get10msPerChannel() { if (_didReadToEnd) { return {}; } for (const auto &update : _parsedPart.getChannelUpdates()) { if (update.frameIndex == _frameIndex) { updateCurrentMapping(update.ssrc, update.id); } } auto readResult = _parsedPart.readPcm(_pcm10ms); if (readResult.numSamples <= 0) { _didReadToEnd = true; return {}; } std::vector resultChannels; for (const auto ssrc : _allSsrcs) { AudioStreamingPart::StreamingPartChannel emptyPart; emptyPart.ssrc = ssrc; resultChannels.push_back(emptyPart); } for (auto &channel : resultChannels) { auto mappedChannelIndex = getCurrentMappedChannelIndex(channel.ssrc); if (mappedChannelIndex) { int sourceChannelIndex = mappedChannelIndex.value(); for (int j = 0; j < readResult.numSamples; j++) { channel.pcmData.push_back(_pcm10ms[sourceChannelIndex + j * readResult.numChannels]); } } else { for (int j = 0; j < readResult.numSamples; j++) { channel.pcmData.push_back(0); } } } _remainingMilliseconds -= 10; if (_remainingMilliseconds < 0) { _remainingMilliseconds = 0; } _frameIndex++; return resultChannels; } private: absl::optional getCurrentMappedChannelIndex(uint32_t ssrc) { for (const auto &it : _currentChannelMapping) { if (it.ssrc == ssrc) { return it.channelIndex; } } return absl::nullopt; } void updateCurrentMapping(uint32_t ssrc, int channelIndex) { for (int i = (int)_currentChannelMapping.size() - 1; i >= 0; i--) { const auto &entry = _currentChannelMapping[i]; if (entry.ssrc == ssrc && entry.channelIndex == channelIndex) { return; } else if (entry.ssrc == ssrc || entry.channelIndex == channelIndex) { _currentChannelMapping.erase(_currentChannelMapping.begin() + i); } } _currentChannelMapping.emplace_back(ssrc, channelIndex); } private: AudioStreamingPartInternal _parsedPart; std::set _allSsrcs; std::vector _pcm10ms; std::vector _currentChannelMapping; int _frameIndex = 0; int _remainingMilliseconds = 0; bool _didReadToEnd = false; }; AudioStreamingPart::AudioStreamingPart(std::vector &&data) { if (!data.empty()) { _state = new AudioStreamingPartState(std::move(data)); } } AudioStreamingPart::~AudioStreamingPart() { if (_state) { delete _state; } } std::map AudioStreamingPart::getEndpointMapping() const { return _state ? _state->getEndpointMapping() : std::map(); } int AudioStreamingPart::getRemainingMilliseconds() const { return _state ? _state->getRemainingMilliseconds() : 0; } std::vector AudioStreamingPart::get10msPerChannel() { return _state ? _state->get10msPerChannel() : std::vector(); } }