zoukankan      html  css  js  c++  java
  • WebRTC实现背景声音的混流

    背景

    在Windows上使用WebRTC做视频采集,然后使用RTMP进行直播推流。默认情况下WebRTC只会采集麦克风的声音,而不会采集机器的背景声音。需要编码实现背景声音的采集和混音功能。

    思路

    Windows提供的API中有音频采集的相关方法,官方也给出了简单的说明示例,虽然不能运行:)。所以可以通过Windows的API来采集PCM格式的音频,然后通过WebRTC的群聊混音机制来进行音频合成

    核心代码

     音频采集部分

    DWORD AudioCaptureCore::DoCaptureThread()
    {
        keepRecording_ = true;
        HANDLE waitArray[2] = { _hShutdownCaptureEvent, _hCaptureSamplesReadyEvent };
        HRESULT hr = S_OK;
    
        LARGE_INTEGER t1;
        LARGE_INTEGER t2;
        int32_t time(0);
    
        BYTE* syncBuffer = NULL;
        UINT32 syncBufIndex = 0;
    
        _readSamples = 0;
    
        // Initialize COM as MTA in this thread.
        ScopedCOMInitializer comInit(ScopedCOMInitializer::kMTA);
        if (!comInit.succeeded()) {
            WEBRTC_TRACE(kTraceError, kTraceAudioDevice, _id,
                "failed to initialize COM in capture thread");
            return 1;
        }
    
        hr = InitCaptureThreadPriority();
        if (FAILED(hr))
        {
            return hr;
        }
    
        _Lock();
    
    
        REFERENCE_TIME hnsRequestedDuration = REFTIMES_PER_SEC;
        REFERENCE_TIME hnsActualDuration;
        UINT32 bufferLength;
        UINT32 numFramesAvailable;
        IMMDeviceEnumerator *pEnumerator = NULL;
        IMMDevice *pDevice = NULL;
        WAVEFORMATEX *pwfx = NULL;
        UINT32 packetLength = 0;
        BOOL bDone = FALSE;
        BYTE *pData;
        DWORD flags;
    
    
        hr = CoCreateInstance(CLSID_MMDeviceEnumerator, NULL, CLSCTX_ALL,
            IID_IMMDeviceEnumerator, (void**)&pEnumerator);
        EXIT_ON_ERROR(hr);
    
            hr = pEnumerator->GetDefaultAudioEndpoint(eRender, eConsole, &pDevice);
        EXIT_ON_ERROR(hr);
    
            hr = pDevice->Activate(IID_IAudioClient, CLSCTX_ALL, NULL, (void**)&_ptrAudioClientIn);
        EXIT_ON_ERROR(hr);
    
        // 
        hr = _ptrAudioClientIn->GetMixFormat(&pwfx);
        EXIT_ON_ERROR(hr);
    
        WAVEFORMATEX waveFormat;
        waveFormat.wFormatTag = WAVE_FORMAT_PCM;
        waveFormat.nChannels = 2;
        waveFormat.nSamplesPerSec = pwfx->nSamplesPerSec;
        waveFormat.nAvgBytesPerSec = pwfx->nSamplesPerSec * 4;
        waveFormat.wBitsPerSample = 16;
        waveFormat.nBlockAlign = 4;
        waveFormat.cbSize = 0;
    
        *pwfx = waveFormat;
    
        hr = _ptrAudioClientIn->Initialize(AUDCLNT_SHAREMODE_SHARED,
            AUDCLNT_STREAMFLAGS_LOOPBACK,
            hnsRequestedDuration,
            0,
            pwfx,
            NULL);
        EXIT_ON_ERROR(hr);
    
                // Set the VoE format equal to the AEC output format.
                _recAudioFrameSize = pwfx->nBlockAlign;
                _recSampleRate = pwfx->nSamplesPerSec;
                _recBlockSize = pwfx->nSamplesPerSec / 100;
                _recChannels = pwfx->nChannels;
    
                if (_ptrAudioBuffer)
                {
                    // Update the audio buffer with the selected parameters
                    _ptrAudioBuffer->SetRecordingSampleRate(_recSampleRate);
                    _ptrAudioBuffer->SetRecordingChannels((uint8_t)_recChannels);
                }
                else
                {
                    // We can enter this state during CoreAudioIsSupported() when no AudioDeviceImplementation
                    // has been created, hence the AudioDeviceBuffer does not exist.
                    // It is OK to end up here since we don't initiate any media in CoreAudioIsSupported().
                    WEBRTC_TRACE(kTraceInfo, kTraceAudioDevice, _id, "AudioDeviceBuffer must be attached before streaming can start");
                }
    
    
                // Get the size of the allocated buffer.
                hr = _ptrAudioClientIn->GetBufferSize(&bufferLength);
                EXIT_ON_ERROR(hr);
    
                hr = _ptrAudioClientIn->GetService(__uuidof(IAudioCaptureClient), (void**)&_ptrCaptureClient);
                EXIT_ON_ERROR(hr);
    
                // Notify the audio sink which format to use.
                    // 如上一行注释,以下的代码是将获取到的音频格式传给另外的类(自己定义的),同样的,因为
                    // 手动制定了音频格式,所以就不需要通知了
                // hr = pMySink->SetFormat(pwfx);
                // EXIT_ON_ERROR(hr)
    
                // Calculate the actual duration of the allocated buffer.
                hnsActualDuration = (double)REFTIMES_PER_SEC * bufferLength / pwfx->nSamplesPerSec;
    
                //hr = _ptrAudioClientIn->Start();  // Start recording.
                //EXIT_ON_ERROR(hr);
    
    
    
        // Get size of capturing buffer (length is expressed as the number of audio frames the buffer can hold).
        // This value is fixed during the capturing session.
        //
                if (_ptrAudioClientIn == NULL)
        {
            WEBRTC_TRACE(kTraceError, kTraceAudioDevice, _id,
                "input state has been modified before capture loop starts.");
            return 1;
        }
                hr = _ptrAudioClientIn->GetBufferSize(&bufferLength);
        EXIT_ON_ERROR(hr);
        WEBRTC_TRACE(kTraceInfo, kTraceAudioDevice, _id, "[CAPT] size of buffer       : %u", bufferLength);
    
        // Allocate memory for sync buffer.
        // It is used for compensation between native 44.1 and internal 44.0 and
        // for cases when the capture buffer is larger than 10ms.
        //
        const UINT32 syncBufferSize = 2 * (bufferLength * _recAudioFrameSize);
        syncBuffer = new BYTE[syncBufferSize];
        if (syncBuffer == NULL)
        {
            return (DWORD)E_POINTER;
        }
        WEBRTC_TRACE(kTraceInfo, kTraceAudioDevice, _id, "[CAPT] size of sync buffer  : %u [bytes]", syncBufferSize);
    
        // Get maximum latency for the current stream (will not change for the lifetime of the IAudioClient object).
        //
        REFERENCE_TIME latency;
                _ptrAudioClientIn->GetStreamLatency(&latency);
        WEBRTC_TRACE(kTraceInfo, kTraceAudioDevice, _id, "[CAPT] max stream latency   : %u (%3.2f ms)",
            (DWORD)latency, (double)(latency / 10000.0));
    
        // Get the length of the periodic interval separating successive processing passes by
        // the audio engine on the data in the endpoint buffer.
        //
        REFERENCE_TIME devPeriod = 0;
        REFERENCE_TIME devPeriodMin = 0;
                _ptrAudioClientIn->GetDevicePeriod(&devPeriod, &devPeriodMin);
        WEBRTC_TRACE(kTraceInfo, kTraceAudioDevice, _id, "[CAPT] device period        : %u (%3.2f ms)",
            (DWORD)devPeriod, (double)(devPeriod / 10000.0));
    
        double extraDelayMS = (double)((latency + devPeriod) / 10000.0);
        WEBRTC_TRACE(kTraceInfo, kTraceAudioDevice, _id, "[CAPT] extraDelayMS         : %3.2f", extraDelayMS);
    
        double endpointBufferSizeMS = 10.0 * ((double)bufferLength / (double)_recBlockSize);
        WEBRTC_TRACE(kTraceInfo, kTraceAudioDevice, _id, "[CAPT] endpointBufferSizeMS : %3.2f", endpointBufferSizeMS);
    
        // Start up the capturing stream.
        //
                hr = _ptrAudioClientIn->Start();
        EXIT_ON_ERROR(hr);
    
        _UnLock();
    
        // Set event which will ensure that the calling thread modifies the recording state to true.
        //
        SetEvent(_hCaptureStartedEvent);
    
        // >> ---------------------------- THREAD LOOP ----------------------------
    
    
            while (keepRecording_)
            {
                BYTE *pData = 0;
                UINT32 framesAvailable = 0;
                DWORD flags = 0;
                UINT64 recTime = 0;
                UINT64 recPos = 0;
    
                std::cout << "bgm audio capturing" << std::endl;
    
                _Lock();
    
                // Sanity check to ensure that essential states are not modified
                // during the unlocked period.
                                if (_ptrCaptureClient == NULL || _ptrAudioClientIn == NULL)
                {
                    _UnLock();
                    WEBRTC_TRACE(kTraceCritical, kTraceAudioDevice, _id,
                        "input state has been modified during unlocked period");
                    goto Exit;
                }
    
                //  Find out how much capture data is available
                //
                hr = _ptrCaptureClient->GetBuffer(&pData,           // packet which is ready to be read by used
                    &framesAvailable, // #frames in the captured packet (can be zero)
                    &flags,           // support flags (check)
                    &recPos,          // device position of first audio frame in data packet
                    &recTime);        // value of performance counter at the time of recording the first audio frame
    
                if (SUCCEEDED(hr))
                {
                    if (AUDCLNT_S_BUFFER_EMPTY == hr)
                    {
                        // Buffer was empty => start waiting for a new capture notification event
                        _UnLock();
                        continue;
                    }
    
                    if (flags & AUDCLNT_BUFFERFLAGS_SILENT)
                    {
                        // Treat all of the data in the packet as silence and ignore the actual data values.
                        WEBRTC_TRACE(kTraceWarning, kTraceAudioDevice, _id, "AUDCLNT_BUFFERFLAGS_SILENT");
                        pData = NULL;
                    }
    
                    assert(framesAvailable != 0);
    
                    if (pData)
                    {
                        CopyMemory(&syncBuffer[syncBufIndex*_recAudioFrameSize], pData, framesAvailable*_recAudioFrameSize);
                    }
                    else
                    {
                        ZeroMemory(&syncBuffer[syncBufIndex*_recAudioFrameSize], framesAvailable*_recAudioFrameSize);
                    }
                    assert(syncBufferSize >= (syncBufIndex*_recAudioFrameSize) + framesAvailable*_recAudioFrameSize);
    
                    // Release the capture buffer
                    //
                    hr = _ptrCaptureClient->ReleaseBuffer(framesAvailable);
                    EXIT_ON_ERROR(hr);
    
                    _readSamples += framesAvailable;
                    syncBufIndex += framesAvailable;
    
                    QueryPerformanceCounter(&t1);
    
                    // Get the current recording and playout delay.
                    uint32_t sndCardRecDelay = (uint32_t)
                        (((((UINT64)t1.QuadPart * _perfCounterFactor) - recTime)
                            / 10000) + (10 * syncBufIndex) / _recBlockSize - 10);
                    uint32_t sndCardPlayDelay =
                        static_cast<uint32_t>(_sndCardPlayDelay);
    
                    _sndCardRecDelay = sndCardRecDelay;
    
                    while (syncBufIndex >= _recBlockSize)
                    {
                        if (_ptrAudioBuffer)
                        {
                            _ptrAudioBuffer->SetRecordedBuffer((const int8_t*)syncBuffer, _recBlockSize);
                            _ptrAudioBuffer->SetVQEData(sndCardPlayDelay,
                                sndCardRecDelay,
                                0);
    
                            _ptrAudioBuffer->SetTypingStatus(KeyPressed());
    
                            QueryPerformanceCounter(&t1);    // measure time: START
    
                            _UnLock();  // release lock while making the callback
                            _ptrAudioBuffer->DeliverRecordedData();
                            _Lock();    // restore the lock
    
                            QueryPerformanceCounter(&t2);    // measure time: STOP
    
                                                             // Measure "average CPU load".
                                                             // Basically what we do here is to measure how many percent of our 10ms period
                                                             // is used for encoding and decoding. This value shuld be used as a warning indicator
                                                             // only and not seen as an absolute value. Running at ~100% will lead to bad QoS.
                            time = (int)(t2.QuadPart - t1.QuadPart);
                            _avgCPULoad = (float)(_avgCPULoad*.99 + (time + _playAcc) / (double)(_perfCounterFreq.QuadPart));
                            _playAcc = 0;
    
                            // Sanity check to ensure that essential states are not modified during the unlocked period
                                                        if (_ptrCaptureClient == NULL || _ptrAudioClientIn == NULL)
                            {
                                _UnLock();
                                WEBRTC_TRACE(kTraceCritical, kTraceAudioDevice, _id, "input state has been modified during unlocked period");
                                goto Exit;
                            }
                        }
    
                        // store remaining data which was not able to deliver as 10ms segment
                        MoveMemory(&syncBuffer[0], &syncBuffer[_recBlockSize*_recAudioFrameSize], (syncBufIndex - _recBlockSize)*_recAudioFrameSize);
                        syncBufIndex -= _recBlockSize;
                        sndCardRecDelay -= 10;
                    }
    
                    if (_AGC)
                    {
                        uint32_t newMicLevel = _ptrAudioBuffer->NewMicLevel();
                        if (newMicLevel != 0)
                        {
                            // The VQE will only deliver non-zero microphone levels when a change is needed.
                            // Set this new mic level (received from the observer as return value in the callback).
                            WEBRTC_TRACE(kTraceStream, kTraceAudioDevice, _id, "AGC change of volume: new=%u", newMicLevel);
                            // We store this outside of the audio buffer to avoid
                            // having it overwritten by the getter thread.
                            _newMicLevel = newMicLevel;
                            SetEvent(_hSetCaptureVolumeEvent);
                        }
                    }
                }
                else
                {
                    // If GetBuffer returns AUDCLNT_E_BUFFER_ERROR, the thread consuming the audio samples
                    // must wait for the next processing pass. The client might benefit from keeping a count
                    // of the failed GetBuffer calls. If GetBuffer returns this error repeatedly, the client
                    // can start a new processing loop after shutting down the current client by calling
                    // IAudioClient::Stop, IAudioClient::Reset, and releasing the audio client.
                    WEBRTC_TRACE(kTraceError, kTraceAudioDevice, _id,
                        "IAudioCaptureClient::GetBuffer returned AUDCLNT_E_BUFFER_ERROR, hr = 0x%08X", hr);
                    goto Exit;
                }
    
                _UnLock();
            }
    
        // ---------------------------- THREAD LOOP ---------------------------- <<
    
                if (_ptrAudioClientIn)
        {
                        hr = _ptrAudioClientIn->Stop();
        }
    
    Exit:
        if (FAILED(hr))
        {
                        _ptrAudioClientIn->Stop();
            _UnLock();
            _TraceCOMError(hr);
        }
    
        RevertCaptureThreadPriority();
    
        if (syncBuffer)
        {
            delete[] syncBuffer;
        }
    
        return (DWORD)hr;
    }

    声音合成

    创建     webrtc::AudioConferenceMixer *audio_mixer_ = nullptr; 在使用多路声音的时候进行混音

    int32_t AnyRtmpCore::RecordedDataIsAvailable(const void* audioSamples, const size_t nSamples,
        const size_t nBytesPerSample, const size_t nChannels, const uint32_t samplesPerSec, const uint32_t totalDelayMS,
        const int32_t clockDrift, const uint32_t currentMicLevel, const bool keyPressed, uint32_t& newMicLevel)
    {
        std::cout << "[-----------] record data avaliable " << nSamples << nBytesPerSample << nChannels << samplesPerSec << std::endl;
        rtc::CritScope cs(&cs_audio_record_);
    
        if (microphone_enable_ && bgm_enable_) {
            audio_device_mixer_ptr_->RecordedDataIsAvailable(audioSamples, nSamples,
                nBytesPerSample, nChannels, samplesPerSec, totalDelayMS,
                clockDrift, currentMicLevel, keyPressed, newMicLevel);
            if (audio_mixer_) {
                audio_mixer_->Process();
            }        
        }
        else
        {
            // 当只有一种声音时,不进行混音
            if (audio_record_callback_) {
                if (audio_record_sample_hz_ != samplesPerSec || nChannels != audio_record_channels_) {
                    int16_t temp_output[kMaxDataSizeSamples];
                    int samples_per_channel_int = resampler_record_.Resample10Msec((int16_t*)audioSamples, samplesPerSec * nChannels,
                        audio_record_sample_hz_ * audio_record_channels_, 1, kMaxDataSizeSamples, temp_output);
                    audio_record_callback_->OnRecordAudio(temp_output, audio_record_sample_hz_ / 100, nBytesPerSample, audio_record_channels_, audio_record_sample_hz_, totalDelayMS);
                }
                else {
                    audio_record_callback_->OnRecordAudio(audioSamples, nSamples, nBytesPerSample, audio_record_channels_, samplesPerSec, totalDelayMS);
                }
            }
        }
            
        return 0;
    }
  • 相关阅读:
    vue 项目编译打包
    【Vue】基于nodejs的vue项目打包编译部署
    关于数据库设计中的状态字段
    Node.js安装及环境配置之Windows篇
    REST的本质,就是用户操作某个网络资源(具有独一无二的识别符URI),获得某种服务,也就是动词+资源(都是HTTP协议的一部分)
    微软重生:4年市值U型大逆转,超越谷歌重返巅峰!
    我在世界最热创业孵化器YC学到的58件事
    创业是否只是年轻人的专利?
    让你更值钱的方法:培养稀缺(追逐新技术,淬炼已有技能、做到出类拔萃,寻找自己所在的行业痛点,App开发者是市场动态平衡的典型)
    算法题
  • 原文地址:https://www.cnblogs.com/vectorli/p/12810196.html
Copyright © 2011-2022 走看看