zoukankan      html  css  js  c++  java
  • (转载)静音检测VAD算法

    转:https://segmentfault.com/a/1190000015432946

    最近把opus编码器里的VAD算法提取了出来,之前在网上没找到合适的开源VAD模块,就把代码放在这里吧,希望能帮助到人。
    下面是.h文件和.cpp文件,使用的时候,需要调用silk_VAD_Get()这个函数,每次输入一个帧(我默认了帧长是20ms,采样率16khz,可以自己在silk_VAD_Get里修改),返回0或者1,代表该帧是否为静音帧。
    .h文件代码:

    #include <stdlib.h>
    #include <malloc.h>
    #include <intrin.h>
    #include <string.h>
    
    int silk_VAD_Get(
        //int          state,                       /*  Encoder state                               */
        const short            pIn[]                           /* I    PCM input                                   */
    );
    
    #define TYPE_NO_VOICE_ACTIVITY                  0
    #define TYPE_UNVOICED                           1
    #define TYPE_VOICED                             2
    
    #define SPEECH_ACTIVITY_DTX_THRES                       0.05f
    #define SILK_FIX_CONST( C, Q )              ((int)((C) * ((long)1 << (Q)) + 0.5))
    #define silk_int16_MAX   0x7FFF                               /*  2^15 - 1 =  32767 */
    #define silk_int16_MIN   ((short)0x8000)                 /* -2^15     = -32768 */
    #define silk_int32_MAX   0x7FFFFFFF                           /*  2^31 - 1 =  2147483647 */
    #define silk_int32_MIN   ((int)0x80000000)             /* -2^31     = -2147483648 */
    #define silk_memset(dest, src, size)        memset((dest), (src), (size))
    
    #define VAD_NOISE_LEVEL_SMOOTH_COEF_Q16         1024    /* Must be <  4096 */
    #define VAD_NOISE_LEVELS_BIAS                   50
    
    /* Sigmoid settings */
    #define VAD_NEGATIVE_OFFSET_Q5                  128     /* sigmoid is 0 at -128 */
    #define VAD_SNR_FACTOR_Q16                      45000
    
    /* smoothing for SNR measurement */
    #define VAD_SNR_SMOOTH_COEF_Q18                 4096
    
    #define VAD_N_BANDS 4
    #define VAD_INTERNAL_SUBFRAMES_LOG2             2
    #define VAD_INTERNAL_SUBFRAMES                  ( 1 << VAD_INTERNAL_SUBFRAMES_LOG2 )
    #define silk_uint8_MAX   0xFF                                 /*  2^8 - 1 = 255 */
    
    #define VARDECL(type, var) type *var
    #define silk_RSHIFT32(a, shift)             ((a)>>(shift))
    #define silk_RSHIFT(a, shift)             ((a)>>(shift))
    #define silk_LSHIFT32(a, shift)             ((a)<<(shift))
    #define silk_LSHIFT(a, shift)             ((a)<<(shift))
    #define ALLOC(var, size, type) var = ((type*)alloca(sizeof(type)*(size)))
    #define silk_ADD16(a, b)                    ((a) + (b))
    #define silk_ADD32(a, b)                    ((a) + (b))
    #define silk_ADD64(a, b)                    ((a) + (b))
    
    #define silk_SUB16(a, b)                    ((a) - (b))
    #define silk_SUB32(a, b)                    ((a) - (b))
    #define silk_SUB64(a, b)                    ((a) - (b))
    #define silk_SMULWB(a32, b32)            ((((a32) >> 16) * (int)((short)(b32))) + ((((a32) & 0x0000FFFF) * (int)((short)(b32))) >> 16))
    #define silk_SMLAWB(a32, b32, c32)       ((a32) + ((((b32) >> 16) * (int)((short)(c32))) + ((((b32) & 0x0000FFFF) * (int)((short)(c32))) >> 16)))
    #define silk_SAT16(a)                       ((a) > silk_int16_MAX ? silk_int16_MAX :      
                                                ((a) < silk_int16_MIN ? silk_int16_MIN : (a)))
    #define silk_MLA(a32, b32, c32)             silk_ADD32((a32),((b32) * (c32)))
    #define silk_SMLABB(a32, b32, c32)       ((a32) + ((int)((short)(b32))) * (int)((short)(c32)))
    #define silk_ADD_POS_SAT32(a, b)            ((((unsigned int)(a)+(unsigned int)(b)) & 0x80000000) ? silk_int32_MAX : ((a)+(b)))
    #define silk_ADD_POS_SAT32(a, b)            ((((unsigned int)(a)+(unsigned int)(b)) & 0x80000000) ? silk_int32_MAX : ((a)+(b)))
    #define silk_DIV32_16(a32, b16)             ((int)((a32) / (b16)))
    #define silk_DIV32(a32, b32)                ((int)((a32) / (b32)))
    #define silk_RSHIFT_ROUND(a, shift)         ((shift) == 1 ? ((a) >> 1) + ((a) & 1) : (((a) >> ((shift) - 1)) + 1) >> 1)
    
    #define silk_SMULWW(a32, b32)            silk_MLA(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16))
    #define silk_min(a, b)                      (((a) < (b)) ? (a) : (b))
    #define silk_max(a, b)                      (((a) > (b)) ? (a) : (b))
    #define silk_ADD_LSHIFT32(a, b, shift)      silk_ADD32((a), silk_LSHIFT32((b), (shift)))    /* shift >= 0 */
    #define silk_MUL(a32, b32)                  ((a32) * (b32))
    #define silk_SMULBB(a32, b32)            ((int)((short)(a32)) * (int)((short)(b32)))
    #define silk_LIMIT( a, limit1, limit2)      ((limit1) > (limit2) ? ((a) > (limit1) ? (limit1) : ((a) < (limit2) ? (limit2) : (a))) 
                                                                     : ((a) > (limit2) ? (limit2) : ((a) < (limit1) ? (limit1) : (a))))
    
    #define silk_LSHIFT_SAT32(a, shift)         (silk_LSHIFT32( silk_LIMIT( (a), silk_RSHIFT32( silk_int32_MIN, (shift) ), 
                                                        silk_RSHIFT32( silk_int32_MAX, (shift) ) ), (shift) ))
    
    
    
    
    
    
    
    static const int tiltWeights[VAD_N_BANDS] = { 30000, 6000, -12000, -12000 };
    static const int sigm_LUT_neg_Q15[6] = {
        16384, 8812, 3906, 1554, 589, 219
    };
    static const int sigm_LUT_slope_Q10[6] = {
        237, 153, 73, 30, 12, 7
    };
    static const int sigm_LUT_pos_Q15[6] = {
        16384, 23955, 28861, 31213, 32178, 32548
    };
    
    static __inline int ec_bsr(unsigned long _x) {
        unsigned long ret;
        _BitScanReverse(&ret, _x);
        return (int)ret;
    }
    # define EC_CLZ0    (1)
    # define EC_CLZ(_x) (-ec_bsr(_x))
    # define EC_ILOG(_x) (EC_CLZ0-EC_CLZ(_x))
    static int silk_min_int(int a, int b)
    {
        return (((a) < (b)) ? (a) : (b));
    }
    static int silk_max_int(int a, int b)
    {
        return (((a) > (b)) ? (a) : (b));
    }
    static int silk_max_32(int a, int b)
    {
        return (((a) > (b)) ? (a) : (b));
    }
    static  int silk_CLZ32(int in32)
    {
        return in32 ? 32 - EC_ILOG(in32) : 32;
    }
    static  int silk_ROR32(int a32, int rot)
    {
        unsigned int x = (unsigned int)a32;
        unsigned int r = (unsigned int)rot;
        unsigned int m = (unsigned int)-rot;
        if (rot == 0) {
            return a32;
        }
        else if (rot < 0) {
            return (int)((x << m) | (x >> (32 - m)));
        }
        else {
            return (int)((x << (32 - r)) | (x >> r));
        }
    }
    static  void silk_CLZ_FRAC(
        int in,            /* I  input                               */
        int *lz,           /* O  number of leading zeros             */
        int *frac_Q7       /* O  the 7 bits right after the leading one */
    )
    {
        int lzeros = silk_CLZ32(in);
    
        *lz = lzeros;
        *frac_Q7 = silk_ROR32(in, 24 - lzeros) & 0x7f;
    }
    
    
    /* Approximation of square root                                          */
    /* Accuracy: < +/- 10%  for output values > 15                           */
    /*           < +/- 2.5% for output values > 120                          */
    static  int silk_SQRT_APPROX(int x)
    {
        int y, lz, frac_Q7;
    
        if (x <= 0) {
            return 0;
        }
    
        silk_CLZ_FRAC(x, &lz, &frac_Q7);
    
        if (lz & 1) {
            y = 32768;
        }
        else {
            y = 46214;        /* 46214 = sqrt(2) * 32768 */
        }
    
        /* get scaling right */
        y >>= silk_RSHIFT(lz, 1);
    
        /* increment using fractional part of input */
        y = silk_SMLAWB(y, y, silk_SMULBB(213, frac_Q7));
    
        return y;
    }
    

      .cpp文件代码:

    #include "opusvad.h"
    #include <stdlib.h>
    
    static short A_fb1_20 = 5394 << 1;
    static short A_fb1_21 = -24290; /* (int16)(20623 << 1) */
    
    typedef struct {
        int                  AnaState[2];                  /* Analysis filterbank state: 0-8 kHz                                   */
        int                  AnaState1[2];                 /* Analysis filterbank state: 0-4 kHz                                   */
        int                  AnaState2[2];                 /* Analysis filterbank state: 0-2 kHz                                   */
        int                  XnrgSubfr[4];       /* Subframe energies                                                    */
        int                  NrgRatioSmth_Q8[VAD_N_BANDS]; /* Smoothed energy level in each band                                   */
        short                 HPstate;                        /* State of differentiator in the lowest band                           */
        int                  NL[VAD_N_BANDS];              /* Noise energy level in each band                                      */
        int                  inv_NL[VAD_N_BANDS];          /* Inverse noise energy level in each band                              */
        int                  NoiseLevelBias[VAD_N_BANDS];  /* Noise level estimator bias/offset                                    */
        int                  counter;                        /* Frame counter used in the initial phase                              */
    } VAD_state;
    
    /* Split signal into two decimated bands using first-order allpass filters */
    void silk_ana_filt_bank_1(
        const short            *in,                /* I    Input signal [N]                                            */
        int                  *S,                 /* I/O  State vector [2]                                            */
        short                  *outL,              /* O    Low band [N/2]                                              */
        short                  *outH,              /* O    High band [N/2]                                             */
        const int            N                   /* I    Number of input samples                                     */
    )
    {
        int      k, N2 = silk_RSHIFT(N, 1);
        int    in32, X, Y, out_1, out_2;
    
        /* Internal variables and state are in Q10 format */
        for (k = 0; k < N2; k++) {
            /* Convert to Q10 */
            in32 = silk_LSHIFT((int)in[2 * k], 10);
    
            /* All-pass section for even input sample */
            Y = silk_SUB32(in32, S[0]);
            X = silk_SMLAWB(Y, Y, A_fb1_21);
            out_1 = silk_ADD32(S[0], X);
            S[0] = silk_ADD32(in32, X);
    
            /* Convert to Q10 */
            in32 = silk_LSHIFT((int)in[2 * k + 1], 10);
    
            /* All-pass section for odd input sample, and add to output of previous section */
            Y = silk_SUB32(in32, S[1]);
            X = silk_SMULWB(Y, A_fb1_20);
            out_2 = silk_ADD32(S[1], X);
            S[1] = silk_ADD32(in32, X);
    
            /* Add/subtract, convert back to int16 and store to output */
            outL[k] = (short)silk_SAT16(silk_RSHIFT_ROUND(silk_ADD32(out_2, out_1), 11));
            outH[k] = (short)silk_SAT16(silk_RSHIFT_ROUND(silk_SUB32(out_2, out_1), 11));
        }
    }
    
    void silk_VAD_GetNoiseLevels(
        const int            pX[VAD_N_BANDS],  /* I    subband energies                            */
        VAD_state              *psSilk_VAD         /* I/O  Pointer to Silk VAD state                   */
    )
    {
        int   k;
        int nl, nrg, inv_nrg;
        int   coef, min_coef;
    
        /* Initially faster smoothing */
        if (psSilk_VAD->counter < 1000) { /* 1000 = 20 sec */
            min_coef = silk_DIV32_16(silk_int16_MAX, silk_RSHIFT(psSilk_VAD->counter, 4) + 1);
        }
        else {
            min_coef = 0;
        }
    
        for (k = 0; k < VAD_N_BANDS; k++) {
            /* Get old noise level estimate for current band */
            nl = psSilk_VAD->NL[k];
            //silk_assert(nl >= 0);
    
            /* Add bias */
            nrg = silk_ADD_POS_SAT32(pX[k], psSilk_VAD->NoiseLevelBias[k]);
            //silk_assert(nrg > 0);
    
            /* Invert energies */
            inv_nrg = silk_DIV32(silk_int32_MAX, nrg);
            //silk_assert(inv_nrg >= 0);
    
            /* Less update when subband energy is high */
            if (nrg > silk_LSHIFT(nl, 3)) {
                coef = VAD_NOISE_LEVEL_SMOOTH_COEF_Q16 >> 3;
            }
            else if (nrg < nl) {
                coef = VAD_NOISE_LEVEL_SMOOTH_COEF_Q16;
            }
            else {
                coef = silk_SMULWB(silk_SMULWW(inv_nrg, nl), VAD_NOISE_LEVEL_SMOOTH_COEF_Q16 << 1);
            }
    
            /* Initially faster smoothing */
            coef = silk_max_int(coef, min_coef);
    
            /* Smooth inverse energies */
            psSilk_VAD->inv_NL[k] = silk_SMLAWB(psSilk_VAD->inv_NL[k], inv_nrg - psSilk_VAD->inv_NL[k], coef);
            //silk_assert(psSilk_VAD->inv_NL[k] >= 0);
    
            /* Compute noise level by inverting again */
            nl = silk_DIV32(silk_int32_MAX, psSilk_VAD->inv_NL[k]);
            //silk_assert(nl >= 0);
    
            /* Limit noise levels (guarantee 7 bits of head room) */
            nl = silk_min(nl, 0x00FFFFFF);
    
            /* Store as part of state */
            psSilk_VAD->NL[k] = nl;
        }
    
        /* Increment frame counter */
        psSilk_VAD->counter++;
    }
    
    int silk_lin2log(
        const int            inLin               /* I  input in linear scale                                         */
    )
    {
        int lz, frac_Q7;
    
        silk_CLZ_FRAC(inLin, &lz, &frac_Q7);
    
        /* Piece-wise parabolic approximation */
        return silk_ADD_LSHIFT32(silk_SMLAWB(frac_Q7, silk_MUL(frac_Q7, 128 - frac_Q7), 179), 31 - lz, 7);
    }
    
    int silk_sigm_Q15(
        int                    in_Q5               /* I                                                                */
    )
    {
        int ind;
    
        if (in_Q5 < 0) {
            /* Negative input */
            in_Q5 = -in_Q5;
            if (in_Q5 >= 6 * 32) {
                return 0;        /* Clip */
            }
            else {
                /* Linear interpolation of look up table */
                ind = silk_RSHIFT(in_Q5, 5);
                return(sigm_LUT_neg_Q15[ind] - silk_SMULBB(sigm_LUT_slope_Q10[ind], in_Q5 & 0x1F));
            }
        }
        else {
            /* Positive input */
            if (in_Q5 >= 6 * 32) {
                return 32767;        /* clip */
            }
            else {
                /* Linear interpolation of look up table */
                ind = silk_RSHIFT(in_Q5, 5);
                return(sigm_LUT_pos_Q15[ind] + silk_SMULBB(sigm_LUT_slope_Q10[ind], in_Q5 & 0x1F));
            }
        }
    }
    int silk_VAD_Init(                                         /* O    Return value, 0 if success                  */
        VAD_state              *psSilk_VAD                     /* I/O  Pointer to Silk VAD state                   */
    )
    {
        int b, ret = 0;
    
        /* reset state memory */
        silk_memset(psSilk_VAD, 0, sizeof(VAD_state));
    
        /* init noise levels */
        /* Initialize array with approx pink noise levels (psd proportional to inverse of frequency) */
        for (b = 0; b < VAD_N_BANDS; b++) {
            psSilk_VAD->NoiseLevelBias[b] = silk_max_32(silk_DIV32_16(VAD_NOISE_LEVELS_BIAS, b + 1), 1);
        }
    
        /* Initialize state */
        for (b = 0; b < VAD_N_BANDS; b++) {
            psSilk_VAD->NL[b] = silk_MUL(100, psSilk_VAD->NoiseLevelBias[b]);
            psSilk_VAD->inv_NL[b] = silk_DIV32(silk_int32_MAX, psSilk_VAD->NL[b]);
        }
        psSilk_VAD->counter = 15;
    
        /* init smoothed energy-to-noise ratio*/
        for (b = 0; b < VAD_N_BANDS; b++) {
            psSilk_VAD->NrgRatioSmth_Q8[b] = 100 * 256;       /* 100 * 256 --> 20 dB SNR */
        }
    
        return(ret);
    }
    
    static int noSpeechCounter;
    
    int silk_VAD_Get(
        //int          state,                       /*  Encoder state                               */
        const short            pIn[]                           /* I    PCM input                                   */
    )
    {
        int   SA_Q15, pSNR_dB_Q7, input_tilt;
        int   decimated_framelength1, decimated_framelength2;
        int   decimated_framelength;
        int   dec_subframe_length, dec_subframe_offset, SNR_Q7, i, b, s;
        int sumSquared, smooth_coef_Q16;
        short HPstateTmp;
        VARDECL(short, X);
        int Xnrg[4];
        int NrgToNoiseRatio_Q8[4];
        int speech_nrg, x_tmp;
        int   X_offset[4];
        int   ret = 0;
        int frame_length = 20;//
        int fs_kHz = 16;
        int  input_quality_bands_Q15[VAD_N_BANDS];
        int signalType;
        int VAD_flag;
        /* Safety checks
        silk_assert(4 == 4);
        silk_assert(MAX_FRAME_LENGTH >= frame_length);
        silk_assert(frame_length <= 512);
        silk_assert(frame_length == 8 * silk_RSHIFT(frame_length, 3));
        */
        /***********************/
        /* Filter and Decimate */
        /***********************/
        decimated_framelength1 = silk_RSHIFT(frame_length, 1);
        decimated_framelength2 = silk_RSHIFT(frame_length, 2);
        decimated_framelength = silk_RSHIFT(frame_length, 3);
        /* Decimate into 4 bands:
        0       L      3L       L              3L                             5L
        -      --       -              --                             --
        8       8       2               4                              4
    
        [0-1 kHz| temp. |1-2 kHz|    2-4 kHz    |            4-8 kHz           |
    
        They're arranged to allow the minimal ( frame_length / 4 ) extra
        scratch space during the downsampling process */
        X_offset[0] = 0;
        X_offset[1] = decimated_framelength + decimated_framelength2;
        X_offset[2] = X_offset[1] + decimated_framelength;
        X_offset[3] = X_offset[2] + decimated_framelength2;
        ALLOC(X, X_offset[3] + decimated_framelength1, short);
        VAD_state *psSilk_VAD;
        psSilk_VAD = (VAD_state*)malloc(sizeof(VAD_state));
        int ret1 = silk_VAD_Init(psSilk_VAD);
    
    
    
        /* 0-8 kHz to 0-4 kHz and 4-8 kHz */
        silk_ana_filt_bank_1(pIn, &psSilk_VAD->AnaState[0],
            X, &X[X_offset[3]], frame_length);
    
        /* 0-4 kHz to 0-2 kHz and 2-4 kHz */
        silk_ana_filt_bank_1(X, &psSilk_VAD->AnaState1[0],
            X, &X[X_offset[2]], decimated_framelength1);
    
        /* 0-2 kHz to 0-1 kHz and 1-2 kHz */
        silk_ana_filt_bank_1(X, &psSilk_VAD->AnaState2[0],
            X, &X[X_offset[1]], decimated_framelength2);
    
        /*********************************************/
        /* HP filter on lowest band (differentiator) */
        /*********************************************/
        X[decimated_framelength - 1] = silk_RSHIFT(X[decimated_framelength - 1], 1);
        HPstateTmp = X[decimated_framelength - 1];
        for (i = decimated_framelength - 1; i > 0; i--) {
            X[i - 1] = silk_RSHIFT(X[i - 1], 1);
            X[i] -= X[i - 1];
        }
        X[0] -= psSilk_VAD->HPstate;
        psSilk_VAD->HPstate = HPstateTmp;
    
        /*************************************/
        /* Calculate the energy in each band */
        /*************************************/
        for (b = 0; b < 4; b++) {
            /* Find the decimated framelength in the non-uniformly divided bands */
            decimated_framelength = silk_RSHIFT(frame_length, silk_min_int(4 - b, 4 - 1));
    
            /* Split length into subframe lengths */
            dec_subframe_length = silk_RSHIFT(decimated_framelength, VAD_INTERNAL_SUBFRAMES_LOG2);
            dec_subframe_offset = 0;
    
            /* Compute energy per sub-frame */
            /* initialize with summed energy of last subframe */
            Xnrg[b] = psSilk_VAD->XnrgSubfr[b];
            for (s = 0; s < VAD_INTERNAL_SUBFRAMES; s++) {
                sumSquared = 0;
                for (i = 0; i < dec_subframe_length; i++) {
                    /* The energy will be less than dec_subframe_length * ( silk_short_MIN / 8 ) ^ 2.            */
                    /* Therefore we can accumulate with no risk of overflow (unless dec_subframe_length > 128)  */
                    x_tmp = silk_RSHIFT(
                        X[X_offset[b] + i + dec_subframe_offset], 3);
                    sumSquared = silk_SMLABB(sumSquared, x_tmp, x_tmp);
    
                    /* Safety check */
                    //silk_assert(sumSquared >= 0);
                }
    
                /* Add/saturate summed energy of current subframe */
                if (s < VAD_INTERNAL_SUBFRAMES - 1) {
                    Xnrg[b] = silk_ADD_POS_SAT32(Xnrg[b], sumSquared);
                }
                else {
                    /* Look-ahead subframe */
                    Xnrg[b] = silk_ADD_POS_SAT32(Xnrg[b], silk_RSHIFT(sumSquared, 1));
                }
    
                dec_subframe_offset += dec_subframe_length;
            }
            psSilk_VAD->XnrgSubfr[b] = sumSquared;
        }
    
        /********************/
        /* Noise estimation */
        /********************/
        silk_VAD_GetNoiseLevels(&Xnrg[0], psSilk_VAD);
    
        /***********************************************/
        /* Signal-plus-noise to noise ratio estimation */
        /***********************************************/
        sumSquared = 0;
        input_tilt = 0;
        for (b = 0; b < 4; b++) {
            speech_nrg = Xnrg[b] - psSilk_VAD->NL[b];
            if (speech_nrg > 0) {
                /* Divide, with sufficient resolution */
                if ((Xnrg[b] & 0xFF800000) == 0) {
                    NrgToNoiseRatio_Q8[b] = silk_DIV32(silk_LSHIFT(Xnrg[b], 8), psSilk_VAD->NL[b] + 1);
                }
                else {
                    NrgToNoiseRatio_Q8[b] = silk_DIV32(Xnrg[b], silk_RSHIFT(psSilk_VAD->NL[b], 8) + 1);
                }
    
                /* Convert to log domain */
                SNR_Q7 = silk_lin2log(NrgToNoiseRatio_Q8[b]) - 8 * 128;
    
                /* Sum-of-squares */
                sumSquared = silk_SMLABB(sumSquared, SNR_Q7, SNR_Q7);          /* Q14 */
    
                                                                               /* Tilt measure */
                if (speech_nrg < ((int)1 << 20)) {
                    /* Scale down SNR value for small subband speech energies */
                    SNR_Q7 = silk_SMULWB(silk_LSHIFT(silk_SQRT_APPROX(speech_nrg), 6), SNR_Q7);
                }
                input_tilt = silk_SMLAWB(input_tilt, tiltWeights[b], SNR_Q7);
            }
            else {
                NrgToNoiseRatio_Q8[b] = 256;
            }
        }
    
        /* Mean-of-squares */
        sumSquared = silk_DIV32_16(sumSquared, 4); /* Q14 */
    
                                                   /* Root-mean-square approximation, scale to dBs, and write to output pointer */
        pSNR_dB_Q7 = (short)(3 * silk_SQRT_APPROX(sumSquared)); /* Q7 */
    
                                                                /*********************************/
                                                                /* Speech Probability Estimation */
                                                                /*********************************/
        SA_Q15 = silk_sigm_Q15(silk_SMULWB(VAD_SNR_FACTOR_Q16, pSNR_dB_Q7) - VAD_NEGATIVE_OFFSET_Q5);
    
        /**************************/
        /* Frequency Tilt Measure */
        /**************************/
        int input_tilt_Q15 = silk_LSHIFT(silk_sigm_Q15(input_tilt) - 16384, 1);
    
        /**************************************************/
        /* Scale the sigmoid output based on power levels */
        /**************************************************/
        speech_nrg = 0;
        for (b = 0; b < 4; b++) {
            /* Accumulate signal-without-noise energies, higher frequency bands have more weight */
            speech_nrg += (b + 1) * silk_RSHIFT(Xnrg[b] - psSilk_VAD->NL[b], 4);
        }
    
        /* Power scaling */
        if (speech_nrg <= 0) {
            SA_Q15 = silk_RSHIFT(SA_Q15, 1);
        }
        else if (speech_nrg < 32768) {
            if (frame_length == 10 * fs_kHz) {
                speech_nrg = silk_LSHIFT_SAT32(speech_nrg, 16);
            }
            else {
                speech_nrg = silk_LSHIFT_SAT32(speech_nrg, 15);
            }
    
            /* square-root */
            speech_nrg = silk_SQRT_APPROX(speech_nrg);
            SA_Q15 = silk_SMULWB(32768 + speech_nrg, SA_Q15);
        }
    
        /* Copy the resulting speech activity in Q8 */
        int speech_activity_Q8 = silk_min_int(silk_RSHIFT(SA_Q15, 7), silk_uint8_MAX);
    
        /***********************************/
        /* Energy Level and SNR estimation */
        /***********************************/
        /* Smoothing coefficient */
        smooth_coef_Q16 = silk_SMULWB(VAD_SNR_SMOOTH_COEF_Q18, silk_SMULWB((int)SA_Q15, SA_Q15));
    
        if (frame_length == 10 * fs_kHz) {
            smooth_coef_Q16 >>= 1;
        }
    
        for (b = 0; b < 4; b++) {
            /* compute smoothed energy-to-noise ratio per band */
            psSilk_VAD->NrgRatioSmth_Q8[b] = silk_SMLAWB(psSilk_VAD->NrgRatioSmth_Q8[b],
                NrgToNoiseRatio_Q8[b] - psSilk_VAD->NrgRatioSmth_Q8[b], smooth_coef_Q16);
    
            /* signal to noise ratio in dB per band */
            SNR_Q7 = 3 * (silk_lin2log(psSilk_VAD->NrgRatioSmth_Q8[b]) - 8 * 128);
            /* quality = sigmoid( 0.25 * ( SNR_dB - 16 ) ); */
            input_quality_bands_Q15[b] = silk_sigm_Q15(silk_RSHIFT(SNR_Q7 - 16 * 128, 4));
        }
        //gap************************************************************//
        if (speech_activity_Q8 < SILK_FIX_CONST(SPEECH_ACTIVITY_DTX_THRES, 8)) {
            signalType = TYPE_NO_VOICE_ACTIVITY;
            //noSpeechCounter++;
            VAD_flag = 0;
        }
        else {
            signalType = TYPE_UNVOICED;
            VAD_flag = 1;
        }
        free(psSilk_VAD);
        return(VAD_flag);
    }
  • 相关阅读:
    linux加载和卸载模块
    java 面试题之银行业务系统
    java 面试题之交通灯管理系统
    java 实现类似spring的可配置的AOP框架
    分析JVM动态生成的类
    最长上升子序列(模板)
    项目管理模式
    让thinkphp 支持ftp上传到远程,ftp删除
    hdu 1280 前m大的数 哈希
    互联网+脑科学,中国脑计划的机会
  • 原文地址:https://www.cnblogs.com/cyblogs/p/11343353.html
Copyright © 2011-2022 走看看