zoukankan      html  css  js  c++  java
  • 替换unimrcp的VAD模块

    摘要:

           unimrcp vad 模块voice activity dector一直认为比较粗暴,而且unimrcp的社区也很久没有更新了。使用原始unimrcp如果只是用来做Demo演示,通过手动调整参数,还是可以的。但是距离生产环境,还是有很远的一段路。

    这篇文章介绍如何使用webRtc vad模块替换原来的算法。

          【题外话:昨天开了题目,因为有事,没有更新,今天补上】

           unimrcp 的vad的模块,在libs/mpf/src/mpf_activity_detector.c 文件中,主要算法函数如下:

     1 static apr_size_t mpf_activity_detector_level_calculate(const mpf_frame_t *frame)
     2 {
     3     apr_size_t sum = 0;
     4     apr_size_t count = frame->codec_frame.size/2;
     5     const apr_int16_t *cur = frame->codec_frame.buffer;
     6     const apr_int16_t *end = cur + count;
     7 
     8     for(; cur < end; cur++) {
     9         if(*cur < 0) {
    10             sum -= *cur;
    11         }
    12         else {
    13             sum += *cur;
    14         }
    15     }
    16 
    17     return sum / count;
    18 }

          大家看这个算法,非常简单粗暴,累加求其平均值,如果大于阈值,表示有声音,如果不大于,表示静音。并没有噪音检测。所以基本上就是不可用。

          在上一篇文档介绍了WebRTC 的 VAD的算法,今天主要使用webRTC 的VAD的算法,替换该算法。步骤和上一篇介绍webRTC的是一致的。

        

     1 static apr_size_t mpf_activity_detector_level_calculate(const mpf_frame_t *frame)
     2 {
     3   //calculate samplesCount
     4   apr_size_t samplesCount = frame->codec_frame.size/2;
     5   //default 10
     6   int per_ms_frames = 10;
     7   //calculate samples
     8   apr_size_t sampleRate = 16000;
     9   //
    10   size_t samples = sampleRate * per_ms_frames / 1000;
    11   if (samples == 0) return -1;
    12   //
    13   size_t nTotal = (samplesCount / samples);
    14   //buffer
    15   int16_t *input = frame->codec_frame.buffer;
    16   //init vad
    17   VadInst * vadInst = WebRtcVad_Create();
    18   if (vadInst == NULL) {
    19     return -1;
    20   }
    21   int status = WebRtcVad_Init(vadInst);
    22   if (status != 0) {
    23     WebRtcVad_Free(vadInst);
    24     return -1;
    25   }
    26   //default 1
    27   int16_t vad_mode = 1;
    28   status = WebRtcVad_set_mode(vadInst, vad_mode);
    29   if (status != 0) {
    30     WebRtcVad_Free(vadInst);
    31     return -1;
    32   }
    33   int cnt = 0;
    34   int i  = 0;
    35   for (i = 0; i < nTotal; i++) {
    36     int keep_weight = 0;
    37     int nVadRet = WebRtcVad_Process(vadInst, sampleRate, input, samples, keep_weight);
    38     if (nVadRet == -1) {
    39       WebRtcVad_Free(vadInst);
    40       return -1;
    41     } else {
    42       if (nVadRet >= 1) {
    43         cnt++;
    44       }
    45       printf(" %d 	", nVadRet);
    46     }
    47     input += samples;
    48   }
    49   //if hunman voice < nTotal/10, as silent sample。maybe ...
    //FIXME
    50 if (cnt < nTotal/10) { 51 return 0; 52 } 53 else { 54 return 1; 55 } 56 }
     WebRtcVad_Free(vadInst)

       下面要更新主处理函数,保留他原有的TRANSION的中间状态逻辑,

     1 /** Process current frame */
     2 MPF_DECLARE(mpf_detector_event_e) mpf_activity_detector_process(mpf_activity_detector_t *detector, const mpf_frame_t *frame)
     3 {
     4     mpf_detector_event_e det_event = MPF_DETECTOR_EVENT_NONE;
     5     apr_size_t level = 0;
     6     if((frame->type & MEDIA_FRAME_TYPE_AUDIO) == MEDIA_FRAME_TYPE_AUDIO) {
     7         /* first, calculate current activity level of processed frame */
     8         level = mpf_activity_detector_level_calculate(frame);
     9 #if 0
    10         apt_log(APT_LOG_MARK,APT_PRIO_INFO,"Activity Detector --------------------- [%"APR_SIZE_T_FMT"]",level);
    11 #endif
    12     }
    13 
    14     if(detector->state == DETECTOR_STATE_INACTIVITY) {
    15         //if(level >= detector->level_threshold) {
    16         if(level >= 1) {
    17             /* start to detect activity */
    18             mpf_activity_detector_state_change(detector,DETECTOR_STATE_ACTIVITY_TRANSITION);
    19         }
    20         else {
    21             detector->duration += CODEC_FRAME_TIME_BASE;
    22             if(detector->duration >= detector->noinput_timeout) {
    23                 /* detected noinput */
    24                 det_event = MPF_DETECTOR_EVENT_NOINPUT;
    25             }
    26         }
    27     }
    28     else if(detector->state == DETECTOR_STATE_ACTIVITY_TRANSITION) {
    29         //if(level >= detector->level_threshold) {
    30         if(level >= 1) {
    31             detector->duration += CODEC_FRAME_TIME_BASE;
    32             if(detector->duration >= detector->speech_timeout) {
    33                 /* finally detected activity */
    34                 det_event = MPF_DETECTOR_EVENT_ACTIVITY;
    35                 mpf_activity_detector_state_change(detector,DETECTOR_STATE_ACTIVITY);
    36             }
    37         }
    38         else {
    39             /* fallback to inactivity */
    40             mpf_activity_detector_state_change(detector,DETECTOR_STATE_INACTIVITY);
    41         }
    42     }
    43     else if(detector->state == DETECTOR_STATE_ACTIVITY) {
    44         //if(level >= detector->level_threshold) {
    45         if(level >= 1) {
    46             detector->duration += CODEC_FRAME_TIME_BASE;
    47         }
    48         else {
    49             /* start to detect inactivity */
    50             mpf_activity_detector_state_change(detector,DETECTOR_STATE_INACTIVITY_TRANSITION);
    51         }
    52     }
    53     else if(detector->state == DETECTOR_STATE_INACTIVITY_TRANSITION) {
    54         //if(level >= detector->level_threshold) {
    55         if(level >= 1) {
    56             /* fallback to activity */
    57             mpf_activity_detector_state_change(detector,DETECTOR_STATE_ACTIVITY);
    58         }
    59         else {
    60             detector->duration += CODEC_FRAME_TIME_BASE;
    61             if(detector->duration >= detector->silence_timeout) {
    62                 /* detected inactivity */
    63                 det_event = MPF_DETECTOR_EVENT_INACTIVITY;
    64                 mpf_activity_detector_state_change(detector,DETECTOR_STATE_INACTIVITY);
    65             }
    66         }
    67     }
    68 
    69     return det_event;
    70 }

       如此替换后,就完成了算法的更新。当然还需要调整一下cmake的相关的文件配置,加载相应的webRTC的vad文件。

        

    static apr_size_t mpf_activity_detector_level_calculate(const mpf_frame_t *frame)
    {
      //calculate samplesCount
      apr_size_t samplesCount = frame->codec_frame.size/2;
      //default 10
      int per_ms_frames = 10;
      //calculate samples
      apr_size_t sampleRate = 16000;
      //
      size_t samples = sampleRate * per_ms_frames / 1000;
      if (samples == 0) return -1;
      //
      size_t nTotal = (samplesCount / samples);
      //buffer
      int16_t *input = frame->codec_frame.buffer;
      //init vad
      VadInst * vadInst = WebRtcVad_Create();
      if (vadInst == NULL) {
        return -1;
      }
      int status = WebRtcVad_Init(vadInst);
      if (status != 0) {
        WebRtcVad_Free(vadInst);
        return -1;
      }
      //default 1
      int16_t vad_mode = 1;
      status = WebRtcVad_set_mode(vadInst, vad_mode);
      if (status != 0) {
        WebRtcVad_Free(vadInst);
        return -1;
      }
      int cnt = 0;
      int i  = 0;
      for (i = 0; i < nTotal; i++) {
        int keep_weight = 0;
        int nVadRet = WebRtcVad_Process(vadInst, sampleRate, input, samples, keep_weight);
        if (nVadRet == -1) {
          WebRtcVad_Free(vadInst);
          return -1;
        } else {
          if (nVadRet >= 1) {
            cnt++;
          }
          printf(" %d 	", nVadRet);
        }
        input += samples;
      }
      //if hunman voice < nTotal/10, as silent sample
      if (cnt < nTotal/10) {
        return 0;
      }
      else {
        return 1;
      } 
  • 相关阅读:
    Git
    Git
    Git
    Git
    Docker
    Linux
    Linux
    Python
    Python
    SQL
  • 原文地址:https://www.cnblogs.com/damizhou/p/11323394.html
Copyright © 2011-2022 走看看