zoukankan      html  css  js  c++  java
  • 替换unimrcp的VAD模块

    摘要:

           unimrcp vad 模块voice activity dector一直认为比较粗暴,而且unimrcp的社区也很久没有更新了。使用原始unimrcp如果只是用来做Demo演示,通过手动调整参数,还是可以的。但是距离生产环境,还是有很远的一段路。

    这篇文章介绍如何使用webRtc vad模块替换原来的算法。

          【题外话:昨天开了题目,因为有事,没有更新,今天补上】

           unimrcp 的vad的模块,在libs/mpf/src/mpf_activity_detector.c 文件中,主要算法函数如下:

     1 static apr_size_t mpf_activity_detector_level_calculate(const mpf_frame_t *frame)
     2 {
     3     apr_size_t sum = 0;
     4     apr_size_t count = frame->codec_frame.size/2;
     5     const apr_int16_t *cur = frame->codec_frame.buffer;
     6     const apr_int16_t *end = cur + count;
     7 
     8     for(; cur < end; cur++) {
     9         if(*cur < 0) {
    10             sum -= *cur;
    11         }
    12         else {
    13             sum += *cur;
    14         }
    15     }
    16 
    17     return sum / count;
    18 }

          大家看这个算法,非常简单粗暴,累加求其平均值,如果大于阈值,表示有声音,如果不大于,表示静音。并没有噪音检测。所以基本上就是不可用。

          在上一篇文档介绍了WebRTC 的 VAD的算法,今天主要使用webRTC 的VAD的算法,替换该算法。步骤和上一篇介绍webRTC的是一致的。

        

     1 static apr_size_t mpf_activity_detector_level_calculate(const mpf_frame_t *frame)
     2 {
     3   //calculate samplesCount
     4   apr_size_t samplesCount = frame->codec_frame.size/2;
     5   //default 10
     6   int per_ms_frames = 10;
     7   //calculate samples
     8   apr_size_t sampleRate = 16000;
     9   //
    10   size_t samples = sampleRate * per_ms_frames / 1000;
    11   if (samples == 0) return -1;
    12   //
    13   size_t nTotal = (samplesCount / samples);
    14   //buffer
    15   int16_t *input = frame->codec_frame.buffer;
    16   //init vad
    17   VadInst * vadInst = WebRtcVad_Create();
    18   if (vadInst == NULL) {
    19     return -1;
    20   }
    21   int status = WebRtcVad_Init(vadInst);
    22   if (status != 0) {
    23     WebRtcVad_Free(vadInst);
    24     return -1;
    25   }
    26   //default 1
    27   int16_t vad_mode = 1;
    28   status = WebRtcVad_set_mode(vadInst, vad_mode);
    29   if (status != 0) {
    30     WebRtcVad_Free(vadInst);
    31     return -1;
    32   }
    33   int cnt = 0;
    34   int i  = 0;
    35   for (i = 0; i < nTotal; i++) {
    36     int keep_weight = 0;
    37     int nVadRet = WebRtcVad_Process(vadInst, sampleRate, input, samples, keep_weight);
    38     if (nVadRet == -1) {
    39       WebRtcVad_Free(vadInst);
    40       return -1;
    41     } else {
    42       if (nVadRet >= 1) {
    43         cnt++;
    44       }
    45       printf(" %d 	", nVadRet);
    46     }
    47     input += samples;
    48   }
    49   //if hunman voice < nTotal/10, as silent sample。maybe ...
    //FIXME
    50 if (cnt < nTotal/10) { 51 return 0; 52 } 53 else { 54 return 1; 55 } 56 }
     WebRtcVad_Free(vadInst)

       下面要更新主处理函数,保留他原有的TRANSION的中间状态逻辑,

     1 /** Process current frame */
     2 MPF_DECLARE(mpf_detector_event_e) mpf_activity_detector_process(mpf_activity_detector_t *detector, const mpf_frame_t *frame)
     3 {
     4     mpf_detector_event_e det_event = MPF_DETECTOR_EVENT_NONE;
     5     apr_size_t level = 0;
     6     if((frame->type & MEDIA_FRAME_TYPE_AUDIO) == MEDIA_FRAME_TYPE_AUDIO) {
     7         /* first, calculate current activity level of processed frame */
     8         level = mpf_activity_detector_level_calculate(frame);
     9 #if 0
    10         apt_log(APT_LOG_MARK,APT_PRIO_INFO,"Activity Detector --------------------- [%"APR_SIZE_T_FMT"]",level);
    11 #endif
    12     }
    13 
    14     if(detector->state == DETECTOR_STATE_INACTIVITY) {
    15         //if(level >= detector->level_threshold) {
    16         if(level >= 1) {
    17             /* start to detect activity */
    18             mpf_activity_detector_state_change(detector,DETECTOR_STATE_ACTIVITY_TRANSITION);
    19         }
    20         else {
    21             detector->duration += CODEC_FRAME_TIME_BASE;
    22             if(detector->duration >= detector->noinput_timeout) {
    23                 /* detected noinput */
    24                 det_event = MPF_DETECTOR_EVENT_NOINPUT;
    25             }
    26         }
    27     }
    28     else if(detector->state == DETECTOR_STATE_ACTIVITY_TRANSITION) {
    29         //if(level >= detector->level_threshold) {
    30         if(level >= 1) {
    31             detector->duration += CODEC_FRAME_TIME_BASE;
    32             if(detector->duration >= detector->speech_timeout) {
    33                 /* finally detected activity */
    34                 det_event = MPF_DETECTOR_EVENT_ACTIVITY;
    35                 mpf_activity_detector_state_change(detector,DETECTOR_STATE_ACTIVITY);
    36             }
    37         }
    38         else {
    39             /* fallback to inactivity */
    40             mpf_activity_detector_state_change(detector,DETECTOR_STATE_INACTIVITY);
    41         }
    42     }
    43     else if(detector->state == DETECTOR_STATE_ACTIVITY) {
    44         //if(level >= detector->level_threshold) {
    45         if(level >= 1) {
    46             detector->duration += CODEC_FRAME_TIME_BASE;
    47         }
    48         else {
    49             /* start to detect inactivity */
    50             mpf_activity_detector_state_change(detector,DETECTOR_STATE_INACTIVITY_TRANSITION);
    51         }
    52     }
    53     else if(detector->state == DETECTOR_STATE_INACTIVITY_TRANSITION) {
    54         //if(level >= detector->level_threshold) {
    55         if(level >= 1) {
    56             /* fallback to activity */
    57             mpf_activity_detector_state_change(detector,DETECTOR_STATE_ACTIVITY);
    58         }
    59         else {
    60             detector->duration += CODEC_FRAME_TIME_BASE;
    61             if(detector->duration >= detector->silence_timeout) {
    62                 /* detected inactivity */
    63                 det_event = MPF_DETECTOR_EVENT_INACTIVITY;
    64                 mpf_activity_detector_state_change(detector,DETECTOR_STATE_INACTIVITY);
    65             }
    66         }
    67     }
    68 
    69     return det_event;
    70 }

       如此替换后,就完成了算法的更新。当然还需要调整一下cmake的相关的文件配置,加载相应的webRTC的vad文件。

        

    static apr_size_t mpf_activity_detector_level_calculate(const mpf_frame_t *frame)
    {
      //calculate samplesCount
      apr_size_t samplesCount = frame->codec_frame.size/2;
      //default 10
      int per_ms_frames = 10;
      //calculate samples
      apr_size_t sampleRate = 16000;
      //
      size_t samples = sampleRate * per_ms_frames / 1000;
      if (samples == 0) return -1;
      //
      size_t nTotal = (samplesCount / samples);
      //buffer
      int16_t *input = frame->codec_frame.buffer;
      //init vad
      VadInst * vadInst = WebRtcVad_Create();
      if (vadInst == NULL) {
        return -1;
      }
      int status = WebRtcVad_Init(vadInst);
      if (status != 0) {
        WebRtcVad_Free(vadInst);
        return -1;
      }
      //default 1
      int16_t vad_mode = 1;
      status = WebRtcVad_set_mode(vadInst, vad_mode);
      if (status != 0) {
        WebRtcVad_Free(vadInst);
        return -1;
      }
      int cnt = 0;
      int i  = 0;
      for (i = 0; i < nTotal; i++) {
        int keep_weight = 0;
        int nVadRet = WebRtcVad_Process(vadInst, sampleRate, input, samples, keep_weight);
        if (nVadRet == -1) {
          WebRtcVad_Free(vadInst);
          return -1;
        } else {
          if (nVadRet >= 1) {
            cnt++;
          }
          printf(" %d 	", nVadRet);
        }
        input += samples;
      }
      //if hunman voice < nTotal/10, as silent sample
      if (cnt < nTotal/10) {
        return 0;
      }
      else {
        return 1;
      } 
  • 相关阅读:
    今天开始用 VSU 2010
    Visual Studio 2010 模型设计工具 基本应用
    Asp.Net访问Oracle 数据库 执行SQL语句和调用存储过程
    Enterprise Library 4.1 Security Block 快速使用图文笔记
    解决“System.Data.OracleClient 需要 Oracle 客户端软件 8.1.7 或更高版本。”(图)
    一个Oracle存储过程示例
    Enterprise Library 4.1 Application Settings 快速使用图文笔记
    Oracle 10g for Windows 简体中文版的安装过程
    Oracle 11g for Windows 简体中文版的安装过程
    Oracle 9i 数据库 创建数据库 Net 配置 创建表 SQL查询 创建存储过程 (图)
  • 原文地址:https://www.cnblogs.com/damizhou/p/11323394.html
Copyright © 2011-2022 走看看