zoukankan      html  css  js  c++  java
  • 替换unimrcp的VAD模块


           unimrcp vad 模块voice activity dector一直认为比较粗暴,而且unimrcp的社区也很久没有更新了。使用原始unimrcp如果只是用来做Demo演示,通过手动调整参数,还是可以的。但是距离生产环境,还是有很远的一段路。

    这篇文章介绍如何使用webRtc vad模块替换原来的算法。


           unimrcp 的vad的模块,在libs/mpf/src/mpf_activity_detector.c 文件中,主要算法函数如下:

     1 static apr_size_t mpf_activity_detector_level_calculate(const mpf_frame_t *frame)
     2 {
     3     apr_size_t sum = 0;
     4     apr_size_t count = frame->codec_frame.size/2;
     5     const apr_int16_t *cur = frame->codec_frame.buffer;
     6     const apr_int16_t *end = cur + count;
     8     for(; cur < end; cur++) {
     9         if(*cur < 0) {
    10             sum -= *cur;
    11         }
    12         else {
    13             sum += *cur;
    14         }
    15     }
    17     return sum / count;
    18 }


          在上一篇文档介绍了WebRTC 的 VAD的算法,今天主要使用webRTC 的VAD的算法,替换该算法。步骤和上一篇介绍webRTC的是一致的。


     1 static apr_size_t mpf_activity_detector_level_calculate(const mpf_frame_t *frame)
     2 {
     3   //calculate samplesCount
     4   apr_size_t samplesCount = frame->codec_frame.size/2;
     5   //default 10
     6   int per_ms_frames = 10;
     7   //calculate samples
     8   apr_size_t sampleRate = 16000;
     9   //
    10   size_t samples = sampleRate * per_ms_frames / 1000;
    11   if (samples == 0) return -1;
    12   //
    13   size_t nTotal = (samplesCount / samples);
    14   //buffer
    15   int16_t *input = frame->codec_frame.buffer;
    16   //init vad
    17   VadInst * vadInst = WebRtcVad_Create();
    18   if (vadInst == NULL) {
    19     return -1;
    20   }
    21   int status = WebRtcVad_Init(vadInst);
    22   if (status != 0) {
    23     WebRtcVad_Free(vadInst);
    24     return -1;
    25   }
    26   //default 1
    27   int16_t vad_mode = 1;
    28   status = WebRtcVad_set_mode(vadInst, vad_mode);
    29   if (status != 0) {
    30     WebRtcVad_Free(vadInst);
    31     return -1;
    32   }
    33   int cnt = 0;
    34   int i  = 0;
    35   for (i = 0; i < nTotal; i++) {
    36     int keep_weight = 0;
    37     int nVadRet = WebRtcVad_Process(vadInst, sampleRate, input, samples, keep_weight);
    38     if (nVadRet == -1) {
    39       WebRtcVad_Free(vadInst);
    40       return -1;
    41     } else {
    42       if (nVadRet >= 1) {
    43         cnt++;
    44       }
    45       printf(" %d 	", nVadRet);
    46     }
    47     input += samples;
    48   }
    49   //if hunman voice < nTotal/10, as silent sample。maybe ...
    50 if (cnt < nTotal/10) { 51 return 0; 52 } 53 else { 54 return 1; 55 } 56 }


     1 /** Process current frame */
     2 MPF_DECLARE(mpf_detector_event_e) mpf_activity_detector_process(mpf_activity_detector_t *detector, const mpf_frame_t *frame)
     3 {
     4     mpf_detector_event_e det_event = MPF_DETECTOR_EVENT_NONE;
     5     apr_size_t level = 0;
     6     if((frame->type & MEDIA_FRAME_TYPE_AUDIO) == MEDIA_FRAME_TYPE_AUDIO) {
     7         /* first, calculate current activity level of processed frame */
     8         level = mpf_activity_detector_level_calculate(frame);
     9 #if 0
    10         apt_log(APT_LOG_MARK,APT_PRIO_INFO,"Activity Detector --------------------- [%"APR_SIZE_T_FMT"]",level);
    11 #endif
    12     }
    14     if(detector->state == DETECTOR_STATE_INACTIVITY) {
    15         //if(level >= detector->level_threshold) {
    16         if(level >= 1) {
    17             /* start to detect activity */
    18             mpf_activity_detector_state_change(detector,DETECTOR_STATE_ACTIVITY_TRANSITION);
    19         }
    20         else {
    21             detector->duration += CODEC_FRAME_TIME_BASE;
    22             if(detector->duration >= detector->noinput_timeout) {
    23                 /* detected noinput */
    24                 det_event = MPF_DETECTOR_EVENT_NOINPUT;
    25             }
    26         }
    27     }
    28     else if(detector->state == DETECTOR_STATE_ACTIVITY_TRANSITION) {
    29         //if(level >= detector->level_threshold) {
    30         if(level >= 1) {
    31             detector->duration += CODEC_FRAME_TIME_BASE;
    32             if(detector->duration >= detector->speech_timeout) {
    33                 /* finally detected activity */
    34                 det_event = MPF_DETECTOR_EVENT_ACTIVITY;
    35                 mpf_activity_detector_state_change(detector,DETECTOR_STATE_ACTIVITY);
    36             }
    37         }
    38         else {
    39             /* fallback to inactivity */
    40             mpf_activity_detector_state_change(detector,DETECTOR_STATE_INACTIVITY);
    41         }
    42     }
    43     else if(detector->state == DETECTOR_STATE_ACTIVITY) {
    44         //if(level >= detector->level_threshold) {
    45         if(level >= 1) {
    46             detector->duration += CODEC_FRAME_TIME_BASE;
    47         }
    48         else {
    49             /* start to detect inactivity */
    50             mpf_activity_detector_state_change(detector,DETECTOR_STATE_INACTIVITY_TRANSITION);
    51         }
    52     }
    53     else if(detector->state == DETECTOR_STATE_INACTIVITY_TRANSITION) {
    54         //if(level >= detector->level_threshold) {
    55         if(level >= 1) {
    56             /* fallback to activity */
    57             mpf_activity_detector_state_change(detector,DETECTOR_STATE_ACTIVITY);
    58         }
    59         else {
    60             detector->duration += CODEC_FRAME_TIME_BASE;
    61             if(detector->duration >= detector->silence_timeout) {
    62                 /* detected inactivity */
    63                 det_event = MPF_DETECTOR_EVENT_INACTIVITY;
    64                 mpf_activity_detector_state_change(detector,DETECTOR_STATE_INACTIVITY);
    65             }
    66         }
    67     }
    69     return det_event;
    70 }



    static apr_size_t mpf_activity_detector_level_calculate(const mpf_frame_t *frame)
      //calculate samplesCount
      apr_size_t samplesCount = frame->codec_frame.size/2;
      //default 10
      int per_ms_frames = 10;
      //calculate samples
      apr_size_t sampleRate = 16000;
      size_t samples = sampleRate * per_ms_frames / 1000;
      if (samples == 0) return -1;
      size_t nTotal = (samplesCount / samples);
      int16_t *input = frame->codec_frame.buffer;
      //init vad
      VadInst * vadInst = WebRtcVad_Create();
      if (vadInst == NULL) {
        return -1;
      int status = WebRtcVad_Init(vadInst);
      if (status != 0) {
        return -1;
      //default 1
      int16_t vad_mode = 1;
      status = WebRtcVad_set_mode(vadInst, vad_mode);
      if (status != 0) {
        return -1;
      int cnt = 0;
      int i  = 0;
      for (i = 0; i < nTotal; i++) {
        int keep_weight = 0;
        int nVadRet = WebRtcVad_Process(vadInst, sampleRate, input, samples, keep_weight);
        if (nVadRet == -1) {
          return -1;
        } else {
          if (nVadRet >= 1) {
          printf(" %d 	", nVadRet);
        input += samples;
      //if hunman voice < nTotal/10, as silent sample
      if (cnt < nTotal/10) {
        return 0;
      else {
        return 1;
  • 相关阅读:
    今天开始用 VSU 2010
    Visual Studio 2010 模型设计工具 基本应用
    Asp.Net访问Oracle 数据库 执行SQL语句和调用存储过程
    Enterprise Library 4.1 Security Block 快速使用图文笔记
    解决“System.Data.OracleClient 需要 Oracle 客户端软件 8.1.7 或更高版本。”(图)
    Enterprise Library 4.1 Application Settings 快速使用图文笔记
    Oracle 10g for Windows 简体中文版的安装过程
    Oracle 11g for Windows 简体中文版的安装过程
    Oracle 9i 数据库 创建数据库 Net 配置 创建表 SQL查询 创建存储过程 (图)
  • 原文地址:https://www.cnblogs.com/damizhou/p/11323394.html
Copyright © 2011-2022 走看看