zoukankan      html  css  js  c++  java
  • 百度语音识别接入遇到的问题

    需要支持HTTP请求

    配置正确的APPID等信息

    需要导入包含.dat的ASR文件夹

    作者写的接驳代码(实时解析):

    #import <Foundation/Foundation.h>
    #import "AudioInputStream.h"
    #import "BDSASRDefines.h"
    #import "BDSASRParameters.h"
    #import "BDSWakeupDefines.h"
    #import "BDSWakeupParameters.h"
    #import "BDSEventManager.h"
    #import "BDVRSettings.h"
    
    @interface SpeechStreamHelper : NSObject
    <BDSClientASRDelegate>
    
    @property(nonatomic,copy)void (^haveRecognizerSpeakTextBlock)(NSString *text,NSTimeInterval startLocation,NSTimeInterval length);
    //开始流识别
    - (void)startAudioStream;
    - (void)pauseAudioStream;
    -(void)stopAudioStream;
    -(void)haveHandlePCMData:(NSData*)data duration:(NSTimeInterval)duration volume:(float)volume;
    
    @end
    #import "SpeechStreamHelper.h"
    
    @interface SpeechStreamHelper()
    
    @property (strong, nonatomic) BDSEventManager *asrEventManager;
    @property (nonatomic,assign)BOOL canStartStream;
    @property (nonatomic,assign)BOOL shouldStartStream;
    @property(nonatomic,strong)AudioInputStream *audioStream;
    @property(nonatomic,assign)NSTimeInterval outsideDuration;
    @property(nonatomic,assign)NSTimeInterval insideDuration;
    
    @end
    
    @implementation SpeechStreamHelper
    
    -(instancetype)init
    {
        self = [super init];
        if (self) {
            self.canStartStream = YES;
            self.shouldStartStream = NO;
            [self configurationManager];
        }
        return self;
    }
    
    -(void)configurationManager
    {
        self.asrEventManager = [BDSEventManager createEventManagerWithName:BDS_ASR_NAME];
        [[BDVRSettings getInstance] configBDVRClient];
        [self configVoiceRecognitionClient];
        [self.asrEventManager setDelegate:self];
    }
    //开始流识别
    - (void)startAudioStream
    {
        [self startAudioStream:0];
    }
    
    //开始流识别
    - (void)startAudioStream:(NSTimeInterval)duration
    {
        self.shouldStartStream = YES;
        if (self.audioStream == nil && self.canStartStream) {
            self.audioStream = [[AudioInputStream alloc] init];
            self.outsideDuration = duration;
            self.insideDuration = CFAbsoluteTimeGetCurrent();
            [self.asrEventManager setParameter:self.audioStream forKey:BDS_ASR_AUDIO_INPUT_STREAM];
            [self.asrEventManager setParameter:@"" forKey:BDS_ASR_AUDIO_FILE_PATH];
            [self.asrEventManager sendCommand:BDS_ASR_CMD_START];
        }
    }
    
    - (void)pauseAudioStream
    {
        self.shouldStartStream = NO;
        [self onRequestEnd];
    }
    
    -(void)stopAudioStream
    {
        self.canStartStream = NO;
        [self pauseAudioStream];
        [self.asrEventManager setDelegate:nil];
        self.asrEventManager = nil;
    }
    
    - (void)onRequestEnd
    {
        if (self.audioStream) {
            [self.audioStream close];
            self.audioStream = nil;
        }
        [self.asrEventManager sendCommand:BDS_ASR_CMD_STOP];
    }
    
    -(void)haveHandlePCMData:(NSData*)data duration:(NSTimeInterval)duration volume:(float)volume
    {
        if (self.shouldStartStream && self.audioStream != nil) {
            [self.audioStream haveHandlePCMData:data];
        }
        else if(self.shouldStartStream && volume > 26)
        {
            [self startAudioStream:duration];
        }
    }
    
    #pragma mark - MVoiceRecognitionClientDelegate
    
    - (void)VoiceRecognitionClientWorkStatus:(int)workStatus obj:(id)aObj {
        switch (workStatus) {
            case EVoiceRecognitionClientWorkStatusNewRecordData: {
                {
                    
                }
                break;
            }
                
            case EVoiceRecognitionClientWorkStatusStartWorkIng: {
                {
                    
                }
                break;
            }
            case EVoiceRecognitionClientWorkStatusStart: {
                {
                    
                }
                break;
            }
            case EVoiceRecognitionClientWorkStatusEnd: {
                {
                    
                }
                break;
            }
            case EVoiceRecognitionClientWorkStatusFlushData: {
                {
                    
                }
                break;
            }
            case EVoiceRecognitionClientWorkStatusFinish: {
                {
                    NSString *data = [self parseResultFromDic:aObj];
                    if (data.length > 0) {
                        CFAbsoluteTime currentTime = CFAbsoluteTimeGetCurrent();
                        NSTimeInterval duration = currentTime - self.insideDuration;
                        if (self.haveRecognizerSpeakTextBlock) {
                            self.haveRecognizerSpeakTextBlock(data, self.outsideDuration, duration);
                        }
                    }
    //                NSLog(@"语音结果:%@   (%.2f -> %.2f)  %.2f",data,self.outsideDuration,self.outsideDuration+duration,duration);
                    [self onRequestEnd];
                }
                break;
            }
            case EVoiceRecognitionClientWorkStatusMeterLevel: {
                break;
            }
            case EVoiceRecognitionClientWorkStatusCancel: {
                {
                    [self onRequestEnd];
                }
                break;
            }
            case EVoiceRecognitionClientWorkStatusError: {
                {
                    [self onRequestEnd];
                }
                break;
            }
            case EVoiceRecognitionClientWorkStatusLoaded: {
                {
                    
                }
                break;
            }
            case EVoiceRecognitionClientWorkStatusUnLoaded: {
                {
                    
                }
                break;
            }
            case EVoiceRecognitionClientWorkStatusChunkThirdData: {
                {
                    
                }
                break;
            }
            case EVoiceRecognitionClientWorkStatusChunkNlu: {
                {
                    printf("当前结果:
    ");
                }
                break;
            }
            case EVoiceRecognitionClientWorkStatusChunkEnd: {
                {
                    //解析结果出现
    
                }
                break;
            }
            case EVoiceRecognitionClientWorkStatusFeedback: {
                {
                    
                }
                break;
            }
            case EVoiceRecognitionClientWorkStatusRecorderEnd: {
                {
                    
                }
                break;
            }
            case EVoiceRecognitionClientWorkStatusLongSpeechEnd: {
                {
    
                }
                break;
            }
            default:
                break;
        }
    }
    
    
    
    - (NSString *)parseResultFromDic:(NSDictionary *)resultDict
    {
        NSArray *results_recognition = [resultDict valueForKey:@"results_recognition"];
        if (results_recognition && [results_recognition isKindOfClass:[NSArray class]] && results_recognition.count > 0) {
            return [NSString stringWithFormat:@"%@",results_recognition[0]];
        }
        return @"";
    }
    
    #pragma mark - Private: Configuration
    
    - (void)configVoiceRecognitionClient {
        //设置DEBUG_LOG的级别
    //    [self.asrEventManager setParameter:@(EVRDebugLogLevelTrace) forKey:BDS_ASR_DEBUG_LOG_LEVEL];
        [self.asrEventManager setParameter:@(EVRDebugLogLevelOff) forKey:BDS_ASR_DEBUG_LOG_LEVEL];
        //配置API_KEY 和 SECRET_KEY 和 APP_ID
        [self.asrEventManager setParameter:@[API_KEY, SECRET_KEY] forKey:BDS_ASR_API_SECRET_KEYS];
        [self.asrEventManager setParameter:APP_ID forKey:BDS_ASR_OFFLINE_APP_CODE];
        //配置端点检测(二选一)
        [self configModelVAD];
    //          [self configDNNMFE];
        
        //     [self.asrEventManager setParameter:@"15361" forKey:BDS_ASR_PRODUCT_ID];
        // ---- 语义与标点 -----
        [self enableNLU];
        //    [self enablePunctuation];
        // ------------------------
    }
    
    - (void) enableNLU {
        // ---- 开启语义理解 -----
        [self.asrEventManager setParameter:@(YES) forKey:BDS_ASR_ENABLE_NLU];
        [self.asrEventManager setParameter:@"1536" forKey:BDS_ASR_PRODUCT_ID];
    }
    
    - (void) enablePunctuation {
        // ---- 开启标点输出 -----
        [self.asrEventManager setParameter:@(NO) forKey:BDS_ASR_DISABLE_PUNCTUATION];
        // 普通话标点
        //    [self.asrEventManager setParameter:@"1537" forKey:BDS_ASR_PRODUCT_ID];
        // 英文标点
        [self.asrEventManager setParameter:@"1737" forKey:BDS_ASR_PRODUCT_ID];
        
    }
    
    
    - (void)configModelVAD {
        NSString *modelVAD_filepath = [[NSBundle mainBundle] pathForResource:@"bds_easr_basic_model" ofType:@"dat"];
        [self.asrEventManager setParameter:modelVAD_filepath forKey:BDS_ASR_MODEL_VAD_DAT_FILE];
        [self.asrEventManager setParameter:@(YES) forKey:BDS_ASR_ENABLE_MODEL_VAD];
    #pragma mark 下面这个模式来的不知道是否有效
        [self.asrEventManager setParameter:@(0.1f) forKey:BDS_ASR_MFE_MAX_SPEECH_PAUSE];
        [self.asrEventManager setParameter:@(2.f) forKey:BDS_ASR_MFE_MAX_WAIT_DURATION];
    }
    
    - (void)configDNNMFE {
        NSString *mfe_dnn_filepath = [[NSBundle mainBundle] pathForResource:@"bds_easr_mfe_dnn" ofType:@"dat"];
        [self.asrEventManager setParameter:mfe_dnn_filepath forKey:BDS_ASR_MFE_DNN_DAT_FILE];
        NSString *cmvn_dnn_filepath = [[NSBundle mainBundle] pathForResource:@"bds_easr_mfe_cmvn" ofType:@"dat"];
        [self.asrEventManager setParameter:cmvn_dnn_filepath forKey:BDS_ASR_MFE_CMVN_DAT_FILE];
        
        [self.asrEventManager setParameter:@(NO) forKey:BDS_ASR_ENABLE_MODEL_VAD];
        // MFE支持自定义静音时长
    //        [self.asrEventManager setParameter:@(200.f) forKey:BDS_ASR_MFE_MAX_SPEECH_PAUSE];
    //        [self.asrEventManager setParameter:@(400.f) forKey:BDS_ASR_MFE_MAX_WAIT_DURATION];
    }
    
    
    @end

    改写的百度Demo代码

    #import <Foundation/Foundation.h>
    
    @interface AudioInputStream : NSInputStream
    
    -(void)haveHandlePCMData:(NSData*)data;
    
    @end
    #import "AudioInputStream.h"
    #import <AudioToolbox/AudioToolbox.h>
    #import <AVFoundation/AVAudioSession.h>
    #include "AudioDataQueue.hpp"
    
    @interface AudioInputStream ()
    {
        BOOL                        isRecording;
        AudioDataQueue              *audioData;
    }
    // Developer should set the status depens on your data flow.
    @property (nonatomic, assign) NSStreamStatus status;
    
    @end
    
    @implementation AudioInputStream
    
    @synthesize delegate;
    
    - (instancetype)init
    {
        if (self = [super init]) {
            _status = NSStreamStatusNotOpen;
            isRecording = false;
        }
        return self;
    }
    
    - (void)open
    {
        /*
         ** any operation to open data source, do it here.
         */
        [self startRecording];
    }
    
    - (void)close
    {
        /*
         ** clean up the data source.
         */
        [self stopRecorder];
    }
    
    #pragma mark - Custom
    
    - (BOOL)hasBytesAvailable;
    {
        return YES;
    }
    
    - (NSStreamStatus)streamStatus;
    {
        return self.status;
    }
    
    - (NSInteger)read:(uint8_t *)buffer maxLength:(NSUInteger)len
    {
        @synchronized (self) {
            if (audioData == NULL || !isRecording) {
                return 0;
            }
            else {
                int dataLength = audioData->dequeSamples(buffer, (int)len, true);
                return dataLength;
            }
        }
    }
    
    - (BOOL)getBuffer:(uint8_t * _Nullable *)buffer length:(NSUInteger *)len
    {
        return NO;
    }
    
    #pragma mark - Data Source
    
    - (void)stopRecorder
    {
        if (!isRecording) {
            return;
        }
        isRecording = false;
        
        @synchronized(self) {
            delete audioData;
        }
    }
    
    - (void)startRecording
    {
        [self clearupRecording];
        isRecording = YES;
    }
    
    
    - (void)clearupRecording
    {
        audioData = new AudioDataQueue(16000*2*2);
        audioData->reset();
    }
    
    #pragma mark - Static callback
    
    -(void)haveHandlePCMData:(NSData*)data
    {
        if (data.length > 0) {
            @synchronized (self) {
                if (isRecording) {
                        audioData->queueAudio((const uint8_t *)data.bytes, (int)data.length);
                }
            }
        }
    }
    
    @end

    百度写的数据缓存队列:

    #ifndef AudioDataQueue_hpp
    #define AudioDataQueue_hpp
    
    class AudioDataQueue
    {
    public:
        AudioDataQueue(int bufferCapacity = 0);
        int queueAudio(const uint8_t* audioData, int dataLength);
        int dequeSamples(uint8_t* dataBuffer, int bufferSize, bool dequeRemaining);
        bool haveData();
        void reset();
        ~AudioDataQueue();
        
    private:
        uint8_t* mData;
        int mDataLength;
        int mBufferCapacity;
        uint8_t* mLoopStart;
        uint8_t* mLoopEnd;
        uint8_t* mDataEnd;
    };
    
    #endif /* AudioDataQueue_hpp */
    #include <stdlib.h>
    #include <string.h>
    #include <stdio.h>
    #include "AudioDataQueue.hpp"
    
    int AudioDataQueue::queueAudio(const uint8_t* audioData, int dataLength)
    {
        if(dataLength == 0)
            return mDataLength;
        
        if (dataLength > mBufferCapacity) {
            audioData += (dataLength-mBufferCapacity);
            dataLength = mBufferCapacity;
        }
        long remainingLen = mDataEnd - mLoopEnd;
        long rightLen = remainingLen >= dataLength ? dataLength : remainingLen;
        memcpy(mLoopEnd, audioData, rightLen);
        mLoopEnd += rightLen;
        if (mLoopEnd == mDataEnd) {
            mLoopEnd = mData;
        }
        
        long leftLen = dataLength > rightLen ? dataLength - rightLen : 0;
        if (leftLen > 0) {
            memcpy(mLoopEnd, audioData + rightLen, leftLen);
            mLoopEnd += leftLen;
        }
        
        mDataLength += dataLength;
        if (mDataLength >= mBufferCapacity) {
            mDataLength = mBufferCapacity;
            mLoopStart = mLoopEnd;
        }
        
        return mDataLength;
    }
    
    int AudioDataQueue::dequeSamples(uint8_t* dataBuffer, int bufferSize, bool dequeRemaining)
    {
        if (mDataLength >= bufferSize || dequeRemaining) {
            long tmp = mDataEnd - mLoopStart;
            long dataRightLen = tmp >= mDataLength ? mDataLength : tmp;
            long rightLen = dataRightLen >= bufferSize ? bufferSize : dataRightLen;
            memcpy(dataBuffer, mLoopStart, rightLen);
            mLoopStart += rightLen;
            if (mLoopStart == mDataEnd) {
                mLoopStart = mData;
            }
            
            long leftLen = 0;
            long left = bufferSize - rightLen;
            if (left > 0) {
                long dataLeftLen = mDataLength > dataRightLen ? mDataLength - dataRightLen : 0;
                leftLen = dataLeftLen >= left ? left : dataLeftLen;
                memcpy(dataBuffer + rightLen, mLoopStart, leftLen);
                mLoopStart += leftLen;
            }
            
            mDataLength -= bufferSize;
            if (mDataLength <= 0) {
                mDataLength = 0;
                mLoopStart = mLoopEnd = mData;
            }
            
            return (int)(rightLen + leftLen);
        }
        
        return 0;
    }
    
    bool AudioDataQueue::haveData()
    {
        return (mDataLength > 0);
    }
    
    void AudioDataQueue::reset()
    {
        mDataLength = 0;
        mDataEnd = mData + mBufferCapacity;
        mLoopStart = mLoopEnd = mData;
    }
    
    AudioDataQueue::AudioDataQueue(int bufferCapacity)
    {
        mDataLength = 0;
        mBufferCapacity = bufferCapacity;
        
        mData = (uint8_t*)malloc(mBufferCapacity);
        mDataEnd = mData + mBufferCapacity;
        mLoopStart = mLoopEnd = mData;
    }
    
    AudioDataQueue::~AudioDataQueue()
    {
        if(mData)
        {
            free(mData);
            mData = NULL;
            mDataEnd = NULL;
            mLoopStart = NULL;
            mLoopEnd = NULL;
        }
        mDataLength = 0;
        mBufferCapacity = 0;
    }
  • 相关阅读:
    JQuery中的事件与动画
    JQuery选择器
    初识JQuery
    JavaScript对象及初识面向对象
    JavaScript操作DOM对象
    JavaScript操作BOM对象
    JavaScript基础
    文件管理2
    文件管理
    创建线程
  • 原文地址:https://www.cnblogs.com/yuxiaoyiyou/p/9843313.html
Copyright © 2011-2022 走看看