zoukankan      html  css  js  c++  java
  • 语音语音合成科大讯飞和Tizen-TTS语音合成引擎

    废话就不多说了,开始。。。

             最近在做一个文本转语音TTS(Text to Speech)的第三方软件封装,应用的是海内语音技术龙头安徽科大讯飞公司提供的离线引擎AiSound5.0,重要用于汽车导航用途。科大讯飞还提供了AiTalk用于语音识别,AiWrite用于手写识别服务等。另外还有针对6种平台的SDK和开辟示例。

            一、科大讯飞语音平台

            科大讯飞目前有不少产品应用在移动终端上了,比如说用在手机上的讯飞语点,可方便语音拨打电话和发送短信,查气候、股票等信息。

            在上面讯飞语音云这个网站可以找到科大讯飞针对6种平台的SDK,http://open.voicecloud.cn/download.php,包含Android、Iphone、Windows、Linux、Java、Flash这些平台的语音合成、语音识别和语音听写的开辟文档和SDK下载,不过应用之前须要请求AppID(每一个语音应用程序须要一个Appid来独一标识,您须要通过注册帐号来获得自己应用的Appid,未注册的语音应用将无法正常获取语音服务。),通过考核后就可以下载响应的开辟文档和SDK并用于自己的软件系统中了。

            二 、科大讯飞语音合成、识别-在线文档和应用程序示例网址

            从这里可以查看在线的开辟文档http://open.voicecloud.cn/developer.php?column=aW50ZV9zZGs%3D

            Android平台的开辟文档和应用程序示例:http://open.voicecloud.cn/developer.php?category=YW5kcm9pZA%3D%3D&column=ZG9jdW1lbnQ%3D&type=d2lkZ2V0#a38

            Windows和Linux平台的开辟文档和程序示例:http://open.voicecloud.cn/developer.php?category=b3RoZXI%3D&column=ZG9jdW1lbnQ%3D&type=YXBp

         

           三、科大讯飞语音产品的移动应用

           这里有一个科大讯飞的在线语音合成系统演示程序:ViViVoice 2.1在线演示系统

            http://www.iflytek.com/TtsDemo/viviVoiceShow.aspx

            1、讯飞语点的应用,官网下载网址如下:http://yudian.voicecloud.cn/yudian.htm,支持Android和Iphone两大平台。

            2、讯飞输入法应用:http://ime.voicecloud.cn/index.html

            3、讯飞语音输入法应用:http://kouxun.voicecloud.cn/index.html

            4、开辟者社区:http://open.voicecloud.cn/index.php

         

           四、官网的Windows平台-语音合成和识别示例代码

        网址如下:http://open.voicecloud.cn/developer.php?category=b3RoZXI%3D&column=c2FtcGxl&type=YXBp

        以Windows下的开辟为例,给出了语音合成、语音识别和语音听写的编程示例。

        1、语音合成开辟例程

         

    #include <stdio.h>
     #include <string.h>
     #include "qtts.h"
     #define END_SYNTH( reason ) 
     { 
     ret = QTTSSessionEnd( session_id, #reason ); 
     if( 0 != ret ) 
     { 
     printf("QTTSSessionEnd failed, error code is %d", ret ); 
     } 
     
     ret = QTTSFini(); 
     if( 0 != ret ) 
     { 
     printf("QTTSFini failed, error code is %d", ret ); 
     } 
     }
     int main()
     {
     const char* configs = NULL;
     const char* session_id = NULL;
     const char* synth_params = NULL;
     const char* synth_text = NULL;
     unsigned int text_len = 0;
     const char* synth_speech = NULL;
     unsigned int synth_speech_len = 0;
     FILE* f_speech = NULL;
     int synth_status = 0;
     int ret = 0;
     printf( "===================================================================
    "
     " Mobile Speech Platform 2.0 Client SDK Demo for TTS 
    "
     "===================================================================
    " );
    
     /* 初始化 */
    configs = "server_url=dev.voicecloud.cn/index.htm, timeout=10000, coding_libs=speex.dll";
     ret = QTTSInit( configs );
     if( 0 != ret )
     {
     printf( "QTTSInit failed, error code is %d", ret );
     return -1;
     }
     /* 开始一路会话,应用会话模式 */
    synth_params = "ssm=1, auf=audio/L16;rate=16000, aue=speex-wb;7, ent=intp65";
     session_id = QTTSSessionBegin( synth_params, &ret );
     if( 0 != ret )
     {
     printf( "QTTSSessionBegin failed, error code is %d", ret );
     return -1;
     }
     /* 写入合成文本 */
    synth_text = "讯飞语音云为您提供了最新最好的语音技术休会,"
     "我们在互联网上开放科大讯飞最新研发的各种语音技术,"
     "包含世界领先的语音合成技术、语音识别技术、声纹识别技术等。";
    text_len = strlen( synth_text );
     ret = QTTSTextPut( session_id, synth_text, text_len, NULL );
     if( 0 != ret )
     {
     printf( "QTTSTextPut failed, error code is %d", ret );
     END_SYNTH( QTTSTextPut failed! );
     return -1;
     }
     /* 获取合成音频 */
    f_speech = fopen( "synth_speech.pcm", "wb" );
     if( NULL == f_speech )
     {
     printf( "Can not open file "synth_speech.pcm"" );
     END_SYNTH( open file );
     return -1;
     }
     while( TTS_FLAG_DATA_END != synth_status )
     {
     synth_speech = QTTSAudioGet( session_id, &synth_speech_len, &synth_status, &ret );
     if( 0 != ret )
     {
     printf( "QTTSAudioGet failed, error code is: %d", ret );
     break;
     }
     printf( "QTTSAudioGet ok, speech length = %d
    ", synth_speech_len );
     if( NULL != synth_speech && 0 != synth_speech_len )
     {
     fwrite( synth_speech, 1, synth_speech_len, f_speech );
     }
     }
     fclose( f_speech );
     /* 结束会话,释放资源 */
    ret = QTTSSessionEnd( session_id, "normal end" );
     if( NULL == f_speech )
     {
     printf( "QTTSSessionEnd failed, error code is %d", ret );
     }
     session_id = NULL;
     ret = QTTSFini();
     if( 0 != ret )
     {
     printf( "QTTSFini failed, error code is %d", ret );
     }
     return 0;
     }
     

        2、语音识别开辟例程 

        每日一道理
    流逝的日子像一片片凋零的枯叶与花瓣,渐去渐远的是青春的纯情与浪漫。不记得曾有多少雨飘在胸前风响在耳畔,只知道沧桑早已漫进了我的心爬上了我的脸。当一个人与追求同行,便坎坷是伴,磨难也是伴。
    #include <stdio.h>
     #include <string.h>
     #include <Windows.h>
     #include "qisr.h"
     #define END_RECOG( reason ) 
     {  ret = QISRSessionEnd( session_id, #reason ); 
     if( 0 != ret ) 
     {  printf("QISRSessionEnd failed, error code is %d", ret ); 
     } 
     
     ret = QISRFini(); 
     if( 0 != ret ) 
     { 
     printf("QISRFini failed, error code is %d", ret ); 
     } 
     }
     #define BLOCK_LEN 5 * 1024
     int main()
     {
     const char* configs = NULL;
     const char* session_id = NULL;
     const char* recog_grammar = NULL;
     const char* recog_params = NULL;
     char recog_audio[ BLOCK_LEN ];
     FILE* f_speech = NULL;
     int audio_status = 0;
     int ep_status = 0;
     int rec_status = 0;
     int rslt_status = 0;
     const char* rec_result = NULL;
     unsigned int audio_len = 0;
     int ret = 0;
     printf( "===================================================================
    "
     " Mobile Speech Platform 2.0 Client SDK Demo for IAT 
    "
     "===================================================================
    " );
     /* 初始化 */
    configs = "server_url=dev.voicecloud.cn/index.htm, coding_libs=speex.dll, vad_enable=true";
     ret = QISRInit( configs );
     if( 0 != ret )
     {
     printf( "QISRInit failed, error code is %d
    ", ret );
     return -1;
     }
     /* 开始一路会话,应用会话模式,应用引擎内置的语法停止识别 */
    recog_grammar = "builtin:grammar/../search/location.abnf?language=zh-cn";
     recog_params = "ssm=1, aue=speex-wb;7, auf=audio/L16;rate=16000, “
     ”ent=map, vad_speech_tail=900";
     session_id = QISRSessionBegin( recog_grammar, recog_params, &ret );
     if( 0 != ret )
     {
     printf( "QISRSessionBegin failed, error code is %d
    ", ret );
     return -1;
     }
     /* 打开用来停止识别的语音文件,用户可以采取其他的获取音频的方式比如实时采集音频 */
    f_speech = fopen( "sxk_16k.pcm", "rb" );
     if( NULL == f_speech )
     {
     printf( "Can not open file "sxk_16k.pcm"
    " );
     END_RECOG( open file );
     return -1;
     }
     /* 发送音频数据,获取语音听写结果 */
    while( ISR_AUDIO_SAMPLE_LAST != audio_status )
     {
     audio_len = fread( recog_audio, 1, BLOCK_LEN, f_speech );
     audio_status = ( audio_len == BLOCK_LEN ) ? 
     ISR_AUDIO_SAMPLE_CONTINUE :ISR_AUDIO_SAMPLE_LAST;
     ret = QISRAudioWrite( session_id, recog_audio, audio_len
     , audio_status, &ep_status, &rslt_status );
     if( 0 != ret )
     {
     printf( "QISRSessionBegin failed, error code is %d
    ", ret );
     rslt_status = ISR_REC_STATUS_SPEECH_COMPLETE;
     break;
     }
     printf( "write audio data ok! len=%d, status=%d
    ", audio_len, audio_status );
     /* 已经有结果缓存在MSC中了,可以获取了 */
    if( ISR_REC_STATUS_SUCCESS == rslt_status )
     {
     rec_result = QISRGetResult( session_id, &rslt_status, 5000, &ret );
     if( 0 != ret )
     {
     printf( "QISRGetResult failed, error code is %d
    ", ret );
     rslt_status = ISR_REC_STATUS_SPEECH_COMPLETE;
     break;
     }
     if( NULL != rec_result )
     { printf( "got a result: %s
    ", rec_result );
     }
     /* 全部结果已经取完了 */
    if( ISR_REC_STATUS_SPEECH_COMPLETE == rslt_status )
     {
     printf( "the result has been got completely!
    " );
     break;
     }
     }
     /* 检测到音频后端点,结束音频发送 */
    if( ISR_EP_AFTER_SPEECH == ep_status )
     { printf( "end point of speech has been detected!
    " );
     break;
     }
     Sleep( 160 );
     }
     fclose( f_speech );
     /* 获取余下的识别结果 */
    while( ISR_REC_STATUS_SPEECH_COMPLETE != rslt_status )
     {
     rec_result = QISRGetResult( session_id, &rslt_status, 5000, &ret );
     if( 0 != ret )
     {
     printf( "QISRGetResult failed, error code is: %d
    ", ret );
     break;
     }
     if( NULL != rec_result )
     {
     printf( "got a result: %s
    ", rec_result );
     }
     /* sleep一下很有必要,防止MSC端无缓存的识别结果时浪费CPU资源 */
    Sleep( 200 );
     }
     /* 结束会话,释放资源 */
    ret = QISRSessionEnd( session_id, "normal end" );
     if( NULL == f_speech )
     {
     printf( "QISRSessionEnd failed, error code is %d
    ", ret );
     }
     session_id = NULL;
     ret = QISRFini();
     if( 0 != ret )
     {
     printf( "QISRFini failed, error code is %d
    ", ret );
     }
     return 0;
     }

        
     

        3、语音听写开辟例程

    #include <stdio.h>
     #include <string.h>
     #include <Windows.h>
     #include "qisr.h"
     #define END_RECOG( reason ) 
     { 
     ret = QISRSessionEnd( session_id, #reason ); 
     if( 0 != ret ) 
     { 
     printf("QISRSessionEnd failed, error code is %d", ret ); 
     } 
     
     ret = QISRFini(); 
     if( 0 != ret ) 
     { 
     printf("QISRFini failed, error code is %d", ret ); 
     } 
     }
     #define BLOCK_LEN 5 * 1024
     int main()
     {
     const char* configs = NULL;
     const char* session_id = NULL;
     const char* recog_params = NULL;
     char recog_audio[ BLOCK_LEN ];
     FILE* f_speech = NULL;
     int audio_status = 0;
     int ep_status = 0;
     int rec_status = 0;
     int rslt_status = 0;
     const char* rec_result = NULL;
     unsigned int audio_len = 0;
     int ret = 0;
     printf( "===================================================================
    "
     " Mobile Speech Platform 2.0 Client SDK Demo for IAT 
    "
     "===================================================================
    " );
     /* 初始化 */
    configs = "server_url=dev.voicecloud.cn/index.htm, coding_libs=speex.dll, vad_enable=true";
     ret = QISRInit( configs );
     if( 0 != ret )
     {
     printf( "QISRInit failed, error code is %d
    ", ret );
     return -1;
     }
     /* 开始一路会话 */
    recog_params = "ssm=1, sub=iat, aue=speex-wb;7, auf=audio/L16;rate=16000, “
     “ent=sms16k, rst=plain, vad_speech_tail=1500";
     session_id = QISRSessionBegin( NULL, recog_params, &ret );
     if( 0 != ret )
     {
     printf( "QISRSessionBegin failed, error code is %d
    ", ret );
     return -1;
     }
     /* 打开用来停止识别的语音文件,用户可以采取其他的获取音频的方式比如实时采集音频 */
    f_speech = fopen( "IAT_16KPCM_10s_0.pcm", "rb" );
     if( NULL == f_speech )
     {
     printf( "Can not open file "IAT_16KPCM_10s_0.pcm"
    " );
     END_RECOG( open file );
     return -1;
     }
     /* 发送音频数据,获取语音听写结果 */
    while( ISR_AUDIO_SAMPLE_LAST != audio_status )
     {
     audio_len = fread( recog_audio, 1, BLOCK_LEN, f_speech );
     audio_status = ( audio_len == BLOCK_LEN ) ? 
     ISR_AUDIO_SAMPLE_CONTINUE :ISR_AUDIO_SAMPLE_LAST;
     ret = QISRAudioWrite( session_id, recog_audio, audio_len, audio_status, &ep_status, &rslt_status );
     if( 0 != ret )
     {
     printf( "QISRSessionBegin failed, error code is %d
    ", ret );
     rslt_status = ISR_REC_STATUS_SPEECH_COMPLETE;
     break;
     }
     printf( "write audio data ok! len=%d, status=%d
    ", audio_len, audio_status );
     /* 已经有结果缓存在MSC中了,可以获取了 */
    if( ISR_REC_STATUS_SUCCESS == rslt_status )
     { rec_result = QISRGetResult( session_id, &rslt_status, 5000, &ret );
     if( 0 != ret )
     {
     printf( "QISRGetResult failed, error code is %d
    ", ret );
     rslt_status = ISR_REC_STATUS_SPEECH_COMPLETE;
     break;
     }
     if( NULL != rec_result )
     {
     printf( "got a result: %s
    ", rec_result );
     }
     /* 全部结果已经取完了 */
    if( ISR_REC_STATUS_SPEECH_COMPLETE == rslt_status )
     {
     printf( "the result has been got completely!
    " );
     break;
     }
     }
     /* 检测到音频后端点,结束音频发送 */
    if( ISR_EP_AFTER_SPEECH == ep_status )
     {
     printf( "end point of speech has been detected!
    " );
     break;
     }
     Sleep( 160 );
     }
     fclose( f_speech );
     /* 获取余下的识别结果 */
    while( ISR_REC_STATUS_SPEECH_COMPLETE != rslt_status )
     {
     rec_result = QISRGetResult( session_id, &rslt_status, 5000, &ret );
     if( 0 != ret )
     {
     printf( "QISRGetResult failed, error code is: %d
    ", ret );
     break;
     }
     if( NULL != rec_result )
     {
     printf( "got a result: %s
    ", rec_result );
     }
     /* sleep一下很有必要,防止MSC端无缓存的识别结果时浪费CPU资源 */
    Sleep( 200 );
     }
     /* 结束会话,释放资源 */
    ret = QISRSessionEnd( session_id, "normal end" );
     if( NULL == f_speech )
     {
     printf( "QISRSessionEnd failed, error code is %d
    ", ret );
     }
     session_id = NULL;
     ret = QISRFini();
     if( 0 != ret )
     {
     printf( "QISRFini failed, error code is %d
    ", ret );
     }
     return 0;
     }

      

           五、Tizen(泰泽)提供的TTS和STT语音合成、语音识别接口

        泰泽是三星和英特尔合作开辟的一款操作系统。

        1、Tizen SDK官网https://developer.tizen.org/downloads/tizen-sdk

        2、Tizen项目:https://review.tizen.org/git/

        3、tts-api:https://review.tizen.org/git/?p=framework/api/tts-api.git;a=summary

        语音和语音合成

        其对应的tts.h头文件api接口在线网址为:https://review.tizen.org/git/?p=framework/api/tts-api.git;a=blob;f=include/tts.h;h=636470d923555a30d164cc09ceff841b72187e20;hb=98aee0bab00a418af162a0314ef931f8fd620892

        4、stt-api:https://review.tizen.org/git/?p=framework/api/stt-api.git;a=summary
     

    文章结束给大家分享下程序员的一些笑话语录: 问路
    有一个驾驶热气球的人发现他迷路了。他降低了飞行的高度,并认出了地面 上的一个人。他继续下降高度并对着那个人大叫,“打扰一下,你能告诉我我 在哪吗?”
    下面那个人说:“是的。你在热气球里啊,盘旋在 30 英尺的空中”。
    热气球上的人说:“你一定是在 IT 部门做技术工作”。
    “没错”,地面上的人说到,“你是怎么知道的?”
    “呵呵”,热气球上的人说,“你告诉我的每件事在技术上都是对的,但对都没 有用”。
    地面上的人说,“你一定是管理层的人”。
    “没错”,热气球上的人说,“可是你是怎么知道的?”
    “呵呵”,地面上的那人说到,“你不知道你在哪里,你也不知道你要去哪,你 总希望我能帮你。你现在和我们刚见面时还在原来那个地方,但现在却是我 错了”。

    --------------------------------- 原创文章 By
    语音和语音合成
    ---------------------------------

  • 相关阅读:
    微信小程序——gulp处理文件
    小程序开发经验总结
    微信小程序入门之构建一个简单TODOS应用
    3元体验腾讯云小程序后端解决方案
    C++笔记:面向对象编程(Handle类)
    你真的知道你看到的UTF-8字符是什么吗?
    Unity3D游戏开发之在Unity3D中视频播放功能的实现
    vb.net机房收费系统——存储过程
    Oracle基础学习4--Oracle权限传递
    我与京东的那些事儿
  • 原文地址:https://www.cnblogs.com/jiangu66/p/3153318.html
Copyright © 2011-2022 走看看