zoukankan      html  css  js  c++  java
  • 微软牛津计划-语音转文本-文本转语音代码和实现

    官方演示地址:

    https://www.azure.cn/projectoxford/demo/speech#recognition

    参考资料:https://msdn.microsoft.com/en-us/library/mt422983.aspx

    1、需要先从官方申请订阅key,

    https://www.azure.cn/projectoxford/subscription

    注册后会申请到主密钥,从密钥2个,都需要记住。

    2、语音转文本核心代码rest api:https://oxfordportal.blob.core.windows.net/speech/doc/recognition/Program.cs

    /**
    Copyright (c) Microsoft Corporation
    All rights reserved. 
    MIT License
    Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the ""Software""), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
    The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
    THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
    **/
    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Threading.Tasks;
    using System.Net;
    using System.IO;
    using System.Runtime.Serialization.Json;
    using System.Runtime.Serialization;
    using System.Web;
    using System.ServiceModel.Channels;
    using System.ServiceModel;
    using System.Threading;
    
    namespace SpeechSample
    {
        [DataContract]
        public class AccessTokenInfo
        {
            [DataMember]
            public string access_token { get; set; }
            [DataMember]
            public string token_type { get; set; }
            [DataMember]
            public string expires_in { get; set; }
            [DataMember]
            public string scope { get; set; }
        }
    
        /*
         * This class demonstrates how to get a valid O-auth token.
         */
        public class Authentication
        {
            public static readonly string AccessUri = "https://oxford-speech.cloudapp.net/token/issueToken";
            private string clientId;
            private string clientSecret;
            private string request;
            private AccessTokenInfo token;
            private Timer accessTokenRenewer;
    
            //Access token expires every 10 minutes. Renew it every 9 minutes only.
            private const int RefreshTokenDuration = 9;
    
            public Authentication(string clientId, string clientSecret)
            {
                this.clientId = clientId;
                this.clientSecret = clientSecret;
    
                /*
                 * If clientid or client secret has special characters, encode before sending request
                 */
                this.request = string.Format("grant_type=client_credentials&client_id={0}&client_secret={1}&scope={2}",
                                              HttpUtility.UrlEncode(clientId),
                                              HttpUtility.UrlEncode(clientSecret),
                                              HttpUtility.UrlEncode("https://speech.platform.bing.com"));
    
                this.token = HttpPost(AccessUri, this.request);
    
                // renew the token every specfied minutes
                accessTokenRenewer = new Timer(new TimerCallback(OnTokenExpiredCallback),
                                               this,
                                               TimeSpan.FromMinutes(RefreshTokenDuration),
                                               TimeSpan.FromMilliseconds(-1));
            }
    
            public AccessTokenInfo GetAccessToken()
            {
                return this.token;
            }
    
            private void RenewAccessToken()
            {
                AccessTokenInfo newAccessToken = HttpPost(AccessUri, this.request);
                //swap the new token with old one
                //Note: the swap is thread unsafe
                this.token = newAccessToken;
                Console.WriteLine(string.Format("Renewed token for user: {0} is: {1}",
                                  this.clientId,
                                  this.token.access_token));
            }
    
            private void OnTokenExpiredCallback(object stateInfo)
            {
                try
                {
                    RenewAccessToken();
                }
                catch (Exception ex)
                {
                    Console.WriteLine(string.Format("Failed renewing access token. Details: {0}", ex.Message));
                }
                finally
                {
                    try
                    {
                        accessTokenRenewer.Change(TimeSpan.FromMinutes(RefreshTokenDuration), TimeSpan.FromMilliseconds(-1));
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine(string.Format("Failed to reschedule the timer to renew access token. Details: {0}", ex.Message));
                    }
                }
            }
    
            private AccessTokenInfo HttpPost(string accessUri, string requestDetails)
            {
                //Prepare OAuth request 
                WebRequest webRequest = WebRequest.Create(accessUri);
                webRequest.ContentType = "application/x-www-form-urlencoded";
                webRequest.Method = "POST";
                byte[] bytes = Encoding.ASCII.GetBytes(requestDetails);
                webRequest.ContentLength = bytes.Length;
                using (Stream outputStream = webRequest.GetRequestStream())
                {
                    outputStream.Write(bytes, 0, bytes.Length);
                }
                using (WebResponse webResponse = webRequest.GetResponse())
                {
                    DataContractJsonSerializer serializer = new DataContractJsonSerializer(typeof(AccessTokenInfo));
                    //Get deserialized object from JSON stream
                    AccessTokenInfo token = (AccessTokenInfo)serializer.ReadObject(webResponse.GetResponseStream());
                    return token;
                }
            }
        }
    
        /*
         * This sample program shows how to send an speech recognition request to the 
         * Microsoft Speech service.      
         */
        class Program
        {
            static void Main(string[] args)
            {
    //注意:可以注释掉直接在后面赋值。
    if ((args.Length < 2) || (string.IsNullOrWhiteSpace(args[0]))) { Console.WriteLine("Arg[0]: Specify the endpoint to hit https://speech.platform.bing.com/recognize"); Console.WriteLine("Arg[1]: Specify a valid input wav file."); return; } AccessTokenInfo token; string headerValue; // Note: Sign up at http://www.projectoxford.ai to get a subscription key. Search for Speech APIs from Azure Marketplace. // Use the subscription key as Client secret below. Authentication auth = new Authentication("Come up with a short ClientId", "Client Secret"); //直接绑定网址
    string requestUri = args[0].Trim(new char[] { '/', '?' }); /* URI Params. Refer to the README file for more information. */ requestUri += @"?scenarios=smd"; // websearch is the other main option. requestUri += @"&appid=D4D52672-91D7-4C74-8AD8-42B1D98141A5"; // You must use this ID. requestUri += @"&locale=en-US"; // We support several other languages. Refer to README file. requestUri += @"&device.os=wp7"; requestUri += @"&version=3.0"; requestUri += @"&format=json"; requestUri += @"&instanceid=565D69FF-E928-4B7E-87DA-9A750B96D9E3"; requestUri += @"&requestid=" + Guid.NewGuid().ToString(); string host = @"speech.platform.bing.com"; string contentType = @"audio/wav; codec=""audio/pcm""; samplerate=16000"; /* * Input your own audio file or use read from a microphone stream directly. */ string audioFile = args[1];//赋值后缀为wav格式的语音文件。 string responseString; FileStream fs = null; try { token = auth.GetAccessToken(); Console.WriteLine("Token: {0} ", token.access_token); /* * Create a header with the access_token property of the returned token */ headerValue = "Bearer " + token.access_token; Console.WriteLine("Request Uri: " + requestUri + Environment.NewLine); HttpWebRequest request = null; request = (HttpWebRequest)HttpWebRequest.Create(requestUri); request.SendChunked = true; request.Accept = @"application/json;text/xml"; request.Method = "POST"; request.ProtocolVersion = HttpVersion.Version11; request.Host = host; request.ContentType = contentType; request.Headers["Authorization"] = headerValue; using (fs = new FileStream(audioFile, FileMode.Open, FileAccess.Read)) { /* * Open a request stream and write 1024 byte chunks in the stream one at a time. */ byte[] buffer = null; int bytesRead = 0; using (Stream requestStream = request.GetRequestStream()) { /* * Read 1024 raw bytes from the input audio file. */ buffer = new Byte[checked((uint)Math.Min(1024, (int)fs.Length))]; while ((bytesRead = fs.Read(buffer, 0, buffer.Length)) != 0) { requestStream.Write(buffer, 0, bytesRead); } // Flush
    //官方没有注释掉,后面报错无法运行,
    // requestStream.Flush(); } /* * Get the response from the service. */ Console.WriteLine("Response:"); using (WebResponse response = request.GetResponse()) { Console.WriteLine(((HttpWebResponse)response).StatusCode); using (StreamReader sr = new StreamReader(response.GetResponseStream())) { responseString = sr.ReadToEnd(); } Console.WriteLine(responseString); } } } catch (Exception ex) { Console.WriteLine(ex.ToString()); Console.WriteLine(ex.Message); } } } }

    3、文本转语音代码,先用户token认证,

    //
    // Copyright (c) Microsoft. All rights reserved.
    // Licensed under the MIT license.
    //
    // Project Oxford: http://ProjectOxford.ai
    //
    // ProjectOxford SDK Github:
    // https://github.com/Microsoft/ProjectOxfordSDK-Windows
    //
    // Copyright (c) Microsoft Corporation
    // All rights reserved.
    //
    // MIT License:
    // Permission is hereby granted, free of charge, to any person obtaining
    // a copy of this software and associated documentation files (the
    // "Software"), to deal in the Software without restriction, including
    // without limitation the rights to use, copy, modify, merge, publish,
    // distribute, sublicense, and/or sell copies of the Software, and to
    // permit persons to whom the Software is furnished to do so, subject to
    // the following conditions:
    //
    // The above copyright notice and this permission notice shall be
    // included in all copies or substantial portions of the Software.
    //
    // THE SOFTWARE IS PROVIDED ""AS IS"", WITHOUT WARRANTY OF ANY KIND,
    // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
    // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
    // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
    // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
    // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
    //
    
    using System;
    using System.IO;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Threading;
    using System.Web;
    using System.Net;
    using System.Net.Http;
    using System.Media;
    using System.Threading.Tasks;
    using System.Runtime.Serialization;
    using System.Runtime.Serialization.Json;
    
    namespace QueuingMachine
    {
        [DataContract]
        public class AccessTokenInfo
        {
            [DataMember]
            public string access_token { get; set; }
            [DataMember]
            public string token_type { get; set; }
            [DataMember]
            public string expires_in { get; set; }
            [DataMember]
            public string scope { get; set; }
        }
    
        /// <summary>
        /// This class demonstrates how to get a valid O-auth token
        /// </summary>
        public class Authentication
        {
            public static readonly string AccessUri = "https://oxford-speech.cloudapp.net/token/issueToken";
            private string clientId;
            private string clientSecret;
            private string requestBody;
            private AccessTokenInfo token;
            private Timer accessTokenRenewer;
    
            //Access token expires every 10 minutes. Renew it every 9 minutes only.
            private const int RefreshTokenDuration = 9;
    
            public Authentication(string clientId, string clientSecret)
            {
                this.clientId = clientId;
                this.clientSecret = clientSecret;
    
                // If clientid or client secret has special characters, encode before sending request 
                this.requestBody = string.Format("grant_type=client_credentials&client_id={0}&client_secret={1}&scope={2}",
                                              HttpUtility.UrlEncode(clientId),
                                              HttpUtility.UrlEncode(clientSecret),
                                              HttpUtility.UrlEncode("https://speech.platform.bing.com"));
    
                this.token = PostHttp(AccessUri, this.requestBody);
    
                // renew the token every specfied minutes
                accessTokenRenewer = new Timer(new TimerCallback(OnTokenExpiredCallback),
                                               this,
                                               TimeSpan.FromMinutes(RefreshTokenDuration),
                                               TimeSpan.FromMilliseconds(-1));
            }
    
            public AccessTokenInfo GetAccessToken()
            {
                return this.token;
            }
    
            private void RenewAccessToken()
            {
                AccessTokenInfo newAccessToken = PostHttp(AccessUri, this.requestBody);
                //swap the new token with old one
                //Note: the swap is thread unsafe
                this.token = newAccessToken;
                Console.WriteLine(string.Format("Renewed token for user: {0} is: {1}",
                                  this.clientId,
                                  this.token.access_token));
            }
    
            private void OnTokenExpiredCallback(object stateInfo)
            {
                try
                {
                    RenewAccessToken();
                }
                catch (Exception ex)
                {
                    Console.WriteLine(string.Format("Failed renewing access token. Details: {0}", ex.Message));
                }
                finally
                {
                    try
                    {
                        accessTokenRenewer.Change(TimeSpan.FromMinutes(RefreshTokenDuration), TimeSpan.FromMilliseconds(-1));
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine(string.Format("Failed to reschedule the timer to renew access token. Details: {0}", ex.Message));
                    }
                }
            }
    
            private AccessTokenInfo PostHttp(string accessUri, string requestDetails)
            {
                //Prepare OAuth request 
                WebRequest webRequest = WebRequest.Create(accessUri);
                webRequest.ContentType = "application/x-www-form-urlencoded";
                webRequest.Method = "POST";
                byte[] bytes = Encoding.ASCII.GetBytes(requestDetails);
                webRequest.ContentLength = bytes.Length;
                using (Stream outputStream = webRequest.GetRequestStream())
                {
                    outputStream.Write(bytes, 0, bytes.Length);
                }
                using (WebResponse webResponse = webRequest.GetResponse())
                {
                    DataContractJsonSerializer serializer = new DataContractJsonSerializer(typeof(AccessTokenInfo));
                    //Get deserialized object from JSON stream
                    AccessTokenInfo token = (AccessTokenInfo)serializer.ReadObject(webResponse.GetResponseStream());
                    return token;
                }
            }
        }
    }

    4、

    //
    // Copyright (c) Microsoft. All rights reserved.
    // Licensed under the MIT license.
    //
    // Project Oxford: http://ProjectOxford.ai
    //
    // ProjectOxford SDK Github:
    // https://github.com/Microsoft/ProjectOxfordSDK-Windows
    //
    // Copyright (c) Microsoft Corporation
    // All rights reserved.
    //
    // MIT License:
    // Permission is hereby granted, free of charge, to any person obtaining
    // a copy of this software and associated documentation files (the
    // "Software"), to deal in the Software without restriction, including
    // without limitation the rights to use, copy, modify, merge, publish,
    // distribute, sublicense, and/or sell copies of the Software, and to
    // permit persons to whom the Software is furnished to do so, subject to
    // the following conditions:
    //
    // The above copyright notice and this permission notice shall be
    // included in all copies or substantial portions of the Software.
    //
    // THE SOFTWARE IS PROVIDED ""AS IS"", WITHOUT WARRANTY OF ANY KIND,
    // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
    // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
    // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
    // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
    // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
    //
    
    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Threading.Tasks;
    using System.Net;
    using System.IO;
    
    namespace QueuingMachine
    {
        public enum AudioFormat
        {
            /// <summary>
            /// pcm wav
            /// </summary>
            Wave,
            
            /// <summary>
            /// tts silk
            /// </summary>
            Silk,
    
            /// <summary>
            /// mp3 format
            /// </summary>
            Mp3
        };
    
        public class TtsService
        {
    
            // Note: Sign up at http://www.projectoxford.ai for the client credentials.
            private static Authentication auth = new Authentication("45be5416456e48f7bbdb3036a12173d4", "e7b3a193cbfc4724b113e1345b38cfdc");
    
            public static byte[] TtsAudioOutput(string lang, string voiceName, AudioFormat format, string text, float prosodyRate = 1.0f)
            {
                byte[] output = null;
    
                AccessTokenInfo token = auth.GetAccessToken();
                string accessToken = token.access_token;
                string uri = "https://speech.platform.bing.com/synthesize";
    
                HttpWebRequest webRequest = (HttpWebRequest)WebRequest.Create(uri);
                string ImpressionGUID = Guid.NewGuid().ToString();
    
                webRequest.ContentType = "application/ssml+xml";
                webRequest.UserAgent = "QueuingMachine";
                string formatName = (format == AudioFormat.Silk) ? "ssml-16khz-16bit-mono-silk" : "riff-16khz-16bit-mono-pcm";
                webRequest.Headers.Add("X-MICROSOFT-OutputFormat", formatName);
                webRequest.Headers.Add("X-Search-AppId", "07D3234E49CE426DAA29772419F436CA");
                webRequest.Headers.Add("X-Search-ClientID", "1ECFAE91408841A480F00935DC390960");
    
                webRequest.Headers.Add("Authorization", "Bearer " + token.access_token);
                webRequest.Method = "POST";
    
                string bodyTemplate = "<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="http://www.w3.org/2001/mstts" xmlns:emo="http://www.w3.org/2009/10/emotionml" xml:lang="{0}">{1}<emo:emotion><emo:category name="CALM" value="1.0"/><prosody rate="{2:F1}">{3}</prosody></emo:emotion></voice></speak>";
                string voiceTag = "<voice name="" + voiceName + "">";
                string deviceLanguage = lang;
                string encodedXml = text.Replace("&", "&amp;").Replace("<", "&lt;").Replace(">", "&gt;").Replace(""", "&quot;").Replace("'", "&apos;");
    
                if(prosodyRate < 0.1f)
                {
                    prosodyRate = 0.1f;
                }else if(prosodyRate > 2.0f)
                {
                    prosodyRate = 2.0f;
                }
    
                string body = string.Format(bodyTemplate, deviceLanguage, voiceTag, prosodyRate, encodedXml);
                byte[] bytes = Encoding.UTF8.GetBytes(body);
                webRequest.ContentLength = bytes.Length;
                using (Stream outputStream = webRequest.GetRequestStream())
                {
                    outputStream.Write(bytes, 0, bytes.Length);
                }
    
                WebResponse webResponse = webRequest.GetResponse();
                using (Stream stream = webResponse.GetResponseStream())
                { 
                    using (MemoryStream ms = new MemoryStream())
                    {
                        int count = 0;
                        do
                        {
                            byte[] buf = new byte[1024];
                            count = stream.Read(buf, 0, 1024);
                            ms.Write(buf, 0, count);
                        } while (stream.CanRead && count > 0);
                        output = ms.ToArray();
                    }
                }
                return output;
            }
        }
    }

    实例代码:

    语音转文本

    html5版本语音转文本

  • 相关阅读:
    做题经验
    4906 删数问题
    1225 八数码难题
    1005 生日礼物
    1004 四子连棋 未完成
    1008 选数 2002年NOIP全国联赛普及组
    1068 乌龟棋 2010年NOIP全国联赛提高组
    2292 图灵机游戏
    实战数据结构(9)_单链表实现多项式的相乘
    最近招两个兼职的活(PHP和JSP)
  • 原文地址:https://www.cnblogs.com/zangdalei/p/5329383.html
Copyright © 2011-2022 走看看