官方演示地址:
https://www.azure.cn/projectoxford/demo/speech#recognition
参考资料:https://msdn.microsoft.com/en-us/library/mt422983.aspx
1、需要先从官方申请订阅key,
https://www.azure.cn/projectoxford/subscription
注册后会申请到主密钥,从密钥2个,都需要记住。
2、语音转文本核心代码rest api:https://oxfordportal.blob.core.windows.net/speech/doc/recognition/Program.cs
/** Copyright (c) Microsoft Corporation All rights reserved. MIT License Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the ""Software""), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. **/ using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using System.Net; using System.IO; using System.Runtime.Serialization.Json; using System.Runtime.Serialization; using System.Web; using System.ServiceModel.Channels; using System.ServiceModel; using System.Threading; namespace SpeechSample { [DataContract] public class AccessTokenInfo { [DataMember] public string access_token { get; set; } [DataMember] public string token_type { get; set; } [DataMember] public string expires_in { get; set; } [DataMember] public string scope { get; set; } } /* * This class demonstrates how to get a valid O-auth token. */ public class Authentication { public static readonly string AccessUri = "https://oxford-speech.cloudapp.net/token/issueToken"; private string clientId; private string clientSecret; private string request; private AccessTokenInfo token; private Timer accessTokenRenewer; //Access token expires every 10 minutes. Renew it every 9 minutes only. private const int RefreshTokenDuration = 9; public Authentication(string clientId, string clientSecret) { this.clientId = clientId; this.clientSecret = clientSecret; /* * If clientid or client secret has special characters, encode before sending request */ this.request = string.Format("grant_type=client_credentials&client_id={0}&client_secret={1}&scope={2}", HttpUtility.UrlEncode(clientId), HttpUtility.UrlEncode(clientSecret), HttpUtility.UrlEncode("https://speech.platform.bing.com")); this.token = HttpPost(AccessUri, this.request); // renew the token every specfied minutes accessTokenRenewer = new Timer(new TimerCallback(OnTokenExpiredCallback), this, TimeSpan.FromMinutes(RefreshTokenDuration), TimeSpan.FromMilliseconds(-1)); } public AccessTokenInfo GetAccessToken() { return this.token; } private void RenewAccessToken() { AccessTokenInfo newAccessToken = HttpPost(AccessUri, this.request); //swap the new token with old one //Note: the swap is thread unsafe this.token = newAccessToken; Console.WriteLine(string.Format("Renewed token for user: {0} is: {1}", this.clientId, this.token.access_token)); } private void OnTokenExpiredCallback(object stateInfo) { try { RenewAccessToken(); } catch (Exception ex) { Console.WriteLine(string.Format("Failed renewing access token. Details: {0}", ex.Message)); } finally { try { accessTokenRenewer.Change(TimeSpan.FromMinutes(RefreshTokenDuration), TimeSpan.FromMilliseconds(-1)); } catch (Exception ex) { Console.WriteLine(string.Format("Failed to reschedule the timer to renew access token. Details: {0}", ex.Message)); } } } private AccessTokenInfo HttpPost(string accessUri, string requestDetails) { //Prepare OAuth request WebRequest webRequest = WebRequest.Create(accessUri); webRequest.ContentType = "application/x-www-form-urlencoded"; webRequest.Method = "POST"; byte[] bytes = Encoding.ASCII.GetBytes(requestDetails); webRequest.ContentLength = bytes.Length; using (Stream outputStream = webRequest.GetRequestStream()) { outputStream.Write(bytes, 0, bytes.Length); } using (WebResponse webResponse = webRequest.GetResponse()) { DataContractJsonSerializer serializer = new DataContractJsonSerializer(typeof(AccessTokenInfo)); //Get deserialized object from JSON stream AccessTokenInfo token = (AccessTokenInfo)serializer.ReadObject(webResponse.GetResponseStream()); return token; } } } /* * This sample program shows how to send an speech recognition request to the * Microsoft Speech service. */ class Program { static void Main(string[] args) {
//注意:可以注释掉直接在后面赋值。 if ((args.Length < 2) || (string.IsNullOrWhiteSpace(args[0]))) { Console.WriteLine("Arg[0]: Specify the endpoint to hit https://speech.platform.bing.com/recognize"); Console.WriteLine("Arg[1]: Specify a valid input wav file."); return; } AccessTokenInfo token; string headerValue; // Note: Sign up at http://www.projectoxford.ai to get a subscription key. Search for Speech APIs from Azure Marketplace. // Use the subscription key as Client secret below. Authentication auth = new Authentication("Come up with a short ClientId", "Client Secret"); //直接绑定网址
string requestUri = args[0].Trim(new char[] { '/', '?' }); /* URI Params. Refer to the README file for more information. */ requestUri += @"?scenarios=smd"; // websearch is the other main option. requestUri += @"&appid=D4D52672-91D7-4C74-8AD8-42B1D98141A5"; // You must use this ID. requestUri += @"&locale=en-US"; // We support several other languages. Refer to README file. requestUri += @"&device.os=wp7"; requestUri += @"&version=3.0"; requestUri += @"&format=json"; requestUri += @"&instanceid=565D69FF-E928-4B7E-87DA-9A750B96D9E3"; requestUri += @"&requestid=" + Guid.NewGuid().ToString(); string host = @"speech.platform.bing.com"; string contentType = @"audio/wav; codec=""audio/pcm""; samplerate=16000"; /* * Input your own audio file or use read from a microphone stream directly. */ string audioFile = args[1];//赋值后缀为wav格式的语音文件。 string responseString; FileStream fs = null; try { token = auth.GetAccessToken(); Console.WriteLine("Token: {0} ", token.access_token); /* * Create a header with the access_token property of the returned token */ headerValue = "Bearer " + token.access_token; Console.WriteLine("Request Uri: " + requestUri + Environment.NewLine); HttpWebRequest request = null; request = (HttpWebRequest)HttpWebRequest.Create(requestUri); request.SendChunked = true; request.Accept = @"application/json;text/xml"; request.Method = "POST"; request.ProtocolVersion = HttpVersion.Version11; request.Host = host; request.ContentType = contentType; request.Headers["Authorization"] = headerValue; using (fs = new FileStream(audioFile, FileMode.Open, FileAccess.Read)) { /* * Open a request stream and write 1024 byte chunks in the stream one at a time. */ byte[] buffer = null; int bytesRead = 0; using (Stream requestStream = request.GetRequestStream()) { /* * Read 1024 raw bytes from the input audio file. */ buffer = new Byte[checked((uint)Math.Min(1024, (int)fs.Length))]; while ((bytesRead = fs.Read(buffer, 0, buffer.Length)) != 0) { requestStream.Write(buffer, 0, bytesRead); } // Flush
//官方没有注释掉,后面报错无法运行,
// requestStream.Flush(); } /* * Get the response from the service. */ Console.WriteLine("Response:"); using (WebResponse response = request.GetResponse()) { Console.WriteLine(((HttpWebResponse)response).StatusCode); using (StreamReader sr = new StreamReader(response.GetResponseStream())) { responseString = sr.ReadToEnd(); } Console.WriteLine(responseString); } } } catch (Exception ex) { Console.WriteLine(ex.ToString()); Console.WriteLine(ex.Message); } } } }
3、文本转语音代码,先用户token认证,
// // Copyright (c) Microsoft. All rights reserved. // Licensed under the MIT license. // // Project Oxford: http://ProjectOxford.ai // // ProjectOxford SDK Github: // https://github.com/Microsoft/ProjectOxfordSDK-Windows // // Copyright (c) Microsoft Corporation // All rights reserved. // // MIT License: // Permission is hereby granted, free of charge, to any person obtaining // a copy of this software and associated documentation files (the // "Software"), to deal in the Software without restriction, including // without limitation the rights to use, copy, modify, merge, publish, // distribute, sublicense, and/or sell copies of the Software, and to // permit persons to whom the Software is furnished to do so, subject to // the following conditions: // // The above copyright notice and this permission notice shall be // included in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED ""AS IS"", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // using System; using System.IO; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading; using System.Web; using System.Net; using System.Net.Http; using System.Media; using System.Threading.Tasks; using System.Runtime.Serialization; using System.Runtime.Serialization.Json; namespace QueuingMachine { [DataContract] public class AccessTokenInfo { [DataMember] public string access_token { get; set; } [DataMember] public string token_type { get; set; } [DataMember] public string expires_in { get; set; } [DataMember] public string scope { get; set; } } /// <summary> /// This class demonstrates how to get a valid O-auth token /// </summary> public class Authentication { public static readonly string AccessUri = "https://oxford-speech.cloudapp.net/token/issueToken"; private string clientId; private string clientSecret; private string requestBody; private AccessTokenInfo token; private Timer accessTokenRenewer; //Access token expires every 10 minutes. Renew it every 9 minutes only. private const int RefreshTokenDuration = 9; public Authentication(string clientId, string clientSecret) { this.clientId = clientId; this.clientSecret = clientSecret; // If clientid or client secret has special characters, encode before sending request this.requestBody = string.Format("grant_type=client_credentials&client_id={0}&client_secret={1}&scope={2}", HttpUtility.UrlEncode(clientId), HttpUtility.UrlEncode(clientSecret), HttpUtility.UrlEncode("https://speech.platform.bing.com")); this.token = PostHttp(AccessUri, this.requestBody); // renew the token every specfied minutes accessTokenRenewer = new Timer(new TimerCallback(OnTokenExpiredCallback), this, TimeSpan.FromMinutes(RefreshTokenDuration), TimeSpan.FromMilliseconds(-1)); } public AccessTokenInfo GetAccessToken() { return this.token; } private void RenewAccessToken() { AccessTokenInfo newAccessToken = PostHttp(AccessUri, this.requestBody); //swap the new token with old one //Note: the swap is thread unsafe this.token = newAccessToken; Console.WriteLine(string.Format("Renewed token for user: {0} is: {1}", this.clientId, this.token.access_token)); } private void OnTokenExpiredCallback(object stateInfo) { try { RenewAccessToken(); } catch (Exception ex) { Console.WriteLine(string.Format("Failed renewing access token. Details: {0}", ex.Message)); } finally { try { accessTokenRenewer.Change(TimeSpan.FromMinutes(RefreshTokenDuration), TimeSpan.FromMilliseconds(-1)); } catch (Exception ex) { Console.WriteLine(string.Format("Failed to reschedule the timer to renew access token. Details: {0}", ex.Message)); } } } private AccessTokenInfo PostHttp(string accessUri, string requestDetails) { //Prepare OAuth request WebRequest webRequest = WebRequest.Create(accessUri); webRequest.ContentType = "application/x-www-form-urlencoded"; webRequest.Method = "POST"; byte[] bytes = Encoding.ASCII.GetBytes(requestDetails); webRequest.ContentLength = bytes.Length; using (Stream outputStream = webRequest.GetRequestStream()) { outputStream.Write(bytes, 0, bytes.Length); } using (WebResponse webResponse = webRequest.GetResponse()) { DataContractJsonSerializer serializer = new DataContractJsonSerializer(typeof(AccessTokenInfo)); //Get deserialized object from JSON stream AccessTokenInfo token = (AccessTokenInfo)serializer.ReadObject(webResponse.GetResponseStream()); return token; } } } }
4、
// // Copyright (c) Microsoft. All rights reserved. // Licensed under the MIT license. // // Project Oxford: http://ProjectOxford.ai // // ProjectOxford SDK Github: // https://github.com/Microsoft/ProjectOxfordSDK-Windows // // Copyright (c) Microsoft Corporation // All rights reserved. // // MIT License: // Permission is hereby granted, free of charge, to any person obtaining // a copy of this software and associated documentation files (the // "Software"), to deal in the Software without restriction, including // without limitation the rights to use, copy, modify, merge, publish, // distribute, sublicense, and/or sell copies of the Software, and to // permit persons to whom the Software is furnished to do so, subject to // the following conditions: // // The above copyright notice and this permission notice shall be // included in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED ""AS IS"", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using System.Net; using System.IO; namespace QueuingMachine { public enum AudioFormat { /// <summary> /// pcm wav /// </summary> Wave, /// <summary> /// tts silk /// </summary> Silk, /// <summary> /// mp3 format /// </summary> Mp3 }; public class TtsService { // Note: Sign up at http://www.projectoxford.ai for the client credentials. private static Authentication auth = new Authentication("45be5416456e48f7bbdb3036a12173d4", "e7b3a193cbfc4724b113e1345b38cfdc"); public static byte[] TtsAudioOutput(string lang, string voiceName, AudioFormat format, string text, float prosodyRate = 1.0f) { byte[] output = null; AccessTokenInfo token = auth.GetAccessToken(); string accessToken = token.access_token; string uri = "https://speech.platform.bing.com/synthesize"; HttpWebRequest webRequest = (HttpWebRequest)WebRequest.Create(uri); string ImpressionGUID = Guid.NewGuid().ToString(); webRequest.ContentType = "application/ssml+xml"; webRequest.UserAgent = "QueuingMachine"; string formatName = (format == AudioFormat.Silk) ? "ssml-16khz-16bit-mono-silk" : "riff-16khz-16bit-mono-pcm"; webRequest.Headers.Add("X-MICROSOFT-OutputFormat", formatName); webRequest.Headers.Add("X-Search-AppId", "07D3234E49CE426DAA29772419F436CA"); webRequest.Headers.Add("X-Search-ClientID", "1ECFAE91408841A480F00935DC390960"); webRequest.Headers.Add("Authorization", "Bearer " + token.access_token); webRequest.Method = "POST"; string bodyTemplate = "<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="http://www.w3.org/2001/mstts" xmlns:emo="http://www.w3.org/2009/10/emotionml" xml:lang="{0}">{1}<emo:emotion><emo:category name="CALM" value="1.0"/><prosody rate="{2:F1}">{3}</prosody></emo:emotion></voice></speak>"; string voiceTag = "<voice name="" + voiceName + "">"; string deviceLanguage = lang; string encodedXml = text.Replace("&", "&").Replace("<", "<").Replace(">", ">").Replace(""", """).Replace("'", "'"); if(prosodyRate < 0.1f) { prosodyRate = 0.1f; }else if(prosodyRate > 2.0f) { prosodyRate = 2.0f; } string body = string.Format(bodyTemplate, deviceLanguage, voiceTag, prosodyRate, encodedXml); byte[] bytes = Encoding.UTF8.GetBytes(body); webRequest.ContentLength = bytes.Length; using (Stream outputStream = webRequest.GetRequestStream()) { outputStream.Write(bytes, 0, bytes.Length); } WebResponse webResponse = webRequest.GetResponse(); using (Stream stream = webResponse.GetResponseStream()) { using (MemoryStream ms = new MemoryStream()) { int count = 0; do { byte[] buf = new byte[1024]; count = stream.Read(buf, 0, 1024); ms.Write(buf, 0, count); } while (stream.CanRead && count > 0); output = ms.ToArray(); } } return output; } } }
实例代码: