zoukankan      html  css  js  c++  java
  • 科大讯飞语音转文字以及中文分词的Java测试代码

    我录了一段音存储在这个test.m4a文件里,语音内容为"测试一下Netweaver对于并发请求的响应性能"。

    使用如下Java代码进行测试:

    
    package com.iflytek.msp.lfasr;
    
    import java.util.HashMap;
    import org.apache.log4j.Logger;
    import com.alibaba.fastjson.JSON;
    import com.iflytek.msp.cpdb.lfasr.client.LfasrClientImp;
    import com.iflytek.msp.cpdb.lfasr.exception.LfasrException;
    import com.iflytek.msp.cpdb.lfasr.model.LfasrType;
    import com.iflytek.msp.cpdb.lfasr.model.Message;
    import com.iflytek.msp.cpdb.lfasr.model.ProgressStatus;
    
    // SDK document: http://www.xfyun.cn/doccenter/lfasr#go_sdk_doc_v2
    public class TestLfasr 
    {
    	// original media path
    	private static final String local_file = "c:\temp\test.m4a";
    
    	private static final LfasrType type = LfasrType.LFASR_STANDARD_RECORDED_AUDIO;
    	
    	private static int sleepSecond = 20;
    	
    	public static void main(String[] args) {
    		LfasrClientImp lc = null;
    		try {
    			lc = LfasrClientImp.initLfasrClient();
    		} catch (LfasrException e) {
    			Message initMsg = JSON.parseObject(e.getMessage(), Message.class);
    			System.out.println("ecode=" + initMsg.getErr_no());
    			System.out.println("failed=" + initMsg.getFailed());
    		}
    				
    		// get upload task id
    		String task_id = "";
    		HashMap<String, String> params = new HashMap<>();
    		params.put("has_participle", "true");
    		try {
    			Message uploadMsg = lc.lfasrUpload(local_file, type, params);
    			int ok = uploadMsg.getOk();
    			if (ok == 0) {
    				task_id = uploadMsg.getData();
    				System.out.println("task_id=" + task_id);
    			} else {
    				System.out.println("ecode=" + uploadMsg.getErr_no());
    				System.out.println("failed=" + uploadMsg.getFailed());
    			}
    		} catch (LfasrException e) {
    			Message uploadMsg = JSON.parseObject(e.getMessage(), Message.class);
    			System.out.println("ecode=" + uploadMsg.getErr_no());
    			System.out.println("failed=" + uploadMsg.getFailed());					
    		}
    		while (true) {
    			try {
    				Thread.sleep(sleepSecond * 1000);
    				System.out.println("waiting ...");
    			} catch (InterruptedException e) {
    			}
    			try {
    				Message progressMsg = lc.lfasrGetProgress(task_id);
    				if (progressMsg.getOk() != 0) {
    					System.out.println("task was fail. task_id:" + task_id);
    					System.out.println("ecode=" + progressMsg.getErr_no());
    					System.out.println("failed=" + progressMsg.getFailed());
    					continue;
    				} else {
    					ProgressStatus progressStatus = JSON.parseObject(progressMsg.getData(), ProgressStatus.class);
    					if (progressStatus.getStatus() == 9) {
    						System.out.println("task was completed. task_id:" + task_id);
    						break;	
    					} else {
    						System.out.println("task was incomplete. task_id:" + task_id + ", status:" + progressStatus.getDesc());
    						continue;
    					}
    				}
    			} catch (LfasrException e) {
    				Message progressMsg = JSON.parseObject(e.getMessage(), Message.class);
    				System.out.println("ecode=" + progressMsg.getErr_no());
    				System.out.println("failed=" + progressMsg.getFailed());
    			}
    		}
    		try {
    			Message resultMsg = lc.lfasrGetResult(task_id);
    			System.out.println(resultMsg.getData());
    			if (resultMsg.getOk() == 0) {
    				System.out.println(resultMsg.getData());
    			} else {
    				System.out.println("ecode=" + resultMsg.getErr_no());
    				System.out.println("failed=" + resultMsg.getFailed());
    			}
    		} catch (LfasrException e) {
    			Message resultMsg = JSON.parseObject(e.getMessage(), Message.class);
    			System.out.println("ecode=" + resultMsg.getErr_no());
    			System.out.println("failed=" + resultMsg.getFailed());
    		}
    	}
    }
    
    

    测试结果

    (1) 所有中文均能成功转成文字; 但英文Netweaver的语音转换成了Net ball

    (2) 智能分词也能按照期望工作,比如“测试一下”成功地分词成了“测试”和“一下”。

    完整的Java项目在我的github上:https://github.com/i042416/voice2text
    要获取更多Jerry的原创技术文章,请关注公众号"汪子熙"或者扫描下面二维码:

  • 相关阅读:
    表的创建与管理
    以传值和传引用的方式传递参数 IN OUT NOCOPY
    PLSQL中的三种参数模式IN、OUT、IN OUT
    用python写GPU上的并行计算程序,有什么库或者编译器?
    cupy中tensor数据类型与numpy以及pytorch中相互转换
    c++ string split
    Java 读取大文件
    Linux 使用系列
    安装以太坊环境
    服务器排查问题相关命令
  • 原文地址:https://www.cnblogs.com/sap-jerry/p/8734695.html
Copyright © 2011-2022 走看看