1、官网SDK地址
https://help.aliyun.com/document_detail/84430.html?spm=a2c4g.11186623.6.581.73f65edftbwk9R
阿里的SDK比腾讯好一些,可以直接从中央仓库直接下载,并且demo 写的比较详细,在开始对接之前,我们需要仔细的看接口说明下面我们文档,这样可以减少 开始对我们对接中的坑,下面开始对接:
2、 先搞jar包
<dependency> <groupId>com.alibaba.nls</groupId> <artifactId>nls-sdk-transcriber</artifactId> <version>2.1.6</version> </dependency>
3、注意事项
NlsClient使用了Netty框架,NlsClient对象的创建会消耗一定时间和资源,一经创建可以重复使用。建议调用程序将NlsClient的创建和关闭与程序本身的生命周期相结合。
SpeechTranscriber对象不可重复使用,一个识别任务对应一个SpeechTranscriber对象。例如,N个音频文件要进行N次识别任务,创建N个SpeechTranscriber对象。
SpeechTranscriberListener对象和SpeechTranscriber对象是一一对应的,不能在不同SpeechTranscriber对象使用同一个SpeechTranscriberListener对象,否则不能将各识别任务区分开。
4、对接流程
4.1 NlsClient类
从上面的注意事项 我们可以看出,NlsClient需要我们单独创建,所以我们在项目启动的时候的直接创建出来:
package com.jack.chat.application.service; import com.alibaba.nls.client.AccessToken; import com.alibaba.nls.client.protocol.NlsClient; import org.apache.commons.collections4.Get; import java.io.IOException; /** * @author zhenghao * @description: * @date 2020/8/1419:16 */ public class NlsClientService { private static NlsClient client; public NlsClientService( String id, String secret, String url) { //TODO 重要提示 创建NlsClient实例,应用全局创建一个即可,生命周期可和整个应用保持一致,默认服务地址为阿里云线上服务地址 //TODO 这里简单演示了获取token 的代码,该token会过期,实际使用时注意在accessToken.getExpireTime()过期前再次获取token AccessToken accessToken = new AccessToken(id, secret); try { accessToken.apply(); System.out.println("get token: " + ", expire time: " + accessToken.getExpireTime()); // TODO 创建NlsClient实例,应用全局创建一个即可,用户指定服务地址 if (url.isEmpty()) { client = new NlsClient(accessToken.getToken()); } else { client = new NlsClient(url, accessToken.getToken()); } } catch (IOException e) { e.printStackTrace(); } } public static NlsClient getNlsClient() { return client; } }
package com.jack.chat.application.service; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.ApplicationListener; import org.springframework.context.event.ContextRefreshedEvent; import org.springframework.stereotype.Service; /** * @author zhenghao * @description: 伴随tomcat 启动 * @date 2020/7/211:41 */ @Service public class ApplicationService implements ApplicationListener<ContextRefreshedEvent> { private Logger log = LoggerFactory.getLogger(ApplicationService.class); public static ApplicationService application = null; String id = ""; String secret = ""; String url = ""; // 默认即可,默认值:wss://nls-gateway.cn-shanghai.aliyuncs.com/ws/v1 @Override public void onApplicationEvent(ContextRefreshedEvent contextRefreshedEvent) { //启动客户端 if (contextRefreshedEvent.getApplicationContext().getParent() == null) { log.info("客户端启动-------------------------->"); synchronized (this) { ApplicationService.application = this; new NlsClientService(id, secret, url); System.out.println("阿里云 nls 初始化完毕"); } } } }
4.2 核心类
package com.jack.chat.asrali.service; import com.alibaba.nls.client.protocol.InputFormatEnum; import com.alibaba.nls.client.protocol.NlsClient; import com.alibaba.nls.client.protocol.SampleRateEnum; import com.alibaba.nls.client.protocol.asr.SpeechTranscriber; import com.alibaba.nls.client.protocol.asr.SpeechTranscriberListener; import com.alibaba.nls.client.protocol.asr.SpeechTranscriberResponse; import com.jack.chat.asr.model.AsrResultModel; import com.jack.chat.fs.service.FsService; import com.jack.chat.socket.service.SocketServer; import com.jack.chat.socket.service.WebSocketMapUtil; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.web.context.ContextLoader; import org.springframework.web.context.WebApplicationContext; import java.io.IOException; /** * @author zhenghao * @description: * @date 2020/8/1418:47 */ public class ALiAsrConnnection { private static final Logger logger = LoggerFactory.getLogger(SpeechTranscriberDemo.class); SpeechTranscriber transcriber = null; private FsService fsService; private String ptel; public void init(NlsClient client, String appKey, String tel) { try { ptel = tel; WebApplicationContext wac = ContextLoader.getCurrentWebApplicationContext(); fsService = (FsService) wac.getBean("fsService"); //创建实例,建立连接 transcriber = new SpeechTranscriber(client, getTranscriberListener()); transcriber.setAppKey(appKey); //输入音频编码方式 transcriber.setFormat(InputFormatEnum.PCM); //输入音频采样率 transcriber.setSampleRate(SampleRateEnum.SAMPLE_RATE_8K); //是否返回中间识别结果 transcriber.setEnableIntermediateResult(true); //是否生成并返回标点符号 transcriber.setEnablePunctuation(true); //是否将返回结果规整化,比如将一百返回为100 transcriber.setEnableITN(false); //设置vad断句参数,默认800ms,有效值[200, 2000] //transcriber.addCustomedParam("max_sentence_silence", 600); //设置是否语义断句 //transcriber.addCustomedParam("enable_semantic_sentence_detection",false); //是否开启顺滑 transcriber.addCustomedParam("disfluency", true); //设置是否开启词模式 //transcriber.addCustomedParam("enable_words",true); //设置vad的模型 //transcriber.addCustomedParam("vad_model","farfield"); //设置vad噪音阈值参数,参数区间是-1到+1,比如-0.9, -0.8...0, 0.1, 0.2, 0.9, //方向是趋于-1的方向,判定为语音的概率越大,也就是说有可能更多噪声被当成语音被误识别; //越趋于+1的方向,判断成噪音的越多,也就是说有可能更多语音段被当成噪音拒绝掉, //该参数属高级参数,调整需慎重和重点测试。 //transcriber.addCustomedParam("speech_noise_threshold",0.3); //设置训练后的定制语言模型id //transcriber.addCustomedParam("customization_id","你的定制语言模型id"); //设置训练后的定制热词id //transcriber.addCustomedParam("vocabulary_id","你的定制热词id"); //设置是否忽略单句超时 transcriber.addCustomedParam("enable_ignore_sentence_timeout", false); //vad断句开启后处理 //transcriber.addCustomedParam("enable_vad_unify_post",false); //此方法将以上参数设置序列化为json发送给服务端,并等待服务端确认 transcriber.start(); // TODO 重要提示:这里是用读取本地文件的形式模拟实时获取语音流并发送的,因为read很快,所以这里需要sleep // TODO 如果是真正的实时获取语音,则无需sleep, 如果是8k采样率语音,第二个参数改为8000 // int deltaSleep = getSleepDelta(len, 8000); // Thread.sleep(deltaSleep); //通知服务端语音数据发送完毕,等待服务端处理完成 long now = System.currentTimeMillis(); logger.info("ASR wait for complete"); // transcriber.stop(); logger.info("ASR latency : " + (System.currentTimeMillis() - now) + " ms"); } catch (Exception e) { System.err.println(e.getMessage()); } finally { if (null != transcriber) { // transcriber.close(); } } } public void send(byte[] contentStream) { transcriber.send(contentStream); } public void close() { try { transcriber.stop(); } catch (Exception e) { e.printStackTrace(); } } private SpeechTranscriberListener getTranscriberListener() { SpeechTranscriberListener listener = new SpeechTranscriberListener() { //TODO 识别出中间结果.服务端识别出一个字或词时会返回此消息.仅当setEnableIntermediateResult(true)时,才会有此类消息返回 @Override public void onTranscriptionResultChange(SpeechTranscriberResponse response) { // System.out.println("task_id: " + response.getTaskId() + // ", name: " + response.getName() + // //状态码 20000000 表示正常识别 // ", status: " + response.getStatus() + // //句子编号,从1开始递增 // ", index: " + response.getTransSentenceIndex() + // //当前的识别结果 // ", result: " + response.getTransSentenceText() + // //当前已处理的音频时长,单位是毫秒 // ", time: " + response.getTransSentenceTime()); //组装内容 System.out.println("index:" + response.getTransSentenceIndex() + ":" + response.getTransSentenceText()); if (StringUtils.isNotEmpty(response.getTransSentenceText())) { sendMessage(response.getTransSentenceIndex(), response.getTransSentenceText()); } } @Override public void onTranscriberStart(SpeechTranscriberResponse response) { // TODO 重要提示: task_id很重要,是调用方和服务端通信的唯一ID标识,当遇到问题时,需要提供此task_id以便排查 // System.out.println("task_id: " + response.getTaskId() + ", name: " + response.getName() + ", status: " + response.getStatus()); } @Override public void onSentenceBegin(SpeechTranscriberResponse response) { // System.out.println("task_id: " + response.getTaskId() + ", name: " + response.getName() + ", status: " + response.getStatus()); } //识别出一句话.服务端会智能断句,当识别到一句话结束时会返回此消息 @Override public void onSentenceEnd(SpeechTranscriberResponse response) { // System.out.println("task_id: " + response.getTaskId() + // ", name: " + response.getName() + // //状态码 20000000 表示正常识别 // ", status: " + response.getStatus() + // //句子编号,从1开始递增 // ", index: " + response.getTransSentenceIndex() + // //当前的识别结果 // ", result: " + response.getTransSentenceText() + // //置信度 // ", confidence: " + response.getConfidence() + // //开始时间 // ", begin_time: " + response.getSentenceBeginTime() + // //当前已处理的音频时长,单位是毫秒 // ", time: " + response.getTransSentenceTime()); // content = content + response.getTransSentenceText(); // System.out.println("识别结果:"+ content); System.out.println("整句index:" + response.getTransSentenceIndex() + ":" + response.getTransSentenceText()); if (StringUtils.isNotEmpty(response.getTransSentenceText())) { sendMessage(response.getTransSentenceIndex(), response.getTransSentenceText()); } } //识别完毕 @Override public void onTranscriptionComplete(SpeechTranscriberResponse response) { System.out.println("task_id: " + response.getTaskId() + ", name: " + response.getName() + ", status: " + response.getStatus()); } @Override public void onFail(SpeechTranscriberResponse response) { // TODO 重要提示: task_id很重要,是调用方和服务端通信的唯一ID标识,当遇到问题时,需要提供此task_id以便排查 System.out.println("task_id: " + response.getTaskId() + ", status: " + response.getStatus() + ", status_text: " + response.getStatusText()); } }; return listener; } public void sendMessage(Integer lineNo, String content) { try { AsrResultModel asrResultModel = new AsrResultModel(); asrResultModel.setLineNo(lineNo); asrResultModel.setResult(content); String telChannel = fsService.getTelChannel(ptel); String message = fsService.message(ptel, asrResultModel); System.out.println("ptel:" + ptel +"通道:" + telChannel + "阿里消息:" + message); SocketServer socketServer = WebSocketMapUtil.getUserWs(telChannel); socketServer.sendMessage(message); } catch (Exception e) { System.out.println("阿里发送消息失败:" + e.getMessage()); } } }
4.3 调用类
aLiAsrConnnection = new ALiAsrConnnection(); aLiAsrConnnection.init(NlsClientService.getNlsClient(), appkey, tel);
在ws创建完成以后,new 核心类。