最近研究阿里云语音合成,录音文件识别,自然语言分析。
自然语言分析官网文档:
https://help.aliyun.com/document_detail/61378.html?spm=a2c4g.11186623.6.547.9q3U1C
智能语音官网文档:
https://help.aliyun.com/product/30413.html?spm=a2c4g.11186623.3.1.yS0DIK
决定采用阿里云提供的restful api接口,主要利用python urllib库来实现调用,整合里官方给出的demo,总结出调用代码。话不多说,贴出代码:
python环境
python3.5.1
http请求代理工具类
在工程目录中建立utils目录,在目录中建立init.py文件代码如下
import hashlib import urllib.request import hmac import base64 import datetime import ssl import uuid from urllib.error import HTTPError class http_proxy: """ Http工具类,封装了鉴权 """ def __init__(self, ak_id, ak_secret): self.__ak_id = ak_id self.__ak_secret = ak_secret def __current_gmt_time(self): date = datetime.datetime.strftime(datetime.datetime.utcnow(), "%a, %d %b %Y %H:%M:%S GMT") return date def __md5_base64(self, strbody): hash = hashlib.md5() hash.update(strbody.encode('utf-8')) print(hash.digest()) return base64.b64encode(hash.digest()).decode('utf-8') def __sha1_base64(self, str_to_sign, secret): hmacsha1 = hmac.new(secret.encode('utf-8'), str_to_sign.encode('utf-8'), hashlib.sha1) return base64.b64encode(hmacsha1.digest()).decode('utf-8') def send_request(self, url, body): gmtnow = self.__current_gmt_time() print(gmtnow) body_md5 = self.__md5_base64(body) print(body_md5) str_to_sign = "POST\napplication/json\n" + body_md5 + "\napplication/json\n" + gmtnow print(str_to_sign) signature = self.__sha1_base64(str_to_sign, self.__ak_secret) print(signature) auth_header = "Dataplus " + self.__ak_id + ":" + signature print(auth_header) ssl._create_default_https_context = ssl._create_unverified_context req = urllib.request.Request(url) req.add_header("Accept", "application/json") req.add_header("Content-Type", "application/json") req.add_header("Date", gmtnow) req.add_header("Authorization", auth_header) data = body.encode('utf-8') f = urllib.request.urlopen(req, data) return f.read().decode('utf-8') def send_get(self, url, task_id): gmtnow = self.__current_gmt_time() print(gmtnow) accept = "application/json" content_type = "application/json" str_to_sign = "GET\n" + accept + "\n" + "" + "\n" + content_type + "\n" + gmtnow print(str_to_sign) signature = self.__sha1_base64(str_to_sign, self.__ak_secret) print(signature) auth_header = "Dataplus " + self.__ak_id + ":" + signature print(auth_header) ssl._create_default_https_context = ssl._create_unverified_context url += "/" + task_id req = urllib.request.Request(url) req.add_header("Accept", "application/json") req.add_header("Content-Type", "application/json") req.add_header("Date", gmtnow) req.add_header("Authorization", auth_header) f = urllib.request.urlopen(req) return f.read().decode('utf-8') # try: # f = urllib.request.urlopen(req) # except HTTPError as e: # print(e) # return None # else: # return f.read().decode('utf-8') def send_requestForNlp(self, path, content): method = "POST" content_type = "application/json;chrset=utf-8" accept = "application/json" host = "nlp.cn-shanghai.aliyuncs.com" gmtnow = self.__current_gmt_time() print(gmtnow) body_md5 = self.__md5_base64(content) print(body_md5) uuidstr = uuid.uuid4().hex str_to_sign = method+"\n"+accept+"\n" + body_md5 + "\n"+content_type+"\n" + gmtnow+ "\nx-acs-signature-method:HMAC-SHA1\n" + "x-acs-signature-nonce:" + uuidstr + "\n" + path; print(str_to_sign) signature = self.__sha1_base64(str_to_sign, self.__ak_secret) print(signature) auth_header = "acs " + self.__ak_id + ":" + signature print(auth_header) ssl._create_default_https_context = ssl._create_unverified_context req = urllib.request.Request("http://"+host+path) req.add_header("Accept",accept) req.add_header("Content-Type", content_type) req.add_header("Content-MD5", body_md5) req.add_header("Date", gmtnow) req.add_header("Host", host) req.add_header("x-acs-signature-nonce", uuidstr) req.add_header("x-acs-signature-method", "HMAC-SHA1") req.add_header("Authorization", auth_header) data = content.encode('utf-8') f = urllib.request.urlopen(req, data) return f.read().decode('utf-8') def sendTtsPost(self, textData, ttsRequestParam,fileRootPath): url = 'http://nlsapi.aliyun.com/speak?'#语音合成接口 #拼接 paramArray = [] for key in ttsRequestParam: paramArray.append(key+"="+ttsRequestParam[key]) url+=url+'&'.join(paramArray) method = "POST" content_type = "text/plain" accept = "audio/" + ttsRequestParam['encode_type'] + ",application/json" gmtnow = self.__current_gmt_time() body_md5 = self.__md5_base64(textData) print(body_md5) str_to_sign = method + "\n" + accept + "\n" + body_md5 + "\n" + content_type + "\n" + gmtnow print(str_to_sign) signature = self.__sha1_base64(str_to_sign, self.__ak_secret) print(signature) auth_header = "Dataplus " + self.__ak_id + ":" + signature print(auth_header) ssl._create_default_https_context = ssl._create_unverified_context req = urllib.request.Request(url) req.add_header("accept", accept) req.add_header("content-Type", content_type) req.add_header("date", gmtnow) req.add_header("Authorization", auth_header) req.add_header("Content-Length", len(textData)) data = textData.encode('utf-8') f = urllib.request.urlopen(req, data) if f.status ==200: file = 'g:audio/' + uuid.uuid4().hex + ".wav" content = f.read() with open(file, 'wb') as f: f.write(content) print("success"+file) else: print('失败!')
调用demo
在目录中建立demo.py ,注意和上面的utils目录同级
import sys import utils import json # app_key 语音数据格式 领域 # nls-service-realtime-8k 8kHz采样率 智能客服服务领域,比如电话客服等 # nls-service-multi-domain 16kHz采样率 汉语通用识别 # nls-service 16kHz采样率 输入法,社交聊天 # nls-service-tv 16kHz采样率 家庭娱乐 # nls-service-shopping 16kHz采样率 电商购物 # nls-realtime-fangyan 16kHz采样率 支持东北、河南、四川等方言 # nls-service-yue-streaming 16kHz采样率 粤语 # nls-service-en 16kHz采样率 英语 ak_id = ""; ##数加管控台获得的accessId ak_secret = "" ## 数加管控台获得的accessSecret url = "https://nlsapi.aliyun.com/transcriptions" # 录音文件提交 def request(): body = { 'app_key': 'nls-service-multi-domain', 'oss_link': 'http://网址/audio/zl4.mp3', } bodyStr = json.dumps(body) httpProxy = utils.http_proxy(ak_id, ak_secret) result = httpProxy.send_request(url, bodyStr) return result # 录音文件识别结果查询 def query(id): httpProxy = utils.http_proxy(ak_id, ak_secret) result = httpProxy.send_get(url, id) return result # 自然语音分析 分词 def nlpTest(): path = '/nlp/api/wordpos/general' postBody = { 'text': '为什么世界是这个样子,人们都不诚实,我要努力,获得成功,让别人尊敬', 'lang': 'ZH', } bodyStr = json.dumps(postBody) httpProxy = utils.http_proxy(ak_id, ak_secret) result = httpProxy.send_requestForNlp(path,bodyStr) return result #语音合成 def ttsTest(): text = '在打招呼的时候直视对方的人在交往中往往具有攻击性,' fileRootPath='g:audio/' ttsRequestParam ={ 'encode_type':'wav',#合成语音的编码格式,支持pcm/wav/mp3/alaw 'voice_name':'xiaogang',#xiaogang - 男,xiaoyun - 女 'volume':'50',#0~100 'sample_rate':'16000',#抽样频率率 8000/16000 'speech_rate':'0',#语速 -500~500 'pitch_rate':'0',#语调 -500~500 'tts_nus':'1',#0 - 通过参数合成语音,1 - 拼接原始录音 'background_music_id':'1',#播放语音时可选背景音乐,0,1 'background_music_offset':'0',#背景音乐播放偏移时长,毫秒。当启用背景音乐时生效 'background_music_volume':'100'#背景音乐音量,当启用背景音乐时生效,0~100 } httpProxy = utils.http_proxy(ak_id, ak_secret) filepath = httpProxy.sendTtsPost(text,ttsRequestParam,fileRootPath) print(filepath) if __name__ == '__main__': # print(request()) # print(query('2324ec1ed63549318b9477f1bf3eaf8a')) print( nlpTest()) # print(ttsTest())