春色将阑,莺声渐老,红英落尽青梅小。画堂人静雨蒙蒙,屏山半掩余香袅。
密约沉沉,离情杳杳,菱花尘满慵将照。倚楼无语欲销魂,长空黯淡连芳草。
首先安装python包
pip3 install baidu-aip
再注册百度AI得到相关参数
一、语音合成
from aip import AipSpeech
APP_ID = '14446020'
API_KEY = 'GnaoLWrIiTKP10disiDHMiNZ'
SECRET_KEY = 'FYaMNBsH5NFsgWcRsyBfaHDV70MvvE6u'
#实例化AipSpeech对象
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
#调用方法语音合成
result = client.synthesis("欢迎来到王者荣耀",
options={
"per": 1,
"spd": 9,
"pit": 9,
"vol": 15,
})
if not isinstance(result, dict):
with open('auido.mp3', 'wb') as f:
f.write(result)
二、语音识别
from aip import AipSpeech
import os
APP_ID = '14446020'
API_KEY = 'GnaoLWrIiTKP10disiDHMiNZ'
SECRET_KEY = 'FYaMNBsH5NFsgWcRsyBfaHDV70MvvE6u'
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
def get_file_content(filePath):
cmd_str = f"ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm"
os.system(cmd_str)
with open(f"{filePath}.pcm", 'rb') as fp:
return fp.read()
res = client.asr(speech=get_file_content("auido.mp3"),options={
"dev_pid":1536,
})
print(res["result"][0])
这里用到一个格式转换的软件,百度地址:
https://pan.baidu.com/s/1MadxSh-A0Pzo1Su_wKdktQ 提取码:x5xi
固定的格式转换命令:(需要将bin文件添加环境变量,在cmd中执行)
ffmpeg -y -i filePath -acodec pcm_s16le -f s16le -ac 1 -ar 16000 filePath.pcm
三、短文本相似度
from aip import AipNlp
APP_ID = '14446020'
API_KEY = 'GnaoLWrIiTKP10disiDHMiNZ'
SECRET_KEY = 'FYaMNBsH5NFsgWcRsyBfaHDV70MvvE6u'
client = AipNlp(APP_ID,API_KEY,SECRET_KEY)
ret = client.simnet("你今年几岁了?","多大年龄了?")
print(ret)
{'log_id': 4545309161914786697, 'texts': {'text_2': '多大年龄了?', 'text_1': '你今年几岁了?'}, 'score': 0.742316} score 是两个测试的短文本相似度,一般大于0.72的两个短文本的意思是相似的句子!
四、代码实现对接图灵
import requests
def tuling_test(question):
url = "http://openapi.tuling123.com/openapi/api/v2"
data = {
"reqType":0,
"perception": {
"inputText": {
"text": question
},
"inputImage": {
},
},
"userInfo": {
"apiKey": "2f4e809b8b3049ce82a6b4787bad65bb",
"userId": "wangjifei"
}
}
return requests.post(url=url,json=data).json()
ret = tuling_test("心情不好")
print(ret.get("results")[0]["values"]["text"])
五、简单实现人机交流
- 基本步骤:
用户录制音频---传入函数---格式转化---语音识别---匹配答案---语音合成---语音文件流写入文件---os执行文件---删除文件
from aip import AipSpeech
from aip import AipNlp
from uuid import uuid4
import os
import requests
import time
APP_ID = '14446007'
API_KEY = 'QrQWLLg5a8qld7Qty7avqCGC'
SECRET_KEY = 'O5mE31LSl17hm8NRYyf9PwlE5Byqm0nr'
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
nlp_client = AipNlp(APP_ID, API_KEY, SECRET_KEY)
def tuling_test(question):
"""接入图灵,为问题匹配答案"""
url = "http://openapi.tuling123.com/openapi/api/v2"
data = {
"reqType": 0,
"perception": {
"inputText": {
"text": question
},
"inputImage": {
},
},
"userInfo": {
"apiKey": "2f4e809b8b3049ce82a6b4787bad65bb",
"userId": "wangjifei"
}
}
ret = requests.post(url=url, json=data).json()
return ret.get("results")[0]["values"]["text"]
def get_file_content(filePath):
"""音频的格式转换"""
cmd_str = f"ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm"
os.system(cmd_str)
with open(f"{filePath}.pcm", 'rb') as fp:
return fp.read()
def custom_reply(text):
"""根据问题得到相应的答案,可以通过短文本相似来自定义,也可以调用图灵问题库"""
if nlp_client.simnet("你叫什么名字", text).get("score") >= 0.72:
return "我不能告诉你"
return tuling_test(text)
def learn_say(file_name):
"""机器人学说话"""
# 语音识别成文字
res = client.asr(speech=get_file_content(file_name), options={
"dev_pid": 1536,
})
os.remove(f"{file_name}.pcm")
text = res.get("result")[0]
# 根据问题得到相关答案
text1 = custom_reply(text)
# 答案语音合成
res_audio = client.synthesis(text1, options={
"vol": 8,
"pit": 8,
"spd": 5,
"per": 4
})
# 通过uuid 生成文件名
ret_file_name = f"{uuid4()}.mp3"
# 将生成的语音流写入文件中
with open(ret_file_name, "wb") as f:
f.write(res_audio)
# 执行音频文件
ret = os.system(ret_file_name)
time.sleep(2)
os.remove(ret_file_name)
if __name__ == '__main__':
learn_say("auido.m4a")
六、网页版智能机器人对话
flask_ws.py
from flask import Flask, request, render_template
from uuid import uuid4
from geventwebsocket.websocket import WebSocket
from gevent.pywsgi import WSGIServer
from geventwebsocket.handler import WebSocketHandler
from learn_say import learn_say
app = Flask(__name__) # type:Flask
@app.route("/ws")
def ws():
user_socket = request.environ.get("wsgi.websocket") # type:WebSocket
while True:
msg = user_socket.receive()
q_file_name = f"{uuid4()}.wav"
with open(q_file_name, "wb") as f:
f.write(msg)
ret_file_name = learn_say(q_file_name)
user_socket.send(ret_file_name)
if __name__ == '__main__':
http_serv = WSGIServer(("127.0.0.1", 8006), app, handler_class=WebSocketHandler)
http_serv.serve_forever()
flask_app.py
from flask import Flask, request, render_template, send_file
app = Flask(__name__) # type:Flask
@app.route("/index")
def index():
return render_template("index.html")
@app.route("/get_audio/<audio_name>")
def get_audio(audio_name):
return send_file(audio_name)
if __name__ == '__main__':
app.run("127.0.0.1", 8008, debug=True)
learn_say.py
from aip import AipSpeech
from aip import AipNlp
from uuid import uuid4
import os
import requests
import time
APP_ID = '14446007'
API_KEY = 'QrQWLLg5a8qld7Qty7avqCGC'
SECRET_KEY = 'O5mE31LSl17hm8NRYyf9PwlE5Byqm0nr'
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
nlp_client = AipNlp(APP_ID, API_KEY, SECRET_KEY)
def tuling_test(question):
"""接入图灵,为问题匹配答案"""
url = "http://openapi.tuling123.com/openapi/api/v2"
data = {
"reqType": 0,
"perception": {
"inputText": {
"text": question
},
"inputImage": {
},
},
"userInfo": {
"apiKey": "2f4e809b8b3049ce82a6b4787bad65bb",
"userId": "wangjifei"
}
}
ret = requests.post(url=url, json=data).json()
return ret.get("results")[0]["values"]["text"]
def get_file_content(filePath):
"""音频的格式转换"""
cmd_str = f"ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm"
os.system(cmd_str)
with open(f"{filePath}.pcm", 'rb') as fp:
return fp.read()
def custom_reply(text):
"""根据问题得到相应的答案,可以通过短文本相似来自定义,也可以调用图灵问题库"""
if nlp_client.simnet("你叫什么名字", text).get("score") >= 0.72:
return "我不能告诉你"
return tuling_test(text)
def learn_say(file_name):
"""机器人学说话"""
# 语音识别成文字
res = client.asr(speech=get_file_content(file_name), options={
"dev_pid": 1536,
})
os.remove(file_name)
os.remove(f"{file_name}.pcm")
text = res.get("result")[0]
# 根据问题得到相关答案
text1 = custom_reply(text)
# 答案语音合成
res_audio = client.synthesis(text1, options={
"vol": 8,
"pit": 8,
"spd": 5,
"per": 4
})
# 通过uuid 生成文件名
ret_file_name = f"{uuid4()}.mp3"
# 将生成的语音流写入文件中
with open(ret_file_name, "wb") as f:
f.write(res_audio)
return ret_file_name
index.html
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Title</title>
</head>
<body>
<audio src="" autoplay controls id="player"></audio>
<button onclick="start_reco()">录制消息</button>
<br>
<button onclick="stop_reco()">发送语音消息</button>
</body>
<script src="/static/Recorder.js"></script>
<script type="application/javascript">
var serv = "http://127.0.0.1:8008";
var ws_serv = "ws://127.0.0.1:8006/ws";
var get_music = serv + "/get_audio/";
var ws = new WebSocket(ws_serv);
ws.onmessage = function (data) {
document.getElementById("player").src = get_music + data.data
};
var reco = null;
var audio_context = new AudioContext();
navigator.getUserMedia = (navigator.getUserMedia ||
navigator.webkitGetUserMedia ||
navigator.mozGetUserMedia ||
navigator.msGetUserMedia);
navigator.getUserMedia({audio: true}, create_stream, function (err) {
console.log(err)
});
function create_stream(user_media) {
var stream_input = audio_context.createMediaStreamSource(user_media);
reco = new Recorder(stream_input);
}
//录制消息
function start_reco() {
reco.record();
}
//先停止录制,再获取音频
function stop_reco() {
reco.stop();
get_audio();
reco.clear();
}
//获取音频,发送音频
function get_audio() {
reco.exportWAV(function (wav_file) {
// wav_file = Blob对象
ws.send(wav_file);
})
}
</script>
</html>