我们的项目想基于python实现语音提示的功能。
1.首先尝试了python内部库pyttsx3
直接通过pip安装即可使用
import pyttsx3 as pyttsx # 调用初始化方法,获取讲话对象 engine = pyttsx.init() engine.say('hello world') engine.runAndWait()
得到的语音感觉有些生硬,然后尝试使用百度API
2.百度API语音合成
创建应用后领取额度,保存应用中自己的API相关参数API KEY 和SECRET KEY
参考官方文档,戳这跳转
写自己的代码
# -*- coding: utf-8 -*- """ Created on Mon Apr 19 15:08:18 2021 @author: ASUS """ # coding=utf-8 import sys import json # 保证兼容python2以及python3 IS_PY3 = sys.version_info.major == 3 if IS_PY3: from urllib.request import urlopen from urllib.request import Request from urllib.error import URLError from urllib.parse import urlencode from urllib.parse import quote_plus else: import urllib2 from urllib import quote_plus from urllib2 import urlopen from urllib2 import Request from urllib2 import URLError from urllib import urlencode # 替换你的 API_KEY API_KEY = '111111111111111111111111' # 替换你的 SECRET_KEY SECRET_KEY = '2222222222222222222222222222222' # 大姚的订单信息内容文本 TEXT = "请注意,前方限速!Attention, speed limit ahead" TTS_URL = 'http://tsn.baidu.com/text2audio' """ TOKEN start """ TOKEN_URL = 'http://openapi.baidu.com/oauth/2.0/token' """ 获取token """ def fetch_token(): params = {'grant_type': 'client_credentials', 'client_id': API_KEY, 'client_secret': SECRET_KEY} post_data = urlencode(params) if (IS_PY3): post_data = post_data.encode('utf-8') req = Request(TOKEN_URL, post_data) try: f = urlopen(req, timeout=5) result_str = f.read() except URLError as err: print('token http response http code : ' + str(err.code)) result_str = err.read() if (IS_PY3): result_str = result_str.decode() result = json.loads(result_str) if ('access_token' in result.keys() and 'scope' in result.keys()): if not 'audio_tts_post' in result['scope'].split(' '): print ('please ensure has check the tts ability') exit() return result['access_token'] else: print ('please overwrite the correct API_KEY and SECRET_KEY') exit() """ TOKEN end """ if __name__ == '__main__': token = fetch_token() tex = quote_plus(TEXT) # 此处TEXT需要两次urlencode params = {'tok': token, 'tex': tex, 'cuid': "quickstart", 'lan': 'zh', 'ctp': 1} # lan ctp 固定参数 data = urlencode(params) req = Request(TTS_URL, data.encode('utf-8')) has_error = False try: f = urlopen(req) result_str = f.read() headers = dict((name.lower(), value) for name, value in f.headers.items()) has_error = ('content-type' not in headers.keys() or headers['content-type'].find('audio/') < 0) except URLError as err: print('http response http code : ' + str(err.code)) result_str = err.read() has_error = True save_file = "error.txt" if has_error else u'限速.mp3' with open(save_file, 'wb') as of: of.write(result_str) if has_error: if (IS_PY3): result_str = str(result_str, 'utf-8') print("tts api error:" + result_str) print("file saved as : " + save_file)
生成了我们所需要的MP3文件
3.播放MP3音频文件
①使用playground库
首先尝试了playsound库 但是一直报 UnicodeDecodeError异常,未能解决问题
并将上文代码中所有的’UTF-8’改为了’gbk’
②使用pygame库
from pygame import mixer import time mixer.init() mixer.music.load('xx.mp3') mixer.music.play() time.sleep(5) mixer.music.stop()
正常播放!
也可以尝试其他的库
基于55个类别的语音文件成功生成:
参考代码:
# -*- coding: utf-8 -*- """ Created on Mon Apr 19 15:08:18 2021 @author: ASUS """ # coding=utf-8 import sys import json # 保证兼容python2以及python3 IS_PY3 = sys.version_info.major == 3 if IS_PY3: from urllib.request import urlopen from urllib.request import Request from urllib.error import URLError from urllib.parse import urlencode from urllib.parse import quote_plus else: import urllib2 from urllib import quote_plus from urllib2 import urlopen from urllib2 import Request from urllib2 import URLError from urllib import urlencode # 替换你的 API_KEY API_KEY = 'yZGnzL2cx2AzBHGTFh5suOHt' # 替换你的 SECRET_KEY SECRET_KEY = 'cTlsySaXhY90HhZVciBstcv7Cs1SnIFB' # 大姚的订单信息内容文本 #TEXT = "请注意,前方限速!Attention, speed limit ahead" classes = ["请注意,前方消防通道,禁止停车!", "请注意,前方行人和非机动车禁止通行!", "请注意,前方禁止电动自行车通行!", "请注意,前方禁止通行!", "请注意,前方禁止三轮车机动车通行!", "请注意,前方禁止拖拉机驶入!", "请注意,前方禁止非机动车入内!", "请注意,前方禁止禁止驶入!", "请注意,前方禁止大型客车驶入!", "请注意,前方禁止畜力车进入!", "请注意,前方禁止机动车驶入!", "请注意,前方禁止小型客车驶入!", "请注意,前方禁止二轮摩托车驶入!", "请注意,前方禁止人力货运三轮车进入!", "请注意,前方禁止载货汽车驶入!", "请注意,前方禁止汽车拖、挂车驶入!", "请注意,前方禁止某两种车驶入!", "请注意,前方禁止人力客运三轮车进入!", "请注意,前方禁止人力车进入!", "请注意,前方禁止向左转弯!", "请注意,前方禁止直行和向左转弯!", "请注意,前方解除禁止超车!", "请注意,前方禁止向右转弯!", "请注意,前方禁止直行和向右转弯!", "请注意,前方禁止停车!", "请注意,前方禁止直行!", "请注意,前方禁止掉头!", "请注意,前方禁止长时停车!", "请注意,前方禁止行人进入!", "请注意,前方禁止向左向右转弯!", "请注意,前方禁止超车!", "请注意,前方禁止鸣喇叭!", "请注意,前方限制宽度!", "请注意,前方限制速度!", "请注意,前方减速让行!", "请注意,前方限制高度!", "请注意,前方解除限制速度!", "请注意,前方路段会车让行!", "请注意,前方路段限制质量!", "请注意,前方路段停车检查!", "请注意,前方路段限制轴重!", "请注意,前方停车让行!", "请注意,前方禁止运输危险物品车辆驶入!", "请注意,前方海关!", "请注意,前方区域禁止长时停车!", "请注意,前方解除禁止区域停车!", "请注意,前方区域禁止停车!", "请注意,前方区域禁止停车解除!", "请注意,前方区域限制速度!", "请注意,前方区域限制速度解除!", "请注意,前方禁止电动三轮车驶入!", "请注意,前方禁止小客车右转!", "请注意,前方禁止载货汽车左转!", "请注意,前方有警告标志!", "请注意,前方有指示标志!",] classes_eng = ['Fire Exit. No Parking', 'Pedestrians and non-motor vehicles are prohibited', 'Prohibition of electric bicycles', 'No thoroughfare', 'Prohibition of tricycles and motor vehicles', 'Prohibit tractors from entering!', 'Prohibition of non-motorized vehicles', 'No Entry', 'Ban large passenger vehicles from entering', 'Ban animal-drawn vehicles entering', 'Prohibit motorized vehicles', 'Prohibition of entry of small passenger cars', 'Two-wheeled motorcycles are prohibited', 'Prohibition of human-powered cargo tricycles from entering', 'Prohibit laden car into', 'It is forbidden to tow or trailer vehicles', 'Prohibit certain two types of vehicles from entering', 'Human-powered passenger tricycles are prohibited from entering', 'No rickshaws are allowed', 'Prohibited left turn', 'Prohibited left turn and straight', 'Lifting the prohibition on overtaking', 'Prohibited right turn', 'Prohibited right turn and straight', 'No parking', 'Prohibition straight', 'No U-turn', 'No long-term parking', 'Prohibit pedestrians entering', 'No left or right turn', 'No Overtaking', 'Prohibit honking', 'Limit width', 'Limit speed', 'Slow down and give way', 'Limit height', 'Speed restrictions lifted', 'Give Way to Oncoming Vehicles', 'Limit weight', 'Parking inspection', 'Limit axle load', 'Park to give way', 'Prohibit entry of vehicles transporting dangerous goods', 'customs', 'Long time parking is prohibited in the area', 'Long-term parking prohibited in the area lifted', 'Parking prohibited area', 'Regional no parking lifted', 'Regional speed limit', 'Area speed limit lifted', 'Prohibition of electric tricycles', 'Prohibition of right turning of passenger cars', 'No left turn for trucks', 'Warning signs', 'mandatory sign'] TTS_URL = 'http://tsn.baidu.com/text2audio' """ TOKEN start """ TOKEN_URL = 'http://openapi.baidu.com/oauth/2.0/token' """ 获取token """ def fetch_token(): params = {'grant_type': 'client_credentials', 'client_id': API_KEY, 'client_secret': SECRET_KEY} post_data = urlencode(params) if (IS_PY3): post_data = post_data.encode('gbk') req = Request(TOKEN_URL, post_data) try: f = urlopen(req, timeout=5) result_str = f.read() except URLError as err: print('token http response http code : ' + str(err.code)) result_str = err.read() if (IS_PY3): result_str = result_str.decode() result = json.loads(result_str) if ('access_token' in result.keys() and 'scope' in result.keys()): if not 'audio_tts_post' in result['scope'].split(' '): print ('please ensure has check the tts ability') exit() return result['access_token'] else: print ('please overwrite the correct API_KEY and SECRET_KEY') exit() """ TOKEN end """ if __name__ == '__main__': # token = fetch_token() # ,len(classes) for i in range(len(classes)): print(i) token = fetch_token() TEXT = classes[i] print(TEXT) tex = quote_plus(TEXT) # 此处TEXT需要两次urlencode params = {'tok': token, 'tex': tex, 'cuid': "quickstart", 'lan': 'zh', 'ctp': 1} # lan ctp 固定参数 data = urlencode(params) req = Request(TTS_URL, data.encode('gbk')) has_error = False try: f = urlopen(req) result_str = f.read() headers = dict((name.lower(), value) for name, value in f.headers.items()) has_error = ('content-type' not in headers.keys() or headers['content-type'].find('audio/') < 0) except URLError as err: print('http response http code : ' + str(err.code)) result_str = err.read() has_error = True # save_file = "error.txt" if has_error else u'限速.mp3' save_file = classes_eng[i] + '.mp3' with open(save_file, 'wb') as of: of.write(result_str) if has_error: if (IS_PY3): result_str = str(result_str, 'gbk') print("tts api error:" + result_str) print("file saved as : " + save_file)