此博客实现与java实现微软文本转语音(TTS)经验总结_java tts_${简简单单}的博客-CSDN博客之上,首先感谢博客源码的提供,本人在上面添加了一些详细的注释,方便大家跟好的理解和使用,毕竟我已经用原文调试了一下午才调通,一些细节的问题给大家标注出来,免得浪费大家的时间,下面直接开始代码吧!
首先大家需要去微软官网获取到密钥,方便调用时可以使用,大家注意看下图,我们一定要注意给我们分配到的区域,我这里是分配到eastus ,就是east us(美国东部)的意思,大家一定需要注意一下,后面会使用到的,然后终结点里面的地址就是我们获取token的地址
下面我们准备几个类,方便后面使用,大家把代码都复制到自己项目中,不要有遗漏:
package com.daoversal.util; public class ByteArray { private byte[] data; private int length; public ByteArray(){ length = 0; data = new byte[length]; } public ByteArray(byte[] ba){ data = ba; length = ba.length; } /** 合并数组 */ public void cat(byte[] second, int offset, int length){ if(this.length + length > data.length) { int allocatedLength = Math.max(data.length, length); byte[] allocated = new byte[allocatedLength << 1]; System.arraycopy(data, 0, allocated, 0, this.length); System.arraycopy(second, offset, allocated, this.length, length); data = allocated; }else { System.arraycopy(second, offset, data, this.length, length); } this.length += length; } public void cat(byte[] second){ cat(second, 0, second.length); } public byte[] getArray(){ if(length == data.length){ return data; } byte[] ba = new byte[length]; System.arraycopy(data, 0, ba, 0, this.length); data = ba; return ba; } public int getLength(){ return length; } }
package com.daoversal.util; import javax.net.ssl.HttpsURLConnection; import java.net.URL; public class HttpsConnection { public static HttpsURLConnection getHttpsConnection(String connectingUrl) throws Exception { URL url = new URL(connectingUrl); return (HttpsURLConnection) url.openConnection(); } }
package com.daoversal.util; import lombok.extern.slf4j.Slf4j; import org.w3c.dom.Document; import org.w3c.dom.Element; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import java.io.StringWriter; @Slf4j public class XmlDom { public static String createDom(String locale, String genderName, String voiceName, String textToSynthesize){ Document doc = null; Element speak, voice; try { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = dbf.newDocumentBuilder(); doc = builder.newDocument(); if (doc != null){ speak = doc.createElement('speak'); speak.setAttribute('version', '1.0'); speak.setAttribute('xml:lang', 'en-US'); voice = doc.createElement('voice'); voice.setAttribute('xml:lang', locale); voice.setAttribute('xml:gender', genderName); voice.setAttribute('name', voiceName); voice.appendChild(doc.createTextNode(textToSynthesize)); speak.appendChild(voice); doc.appendChild(speak); } } catch (ParserConfigurationException e) { log.error('Create ssml document failed: {}',e.getMessage()); return null; } return transformDom(doc); } private static String transformDom(Document doc){ StringWriter writer = new StringWriter(); try { TransformerFactory tf = TransformerFactory.newInstance(); Transformer transformer; transformer = tf.newTransformer(); transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, 'yes'); transformer.transform(new DOMSource(doc), new StreamResult(writer)); } catch (TransformerException e) { log.error('Transform ssml document failed: {}',e.getMessage()); return null; } return writer.getBuffer().toString().replaceAll('\n|\r', ''); } }
下面这个类我给大家重点讲一下,大家去下面网址看看自己的参数Text to speech API reference (REST) - Speech service - Azure AI services | Microsoft Learn
AUDIO_24KHZ_48KBITRATE_MONO_MP3 :语言类型,这个不重要,那个声音好听用那个,去下图找:
ACCESS_TOKEN_URI :就是本文章的第一张图里面,里面获取token的地址,直接将地址复制进来就好了。
API_KEY :自己的api key,就是密钥。
TTS_SERVICE_URI : 这个地址一定要对应分配的区域才行,不然会报权限错误
Synthesis tts speech failed Server returned HTTP response code: 401 for URL: https://.........
我这里是 east us(美国东部),所以就使用美国东部里面的地址即可。
package com.daoversal.util; public class TtsConst { /** * 音频合成类型(亲测这种效果最佳,其他的你自己去试试) * 里面有很多类型,可以去里面找自己需要的 * https://learn.microsoft.com/en-us/azure/ai-services/speech-service/rest-text-to-speech?tabs=streaming */ public static final String AUDIO_24KHZ_48KBITRATE_MONO_MP3 = 'audio-24khz-48kbitrate-mono-mp3'; /** * 授权url 获取密钥页面 终结点 里面的地址,我们使用这个获取token */ public static final String ACCESS_TOKEN_URI = 'token获取地址'; /** * api key */ public static final String API_KEY = '自己的密钥'; /** * tts服务url,这里一定要根据自己分配的地区找相应的地址才行 */ public static final String TTS_SERVICE_URI = 'https://eastus.tts.speech.microsoft.com/cognitiveservices/v1/'; }
下面参数给大家讲一下:
textToSynthesize : 传入的合成语音文本内容
locale:语言类型,大家可以参考,中文在嵌入式语音里面,大家可以在两个页面找到自己需要的语言。
Embedded Speech - Speech service - Azure AI services | Microsoft Learn
Language support - Speech service - Azure AI services | Microsoft Learn
gender:为发声人性别,Male表示男性
voiceName :发声者名称,大家可以去下图找出对应的,比如中文的话:
package com.daoversal.util; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.StringUtils; import org.springframework.stereotype.Component; import javax.annotation.Resource; import javax.net.ssl.HttpsURLConnection; import java.io.DataOutputStream; import java.io.InputStream; @Slf4j @Component public class TtsService { @Resource private Authentication authentication; /** * 合成音频 * @param textToSynthesize 传入需要翻译的文本 * @param locale 要合成的语言类型 * @param gender 性别 * @param voiceName 发音者名称 * @return */ public byte[] genAudioBytes(String textToSynthesize, String locale, String gender, String voiceName) { String accessToken = authentication.genAccessToken(); if (StringUtils.isEmpty(accessToken)) { return new byte[0]; } try { HttpsURLConnection webRequest = HttpsConnection.getHttpsConnection(TtsConst.TTS_SERVICE_URI); webRequest.setRequestProperty('Host', 'eastus.tts.speech.microsoft.com'); webRequest.setRequestProperty('Content-Type', 'application/ssml+xml'); webRequest.setRequestProperty('X-Microsoft-OutputFormat', TtsConst.AUDIO_24KHZ_48KBITRATE_MONO_MP3); webRequest.setRequestProperty('Authorization', 'Bearer ' + accessToken); webRequest.setRequestProperty('Ocp-Apim-Subscription-Key', TtsConst.API_KEY); webRequest.setRequestProperty('User-Agent', 'Mozilla/5.0'); webRequest.setRequestProperty('Accept', '*/*'); webRequest.setDoInput(true); webRequest.setDoOutput(true); webRequest.setConnectTimeout(5000); webRequest.setReadTimeout(300000); webRequest.setRequestMethod('POST'); String body = XmlDom.createDom(locale, gender, voiceName, textToSynthesize); if (StringUtils.isEmpty(body)) { return new byte[0]; } byte[] bytes = body.getBytes(); webRequest.setRequestProperty('content-length', String.valueOf(bytes.length)); webRequest.connect(); DataOutputStream dop = new DataOutputStream(webRequest.getOutputStream()); dop.write(bytes); dop.flush(); dop.close(); InputStream inSt = webRequest.getInputStream(); ByteArray ba = new ByteArray(); int rn2 = 0; int bufferLength = 4096; byte[] buf2 = new byte[bufferLength]; while ((rn2 = inSt.read(buf2, 0, bufferLength)) > 0) { ba.cat(buf2, 0, rn2); } inSt.close(); webRequest.disconnect(); return ba.getArray(); } catch (Exception e) { log.error('Synthesis tts speech failed {}', e.getMessage()); } return null; } }
最后就是调用了,大家可以测试了:
package com.daoversal.web; import com.daoversal.framework.http.Response; import com.daoversal.task.DvWeekCountTask; import com.daoversal.task.RechargeTask; import com.daoversal.task.UserGradeCountTask; import com.daoversal.task.WindControlMsgTask; import com.daoversal.util.TtsService; import io.swagger.annotations.Api; import io.swagger.annotations.ApiOperation; import okhttp3.*; import org.springframework.boot.configurationprocessor.json.JSONException; import org.springframework.boot.configurationprocessor.json.JSONObject; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.PostMapping; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RestController; import javax.annotation.Resource; import javax.servlet.http.HttpServletResponse; import java.io.*; /** * <p> * 套餐价值释放记录表 前端控制器 * </p> * * @author HayDen * @since 03 22 10:44:13 */ @RestController @RequestMapping('/test') @Api(value = 'test') public class TestController { @Resource private TtsService testService; @PostMapping('/ttsService') @ApiOperation(value = '获取ttsService', httpMethod = 'POST' ) public void ttsService(String text) { // byte[] bte = testService.genAudioBytes(res,'en-US','Male','en-US-JennyNeural'); byte[] bte = testService.genAudioBytes(text,'zh-CN','Male','zh-CN-YunxiNeural'); String value = 'hllo.mp3'; convertByteArrayToFile(bte,value); System.out.println('213213123'); } /** * 此文件是将byte[] 转换成文件存储到指定路径的 * @param arr * @param value */ public static void convertByteArrayToFile(byte[] arr,String value) { try ( BufferedInputStream bis = new BufferedInputStream(new ByteArrayInputStream(arr)); //这里是转换以后的文件存储的路径 FileOutputStream fileOutputStream = new FileOutputStream('/Users/recovery/Downloads/'+value); BufferedOutputStream bos = new BufferedOutputStream(fileOutputStream) ) { int data; while ((data = bis.read()) != -1) { bos.write(data); } bos.flush(); } catch (IOException e) { e.printStackTrace(); } } }
最后大家需要注意一下就是如果你选的是英文en-US,但是输入的文本是中文的话他是不会翻译的,所以大家一定要注意自己的语言类型不要弄错了,如果有疑问可以留言哦,我看到肯定会毫无保留的给大家说明的。
联系客服