文字转语音tts
本文章使用 阿里百炼平台 tts 进行将长文本转成 语音。
<dependency><groupId>com.alibaba</groupId><artifactId>dashscope-sdk-java</artifactId>
<!-- <version>2.20.8</version>--><version>2.21.0</version></dependency>
配置 API-Key
spring:ai:dashscope:api-key: sk-bbf4567b2dff4a61aaba8eb033dcbxxx # 百炼
package com.gj.tools;import com.alibaba.dashscope.aigc.multimodalconversation.AudioParameters;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversation;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationParam;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationResult;
import com.alibaba.dashscope.exception.ApiException;
import com.alibaba.dashscope.exception.NoApiKeyException;
import com.alibaba.dashscope.exception.UploadFileException;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Component;import javax.sound.sampled.*;
import java.io.*;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.nio.file.*;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.*;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;//import java.util.Map;
//
///**
// * @author GJ
// * @date 2025/7/30 22:24
// */@Component
public class QwenTTS {public static ResponseEntity<Map<String, Object>> call(String context, String apiKey)throws Exception {ExecutorService executor = Executors.newFixedThreadPool(4); // 线程池加速下载// 分割长文本(按512字节分段)List<String> textSegments = splitTextByByteSize(context, 512);List<String> audioUrls = new ArrayList<>();// 遍历每个文本片段并调用APIfor (String segment : textSegments) {System.out.println("Processing segment: " + segment.substring(0, Math.min(20, segment.length())) + "...");MultiModalConversation conv = new MultiModalConversation();MultiModalConversationParam param = MultiModalConversationParam.builder().model("qwen-tts-latest").apiKey(apiKey).text(segment).voice(AudioParameters.Voice.DYLAN).build();MultiModalConversationResult result = conv.call(param);String url = result.getOutput().getAudio().getUrl();audioUrls.add(url);}// 2. 下载并合并所有音频片段String mergedAudioUrl = mergeAudioFiles(audioUrls, executor);// 构建返回结果Map<String, Object> responseMap = new HashMap<>();responseMap.put("urls", audioUrls); // 语音URL列表responseMap.put("segments", textSegments); // 分段文本列表responseMap.put("originalLength", context.length()); // 原始文本长度// 3. 清理临时文件executor.shutdown();// 4. 返回合并后的URLreturn ResponseEntity.ok(Map.of("url", mergedAudioUrl,"segmentCount", audioUrls.size(),"originalLength", context.length()));}private static String mergeAudioFiles(List<String> audioUrls, ExecutorService executor)throws Exception {// 创建临时目录Path tempDir = Files.createTempDirectory("audio_merge");List<Path> tempFiles = Collections.synchronizedList(new ArrayList<>());List<Callable<Path>> downloadTasks = new ArrayList<>();// 创建下载任务for (int i = 0; i < audioUrls.size(); i++) {final int index = i+1;final String url = audioUrls.get(i);downloadTasks.add(() -> {Path tempFile = Files.createTempFile(tempDir, "" + index + "_", ".wav");try (InputStream in = new URL(url).openStream()) {Files.copy(in, tempFile, StandardCopyOption.REPLACE_EXISTING);}tempFiles.add(tempFile);return tempFile;});}// 并行下载所有片段executor.invokeAll(downloadTasks);// 合并音频文件Path mergedFile = Files.createTempFile(tempDir, "merged_", ".wav");mergeWavFiles(tempFiles, mergedFile);// 上传到永久存储(这里以本地存储为例,实际应使用云存储)
// return uploadToStorage(mergedFile);System.out.println("文件夹。。。。。。" + tempDir.toAbsolutePath().toString());
// return mergedFile.toAbsolutePath().toString();// 3. 清理临时文件,执行完毕后关闭线程池executor.shutdown();cleanupTempDir(tempDir);System.out.println("文件夹。。。。。。" + tempDir.toAbsolutePath().toString());return tempDir.toAbsolutePath().toString();}// 核心音频合并方法private static void mergeWavFiles(List<Path> inputFiles, Path outputFile) throws Exception {try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {// 跳过第一个文件的WAV头boolean firstFile = true;int dataOffset = 44; // 标准WAV头长度
// 将inputFiles进行更具文件名称进行排序Path path = inputFiles.get(0);System.out.println("文件名称22222。。。。"+path.getFileName());
// inputFiles,path.getFileName() 进行排序 10_2093930167529076216 这个是文件名格式,从1开始进行排序inputFiles.sort(Comparator.comparingInt(s -> Integer.parseInt(s.getFileName().toString().split("_")[0])));System.out.println("排完序列>>>>>"+inputFiles);for (Path file : inputFiles) {System.out.println("文件名称。。。。"+file);byte[] fileBytes = Files.readAllBytes(file);if (firstFile) {// 写入完整第一个文件baos.write(fileBytes);firstFile = false;} else {// 只写入音频数据(跳过头)if (fileBytes.length > dataOffset) {baos.write(fileBytes, dataOffset, fileBytes.length - dataOffset);} else {baos.write(fileBytes);}}}// 更新最终文件大小信息(关键步骤)byte[] mergedData = baos.toByteArray();updateWavHeader(mergedData);// 写入最终文件Files.write(outputFile, mergedData);//将合并好的文件复制到指定的文件夹Path targetDir = Paths.get("D:\\Projects-GJ\\spring-ai-deepseek\\spring-ai-deepseek\\src\\main\\resources\\video");Files.copy(outputFile,targetDir.resolve(outputFile.getFileName()), StandardCopyOption.REPLACE_EXISTING);}}// 更新WAV文件头信息private static void updateWavHeader(byte[] wavData) {// 文件大小(不包括前8字节)int fileSize = wavData.length - 8;writeLittleEndian(wavData, 4, fileSize);// 数据块大小int dataSize = wavData.length - 44;writeLittleEndian(wavData, 40, dataSize);}// 小端序写入private static void writeLittleEndian(byte[] array, int position, int value) {array[position] = (byte) (value & 0xFF);array[position + 1] = (byte) ((value >> 8) & 0xFF);array[position + 2] = (byte) ((value >> 16) & 0xFF);array[position + 3] = (byte) ((value >> 24) & 0xFF);}/*** 按字节大小分割文本(保证不截断UTF-8字符)*/private static List<String> splitTextByByteSize(String text, int maxBytesPerSegment) {List<String> segments = new ArrayList<>();byte[] textBytes = text.getBytes(StandardCharsets.UTF_8);int start = 0;while (start < textBytes.length) {int end = start + maxBytesPerSegment;// 防止数组越界if (end > textBytes.length) {end = textBytes.length;}// 回退到完整字符边界(确保end在数组范围内)if (end < textBytes.length) {while (end > start && (textBytes[end] & 0xC0) == 0x80) {end--;}}segments.add(new String(textBytes, start, end - start, StandardCharsets.UTF_8));start = end;}return segments;}private static void cleanupTempDir(Path tempDir) throws IOException {Files.walkFileTree(tempDir, new SimpleFileVisitor<Path>() {@Overridepublic FileVisitResult visitFile(Path file, BasicFileAttributes attrs)throws IOException {Files.delete(file);return FileVisitResult.CONTINUE;}@Overridepublic FileVisitResult postVisitDirectory(Path dir, IOException exc)throws IOException {Files.delete(dir);return FileVisitResult.CONTINUE;}});}
}
调用:
@PostMapping("/tts/PromptTemplate/chat")public ResponseEntity<Map<String,Object>> ttsPromptTemplateChat(@RequestBody VideoQuest videoQuest) {try {String question = videoQuest.getQuestion();System.out.println("-------"+question);return qwenTTS.call(question,apiKey);} catch (NoApiKeyException e) {throw new RuntimeException(e);} catch (UploadFileException e) {throw new RuntimeException(e);} catch (Exception e) {throw new RuntimeException(e);}}
结果:
学的越多,不知道的越多
boot接入deepseek详细文章