使用FFmpeg进行视频抽取音频，之后进行语音识别转为文字-阿里云开发者社区

1、首先需要下载FFmpeg；

2、Gradle依赖

正在上传…
取消
def void forceVersion(details, group, version) {

if (details.requested.group == group) {
    details.useVersion version
}

}

def void forceVersion(details, group, name, version) {

if (details.requested.group == group && details.requested.name == name) {
    details.useVersion version
}

}

allprojects { p ->

group = 'com.my.spider'
version = '1.0.0'

apply plugin: 'java'
apply plugin: 'maven'
apply plugin: 'maven-publish'

[compileJava, compileTestJava]*.options*.encoding = 'UTF-8'

jar.doFirst {
    manifest {
        def manifestFile = "${projectDir}/META-INF/MANIFEST.MF"
        if (new File(manifestFile).exists())
            from (manifestFile)
        
        attributes 'Implementation-Title':p.name
        if (p.version.endsWith('-SNAPSHOT')) {
            attributes 'Implementation-Version': p.version + '-' + p.ext.Timestamp
        } else {
            attributes 'Implementation-Version': p.version
        }
        attributes 'Implementation-BuildDateTime':new Date()
    }
}

javadoc {
    options {
        encoding 'UTF-8'
        charSet 'UTF-8'
        author false
        version true
        links 'http://docs.oracle.com/javase/8/docs/api/index.html'
        memberLevel = org.gradle.external.javadoc.JavadocMemberLevel.PRIVATE
    }
}

if (System.env.uploadArchives) {
    build.dependsOn publish
}

buildscript {
    repositories {
        mavenCentral()
    }
    dependencies {classpath 'org.springframework.boot:spring-boot-gradle-plugin:1.5.14.RELEASE' }
}

afterEvaluate {Project  project -> 
    if (project.pluginManager.hasPlugin('java')) {
        configurations.all {
            resolutionStrategy.eachDependency {DependencyResolveDetails details -> 
                forceVersion details, 'org.springframework.boot', '1.4.1.RELEASE'
                forceVersion details, 'org.slf4j', '1.7.21'
                forceVersion details, 'org.springframework', '4.3.3.RELEASE'
            }

            exclude module:'slf4j-log4j12'
            exclude module:'log4j'
        }

        dependencies {testCompile 'junit:junit:4.12' }
    }
}

repositories {
    mavenCentral()
}

// 时间戳：年月日时分
p.ext.Timestamp = new Date().format('yyyyMMddHHmm')
// Build Number
p.ext.BuildNumber = System.env.BUILD_NUMBER
if (p.ext.BuildNumber == null || "" == p.ext.BuildNumber) {
    p.ext.BuildNumber = 'x'
}

}

task zipSources(type: Zip) {

description '压缩源代码'
project.ext.zipSourcesFile = project.name + '-' + project.version + '-' +  project.ext.Timestamp + '.' + project.ext.BuildNumber + '-sources.zip' 
archiveName = project.ext.zipSourcesFile
includeEmptyDirs = false

from project.projectDir

exclude '**/.*'
exclude 'build/*'
allprojects.each { p ->
    exclude '**/' + p.name + '/bin/*'
    exclude '**/' + p.name + '/build/*'
    exclude '**/' + p.name + '/data/*'
    exclude '**/' + p.name + '/work/*'
    exclude '**/' + p.name + '/logs/*'    
}

}

def CopySpec appCopySpec(Project prj, dstname = null) {

if (!dstname) { dstname = prj.name }
return copySpec{
    // Fat jar
    from (prj.buildDir.toString() + '/libs/' + prj.name + '-' + project.version + '.jar') {
        into dstname
    }        

    // Configs
    from (prj.projectDir.toString() + '/config/examples') {
        into dstname + '/config'
    }

    // Windows start script
    from (prj.projectDir.toString() + '/' + prj.name + '.bat') {
        into dstname
    }
    
    // Unix conf script
    from (prj.projectDir.toString() + '/' + prj.name + '.conf') {
        into dstname
        rename prj.name, prj.name + '-' + project.version
    }
}

}

task zipSetup(type: Zip, dependsOn: subprojects.build) {

description '制作安装包' 
project.ext.zipSetupFile = project.name + '-' + project.version + '-' +  project.ext.Timestamp + '.' + project.ext.BuildNumber + '-setup.zip' 
archiveName = project.name + '-' + project.version + '-' +  project.ext.Timestamp + '.' + project.ext.BuildNumber + '-setup.zip'

with appCopySpec(project(':spider-demo'))

}

import java.security.MessageDigest

def generateMD5(final file) {

MessageDigest digest = MessageDigest.getInstance("MD5")
file.withInputStream(){is->
    byte[] buffer = new byte[8192]
    int read = 0
    while( (read = is.read(buffer)) > 0) {
        digest.update(buffer, 0, read);
    }
}
byte[] md5sum = digest.digest()
BigInteger bigInt = new BigInteger(1, md5sum)
return bigInt.toString(16)

}

task md5(dependsOn: [zipSetup, zipSources]) << {

String md5_setup = generateMD5(file("${projectDir}/build/distributions/" + project.ext.zipSetupFile));
String md5_sources = generateMD5(file("${projectDir}/build/distributions/" + project.ext.zipSourcesFile));
println project.ext.zipSetupFile + '=' + md5_setup
println project.ext.zipSourcesFile + '=' + md5_sources

def newFile = new File("${projectDir}/build/distributions/" 
                + project.name + '-' + project.version + '-' +  project.ext.Timestamp + '.' + project.ext.BuildNumber + '-md5.txt')
PrintWriter printWriter = newFile.newPrintWriter()
printWriter.println project.ext.zipSetupFile + '=' + md5_setup
printWriter.println project.ext.zipSourcesFile + '=' + md5_sources
printWriter.flush()
printWriter.close()

}

build.dependsOn subprojects.build, zipSetup, zipSources, md5
bulid.gradle
工程组件gradle依赖：语音识别使用百度api；需引入 compile 'com.baidu.aip:java-sdk:3.2.1'

apply plugin: 'spring-boot'
apply plugin: 'application'

distributions {

main {
    contents {
        from ("${projectDir}/config/examples") {
            into "config"
        }
    }
}

}

distTar.enabled = false

springBoot {

executable = true
mainClass = 'com.my.ai.Application'

}

dependencies {

compile 'org.springframework.boot:spring-boot-starter-web:1.4.0.RELEASE'
compile 'dom4j:dom4j:1.6.1'
compile 'commons-httpclient:commons-httpclient:3.1'
compileOnly 'com.h2database:h2:1.4.191'
compile 'javax.cache:cache-api:1.0.0'
compile 'org.jboss.resteasy:resteasy-jaxrs:3.0.14.Final'
compile 'org.jboss.resteasy:resteasy-client:3.0.14.Final'
// Axis
compile 'axis:axis:1.4'

compile 'org.jsoup:jsoup:1.10.1'

compile 'com.alibaba:fastjson:1.2.21'

compile 'com.baidu.aip:java-sdk:3.2.1'

}
3、视频抽取音频服务“

正在上传…
取消
package com.my.ai.service;

import java.io.BufferedReader;
import java.io.File;
import java.io.InputStreamReader;
import java.util.List;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;

//视频抽取音频
@Service
public class ExtractAudioService {

public static Logger logger  = LoggerFactory.getLogger(ExtractAudioService.class);

public static void main(String[] args) {
    new ExtractAudioService().getAudioFromVideo("E:\\QLDownload\\氧化还原反应中电子转移的方向和数目的表示方法\\氧化还原反应中电子转移的方向和数目的表示方法.mp4",
            "D:\\ffmpeg4.2\\bin\\ffmpeg.exe");
}


public String getAudioFromVideo(String videoPath,String ffmpegPath) {
    File video = new File(videoPath);
    if(video.exists() && video.isFile()){
        String format = "wav";
        String outPath = videoPath.substring(0,videoPath.lastIndexOf(".")) + ".wav";
        processCmd(videoPath, ffmpegPath, format, outPath);
        return outPath;
    }
    return null;
}

//D:\ffmpeg4.2\bin\ffmpeg.exe -i 氧化还原反应中电子转移的方向和数目的表示方法.mp4 -f wav -vn -y 3.wav
public String processCmd(String inputPath,String ffmpegPath,String format,String outPath) {
    List<String> commend = new java.util.ArrayList<String>();
    commend.add(ffmpegPath);
    commend.add("-i");
    commend.add(inputPath);
    commend.add("-y");
    commend.add("-vn");
    commend.add("-f");
    commend.add(format);
    commend.add(outPath);
    try {

        ProcessBuilder builder = new ProcessBuilder();
        builder.command(commend);
        builder.redirectErrorStream(true);
        Process p = builder.start();

        // 1. start
        BufferedReader buf = null; // 保存ffmpeg的输出结果流
        String line = null;
        // read the standard output

        buf = new BufferedReader(new InputStreamReader(p.getInputStream()));

        StringBuffer sb = new StringBuffer();
        while ((line = buf.readLine()) != null) {
            System.out.println(line);
            sb.append(line);
            continue;
        }
        p.waitFor();// 这里线程阻塞，将等待外部转换进程运行成功运行结束后，才往下执行
        // 1. end
        return sb.toString();
    } catch (Exception e) {

// System.out.println(e);

        return null;
    }
}

}
ExtractAudioService
4、音频切段：

package com.my.ai.service;

import java.io.BufferedReader;
import java.io.File;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;

@Service
public class CutService {

public static Logger logger = LoggerFactory.getLogger(CutService.class);

public List<String> cutFile(String media_path, String ffmpeg_path) {

    List<String> audios = new ArrayList<>();
    int mediaTime = getMediaTime(media_path, ffmpeg_path); 
    int num = mediaTime / 59;
    int lastNum = mediaTime % 59;
    System.out.println(mediaTime +"|" + num + "|"+ lastNum);
    int length = 59;
    File file = new File(media_path);
    String filename = file.getName();
    for (int i = 0; i < num; i++) {
        String outputPath = file.getParent() + File.separator + i + "-"+filename;
        processCmd(media_path, ffmpeg_path, String.valueOf(length * i) , 
                String.valueOf(length), outputPath);
        audios.add(outputPath);
    }
    if(lastNum > 0) {
        String outputPath = file.getParent() + File.separator + num + "-"+filename;
        processCmd(media_path, ffmpeg_path, String.valueOf(length * num) , 
                String.valueOf(lastNum), outputPath);
        audios.add(outputPath);
    }
    return audios;
}

/**
 * 获取视频总时间
 * 
 * @param viedo_path  视频路径
 * @param ffmpeg_path ffmpeg路径
 * @return
 */
public int getMediaTime(String video_path, String ffmpeg_path) {
    List<String> commands = new java.util.ArrayList<String>();
    commands.add(ffmpeg_path);
    commands.add("-i");
    commands.add(video_path);
    try {
        ProcessBuilder builder = new ProcessBuilder();
        builder.command(commands);
        final Process p = builder.start();

        // 从输入流中读取视频信息
        BufferedReader br = new BufferedReader(new InputStreamReader(p.getErrorStream()));
        StringBuffer sb = new StringBuffer();
        String line = "";
        while ((line = br.readLine()) != null) {
            sb.append(line);
        }
        System.out.println(sb.toString());
        br.close();

        // 从视频信息中解析时长
        String regexDuration = "Duration: (.*?), bitrate: (\\d*) kb\\/s";
        Pattern pattern = Pattern.compile(regexDuration);
        Matcher m = pattern.matcher(sb.toString());
        if (m.find()) {
            int time = getTimelen(m.group(1));
            System.out
                    .println(video_path + ",视频时长：" + time + ",比特率：" + m.group(2) + "kb/s");
            return time;
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
    return 0;
}

// 格式:"00:00:10.68"
public int getTimelen(String timelen) {
    int min = 0;
    String strs[] = timelen.split(":");
    if (strs[0].compareTo("0") > 0) {
        min += Integer.valueOf(strs[0]) * 60 * 60;// 秒
    }
    if (strs[1].compareTo("0") > 0) {
        min += Integer.valueOf(strs[1]) * 60;
    }
    if (strs[2].compareTo("0") > 0) {
        min += Math.round(Float.valueOf(strs[2]));
    }  
    return min;
}

//D:\ffmpeg4.2\bin\ffmpeg.exe -i 123.pcm -ss 0 -t 59 1-123.wav
public String processCmd(String inputPath,String ffmpegPath,
        String startTime,String length,String outputPath) {
    List<String> commend = new java.util.ArrayList<String>();
    commend.add(ffmpegPath);
    commend.add("-i");
    commend.add(inputPath);
    commend.add("-ss");
    commend.add(startTime);
    commend.add("-t");
    commend.add(length);
    commend.add(outputPath);
    try {

        ProcessBuilder builder = new ProcessBuilder();
        builder.command(commend);
        builder.redirectErrorStream(true);
        Process p = builder.start();

        // 1. start
        BufferedReader buf = null; // 保存ffmpeg的输出结果流
        String line = null;
        // read the standard output

        buf = new BufferedReader(new InputStreamReader(p.getInputStream()));

        StringBuffer sb = new StringBuffer();
        while ((line = buf.readLine()) != null) {
            System.out.println(line);
            sb.append(line);
            continue;
        }
        p.waitFor();// 这里线程阻塞，将等待外部转换进程运行成功运行结束后，才往下执行
        // 1. end
        return sb.toString();
    } catch (Exception e) {
        System.out.println(e);    
        return null;
    }
}

//ffmpeg -y  -i 16k.wav  -acodec pcm_s16le -f s16le -ac 1 -ar 16000 16k.pcm 
public static String processWavToPcm(String inputPath,String ffmpegPath,String outputPath) {
    List<String> commend = new java.util.ArrayList<String>();
    commend.add(ffmpegPath);
    commend.add("-i");
    commend.add(inputPath);
    commend.add("-acodec");
    commend.add("pcm_s16le");
    commend.add("-f");
    commend.add("s16le");
    commend.add("-ac");
    commend.add("1");
    commend.add("-ar");
    commend.add("16000");
    commend.add(outputPath);
    try {

        ProcessBuilder builder = new ProcessBuilder();
        builder.command(commend);
        builder.redirectErrorStream(true);
        Process p = builder.start();

        // 1. start
        BufferedReader buf = null; // 保存ffmpeg的输出结果流
        String line = null;
        // read the standard output

        buf = new BufferedReader(new InputStreamReader(p.getInputStream()));

        StringBuffer sb = new StringBuffer();
        while ((line = buf.readLine()) != null) {
            System.out.println(line);
            sb.append(line);
            continue;
        }
        p.waitFor();// 这里线程阻塞，将等待外部转换进程运行成功运行结束后，才往下执行
        // 1. end
        return outputPath;
                //sb.toString();
    } catch (Exception e) {
        System.out.println(e);    
        return null;
    }
}




public static void main(String[] args) {
    List<String> audios = new CutService().cutFile(
            "E:\\QLDownload\\氧化还原反应中电子转移的方向和数目的表示方法\\氧化还原反应中电子转移的方向和数目的表示方法.wav",
            "D:\\ffmpeg4.2\\bin\\ffmpeg.exe");
    System.out.println(audios.size());
    
    for (String wavPath : audios) {
        String out = wavPath.substring(0,wavPath.lastIndexOf(".")) + ".pcm";
        processWavToPcm(wavPath, "D:\\ffmpeg4.2\\bin\\ffmpeg.exe", out);
    }
    
}

}
5、音频格式转换，便于进行语音识别，代码如上：

6、调用sdk，获取识别结果：

package com.my.ai.service;

import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;

import com.baidu.aip.speech.AipSpeech;

@Service
public class TokenService {

public static Logger logger = LoggerFactory.getLogger(TokenService.class);

 //设置APPID/AK/SK
public static final String APP_ID = "***";
public static final String API_KEY = "***";
public static final String SECRET_KEY = "***";
static AipSpeech client = null;
static {
    if(client == null) {
         client = new AipSpeech(APP_ID, API_KEY, SECRET_KEY);
    }
}

public static void main(String[] args) {
    getResult("E:\\QLDownload\\数目的表示方法.pcm");
}

public static String getResult(String file) {
     
      // 可选：设置网络连接参数
      client.setConnectionTimeoutInMillis(2000);
      client.setSocketTimeoutInMillis(60000);
      // 可选：设置代理服务器地址, http和socket二选一，或者均不设置
      //client.setHttpProxy("proxy_host", proxy_port);  // 设置http代理
      //client.setSocketProxy("proxy_host", proxy_port);  // 设置socket代理
      JSONObject res = client.asr(file, "pcm", 16000, null);
      //System.out.println(res.toString(2));
      System.out.println(res.get("result").toString());
      return res.get("result").toString();
}

}
　　

7、结果写入文件：

package com.my.ai.service;

import java.io.BufferedOutputStream;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.RandomAccessFile;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;

@Service
public class FileService {

public static Logger logger = LoggerFactory.getLogger(FileService.class);


//最慢
public static void writeFile1(String file,String content) throws IOException {
    FileOutputStream out = null;
    out = new FileOutputStream(new File(file));
    long begin = System.currentTimeMillis();
    out.write(content.getBytes());
    out.close();
    long end = System.currentTimeMillis();
    System.out.println("FileOutputStream执行耗时:" + (end - begin) + " 毫秒");
}
//中
public static void writeFile2(String file,String content) throws IOException{
    FileWriter fw = null;
    fw = new FileWriter(file);
    long begin3 = System.currentTimeMillis();
    fw.write(content);
    fw.close();
    long end3 = System.currentTimeMillis();
    System.out.println("FileWriter执行耗时:" + (end3 - begin3) + " 毫秒");
}
//最快
public static void writeFile3(String file,String content) throws IOException{
    FileOutputStream outSTr = null;
    BufferedOutputStream buff = null;
    outSTr = new FileOutputStream(new File(file));
    buff = new BufferedOutputStream(outSTr);
    long begin0 = System.currentTimeMillis();
    buff.write(content.getBytes());
    buff.flush();
    buff.close();
    long end0 = System.currentTimeMillis();
    System.out.println("BufferedOutputStream执行耗时:" + (end0 - begin0) + " 毫秒");
}

public static void main(String[] args) {
    for (int i = 0; i < 7; i++) {
        String result = TokenService.getResult("E:\\QLDownload\\氧化还原反应中电子转移的方向和数目的表示方法\\" + i +"-氧化还原反应中电子转移的方向和数目的表示方法.pcm");
        appendFile2("E:\\QLDownload\\氧化还原反应中电子转移的方向和数目的表示方法\\氧化还原反应中电子转移的方向和数目的表示方法.txt", result+"\r\n");
    }
}

public static void appendFile1(String file, String conent) {
    BufferedWriter out = null;
    try {
        out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file, true)));
        out.write(conent);
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        try {
            if (out != null) {
                out.close();
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

/**
 * 追加文件：使用FileWriter
 * 
 * @param fileName
 * @param content
 */
public static void appendFile2(String fileName, String content) {
    FileWriter writer = null;
    try {
        // 打开一个写文件器，构造函数中的第二个参数true表示以追加形式写文件
        writer = new FileWriter(fileName, true);
        writer.write(content);
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        try {
            if (writer != null) {
                writer.close();
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

/**
 *  追加文件：使用RandomAccessFile
 * 
 * @param fileName 文件名
 * @param content  追加的内容
 */
public static void appendFile3(String fileName, String content) {
    RandomAccessFile randomFile = null;
    try {
        // 打开一个随机访问文件流，按读写方式
        randomFile = new RandomAccessFile(fileName, "rw");
        // 文件长度，字节数
        long fileLength = randomFile.length();
        // 将写文件指针移到文件尾。
        randomFile.seek(fileLength);
        randomFile.writeBytes(content);
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        if (randomFile != null) {
            try {
                randomFile.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

}
　

8、测试：

package com.my.ai.test;

import java.util.List;

import com.my.ai.service.CutService;
import com.my.ai.service.ExtractAudioService;
import com.my.ai.service.FileService;
import com.my.ai.service.TokenService;

public class TestService {


public static void main(String[] args) {
    ExtractAudioService audioService = new ExtractAudioService();
    String outPath =  audioService.getAudioFromVideo("G:\\Youku Files\\transcode\\反应_标清.mp4", "D:\\ffmpeg4.2\\bin\\ffmpeg.exe");
    List<String> audios = new CutService().cutFile(outPath,"D:\\ffmpeg4.2\\bin\\ffmpeg.exe");
    for (String wavPath : audios) {
        String out = wavPath.substring(0,wavPath.lastIndexOf(".")) + ".pcm";
        String outPcm = CutService.processWavToPcm(wavPath, "D:\\ffmpeg4.2\\bin\\ffmpeg.exe", out);
        String result = TokenService.getResult(outPcm);
        FileService.appendFile2("G:\\Youku Files\\transcode\\标清.mp4-字幕.txt", result+"\r\n");
    }
}

}
　　

使用FFmpeg进行视频抽取音频，之后进行语音识别转为文字

热门文章

最新文章

相关课程

相关电子书

热门

活动广场

任务中心

开发者评测

高校计划

乘风者计划

训练营

阿里云MVP

话题

直播

下载

镜像站

技术资料

插件

使用FFmpeg进行视频抽取音频，之后进行语音识别转为文字

热门文章

最新文章

相关课程

相关电子书