FFmpeg库的使用与深度解析:解码音频流流程-阿里云开发者社区

FFmpeg库的使用与深度解析:解码音频流流程

2024-03-26 104

版权

本文内容由阿里云实名注册用户自发贡献，版权归原作者所有，阿里云开发者社区不拥有其著作权，亦不承担相应法律责任。具体规则请查看《阿里云开发者社区用户服务协议》和《阿里云开发者社区知识产权保护指引》。如果您发现本社区中有涉嫌抄袭的内容，填写侵权投诉表单进行举报，一经查实，本社区将立刻删除涉嫌侵权内容。

本文涉及的产品

公共DNS（含HTTPDNS解析），每月1000万次HTTP解析

云解析 DNS，旗舰版 1个月

全局流量管理 GTM，标准版 1个月

简介： FFmpeg库的使用与深度解析:解码音频流流程

解码音频流：FFmpeg库的使用与深度解析

1. 引言

在多媒体处理领域，FFmpeg是一个非常强大的库，它提供了多种工具和接口用于处理音频和视频数据。本文将深入探讨如何使用FFmpeg库进行音频流的解码和重采样。

“Simplicity is the ultimate sophistication.” — Leonardo da Vinci

这句话也适用于编程和数据处理。简单的代码和算法往往更容易维护和扩展。

2. 解封装流程

2.1 注册所有封装器和解封装器

使用av_register_all()函数进行注册。

av_register_all();

2.2 打开文件

使用avformat_open_input()函数打开一个文件或URL。

AVFormatContext* pFormatCtx = nullptr;
avformat_open_input(&pFormatCtx, "input.mp3", nullptr, nullptr);

2.3 查找流信息

使用avformat_find_stream_info()函数查找流信息。

avformat_find_stream_info(pFormatCtx, nullptr);

2.4 获取音频流索引和解码器ID

int audioStream = -1;
for (int i = 0; i < pFormatCtx->nb_streams; i++) {
    if (pFormatCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
        audioStream = i;
        break;
    }
}
AVCodecID codecID = pFormatCtx->streams[audioStream]->codec->codec_id;

3. 解码流程

3.1 获取解码器

使用avcodec_find_decoder()函数获取解码器。

AVCodec* pCodec = avcodec_find_decoder(codecID);

3.2 打开解码器

使用avcodec_open2()函数打开解码器。

AVCodecContext* pCodecCtx = pFormatCtx->streams[audioStream]->codec;
avcodec_open2(pCodecCtx, pCodec, nullptr);

3.3 解码数据

AVPacket packet;
AVFrame* pFrame = av_frame_alloc();
while (av_read_frame(pFormatCtx, &packet) >= 0) {
    if (packet.stream_index == audioStream) {
        avcodec_send_packet(pCodecCtx, &packet);
        avcodec_receive_frame(pCodecCtx, pFrame);
    }
    av_packet_unref(&packet);
}

4. 重采样

4.1 创建SwrContext

SwrContext* swrCtx = swr_alloc();

4.2 设置参数并初始化

swr_alloc_set_opts(swrCtx, ...);
swr_init(swrCtx);

4.3 数据转换和内存释放

swr_convert(swrCtx, ...);
swr_free(&swrCtx);

5. 代码示例

#include <iostream>
#include <cstdio>
#include <vdef.h>
using namespace std;
#define MAX_AUDIO_FRAME_SIZE 192000
//Buffer:存储格式
//|-----------|-------------|
//chunk-------pos---len-----|
static Uint8* audio_chunk;
static int audio_len;  //音频剩余长度
static Uint8* audio_pos;  //静态控制音频播放位置
//注册回调函数  SDL2.0
// udata就是我们给到SDL的指针，stream是我们要把声音数据写入的缓冲区指针，len是缓冲区的大小。
void Fill_audio(void* udata,Uint8* stream,int len)
{
    cout << "Fill_audio len:"<<len<<endl;
    SDL_memset(stream,0,len);
    if(audio_len == 0)
    return ;
    len = (len>audio_len?len:audio_len);   //尽可能为最大音频量
    SDL_MixAudio(stream,audio_pos,len,SDL_MIX_MAXVOLUME); //这里的音量设置为函数要求，不影响硬件音量
    audio_pos +=len;//音频播放位置
    audio_len -=len;//剩余音频长度
}
int main()    //这里main 在SDL_main中被宏定义了用的时候不可以使用int main(省参)
{
 int  l_s32AStreamSubscript = -1;//音频流标志
  avformat_network_init();
  char fillename[] = "E:\\tt.mp3";//播放文件
// 1.Open the input file in the unpacked format
  l_pstFormatCtx = avformat_alloc_context();
  if(avformat_open_input(&l_pstFormatCtx,fillename,NULL,NULL)!=0)
  {
    cout << "[music_error]Could not open source file,exit work_" << fillename <<endl;
    return -1;
  }
  if(avformat_find_stream_info(l_pstFormatCtx,NULL)<0)
  {
    cout << "[music_error]couldn't find stream information" <<endl;
    return -1;
  }
//2. get the index position of the audio stream,
  if(l_pstFormatCtx!=nullptr)
  {
    for (unsigned int i = 0; i < l_pstFormatCtx->nb_streams; ++i)
    {
      if (l_pstFormatCtx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
        l_s32AStreamSubscript = i;//Audio stream sequence number
    }
  }
  if(l_s32AStreamSubscript == -1)
  {
    cout << "[music_error]Can't find audiostream" <<endl;
    return -1;
  }
//3.Find and open the audio decoder  Codec type or id mismatches
  l_pstAStream= l_pstFormatCtx->streams[l_s32AStreamSubscript];
  l_pstACodec =  avcodec_find_decoder(l_pstAStream->codecpar->codec_id);
  l_pstACodecCtx = avcodec_alloc_context3(l_pstACodec); //Allocation of AVCodecContext memory
  if(l_pstACodecCtx == nullptr || avcodec_parameters_to_context(l_pstACodecCtx, (const AVCodecParameters *)l_pstAStream->codecpar)<0)
  {
      cout << "[music_error]Codec ont find" <<endl;
      return -1;
  }
  if (avcodec_open2(l_pstACodecCtx, l_pstACodec, nullptr) < 0 || l_pstACodec == nullptr)
  {
      cout << "[music_error]Cannot find the corresponding decoder or the file is encrypted" <<SDL_GetError()<<endl;
      return -1;
  }
  if(SDL_Init(SDL_INIT_VIDEO|SDL_INIT_AUDIO|SDL_INIT_TIMER))
  {
    cout << "[music_error]Could not initialize SDL" <<SDL_GetError()<<endl;
    return -1;
  }
  uint64_t out_channel_layout  = AV_CH_LAYOUT_STEREO;  //声道格式
  AVSampleFormat out_sample_fmt = AV_SAMPLE_FMT_S16;   //采样格式
  int out_nb_samples=l_pstACodecCtx->frame_size;   //nb_samples: AAC-1024 MP3-1152  格式大小 /*有的是视频格式数据头为非标准格式，从frame_size中得不到正确的数据大小，只能解码一帧数据后才可以获得*/
  int out_sample_rate = 44100;//采样率 pCodecCtx->sample_rate
  int out_channels = av_get_channel_layout_nb_channels(out_channel_layout);  //根据声道格式返回声道个数
  int out_buffer_size = av_samples_get_buffer_size(NULL,out_channels,out_nb_samples,out_sample_fmt,1);//获取输出缓冲大小
  out_buffer = (uint8_t*)av_malloc(MAX_AUDIO_FRAME_SIZE);
  memset(out_buffer,0,MAX_AUDIO_FRAME_SIZE);
  wanted_spec.freq = out_sample_rate; //采样率
  wanted_spec.format = AUDIO_S16SYS;  //告诉SDL我们将要给的格式
  wanted_spec.channels = out_channels;   //声音的通道数
  wanted_spec.silence = 0;         //用来表示静音的值
  wanted_spec.samples = out_nb_samples;   //格式大小
  wanted_spec.callback = Fill_audio;    //回调函数
  //打开音频设备
  wanted_spec.userdata = l_pstACodecCtx;    //SDL供给回调函数运行的参数
  if (SDL_OpenAudio(&wanted_spec, NULL)<0)
  {
        printf("can't open audio.\n");
        return -1;
  }
   //根据声道数返回默认输入声道格式
  int64_t in_channel_layout = av_get_default_channel_layout(l_pstACodecCtx->channels);
   //音频格式转换准备
  au_convert_ctx = swr_alloc();//等同于au_convert_ctx  = NULL;
  //参数设置：输出格式PCM -- 输入格式  MP3
  au_convert_ctx = swr_alloc_set_opts(au_convert_ctx,out_channel_layout, out_sample_fmt, out_sample_rate,
        in_channel_layout,l_pstACodecCtx->sample_fmt , l_pstACodecCtx->sample_rate,0, NULL);
  swr_init(au_convert_ctx);//初始化
  int index =  0;
  packet = (AVPacket*)av_malloc(sizeof(AVPacket));
  av_init_packet(packet);
  pFrame = av_frame_alloc();
  //解析数据包
  while(av_read_frame(l_pstFormatCtx, packet)>=0)
  {
      if(packet->stream_index == l_s32AStreamSubscript)  //如果为音频标志
      {                 
          //解码一帧音频压缩数据，得到音频像素数据
          if ( avcodec_send_packet(l_pstACodecCtx, packet) != 0)
          {
             cout<<"[audio_decode_frame] avcodec_send_packet failed"<<endl;
          }
          else
          {
      //       cout<<"[audio_decode_frame] avcodec_send_packet successfully"<<endl;
          }
           一个avPacket可能包含多帧数据，所以需要使用while循环一直读取
          while( (avcodec_receive_frame(l_pstACodecCtx, pFrame) )>= 0)
          {
              //数据格式转换
              swr_convert(au_convert_ctx,&out_buffer,MAX_AUDIO_FRAME_SIZE,(const uint8_t**)pFrame->data,pFrame->nb_samples);
              //输出一帧包大小
              printf("index:%5d\t pts:%lld\t packet size:%d\n",index,packet->pts,packet->size);
              index++;
           }
          while(audio_len>0)
            SDL_Delay(1);//延时1ms
         //指向音频数据 (PCM data)
         audio_chunk = (Uint8 *) out_buffer;
         //音频长度
         audio_len =out_buffer_size;
         //当前播放位置
         audio_pos = audio_chunk;
         //开始播放
         SDL_PauseAudio(0);
      }
//      cout<<"[audio_decode_frame]Remove the reference to the previous frame"<<endl;
      av_packet_unref(packet);
      av_frame_unref(pFrame);
  }
     av_packet_free(&packet);
     //释放转换结构体
         swr_free(&au_convert_ctx);
#if USE_SDL
    SDL_CloseAudio();//Close SDL
    SDL_Quit();
#endif
#if WRITEPCM
    fclose(file);
#endif
    av_free(out_buffer);
//free_AVCodecCtx:
    avcodec_close(l_pstACodecCtx);
    // 关闭打开音频文件
    avformat_close_input(&l_pstFormatCtx);
    system("pause");
    return 0;
}

6. 总结

本文详细介绍了如何使用FFmpeg库进行音频流的解封装、解码和重采样。这些步骤虽然看似简单，但每一个函数和接口背后都有深刻的设计哲学。

“The most important property of a program is whether it accomplishes the intention of its user.” — C.A.R. Hoare

在我们的编程学习之旅中，理解是我们迈向更高层次的重要一步。然而，掌握新技能、新理念，始终需要时间和坚持。从心理学的角度看，学习往往伴随着不断的试错和调整，这就像是我们的大脑在逐渐优化其解决问题的“算法”。

这就是为什么当我们遇到错误，我们应该将其视为学习和进步的机会，而不仅仅是困扰。通过理解和解决这些问题，我们不仅可以修复当前的代码，更可以提升我们的编程能力，防止在未来的项目中犯相同的错误。

我鼓励大家积极参与进来，不断提升自己的编程技术。无论你是初学者还是有经验的开发者，我希望我的博客能对你的学习之路有所帮助。如果你觉得这篇文章有用，不妨点击收藏，或者留下你的评论分享你的见解和经验，也欢迎你对我博客的内容提出建议和问题。每一次的点赞、评论、分享和关注都是对我的最大支持，也是对我持续分享和创作的动力。

FFmpeg库的使用与深度解析:解码音频流流程

解码音频流：FFmpeg库的使用与深度解析

1. 引言

2. 解封装流程

2.1 注册所有封装器和解封装器

2.2 打开文件

2.3 查找流信息

2.4 获取音频流索引和解码器ID

3. 解码流程

3.1 获取解码器

3.2 打开解码器

3.3 解码数据

4. 重采样

4.1 创建SwrContext

4.2 设置参数并初始化

4.3 数据转换和内存释放

5. 代码示例

6. 总结

热门文章

最新文章

相关课程

相关电子书

相关实验场景

推荐镜像

热门

活动广场

任务中心

开发者评测

高校计划

乘风者计划

训练营

阿里云MVP

话题

直播

下载

镜像站

技术资料

插件

FFmpeg库的使用与深度解析:解码音频流流程

解码音频流：FFmpeg库的使用与深度解析

1. 引言

2. 解封装流程

2.1 注册所有封装器和解封装器

2.2 打开文件

2.3 查找流信息

2.4 获取音频流索引和解码器ID

3. 解码流程

3.1 获取解码器

3.2 打开解码器

3.3 解码数据

4. 重采样

4.1 创建SwrContext

4.2 设置参数并初始化

4.3 数据转换和内存释放

5. 代码示例

6. 总结

热门文章

最新文章

相关课程

相关电子书

相关实验场景

推荐镜像