/** * 参考于:http://blog.csdn.net/leixiaohua1020/article/details/46890259 */ #include <stdio.h> #include <string.h> extern "C" { #include "libavcodec/avcodec.h" #include "libavformat/avformat.h" #include "libswresample/swresample.h" }; #pragma comment(lib, "avcodec.lib") #pragma comment(lib, "avformat.lib") #pragma comment(lib, "swresample.lib") #pragma comment(lib, "avutil.lib") // 1 second of 48khz 32bit(4Byte) audio #define MAX_AUDIO_FRAME_SIZE 192000 int main(int argc, char* argv[]) { AVFormatContext *pFormatCtx = NULL; AVCodecContext *pCodecCtx = NULL; AVCodec *pCodec = NULL; AVPacket packet; AVFrame *pAudioFrame = NULL; uint8_t *buffer = NULL; struct SwrContext *audio_convert_ctx = NULL; int got_picture; int audioIndex; char filepath[1024] = ""; printf("Usage: program.exe *.mp3 "); if (argc == 2) { strcpy(filepath, argv[1]); } else { printf("Could not find a audio file "); return -1; } FILE *fp_pcm = fopen("output.pcm", "wb+"); if (fp_pcm == NULL) { printf("FILE open error"); return -1; } av_register_all(); if (avformat_open_input(&pFormatCtx, filepath, NULL, NULL) != 0) { printf("Couldn't open an input stream. "); return -1; } if (avformat_find_stream_info(pFormatCtx, NULL) < 0) { printf("Couldn't find stream information. "); return -1; } audioIndex = -1; for (int i = 0; i < pFormatCtx->nb_streams; i++) { if (pFormatCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO) { audioIndex = i; break; } } if (audioIndex == -1) { printf("Couldn't find a audio stream. "); return -1; } pCodecCtx = pFormatCtx->streams[audioIndex]->codec; pCodec = avcodec_find_decoder(pCodecCtx->codec_id); if (pCodec == NULL) { printf("Codec not found. "); return -1; } if (avcodec_open2(pCodecCtx, pCodec, NULL) < 0) { printf("Could not open codec. "); return -1; } pAudioFrame = av_frame_alloc(); if (pAudioFrame == NULL) { printf("Could not alloc AVFrame "); return -1; } //音频输出参数 uint64_t out_channel_layout = AV_CH_LAYOUT_STEREO;//声道格式 AVSampleFormat out_sample_fmt = AV_SAMPLE_FMT_S16;//采样格式 int out_nb_samples = pCodecCtx->frame_size;//nb_samples: AAC-1024 MP3-1152 int out_sample_rate = 44100;//采样率 int out_nb_channels = av_get_channel_layout_nb_channels(out_channel_layout);//根据声道格式返回声道个数 int out_buffer_size = av_samples_get_buffer_size(NULL, out_nb_channels, out_nb_samples, out_sample_fmt, 1); buffer = (uint8_t *)av_malloc(MAX_AUDIO_FRAME_SIZE); /** * 函数声明:struct SwrContext *swr_alloc(void); * Allocate SwrContext. * * If you use this function you will need to set the parameters (manually or * with swr_alloc_set_opts()) before calling swr_init(). * * @see swr_alloc_set_opts(), swr_init(), swr_free() * @return NULL on error, allocated context otherwise */ audio_convert_ctx = swr_alloc(); if (audio_convert_ctx == NULL) { printf("Could not allocate SwrContext "); return -1; } /** * 函数声明:struct SwrContext *swr_alloc_set_opts( * struct SwrContext *s,int64_t out_ch_layout, enum AVSampleFormat out_sample_fmt, int out_sample_rate, * int64_t in_ch_layout, enum AVSampleFormat in_sample_fmt, int in_sample_rate, * int log_offset, void *log_ctx); * * Allocate SwrContext if needed and set/reset common parameters. * * This function does not require s to be allocated with swr_alloc(). On the * other hand, swr_alloc() can use swr_alloc_set_opts() to set the parameters * on the allocated context. * * @param s existing Swr context if available, or NULL if not * @param out_ch_layout output channel layout (AV_CH_LAYOUT_*) * @param out_sample_fmt output sample format (AV_SAMPLE_FMT_*). * @param out_sample_rate output sample rate (frequency in Hz) * @param in_ch_layout input channel layout (AV_CH_LAYOUT_*) * @param in_sample_fmt input sample format (AV_SAMPLE_FMT_*). * @param in_sample_rate input sample rate (frequency in Hz) * @param log_offset logging level offset * @param log_ctx parent logging context, can be NULL * * @see swr_init(), swr_free() * @return NULL on error, allocated context otherwise */ /* int64_t in_channel_layout = av_get_default_channel_layout(pCodecCtx->channels);//根据声道数返回默认输入声道格式 swr_alloc_set_opts(audio_convert_ctx, out_channel_layout, out_sample_fmt, out_sample_rate, in_channel_layout, pCodecCtx->sample_fmt, pCodecCtx->sample_rate, 0, NULL); */ swr_alloc_set_opts(audio_convert_ctx, out_channel_layout, out_sample_fmt,out_sample_rate, pCodecCtx->channel_layout, pCodecCtx->sample_fmt, pCodecCtx->sample_rate, 0, NULL); /** * 函数声明:int swr_init(struct SwrContext *s); * Initialize context after user parameters have been set. * @note The context must be configured using the AVOption API. * * @see av_opt_set_int() * @see av_opt_set_dict() * * @param[in,out] s Swr context to initialize * @return AVERROR error code in case of failure. */ swr_init(audio_convert_ctx); int index = 0;//计数器 while (av_read_frame(pFormatCtx, &packet) >= 0) { if (packet.stream_index == audioIndex) { if (avcodec_decode_audio4(pCodecCtx, pAudioFrame, &got_picture, &packet) < 0) { printf("Error in decoding audio frame. "); return -1; } if (got_picture) { /** Convert audio. * 函数声明:int swr_convert(struct SwrContext *s, uint8_t **out, int out_count, * const uint8_t **in, int in_count); * in and in_count can be set to 0 to flush the last few samples out at the * end. * * If more input is provided than output space, then the input will be buffered. * You can avoid this buffering by using swr_get_out_samples() to retrieve an * upper bound on the required number of output samples for the given number of * input samples. Conversion will run directly without copying whenever possible. * * @param s allocated Swr context, with parameters set * @param out output buffers, only the first one need be set in case of packed audio * @param out_count amount of space available for output in samples per channel * @param in input buffers, only the first one need to be set in case of packed audio * @param in_count number of input samples available in one channel * * @return number of samples output per channel, negative value on error */ swr_convert(audio_convert_ctx, &buffer, MAX_AUDIO_FRAME_SIZE, (const uint8_t **)pAudioFrame->data, pAudioFrame->nb_samples); printf("index:%5d pts:%lld packet size:%d ", index, packet.pts, packet.size); //Write PCM fwrite(buffer, 1, out_buffer_size, fp_pcm); index++; } } av_free_packet(&packet); } fclose(fp_pcm); swr_free(&audio_convert_ctx); av_free(buffer); av_frame_free(&pAudioFrame); avcodec_close(pCodecCtx); avformat_close_input(&pFormatCtx); return 0; }
与解析视频里的YUV/RGB(http://blog.csdn.net/x_iya/article/details/52248929)相同的是,解析出音频的AVFrame同样需要转换。
由于ffmpeg最新版本(从2.1开始貌似)使用avcodec_decode_audio4函数来解码音频,但解码得到的数据类型为float 4bit,而播放器播放的格式一般为S16(signed 16bit),就需要对解码得到的数据进行转换,然而,ffmpeg已经帮我们做好了,只需调用API就可以了,这个函数就是:swr_convert
输出:
使用Audacity打开(注意参数)
问题:
1.有些格式的视频不符合标准,获得的pCodecCtx->frame_size为0
雷老师,我使用wmv格式的视频进行测试,结果不能进行得到正确的pcm文件,文件大小始终为0,发现是out_nb_samples = pCodecCtx->frame_size发生错误,其中pCodecCtx->frame_size为0,导致av_samples_get_buffer_size算出的大小是负数。问过有经验组长,他告诉我有些格式的视频不符合标准,不能从文件头中获取到信息,要在读入一帧后获取,也就是ret = avcodec_decode_audio4( pCodecCtx, pFrame,&got_picture,
packet);之后,从pFrame中获取信息。 发现错误,重新修改程序,就能得到数据了。 |
2.对于采样率为48000Hz的视频,解析为采样率为44100Hz的pcm时出现杂音,将out_sample_rate设置为
int out_sample_rate = 48000;则没有问题。