在进行音视频编程过程中,音频处理是必须掌握的基本知识。本篇文章主要介绍音频处理过程中通常会遇到的相关处理,包括音频编码,音频解码,音频重采样和音频混合。
1 音频PCM采样数据编码为压缩码流
#include "stdafx.h"
extern "C"
{
#include "libavcodec\avcodec.h"
#include "libavformat\avformat.h"
#include "libswscale\swscale.h"
};
int _tmain(int argc, _TCHAR* argv[])
{
AVFormatContext* pFormatCtx;
AVOutputFormat* fmt;
AVStream* audio_st;
AVCodecContext* pCodecCtx;
AVCodec* pCodec;
uint8_t* frame_buf;
AVFrame* frame;
int size;
FILE *in_file = fopen("tdjm.pcm", "rb"); //音频PCM采样数据
int framenum=1000; //音频帧数
const char* out_file = "tdjm.aac"; //输出文件路径
av_register_all();
//方法1.组合使用几个函数
pFormatCtx = avformat_alloc_context();
//猜格式
fmt = av_guess_format(NULL, out_file, NULL);
pFormatCtx->oformat = fmt;
//方法2.更加自动化一些
//avformat_alloc_output_context2(&pFormatCtx, NULL, NULL, out_file);
//fmt = pFormatCtx->oformat;
//注意输出路径
if (avio_open(&pFormatCtx->pb,out_file, AVIO_FLAG_READ_WRITE) < 0)
{
printf("输出文件打开失败!\n");
return -1;
}
audio_st = av_new_stream(pFormatCtx, 0);
if (audio_st==NULL){
return -1;
}
pCodecCtx = audio_st->codec;
pCodecCtx->codec_id = fmt->audio_codec;
pCodecCtx->codec_type = AVMEDIA_TYPE_AUDIO;
pCodecCtx->sample_fmt = AV_SAMPLE_FMT_S16;
pCodecCtx->sample_rate= 44100;
pCodecCtx->channel_layout=AV_CH_LAYOUT_STEREO;
pCodecCtx->channels = av_get_channel_layout_nb_channels(pCodecCtx->channel_layout);
pCodecCtx->bit_rate = 16000;
//输出格式信息
av_dump_format(pFormatCtx, 0, out_file, 1);
pCodec = avcodec_find_encoder(pCodecCtx->codec_id);
if (!pCodec)
{
printf("没有找到合适的编码器!\n");
return -1;
}
if (avcodec_open2(pCodecCtx, pCodec,NULL) < 0)
{
printf("编码器打开失败!\n");
return -1;
}
frame = avcodec_alloc_frame();
frame->nb_samples= pCodecCtx->frame_size;
frame->format= pCodecCtx->sample_fmt;
size = av_samples_get_buffer_size(NULL, pCodecCtx->channels,pCodecCtx->frame_size,pCodecCtx->sample_fmt, 1);
frame_buf = (uint8_t *)av_malloc(size);
avcodec_fill_audio_frame(frame, pCodecCtx->channels, pCodecCtx->sample_fmt,(const uint8_t*)frame_buf, size, 1);
//写文件头
avformat_write_header(pFormatCtx,NULL);
AVPacket pkt;
av_new_packet(&pkt,size);
for (int i=0; i<framenum; i++){
//读入PCM
if (fread(frame_buf, 1, size, in_file) < 0)
{
printf("文件读取错误!\n");
return -1;
}else if(feof(in_file)){
break;
}
frame->data[0] = frame_buf; //采样信号
frame->pts=i*100;
int got_frame=0;
//编码
int ret = avcodec_encode_audio2(pCodecCtx, &pkt,frame, &got_frame);
if(ret < 0)
{
printf("编码错误!\n");
return -1;
}
if (got_frame==1)
{
printf("编码成功第%d帧!\n",i);
pkt.stream_index = audio_st->index;
ret = av_write_frame(pFormatCtx, &pkt);
av_free_packet(&pkt);
}
}
//写文件尾
av_write_trailer(pFormatCtx);
//清理
if (audio_st)
{
avcodec_close(audio_st->codec);
av_free(frame);
av_free(frame_buf);
}
avio_close(pFormatCtx->pb);
avformat_free_context(pFormatCtx);
fclose(in_file);
return 0;
}
本程序实现了音频PCM采样数据编码为压缩码流(MP3,WMA,AAC等)。代码简洁明了,主要函数的详细描述如下:
av_register_all():注册FFmpeg所有编解码器。
avformat_alloc_output_context2():初始化输出码流的AVFormatContext。
avio_open():打开输出文件。
av_new_stream():创建输出码流的AVStream。
avcodec_find_encoder():查找编码器。
avcodec_open2():打开编码器。
avformat_write_header():写文件头(对于某些没有文件头的封装格式,不需要此函数。比如说MPEG2TS)。
avcodec_encode_audio2():编码音频。即将AVFrame(存储PCM采样数据)编码为AVPacket(存储AAC,MP3等格式的码流数据)。
av_write_frame():将编码后的视频码流写入文件。
av_write_trailer():写文件尾(对于某些没有文件头的封装格式,不需要此函数。比如说MPEG2TS)。
2 mp3解码为PCM
#include <stdio.h>
#define __STDC_CONSTANT_MACROS
extern "C"
{
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libswresample/swresample.h"
};
//setup_array函数摘自ffmpeg例程
static void setup_array(uint8_t* out[SWR_CH_MAX], AVFrame* in_frame, int format, int samples)
{
if (av_sample_fmt_is_planar((AVSampleFormat)format))
{
int i;
int plane_size = av_get_bytes_per_sample((AVSampleFormat)(format & 0xFF)) * samples;
format &= 0xFF;
//从decoder出来的frame中的data数据不是连续分布的,所以不能这样写:in_frame->data[0]+i*plane_size;
for (i = 0; i < in_frame->channels; i++)
{
out[i] = in_frame->data[i];
}
}
else
{
out[0] = in_frame->data[0];
}
}
int main()
{
char filepath[] = "bird.mp3";
AVFormatContext *pFormatCtx;
int i,videoindex;
AVCodecContext *pCodecCtx;
AVCodec *pCodec;
AVFrame *pFrame, *pFrameYUV;
AVPacket *packet;
struct SwsContext *img_convert_ctx;
int ret, got_audio;
FILE *fp_pcm;
fp_pcm = fopen("output.pcm", "wb+");
av_register_all();
avformat_network_init();
pFormatCtx = avformat_alloc_context();
if (avformat_open_input(&pFormatCtx, filepath, NULL, NULL) != 0){
printf("Couldn't open input stream.\n");
return -1;
}
if (avformat_find_stream_info(pFormatCtx, NULL) < 0){
printf("Couldn't find stream information.\n");
return -1;
}
videoindex = -1;
for (i = 0; i < pFormatCtx->nb_streams; i++)
if (pFormatCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO){
videoindex = i;
break;
}
if (videoindex == -1){
printf("Didn't find a video stream.\n");
return -1;
}
pCodecCtx = pFormatCtx->streams[videoindex]->codec;
pCodec = avcodec_find_decoder(pCodecCtx->codec_id);
if (pCodec == NULL){
printf("Codec not found.\n");
return -1;
}
if (avcodec_open2(pCodecCtx, pCodec, NULL) < 0){
printf("Could not open codec.\n");
return -1;
}
pFrame = av_frame_alloc();
//pFrameYUV = av_frame_alloc();
//uint8_t *out_buffer = (uint8_t *)av_malloc(avpicture_get_size(PIX_FMT_YUV420P, pCodecCtx->width, pCodecCtx->height));
//avpicture_fill((AVPicture *)pFrameYUV, out_buffer, PIX_FMT_YUV420P, pCodecCtx->width, pCodecCtx->height);
packet = (AVPacket *)av_malloc(sizeof(AVPacket));
//Output Information-----------------------------
printf("------------- File Information ------------------\n");
av_dump_format(pFormatCtx, 0, filepath, 0);
printf("-------------------------------------------------\n");
while (av_read_frame(pFormatCtx, packet) >= 0){
if (packet->stream_index == videoindex){
//Decode
ret = avcodec_decode_audio4(pCodecCtx, pFrame, &got_audio, packet);
if (got_audio){
//输入也可能是分平面的,所以要做如下处理
uint8_t* m_ain[SWR_CH_MAX];
setup_array(m_ain, pFrame, pCodecCtx->sample_fmt, pFrame->nb_samples);
fwrite(pFrame->data[0], 1, pFrame->linesize[0], fp_pcm);
}
}
}
av_free(pFrame);
avcodec_close(pCodecCtx);
avformat_close_input(&pFormatCtx);
return 0;
}
3 SwrContext音频重采样
#include "stdafx.h"
#ifdef __cplusplus
extern"C"
{
#endif
#include <libavformat/avformat.h>
#include "libavcodec/avcodec.h"
#include "libavfilter/avfiltergraph.h"
#include "libavfilter/buffersink.h"
#include "libavfilter/buffersrc.h"
#include "libavutil/avutil.h"
#include "libavutil/opt.h"
#include "libavutil/pixdesc.h"
#include "libswresample\swresample.h"
#include "libavutil\fifo.h"
#pragma comment(lib, "avcodec.lib")
#pragma comment(lib, "avformat.lib")
#pragma comment(lib, "avutil.lib")
//#pragma comment(lib, "avdevice.lib")
#pragma comment(lib, "avfilter.lib")
//#pragma comment(lib, "postproc.lib")
#pragma comment(lib, "swresample.lib")
//#pragma comment(lib, "swscale.lib")
#ifdef __cplusplus
};
#endif
#define BUF_SIZE_20K 2048000
#define BUF_SIZE_1K 1024000
SwrContext* pSwrCtx = NULL;
AVStream *out_stream = NULL;
AVFormatContext *in_fmt_ctx = NULL, *out_fmt_ctx = NULL;
int audio_index = -1;
void initSwr()
{
if (out_fmt_ctx->streams[0]->codec->channels != in_fmt_ctx->streams[audio_index]->codec->channels
|| out_fmt_ctx->streams[0]->codec->sample_rate != in_fmt_ctx->streams[audio_index]->codec->sample_rate
|| out_fmt_ctx->streams[0]->codec->sample_fmt != in_fmt_ctx->streams[audio_index]->codec->sample_fmt)
{
if ( NULL == pSwrCtx )
{
pSwrCtx = swr_alloc();
}
#if LIBSWRESAMPLE_VERSION_MINOR >= 17 // 根据版本不同,选用适当函数
av_opt_set_int(pSwrCtx, "ich", in_fmt_ctx->streams[audio_index]->codec->channels, 0);
av_opt_set_int(pSwrCtx, "och", out_fmt_ctx->streams[0]->codec->channels, 0);
av_opt_set_int(pSwrCtx, "in_sample_rate", in_fmt_ctx->streams[audio_index]->codec->sample_rate, 0);
av_opt_set_int(pSwrCtx, "out_sample_rate", out_fmt_ctx->streams[0]->codec->sample_rate, 0);
av_opt_set_sample_fmt(pSwrCtx, "in_sample_fmt", in_fmt_ctx->streams[audio_index]->codec->sample_fmt, 0);
av_opt_set_sample_fmt(pSwrCtx, "out_sample_fmt", out_fmt_ctx->streams[0]->codec->sample_fmt, 0);
#else
pSwrCtx = swr_alloc_set_opts(pSwrCtx,
in_fmt_ctx->streams[audio_index]->codec->channel_layout,
AV_SAMPLE_FMT_S16,
in_fmt_ctx->streams[audio_index]->codec->sample_rate,
in_fmt_ctx->streams[audio_index]->codec->channel_layout,
in_fmt_ctx->streams[audio_index]->codec->sample_fmt,
in_fmt_ctx->streams[audio_index]->codec->sample_rate,
0, NULL);
#endif
swr_init(pSwrCtx);
}
}
//setup_array函数摘自ffmpeg例程
static void setup_array(uint8_t* out[SWR_CH_MAX], AVFrame* in_frame, int format, int samples)
{
if (av_sample_fmt_is_planar((AVSampleFormat)format))
{
int i;
int plane_size = av_get_bytes_per_sample((AVSampleFormat)(format & 0xFF)) * samples;
format &= 0xFF;
//从decoder出来的frame中的data数据不是连续分布的,所以不能这样写:in_frame->data[0]+i*plane_size;
for (i = 0; i < in_frame->channels; i++)
{
out[i] = in_frame->data[i];
}
}
else
{
out[0] = in_frame->data[0];
}
}
int TransSample(AVFrame *in_frame, AVFrame *out_frame)
{
int ret;
int max_dst_nb_samples = 4096;
//int64_t dst_nb_samples;
int64_t src_nb_samples = in_frame->nb_samples;
out_frame->pts = in_frame->pts;
uint8_t* paudiobuf;
int decode_size, input_size, len;
if (pSwrCtx != NULL)
{
out_frame->nb_samples = av_rescale_rnd(swr_get_delay(pSwrCtx, out_fmt_ctx->streams[0]->codec->sample_rate) + src_nb_samples,
out_fmt_ctx->streams[0]->codec->sample_rate, out_fmt_ctx->streams[0]->codec->sample_rate, AV_ROUND_UP);
ret = av_samples_alloc(out_frame->data, &out_frame->linesize[0],
out_fmt_ctx->streams[0]->codec->channels, out_frame->nb_samples, out_fmt_ctx->streams[0]->codec->sample_fmt, 0);
if (ret < 0)
{
av_log(NULL, AV_LOG_WARNING, "[%s.%d %s() Could not allocate samples Buffer\n", __FILE__, __LINE__, __FUNCTION__);
return -1;
}
max_dst_nb_samples = out_frame->nb_samples;
//输入也可能是分平面的,所以要做如下处理
uint8_t* m_ain[SWR_CH_MAX];
setup_array(m_ain, in_frame, in_fmt_ctx->streams[audio_index]->codec->sample_fmt, src_nb_samples);
//注意这里,out_count和in_count是samples单位,不是byte
//所以这样av_get_bytes_per_sample(in_fmt_ctx->streams[audio_index]->codec->sample_fmt) * src_nb_samples是错的
len = swr_convert(pSwrCtx, out_frame->data, out_frame->nb_samples,
(const uint8_t**)m_ain, src_nb_samples);
if (len < 0)
{
char errmsg[BUF_SIZE_1K];
av_strerror(len, errmsg, sizeof(errmsg));
av_log(NULL, AV_LOG_WARNING, "[%s:%d] swr_convert!(%d)(%s)", __FILE__, __LINE__, len, errmsg);
return -1;
}
}
else
{
printf("pSwrCtx with out init!\n");
return -1;
}
return 0;
}
int flush_encoder(AVFormatContext *fmt_ctx, unsigned int stream_index)
{
int ret;
int got_frame;
AVPacket enc_pkt;
if (!(fmt_ctx->streams[stream_index]->codec->codec->capabilities &
CODEC_CAP_DELAY))
{
return 0;
}
int i = 0;
while(1)
{
enc_pkt.data = NULL;
enc_pkt.size = 0;
av_init_packet(&enc_pkt);
ret = avcodec_encode_audio2(out_fmt_ctx->streams[stream_index]->codec, &enc_pkt,
NULL, &got_frame);
if (ret < 0)
break;
if (!got_frame)
break;
/* prepare packet for muxing */
enc_pkt.stream_index = stream_index;
enc_pkt.dts = av_rescale_q_rnd(enc_pkt.dts,
out_fmt_ctx->streams[stream_index]->codec->time_base,
out_fmt_ctx->streams[stream_index]->time_base,
(AVRounding)(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
enc_pkt.pts = av_rescale_q_rnd(enc_pkt.pts,
out_fmt_ctx->streams[stream_index]->codec->time_base,
out_fmt_ctx->streams[stream_index]->time_base,
(AVRounding)(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
enc_pkt.duration = av_rescale_q(enc_pkt.duration,
out_fmt_ctx->streams[stream_index]->codec->time_base,
out_fmt_ctx->streams[stream_index]->time_base);
/* mux encoded frame */
ret = av_interleaved_write_frame(out_fmt_ctx, &enc_pkt);
av_free_packet(&enc_pkt);
if (ret < 0)
{
break;
}
i++;
}
printf("flusher write %d frame", i);
return ret;
}
int _tmain(int argc, _TCHAR* argv[])
{
AVFrame *frame, *frame_out;
AVPacket pkt_in, pkt_out;
int ret;
if (argc < 3)
{
printf("error in input param");
getchar();
return -1;
}
av_register_all();
avfilter_register_all();
//input
if (avformat_open_input(&in_fmt_ctx, argv[1], NULL, NULL) < 0)
{
printf("can not open input file context");
goto end;
}
if (avformat_find_stream_info(in_fmt_ctx, NULL) < 0)
{
printf("can not find input stream info!\n");
goto end;
}
//output
avformat_alloc_output_context2(&out_fmt_ctx, NULL, NULL, argv[2]);
if (!out_fmt_ctx)
{
printf("can not alloc output context!\n");
goto end;
}
//open decoder & new out stream & open encoder
for (int i = 0; i < in_fmt_ctx->nb_streams; i++)
{
if (in_fmt_ctx->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO)
{
//open decoder
if(0 > avcodec_open2(in_fmt_ctx->streams[i]->codec, avcodec_find_decoder(in_fmt_ctx->streams[i]->codec->codec_id), NULL))
{
printf("can not find or open decoder!\n");
goto end;
}
audio_index = i;
//new stream
out_stream = avformat_new_stream(out_fmt_ctx, NULL);
if (!out_stream)
{
printf("can not new stream for output!\n");
goto end;
}
//set codec context param
//use default audio encoder
out_stream->codec->codec = avcodec_find_encoder(out_fmt_ctx->oformat->audio_codec);
//use the input audio encoder
//out_stream->codec->codec = avcodec_find_encoder(ifmt_ctx->streams[i]->codec->codec_id);
out_stream->codec->sample_rate = in_fmt_ctx->streams[i]->codec->sample_rate;
out_stream->codec->channel_layout = in_fmt_ctx->streams[i]->codec->channel_layout;
out_stream->codec->channels = av_get_channel_layout_nb_channels(out_stream->codec->channel_layout);
// take first format from list of supported formats
out_stream->codec->sample_fmt = out_stream->codec->codec->sample_fmts[0];
AVRational time_base={1, out_stream->codec->sample_rate};
out_stream->codec->time_base = time_base;
//open encoder
if (!out_stream->codec->codec)
{
printf("can not find the encoder!\n");
goto end;
}
if ((avcodec_open2(out_stream->codec, out_stream->codec->codec, NULL)) < 0)
{
printf("can not open the encoder\n");
goto end;
}
if (out_fmt_ctx->oformat->flags & AVFMT_GLOBALHEADER)
out_stream->codec->flags |= CODEC_FLAG_GLOBAL_HEADER;
break;
}
}
//dump input info
av_dump_format(in_fmt_ctx, 0, argv[1], 0);
//dump output info
av_dump_format(out_fmt_ctx, 0, argv[2], 1);
if (-1 == audio_index)
{
printf("found no audio stream in input file!\n");
goto end;
}
if (!(out_fmt_ctx->oformat->flags & AVFMT_NOFILE))
{
if(avio_open(&out_fmt_ctx->pb, argv[2], AVIO_FLAG_WRITE) < 0)
{
printf("can not open output file handle!\n");
goto end;
}
}
if(avformat_write_header(out_fmt_ctx, NULL) < 0)
{
printf("can not write the header of the output file!\n");
goto end;
}
//
initSwr();
int got_frame, got_picture;
int frame_index = 0;
int i = 0;
for(int i = 0; ;i++)
{
pkt_in.data = NULL;
pkt_in.size = 0;
got_frame = -1;
got_picture = -1;
if (av_read_frame(in_fmt_ctx, &pkt_in) < 0)
{
break;
}
if (pkt_in.stream_index != audio_index)
{
continue;
}
frame = av_frame_alloc();
if ((ret = avcodec_decode_audio4(in_fmt_ctx->streams[audio_index]->codec, frame, &got_frame, &pkt_in)) < 0)
{
av_frame_free(&frame);
printf("can not decoder a frame");
break;
}
av_free_packet(&pkt_in);
if (got_frame)
{
frame->pts = av_frame_get_best_effort_timestamp(frame);
frame_out = av_frame_alloc();
if (0 != TransSample(frame, frame_out))
{
printf("can not swr the audio data!\n");
break;
}
av_init_packet(&pkt_out);
int ret = avcodec_encode_audio2(out_fmt_ctx->streams[0]->codec, &pkt_out, frame_out, &got_picture);
av_free(frame_out->data[0]);
av_frame_free(&frame_out);
if (got_picture )
{
pkt_out.stream_index = out_stream->index;
pkt_out.dts = av_rescale_q_rnd(pkt_out.dts,
out_fmt_ctx->streams[out_stream->index]->codec->time_base,
out_fmt_ctx->streams[out_stream->index]->time_base,
(AVRounding)(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
pkt_out.pts = av_rescale_q_rnd(pkt_out.pts,
out_fmt_ctx->streams[out_stream->index]->codec->time_base,
out_fmt_ctx->streams[out_stream->index]->time_base,
(AVRounding)(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
pkt_out.duration = av_rescale_q(pkt_out.duration,
out_fmt_ctx->streams[out_stream->index]->codec->time_base,
out_fmt_ctx->streams[out_stream->index]->time_base);
av_log(NULL, AV_LOG_DEBUG, "Muxing frame\n");
/* mux encoded frame */
ret = av_interleaved_write_frame(out_fmt_ctx, &pkt_out);
av_free_packet(&pkt_out);
if (ret < 0)
{
printf("write a frame failed!\n");
break;
}
printf("success write a frame:index %d\n", frame_index++);
}
}
}
ret = flush_encoder(out_fmt_ctx, out_stream->index);
if (ret < 0)
{
printf("Flushing encoder failed");
return -1;
}
//write file trailer
av_write_trailer(out_fmt_ctx);
//clean
avcodec_close(out_stream->codec);
avcodec_close(in_fmt_ctx->streams[audio_index]->codec);
end:
avformat_close_input(&in_fmt_ctx);
if (out_fmt_ctx && !(out_fmt_ctx->oformat->flags & AVFMT_NOFILE))
{
avio_close(out_fmt_ctx->pb);
}
avformat_free_context(out_fmt_ctx);
getchar();
return 0;
}
4 利用FFmpeg的filter混音
集线程1采集到数据->解码->写入fifo1 |
采集线程2采集到数据->解码->写入fifo2 | ->主线程编码循环读取2个fifo->把数据压人filter->从filter中读取混音后数据->编码->写入文件
extern "C"
{
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libavdevice/avdevice.h"
#include "libavfilter/avfilter.h"
#include "libavfilter/avfiltergraph.h"
#include "libavfilter/buffersink.h"
#include "libavfilter/buffersrc.h"
#include "libavutil/audio_fifo.h"
#include "libavutil/avutil.h"
#include "libavutil/fifo.h"
}
#pragma comment(lib, "avcodec.lib")
#pragma comment(lib, "avformat.lib")
#pragma comment(lib, "avutil.lib")
#pragma comment(lib, "avdevice.lib")
#pragma comment(lib, "avfilter.lib")
//#pragma comment(lib, "avfilter.lib")
//#pragma comment(lib, "postproc.lib")
//#pragma comment(lib, "swresample.lib")
#pragma comment(lib, "swscale.lib")
#include <windows.h>
#include <conio.h>
#include <time.h>
enum CaptureState
{
PREPARED,
RUNNING,
STOPPED,
FINISHED
};
typedef struct BufferSourceContext {
const AVClass *bscclass;
AVFifoBuffer *fifo;
AVRational time_base; ///< time_base to set in the output link
AVRational frame_rate; ///< frame_rate to set in the output link
unsigned nb_failed_requests;
unsigned warning_limit;
/* video only */
int w, h;
enum AVPixelFormat pix_fmt;
AVRational pixel_aspect;
char *sws_param;
AVBufferRef *hw_frames_ctx;
/* audio only */
int sample_rate;
enum AVSampleFormat sample_fmt;
int channels;
uint64_t channel_layout;
char *channel_layout_str;
int got_format_from_params;
int eof;
} BufferSourceContext;
AVFormatContext* _fmt_ctx_spk = NULL;
AVFormatContext* _fmt_ctx_mic = NULL;
AVFormatContext* _fmt_ctx_out = NULL;
int _index_spk = -1;
int _index_mic = -1;
int _index_a_out = -1;
AVFilterGraph* _filter_graph = NULL;
AVFilterContext* _filter_ctx_src_spk = NULL;
AVFilterContext* _filter_ctx_src_mic = NULL;
AVFilterContext* _filter_ctx_sink = NULL;
CaptureState _state = CaptureState::PREPARED;
CRITICAL_SECTION _section_spk;
CRITICAL_SECTION _section_mic;
AVAudioFifo* _fifo_spk = NULL;
AVAudioFifo* _fifo_mic = NULL;
void InitRecorder()
{
av_register_all();
avdevice_register_all();
avfilter_register_all();
}
int OpenSpeakerInput(char* inputForamt, char* url)
{
AVInputFormat* ifmt = av_find_input_format(inputForamt);
AVDictionary* opt1 = NULL;
av_dict_set(&opt1, "rtbufsize", "10M", 0);
int ret = 0;
ret = avformat_open_input(&_fmt_ctx_spk, url, ifmt, &opt1);
if (ret < 0)
{
printf("Speaker: failed to call avformat_open_input\n");
return -1;
}
ret = avformat_find_stream_info(_fmt_ctx_spk, NULL);
if (ret < 0)
{
printf("Speaker: failed to call avformat_find_stream_info\n");
return -1;
}
for (int i = 0; i < _fmt_ctx_spk->nb_streams; i++)
{
if (_fmt_ctx_spk->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO)
{
_index_spk = i;
break;
}
}
if (_index_spk < 0)
{
printf("Speaker: negative audio index\n");
return -1;
}
AVCodecContext* codec_ctx = _fmt_ctx_spk->streams[_index_spk]->codec;
AVCodec* codec = avcodec_find_decoder(codec_ctx->codec_id);
if (codec == NULL)
{
printf("Speaker: null audio decoder\n");
return -1;
}
ret = avcodec_open2(codec_ctx, codec, NULL);
if (ret < 0)
{
printf("Speaker: failed to call avcodec_open2\n");
return -1;
}
av_dump_format(_fmt_ctx_spk, _index_spk, url, 0);
return 0;
}
int OpenMicrophoneInput(char* inputForamt, char* url)
{
AVInputFormat* ifmt = av_find_input_format(inputForamt);
AVDictionary* opt1 = NULL;
av_dict_set(&opt1, "rtbufsize", "10M", 0);
int ret = 0;
ret = avformat_open_input(&_fmt_ctx_mic, url, ifmt, &opt1);
if (ret < 0)
{
printf("Microphone: failed to call avformat_open_input\n");
return -1;
}
ret = avformat_find_stream_info(_fmt_ctx_mic, NULL);
if (ret < 0)
{
printf("Microphone: failed to call avformat_find_stream_info\n");
return -1;
}
for (int i = 0; i < _fmt_ctx_mic->nb_streams; i++)
{
if (_fmt_ctx_mic->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO)
{
_index_mic = i;
break;
}
}
if (_index_mic < 0)
{
printf("Microphone: negative audio index\n");
return -1;
}
AVCodecContext* codec_ctx = _fmt_ctx_mic->streams[_index_mic]->codec;
AVCodec* codec = avcodec_find_decoder(codec_ctx->codec_id);
if (codec == NULL)
{
printf("Microphone: null audio decoder\n");
return -1;
}
ret = avcodec_open2(codec_ctx, codec, NULL);
if (ret < 0)
{
printf("Microphone: failed to call avcodec_open2\n");
return -1;
}
av_dump_format(_fmt_ctx_mic, _index_mic, url, 0);
return 0;
}
int OpenFileOutput(char* fileName)
{
int ret = 0;
ret = avformat_alloc_output_context2(&_fmt_ctx_out, NULL, NULL, fileName);
if (ret < 0)
{
printf("Mixer: failed to call avformat_alloc_output_context2\n");
return -1;
}
AVStream* stream_a = NULL;
stream_a = avformat_new_stream(_fmt_ctx_out, NULL);
if (stream_a == NULL)
{
printf("Mixer: failed to call avformat_new_stream\n");
return -1;
}
_index_a_out = 0;
stream_a->codec->codec_type = AVMEDIA_TYPE_AUDIO;
AVCodec* codec_mp3 = avcodec_find_encoder(AV_CODEC_ID_MP3);
stream_a->codec->codec = codec_mp3;
stream_a->codec->sample_rate = 16000;
stream_a->codec->channels = 1;
stream_a->codec->channel_layout = av_get_default_channel_layout(1);
stream_a->codec->sample_fmt = codec_mp3->sample_fmts[0];
stream_a->codec->bit_rate = 16000;
stream_a->codec->time_base.num = 1;
stream_a->codec->time_base.den = stream_a->codec->sample_rate;
stream_a->codec->codec_tag = 0;
if (_fmt_ctx_out->oformat->flags & AVFMT_GLOBALHEADER)
stream_a->codec->flags |= CODEC_FLAG_GLOBAL_HEADER;
if (avcodec_open2(stream_a->codec, stream_a->codec->codec, NULL) < 0)
{
printf("Mixer: failed to call avcodec_open2\n");
return -1;
}
if (!(_fmt_ctx_out->oformat->flags & AVFMT_NOFILE))
{
if (avio_open(&_fmt_ctx_out->pb, fileName, AVIO_FLAG_WRITE) < 0)
{
printf("Mixer: failed to call avio_open\n");
return -1;
}
}
if (avformat_write_header(_fmt_ctx_out, NULL) < 0)
{
printf("Mixer: failed to call avformat_write_header\n");
return -1;
}
bool b = (!_fmt_ctx_out->streams[0]->time_base.num && _fmt_ctx_out->streams[0]->codec->time_base.num);
av_dump_format(_fmt_ctx_out, _index_a_out, fileName, 1);
_fifo_spk = av_audio_fifo_alloc(_fmt_ctx_spk->streams[_index_spk]->codec->sample_fmt, _fmt_ctx_spk->streams[_index_spk]->codec->channels, 30*_fmt_ctx_spk->streams[_index_spk]->codec->frame_size);
_fifo_mic = av_audio_fifo_alloc(_fmt_ctx_mic->streams[_index_mic]->codec->sample_fmt, _fmt_ctx_mic->streams[_index_mic]->codec->channels, 30*_fmt_ctx_mic->streams[_index_spk]->codec->frame_size);
return 0;
}
int InitFilter(char* filter_desc)
{
char args_spk[5120];
char* pad_name_spk = "in0";
char args_mic[512];
char* pad_name_mic = "in1";
AVFilter* filter_src_spk = avfilter_get_by_name("abuffer");
AVFilter* filter_src_mic = avfilter_get_by_name("abuffer");
AVFilter* filter_sink = avfilter_get_by_name("abuffersink");
AVFilterInOut* filter_output_spk = avfilter_inout_alloc();
AVFilterInOut* filter_output_mic = avfilter_inout_alloc();
AVFilterInOut* filter_input = avfilter_inout_alloc();
_filter_graph = avfilter_graph_alloc();
sprintf_s(args_spk, sizeof(args_spk), "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%I64x",
_fmt_ctx_spk->streams[_index_spk]->codec->time_base.num,
_fmt_ctx_spk->streams[_index_spk]->codec->time_base.den,
_fmt_ctx_spk->streams[_index_spk]->codec->sample_rate,
av_get_sample_fmt_name(_fmt_ctx_spk->streams[_index_spk]->codec->sample_fmt),
_fmt_ctx_spk->streams[_index_spk]->codec->channel_layout);
sprintf_s(args_mic, sizeof(args_mic), "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%I64x",
_fmt_ctx_mic->streams[_index_mic]->codec->time_base.num,
_fmt_ctx_mic->streams[_index_mic]->codec->time_base.den,
_fmt_ctx_mic->streams[_index_mic]->codec->sample_rate,
av_get_sample_fmt_name(_fmt_ctx_mic->streams[_index_mic]->codec->sample_fmt),
_fmt_ctx_mic->streams[_index_mic]->codec->channel_layout);
//sprintf_s(args_spk, sizeof(args_spk), "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%I64x", _fmt_ctx_out->streams[_index_a_out]->codec->time_base.num, _fmt_ctx_out->streams[_index_a_out]->codec->time_base.den, _fmt_ctx_out->streams[_index_a_out]->codec->sample_rate, av_get_sample_fmt_name(_fmt_ctx_out->streams[_index_a_out]->codec->sample_fmt), _fmt_ctx_out->streams[_index_a_out]->codec->channel_layout);
//sprintf_s(args_mic, sizeof(args_mic), "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%I64x", _fmt_ctx_out->streams[_index_a_out]->codec->time_base.num, _fmt_ctx_out->streams[_index_a_out]->codec->time_base.den, _fmt_ctx_out->streams[_index_a_out]->codec->sample_rate, av_get_sample_fmt_name(_fmt_ctx_out->streams[_index_a_out]->codec->sample_fmt), _fmt_ctx_out->streams[_index_a_out]->codec->channel_layout);
int ret = 0;
ret = avfilter_graph_create_filter(&_filter_ctx_src_spk, filter_src_spk, pad_name_spk, args_spk, NULL, _filter_graph);
if (ret < 0)
{
printf("Filter: failed to call avfilter_graph_create_filter -- src spk\n");
return -1;
}
ret = avfilter_graph_create_filter(&_filter_ctx_src_mic, filter_src_mic, pad_name_mic, args_mic, NULL, _filter_graph);
if (ret < 0)
{
printf("Filter: failed to call avfilter_graph_create_filter -- src mic\n");
return -1;
}
ret = avfilter_graph_create_filter(&_filter_ctx_sink, filter_sink, "out", NULL, NULL, _filter_graph);
if (ret < 0)
{
printf("Filter: failed to call avfilter_graph_create_filter -- sink\n");
return -1;
}
AVCodecContext* encodec_ctx = _fmt_ctx_out->streams[_index_a_out]->codec;
ret = av_opt_set_bin(_filter_ctx_sink, "sample_fmts", (uint8_t*)&encodec_ctx->sample_fmt, sizeof(encodec_ctx->sample_fmt), AV_OPT_SEARCH_CHILDREN);
if (ret < 0)
{
printf("Filter: failed to call av_opt_set_bin -- sample_fmts\n");
return -1;
}
ret = av_opt_set_bin(_filter_ctx_sink, "channel_layouts", (uint8_t*)&encodec_ctx->channel_layout, sizeof(encodec_ctx->channel_layout), AV_OPT_SEARCH_CHILDREN);
if (ret < 0)
{
printf("Filter: failed to call av_opt_set_bin -- channel_layouts\n");
return -1;
}
ret = av_opt_set_bin(_filter_ctx_sink, "sample_rates", (uint8_t*)&encodec_ctx->sample_rate, sizeof(encodec_ctx->sample_rate), AV_OPT_SEARCH_CHILDREN);
if (ret < 0)
{
printf("Filter: failed to call av_opt_set_bin -- sample_rates\n");
return -1;
}
filter_output_spk->name = av_strdup(pad_name_spk);
filter_output_spk->filter_ctx = _filter_ctx_src_spk;
filter_output_spk->pad_idx = 0;
filter_output_spk->next = filter_output_mic;
filter_output_mic->name = av_strdup(pad_name_mic);
filter_output_mic->filter_ctx = _filter_ctx_src_mic;
filter_output_mic->pad_idx = 0;
filter_output_mic->next = NULL;
filter_input->name = av_strdup("out");
filter_input->filter_ctx = _filter_ctx_sink;
filter_input->pad_idx = 0;
filter_input->next = NULL;
AVFilterInOut* filter_outputs[2];
filter_outputs[0] = filter_output_spk;
filter_outputs[1] = filter_output_mic;
ret = avfilter_graph_parse_ptr(_filter_graph, filter_desc, &filter_input, filter_outputs, NULL);
if (ret < 0)
{
printf("Filter: failed to call avfilter_graph_parse_ptr\n");
return -1;
}
ret = avfilter_graph_config(_filter_graph, NULL);
if (ret < 0)
{
printf("Filter: failed to call avfilter_graph_config\n");
return -1;
}
avfilter_inout_free(&filter_input);
av_free(filter_src_spk);
av_free(filter_src_mic);
avfilter_inout_free(filter_outputs);
//av_free(filter_outputs);
char* temp = avfilter_graph_dump(_filter_graph, NULL);
printf("%s\n", temp);
return 0;
}
DWORD WINAPI SpeakerCapThreadProc(LPVOID lpParam)
{
AVFrame* pFrame = av_frame_alloc();
AVPacket packet;
av_init_packet(&packet);
int got_sound;
while (_state == CaptureState::RUNNING)
{
packet.data = NULL;
packet.size = 0;
if (av_read_frame(_fmt_ctx_spk, &packet) < 0)
{
continue;
}
if (packet.stream_index == _index_spk)
{
if (avcodec_decode_audio4(_fmt_ctx_spk->streams[_index_spk]->codec, pFrame, &got_sound, &packet) < 0)
{
break;
}
av_free_packet(&packet);
if (!got_sound)
{
continue;
}
int fifo_spk_space = av_audio_fifo_space(_fifo_spk);
while(fifo_spk_space < pFrame->nb_samples && _state == CaptureState::RUNNING)
{
Sleep(10);
printf("_fifo_spk full !\n");
fifo_spk_space = av_audio_fifo_space(_fifo_spk);
}
if (fifo_spk_space >= pFrame->nb_samples)
{
EnterCriticalSection(&_section_spk);
int nWritten = av_audio_fifo_write(_fifo_spk, (void**)pFrame->data, pFrame->nb_samples);
LeaveCriticalSection(&_section_spk);
}
}
}
av_frame_free(&pFrame);
return 0;
}
DWORD WINAPI MicrophoneCapThreadProc(LPVOID lpParam)
{
AVFrame* pFrame = av_frame_alloc();
AVPacket packet;
av_init_packet(&packet);
int got_sound;
while (_state == CaptureState::PREPARED)
{
}
while (_state == CaptureState::RUNNING)
{
packet.data = NULL;
packet.size = 0;
if (av_read_frame(_fmt_ctx_mic, &packet) < 0)
{
continue;
}
if (packet.stream_index == _index_mic)
{
if (avcodec_decode_audio4(_fmt_ctx_mic->streams[_index_mic]->codec, pFrame, &got_sound, &packet) < 0)
{
break;
}
av_free_packet(&packet);
if (!got_sound)
{
continue;
}
int fifo_mic_space = av_audio_fifo_space(_fifo_mic);
while(fifo_mic_space < pFrame->nb_samples && _state == CaptureState::RUNNING)
{
Sleep(10);
printf("_fifo_mic full !\n");
fifo_mic_space = av_audio_fifo_space(_fifo_mic);
}
if (fifo_mic_space >= pFrame->nb_samples)
{
EnterCriticalSection(&_section_mic);
int temp = av_audio_fifo_space(_fifo_mic);
int temp2 = pFrame->nb_samples;
int nWritten = av_audio_fifo_write(_fifo_mic, (void**)pFrame->data, pFrame->nb_samples);
LeaveCriticalSection(&_section_mic);
}
}
}
av_frame_free(&pFrame);
return 0;
}
int main()
{
int ret = 0;
InitRecorder();
char fileName[128];
char* outFileType = ".mp3";
time_t rawtime;
tm* timeInfo;
time(&rawtime);
timeInfo = localtime(&rawtime);
sprintf_s(fileName, sizeof(fileName), "%d_%d_%d_%d_%d_%d%s",
timeInfo->tm_year + 1900, timeInfo->tm_mon + 1, timeInfo->tm_mday,
timeInfo->tm_hour, timeInfo->tm_min, timeInfo->tm_sec, outFileType);
char* filter_desc = "[in0][in1]amix=inputs=2[out]";
//ret = OpenSpeakerInput("dshow", "audio=virtual-audio-capturer");
ret = OpenSpeakerInput(NULL, "aa.mp3");
if (ret < 0)
{
goto Release;
}
//ret = OpenMicrophoneInput("dshow", "audio=External Microphone (Conexant S");
ret = OpenMicrophoneInput(NULL, "bb.mp3");
if (ret < 0)
{
goto Release;
}
ret = OpenFileOutput(fileName);
if (ret < 0)
{
goto Release;
}
ret = InitFilter(filter_desc);
if (ret < 0)
{
goto Release;
}
_state = CaptureState::RUNNING;
InitializeCriticalSection(&_section_spk);
InitializeCriticalSection(&_section_mic);
CreateThread(NULL, 0, SpeakerCapThreadProc, 0, 0, NULL);
CreateThread(NULL, 0, MicrophoneCapThreadProc, 0, 0, NULL);
int tmpFifoFailed = 0;
int64_t frame_count = 0;
while (_state != CaptureState::FINISHED)
{
if (_kbhit())
{
_state = CaptureState::STOPPED;
break;
}
else
{
int ret = 0;
AVFrame* pFrame_spk = av_frame_alloc();
AVFrame* pFrame_mic = av_frame_alloc();
AVPacket packet_out;
int got_packet_ptr = 0;
int fifo_spk_size = av_audio_fifo_size(_fifo_spk);
int fifo_mic_size = av_audio_fifo_size(_fifo_mic);
int frame_spk_min_size = _fmt_ctx_spk->streams[_index_spk]->codec->frame_size;
int frame_mic_min_size = _fmt_ctx_mic->streams[_index_mic]->codec->frame_size;
if (fifo_spk_size >= frame_spk_min_size && fifo_mic_size >= frame_mic_min_size)
{
tmpFifoFailed = 0;
pFrame_spk->nb_samples = frame_spk_min_size;
pFrame_spk->channel_layout = _fmt_ctx_spk->streams[_index_spk]->codec->channel_layout;
pFrame_spk->format = _fmt_ctx_spk->streams[_index_spk]->codec->sample_fmt;
pFrame_spk->sample_rate = _fmt_ctx_spk->streams[_index_spk]->codec->sample_rate;
av_frame_get_buffer(pFrame_spk, 0);
pFrame_mic->nb_samples = frame_mic_min_size;
pFrame_mic->channel_layout = _fmt_ctx_mic->streams[_index_mic]->codec->channel_layout;
pFrame_mic->format = _fmt_ctx_mic->streams[_index_mic]->codec->sample_fmt;
pFrame_mic->sample_rate = _fmt_ctx_mic->streams[_index_mic]->codec->sample_rate;
av_frame_get_buffer(pFrame_mic, 0);
EnterCriticalSection(&_section_spk);
ret = av_audio_fifo_read(_fifo_spk, (void**)pFrame_spk->data, frame_spk_min_size);
LeaveCriticalSection(&_section_spk);
EnterCriticalSection(&_section_mic);
ret = av_audio_fifo_read(_fifo_mic, (void**)pFrame_mic->data, frame_mic_min_size);
LeaveCriticalSection(&_section_mic);
pFrame_spk->pts = av_frame_get_best_effort_timestamp(pFrame_spk);
pFrame_mic->pts = av_frame_get_best_effort_timestamp(pFrame_mic);
BufferSourceContext* s = (BufferSourceContext*)_filter_ctx_src_spk->priv;
bool b1 = (s->sample_fmt != pFrame_spk->format);
bool b2 = (s->sample_rate != pFrame_spk->sample_rate);
bool b3 = (s->channel_layout != pFrame_spk->channel_layout);
bool b4 = (s->channels != pFrame_spk->channels);
ret = av_buffersrc_add_frame(_filter_ctx_src_spk, pFrame_spk);
if (ret < 0)
{
printf("Mixer: failed to call av_buffersrc_add_frame (speaker)\n");
break;
}
ret = av_buffersrc_add_frame(_filter_ctx_src_mic, pFrame_mic);
if (ret < 0)
{
printf("Mixer: failed to call av_buffersrc_add_frame (microphone)\n");
break;
}
while (1)
{
AVFrame* pFrame_out = av_frame_alloc();
ret = av_buffersink_get_frame_flags(_filter_ctx_sink, pFrame_out, 0);
if (ret < 0)
{
printf("Mixer: failed to call av_buffersink_get_frame_flags\n");
break;
}
if (pFrame_out->data[0] != NULL)
{
av_init_packet(&packet_out);
packet_out.data = NULL;
packet_out.size = 0;
ret = avcodec_encode_audio2(_fmt_ctx_out->streams[_index_a_out]->codec, &packet_out, pFrame_out, &got_packet_ptr);
if (ret < 0)
{
printf("Mixer: failed to call avcodec_decode_audio4\n");
break;
}
if (got_packet_ptr)
{
packet_out.stream_index = _index_a_out;
packet_out.pts = frame_count * _fmt_ctx_out->streams[_index_a_out]->codec->frame_size;
packet_out.dts = packet_out.pts;
packet_out.duration = _fmt_ctx_out->streams[_index_a_out]->codec->frame_size;
packet_out.pts = av_rescale_q_rnd(packet_out.pts,
_fmt_ctx_out->streams[_index_a_out]->codec->time_base,
_fmt_ctx_out->streams[_index_a_out]->time_base,
(AVRounding)(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
packet_out.dts = packet_out.pts;
packet_out.duration = av_rescale_q_rnd(packet_out.duration,
_fmt_ctx_out->streams[_index_a_out]->codec->time_base,
_fmt_ctx_out->streams[_index_a_out]->time_base,
(AVRounding)(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
frame_count++;
ret = av_interleaved_write_frame(_fmt_ctx_out, &packet_out);
if (ret < 0)
{
printf("Mixer: failed to call av_interleaved_write_frame\n");
}
printf("Mixer: write frame to file\n");
}
av_free_packet(&packet_out);
}
av_frame_free(&pFrame_out);
}
}
else
{
tmpFifoFailed++;
Sleep(20);
if (tmpFifoFailed > 300)
{
_state = CaptureState::STOPPED;
Sleep(30);
break;
}
}
av_frame_free(&pFrame_spk);
av_frame_free(&pFrame_mic);
}
}
av_write_trailer(_fmt_ctx_out);
Release:
av_audio_fifo_free(_fifo_spk);
av_audio_fifo_free(_fifo_mic);
avfilter_free(_filter_ctx_src_spk);
avfilter_free(_filter_ctx_src_mic);
avfilter_free(_filter_ctx_sink);
avfilter_graph_free(&_filter_graph);
if (_fmt_ctx_out)
{
avio_close(_fmt_ctx_out->pb);
}
avformat_close_input(&_fmt_ctx_spk);
avformat_close_input(&_fmt_ctx_mic);
avformat_free_context(_fmt_ctx_out);
return ret;
}
http://blog.csdn.net/leixiaohua1020/article/details/25430449
http://blog.csdn.net/dancing_night/article/details/45642107