FFmpeg 音频处理

在进行音视频编程过程中，音频处理是必须掌握的基本知识。本篇文章主要介绍音频处理过程中通常会遇到的相关处理，包括音频编码，音频解码，音频重采样和音频混合。

1 音频PCM采样数据编码为压缩码流

#include "stdafx.h"

extern "C"
{
#include "libavcodec\avcodec.h"
#include "libavformat\avformat.h"
#include "libswscale\swscale.h"
};

int _tmain(int argc, _TCHAR* argv[])
{
    AVFormatContext* pFormatCtx;
    AVOutputFormat* fmt;
    AVStream* audio_st;
    AVCodecContext* pCodecCtx;
    AVCodec* pCodec;

    uint8_t* frame_buf;
    AVFrame* frame;
    int size;

    FILE *in_file = fopen("tdjm.pcm", "rb");    //音频PCM采样数据 
    int framenum=1000;  //音频帧数
    const char* out_file = "tdjm.aac";                  //输出文件路径


    av_register_all();

    //方法1.组合使用几个函数
    pFormatCtx = avformat_alloc_context();
    //猜格式
    fmt = av_guess_format(NULL, out_file, NULL);
    pFormatCtx->oformat = fmt;


    //方法2.更加自动化一些
    //avformat_alloc_output_context2(&pFormatCtx, NULL, NULL, out_file);
    //fmt = pFormatCtx->oformat;

    //注意输出路径
    if (avio_open(&pFormatCtx->pb,out_file, AVIO_FLAG_READ_WRITE) < 0)
    {
        printf("输出文件打开失败！\n");
        return -1;
    }

    audio_st = av_new_stream(pFormatCtx, 0);
    if (audio_st==NULL){
        return -1;
    }
    pCodecCtx = audio_st->codec;
    pCodecCtx->codec_id = fmt->audio_codec;
    pCodecCtx->codec_type = AVMEDIA_TYPE_AUDIO;
    pCodecCtx->sample_fmt = AV_SAMPLE_FMT_S16;
    pCodecCtx->sample_rate= 44100;
    pCodecCtx->channel_layout=AV_CH_LAYOUT_STEREO;
    pCodecCtx->channels = av_get_channel_layout_nb_channels(pCodecCtx->channel_layout);
    pCodecCtx->bit_rate = 16000;  

    //输出格式信息
    av_dump_format(pFormatCtx, 0, out_file, 1);

    pCodec = avcodec_find_encoder(pCodecCtx->codec_id);
    if (!pCodec)
    {
        printf("没有找到合适的编码器！\n");
        return -1;
    }
    if (avcodec_open2(pCodecCtx, pCodec,NULL) < 0)
    {
        printf("编码器打开失败！\n");
        return -1;
    }
    frame = avcodec_alloc_frame();
    frame->nb_samples= pCodecCtx->frame_size;
    frame->format= pCodecCtx->sample_fmt;

    size = av_samples_get_buffer_size(NULL, pCodecCtx->channels,pCodecCtx->frame_size,pCodecCtx->sample_fmt, 1);
    frame_buf = (uint8_t *)av_malloc(size);
    avcodec_fill_audio_frame(frame, pCodecCtx->channels, pCodecCtx->sample_fmt,(const uint8_t*)frame_buf, size, 1);

    //写文件头
    avformat_write_header(pFormatCtx,NULL);

    AVPacket pkt;
    av_new_packet(&pkt,size);

    for (int i=0; i<framenum; i++){
        //读入PCM
        if (fread(frame_buf, 1, size, in_file) < 0)
        {
            printf("文件读取错误！\n");
            return -1;
        }else if(feof(in_file)){
            break;
        }
        frame->data[0] = frame_buf;  //采样信号

        frame->pts=i*100;
        int got_frame=0;
        //编码
        int ret = avcodec_encode_audio2(pCodecCtx, &pkt,frame, &got_frame);
        if(ret < 0)
        {
            printf("编码错误！\n");
            return -1;
        }
        if (got_frame==1)
        {
            printf("编码成功第%d帧！\n",i);
            pkt.stream_index = audio_st->index;
            ret = av_write_frame(pFormatCtx, &pkt);
            av_free_packet(&pkt);
        }
    }

    //写文件尾
    av_write_trailer(pFormatCtx);

    //清理
    if (audio_st)
    {
        avcodec_close(audio_st->codec);
        av_free(frame);
        av_free(frame_buf);
    }
    avio_close(pFormatCtx->pb);
    avformat_free_context(pFormatCtx);

    fclose(in_file);

    return 0;
}

本程序实现了音频PCM采样数据编码为压缩码流（MP3，WMA，AAC等）。代码简洁明了，主要函数的详细描述如下：
av_register_all()：注册FFmpeg所有编解码器。
avformat_alloc_output_context2()：初始化输出码流的AVFormatContext。
avio_open()：打开输出文件。
av_new_stream()：创建输出码流的AVStream。
avcodec_find_encoder()：查找编码器。
avcodec_open2()：打开编码器。
avformat_write_header()：写文件头（对于某些没有文件头的封装格式，不需要此函数。比如说MPEG2TS）。
avcodec_encode_audio2()：编码音频。即将AVFrame（存储PCM采样数据）编码为AVPacket（存储AAC，MP3等格式的码流数据）。
av_write_frame()：将编码后的视频码流写入文件。
av_write_trailer()：写文件尾（对于某些没有文件头的封装格式，不需要此函数。比如说MPEG2TS）。

2 mp3解码为PCM

#include <stdio.h>

#define __STDC_CONSTANT_MACROS

extern "C"
{
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libswresample/swresample.h"
};

//setup_array函数摘自ffmpeg例程
static void setup_array(uint8_t* out[SWR_CH_MAX], AVFrame* in_frame, int format, int samples)
{
    if (av_sample_fmt_is_planar((AVSampleFormat)format))
    {
        int i;
        int plane_size = av_get_bytes_per_sample((AVSampleFormat)(format & 0xFF)) * samples;
        format &= 0xFF;
        //从decoder出来的frame中的data数据不是连续分布的，所以不能这样写：in_frame->data[0]+i*plane_size;
        for (i = 0; i < in_frame->channels; i++)
        {
            out[i] = in_frame->data[i];
        }
    }
    else
    {
        out[0] = in_frame->data[0];
    }
}

int main()
{
    char filepath[] = "bird.mp3";
    AVFormatContext *pFormatCtx;
    int i,videoindex;
    AVCodecContext  *pCodecCtx;
    AVCodec         *pCodec;
    AVFrame *pFrame, *pFrameYUV;
    AVPacket *packet;
    struct SwsContext *img_convert_ctx;
    int ret, got_audio;
    FILE *fp_pcm;
    fp_pcm = fopen("output.pcm", "wb+");

    av_register_all();
    avformat_network_init();

    pFormatCtx = avformat_alloc_context();

    if (avformat_open_input(&pFormatCtx, filepath, NULL, NULL) != 0){
        printf("Couldn't open input stream.\n");
        return -1;
    }
    if (avformat_find_stream_info(pFormatCtx, NULL) < 0){
        printf("Couldn't find stream information.\n");
        return -1;
    }
    videoindex = -1;
    for (i = 0; i < pFormatCtx->nb_streams; i++)
        if (pFormatCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO){
            videoindex = i;
            break;
        }
    if (videoindex == -1){
        printf("Didn't find a video stream.\n");
        return -1;
    }

    pCodecCtx = pFormatCtx->streams[videoindex]->codec;
    pCodec = avcodec_find_decoder(pCodecCtx->codec_id);
    if (pCodec == NULL){
        printf("Codec not found.\n");
        return -1;
    }
    if (avcodec_open2(pCodecCtx, pCodec, NULL) < 0){
        printf("Could not open codec.\n");
        return -1;
    }

    pFrame = av_frame_alloc();
    //pFrameYUV = av_frame_alloc();
    //uint8_t *out_buffer = (uint8_t *)av_malloc(avpicture_get_size(PIX_FMT_YUV420P, pCodecCtx->width, pCodecCtx->height));
    //avpicture_fill((AVPicture *)pFrameYUV, out_buffer, PIX_FMT_YUV420P, pCodecCtx->width, pCodecCtx->height);

    packet = (AVPacket *)av_malloc(sizeof(AVPacket));
    //Output Information-----------------------------
    printf("------------- File Information ------------------\n");
    av_dump_format(pFormatCtx, 0, filepath, 0);
    printf("-------------------------------------------------\n");

    while (av_read_frame(pFormatCtx, packet) >= 0){
        if (packet->stream_index == videoindex){
            //Decode
            ret = avcodec_decode_audio4(pCodecCtx, pFrame, &got_audio, packet);
            if (got_audio){

                //输入也可能是分平面的，所以要做如下处理
                uint8_t* m_ain[SWR_CH_MAX];
                setup_array(m_ain, pFrame, pCodecCtx->sample_fmt, pFrame->nb_samples);

                fwrite(pFrame->data[0], 1, pFrame->linesize[0], fp_pcm);
            }
        }
    }

    av_free(pFrame);
    avcodec_close(pCodecCtx);
    avformat_close_input(&pFormatCtx);

    return 0;
}

3 SwrContext音频重采样

#include "stdafx.h"

#ifdef __cplusplus
extern"C"
{
#endif
#include <libavformat/avformat.h>
#include "libavcodec/avcodec.h"
#include "libavfilter/avfiltergraph.h"
#include "libavfilter/buffersink.h"
#include "libavfilter/buffersrc.h"
#include "libavutil/avutil.h"
#include "libavutil/opt.h"
#include "libavutil/pixdesc.h"
#include "libswresample\swresample.h"
#include "libavutil\fifo.h"


#pragma comment(lib, "avcodec.lib")
#pragma comment(lib, "avformat.lib")
#pragma comment(lib, "avutil.lib")
//#pragma comment(lib, "avdevice.lib")
#pragma comment(lib, "avfilter.lib")
//#pragma comment(lib, "postproc.lib")
#pragma comment(lib, "swresample.lib")
//#pragma comment(lib, "swscale.lib")
#ifdef __cplusplus
};
#endif

#define BUF_SIZE_20K 2048000
#define BUF_SIZE_1K 1024000

SwrContext* pSwrCtx = NULL;
AVStream *out_stream = NULL;
AVFormatContext *in_fmt_ctx = NULL, *out_fmt_ctx = NULL;
int audio_index = -1;

void initSwr()
{
    if (out_fmt_ctx->streams[0]->codec->channels != in_fmt_ctx->streams[audio_index]->codec->channels
        || out_fmt_ctx->streams[0]->codec->sample_rate != in_fmt_ctx->streams[audio_index]->codec->sample_rate
        || out_fmt_ctx->streams[0]->codec->sample_fmt != in_fmt_ctx->streams[audio_index]->codec->sample_fmt)
    {
        if ( NULL == pSwrCtx ) 
        {
            pSwrCtx = swr_alloc();
        }
#if LIBSWRESAMPLE_VERSION_MINOR >= 17   // 根据版本不同，选用适当函数
        av_opt_set_int(pSwrCtx, "ich", in_fmt_ctx->streams[audio_index]->codec->channels, 0);
        av_opt_set_int(pSwrCtx, "och", out_fmt_ctx->streams[0]->codec->channels, 0);
        av_opt_set_int(pSwrCtx, "in_sample_rate",  in_fmt_ctx->streams[audio_index]->codec->sample_rate, 0);
        av_opt_set_int(pSwrCtx, "out_sample_rate",  out_fmt_ctx->streams[0]->codec->sample_rate, 0);
        av_opt_set_sample_fmt(pSwrCtx, "in_sample_fmt", in_fmt_ctx->streams[audio_index]->codec->sample_fmt, 0);
        av_opt_set_sample_fmt(pSwrCtx, "out_sample_fmt", out_fmt_ctx->streams[0]->codec->sample_fmt, 0);

#else
        pSwrCtx = swr_alloc_set_opts(pSwrCtx,
            in_fmt_ctx->streams[audio_index]->codec->channel_layout, 
            AV_SAMPLE_FMT_S16, 
            in_fmt_ctx->streams[audio_index]->codec->sample_rate,
            in_fmt_ctx->streams[audio_index]->codec->channel_layout, 
            in_fmt_ctx->streams[audio_index]->codec->sample_fmt, 
            in_fmt_ctx->streams[audio_index]->codec->sample_rate,
            0, NULL);
#endif
        swr_init(pSwrCtx);
    }
}

//setup_array函数摘自ffmpeg例程
static void setup_array(uint8_t* out[SWR_CH_MAX], AVFrame* in_frame, int format, int samples)
{
    if (av_sample_fmt_is_planar((AVSampleFormat)format)) 
    {
        int i;
        int plane_size = av_get_bytes_per_sample((AVSampleFormat)(format & 0xFF)) * samples;
        format &= 0xFF;
        //从decoder出来的frame中的data数据不是连续分布的，所以不能这样写：in_frame->data[0]+i*plane_size;
        for (i = 0; i < in_frame->channels; i++) 
        {
            out[i] = in_frame->data[i];
        }
    } 
    else 
    {
        out[0] = in_frame->data[0];
    }
}

int TransSample(AVFrame *in_frame, AVFrame *out_frame)
{
    int ret;


    int max_dst_nb_samples = 4096;
    //int64_t dst_nb_samples;
    int64_t src_nb_samples = in_frame->nb_samples;
    out_frame->pts = in_frame->pts;
    uint8_t* paudiobuf;
    int decode_size, input_size, len;
    if (pSwrCtx != NULL) 
    {
        out_frame->nb_samples = av_rescale_rnd(swr_get_delay(pSwrCtx, out_fmt_ctx->streams[0]->codec->sample_rate) + src_nb_samples,
            out_fmt_ctx->streams[0]->codec->sample_rate, out_fmt_ctx->streams[0]->codec->sample_rate, AV_ROUND_UP);

        ret = av_samples_alloc(out_frame->data, &out_frame->linesize[0], 
            out_fmt_ctx->streams[0]->codec->channels, out_frame->nb_samples, out_fmt_ctx->streams[0]->codec->sample_fmt, 0);
        if (ret < 0)
        {
            av_log(NULL, AV_LOG_WARNING, "[%s.%d %s() Could not allocate samples Buffer\n", __FILE__, __LINE__, __FUNCTION__);
            return -1;
        }
        max_dst_nb_samples = out_frame->nb_samples;

        //输入也可能是分平面的，所以要做如下处理
        uint8_t* m_ain[SWR_CH_MAX];
        setup_array(m_ain, in_frame, in_fmt_ctx->streams[audio_index]->codec->sample_fmt, src_nb_samples);

        //注意这里，out_count和in_count是samples单位，不是byte
        //所以这样av_get_bytes_per_sample(in_fmt_ctx->streams[audio_index]->codec->sample_fmt) * src_nb_samples是错的
        len = swr_convert(pSwrCtx, out_frame->data, out_frame->nb_samples, 
            (const uint8_t**)m_ain, src_nb_samples);

        if (len < 0) 
        {
            char errmsg[BUF_SIZE_1K];
            av_strerror(len, errmsg, sizeof(errmsg));
            av_log(NULL, AV_LOG_WARNING, "[%s:%d] swr_convert!(%d)(%s)", __FILE__, __LINE__, len, errmsg);
            return -1;
        }
    }
    else
    {
        printf("pSwrCtx with out init!\n");
        return -1;
    }
    return 0;
}

int flush_encoder(AVFormatContext *fmt_ctx, unsigned int stream_index)
{
    int ret;
    int got_frame;
    AVPacket enc_pkt;
    if (!(fmt_ctx->streams[stream_index]->codec->codec->capabilities &
        CODEC_CAP_DELAY))
    {
        return 0;
    }
    int i = 0;
    while(1)
    {
        enc_pkt.data = NULL;
        enc_pkt.size = 0;
        av_init_packet(&enc_pkt);
        ret = avcodec_encode_audio2(out_fmt_ctx->streams[stream_index]->codec, &enc_pkt,
            NULL, &got_frame);

        if (ret < 0)
            break;
        if (!got_frame)
            break;
        /* prepare packet for muxing */
        enc_pkt.stream_index = stream_index;
        enc_pkt.dts = av_rescale_q_rnd(enc_pkt.dts,
            out_fmt_ctx->streams[stream_index]->codec->time_base,
            out_fmt_ctx->streams[stream_index]->time_base,
            (AVRounding)(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
        enc_pkt.pts = av_rescale_q_rnd(enc_pkt.pts,
            out_fmt_ctx->streams[stream_index]->codec->time_base,
            out_fmt_ctx->streams[stream_index]->time_base,
            (AVRounding)(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
        enc_pkt.duration = av_rescale_q(enc_pkt.duration,
            out_fmt_ctx->streams[stream_index]->codec->time_base,
            out_fmt_ctx->streams[stream_index]->time_base);

        /* mux encoded frame */
        ret = av_interleaved_write_frame(out_fmt_ctx, &enc_pkt);
        av_free_packet(&enc_pkt);
        if (ret < 0)
        {
            break;
        }
        i++;
    }
    printf("flusher write %d frame", i);
    return ret;
}

int _tmain(int argc, _TCHAR* argv[])
{
    AVFrame *frame, *frame_out;
    AVPacket pkt_in, pkt_out;

    int ret;

    if (argc < 3)
    {
        printf("error in input param");
        getchar();
        return -1;
    }

    av_register_all();
    avfilter_register_all();
    //input
    if (avformat_open_input(&in_fmt_ctx, argv[1], NULL, NULL) < 0)
    {
        printf("can not open input file context");
        goto end;
    }
    if (avformat_find_stream_info(in_fmt_ctx, NULL) < 0)
    {
        printf("can not find input stream info!\n");
        goto end;
    }

    //output
    avformat_alloc_output_context2(&out_fmt_ctx, NULL, NULL, argv[2]);
    if (!out_fmt_ctx)
    {
        printf("can not alloc output context!\n");
        goto end;
    }
    //open decoder & new out stream & open encoder
    for (int i = 0; i < in_fmt_ctx->nb_streams; i++)
    {
        if (in_fmt_ctx->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO)
        {
            //open decoder
            if(0 > avcodec_open2(in_fmt_ctx->streams[i]->codec, avcodec_find_decoder(in_fmt_ctx->streams[i]->codec->codec_id), NULL))
            {
                printf("can not find or open decoder!\n");
                goto end;
            }

            audio_index = i;

            //new stream
            out_stream = avformat_new_stream(out_fmt_ctx, NULL);
            if (!out_stream)
            {
                printf("can not new stream for output!\n");
                goto end;
            }

            //set codec context param           
            //use default audio encoder
            out_stream->codec->codec = avcodec_find_encoder(out_fmt_ctx->oformat->audio_codec);
            //use the input audio encoder
            //out_stream->codec->codec = avcodec_find_encoder(ifmt_ctx->streams[i]->codec->codec_id);

            out_stream->codec->sample_rate = in_fmt_ctx->streams[i]->codec->sample_rate;
            out_stream->codec->channel_layout = in_fmt_ctx->streams[i]->codec->channel_layout;
            out_stream->codec->channels = av_get_channel_layout_nb_channels(out_stream->codec->channel_layout);
            // take first format from list of supported formats
            out_stream->codec->sample_fmt = out_stream->codec->codec->sample_fmts[0];
            AVRational time_base={1, out_stream->codec->sample_rate};
            out_stream->codec->time_base = time_base;

            //open encoder
            if (!out_stream->codec->codec)
            {
                printf("can not find the encoder!\n");
                goto end;
            }
            if ((avcodec_open2(out_stream->codec, out_stream->codec->codec, NULL)) < 0)
            {
                printf("can not open the encoder\n");
                goto end;
            }

            if (out_fmt_ctx->oformat->flags & AVFMT_GLOBALHEADER)
                out_stream->codec->flags |= CODEC_FLAG_GLOBAL_HEADER;

            break;
        }
    }

    //dump input info
    av_dump_format(in_fmt_ctx, 0, argv[1], 0);
    //dump output info
    av_dump_format(out_fmt_ctx, 0, argv[2], 1);

    if (-1 == audio_index)
    {
        printf("found no audio stream in input file!\n");
        goto end;
    }

    if (!(out_fmt_ctx->oformat->flags & AVFMT_NOFILE))
    {
        if(avio_open(&out_fmt_ctx->pb, argv[2], AVIO_FLAG_WRITE) < 0)
        {
            printf("can not open output file handle!\n");
            goto end;
        }
    }

    if(avformat_write_header(out_fmt_ctx, NULL) < 0)
    {
        printf("can not write the header of the output file!\n");
        goto end;
    }

    //
    initSwr();

    int got_frame, got_picture;
    int frame_index = 0;
    int i = 0;
    for(int i = 0; ;i++)
    {       
        pkt_in.data = NULL;
        pkt_in.size = 0;

        got_frame = -1;
        got_picture = -1;

        if (av_read_frame(in_fmt_ctx, &pkt_in) < 0)
        {
            break;
        }
        if (pkt_in.stream_index != audio_index)
        {
            continue;
        }
        frame = av_frame_alloc();
        if ((ret = avcodec_decode_audio4(in_fmt_ctx->streams[audio_index]->codec, frame, &got_frame, &pkt_in)) < 0)
        {
            av_frame_free(&frame);
            printf("can not decoder a frame");
            break;
        }
        av_free_packet(&pkt_in);

        if (got_frame)
        {
            frame->pts = av_frame_get_best_effort_timestamp(frame);

            frame_out = av_frame_alloc();

            if (0 != TransSample(frame, frame_out))
            {
                printf("can not swr the audio data!\n");
                break;
            }

            av_init_packet(&pkt_out);
            int ret = avcodec_encode_audio2(out_fmt_ctx->streams[0]->codec, &pkt_out, frame_out, &got_picture);
            av_free(frame_out->data[0]);
            av_frame_free(&frame_out);
            if (got_picture ) 
            {
                pkt_out.stream_index = out_stream->index;
                pkt_out.dts = av_rescale_q_rnd(pkt_out.dts,
                    out_fmt_ctx->streams[out_stream->index]->codec->time_base,
                    out_fmt_ctx->streams[out_stream->index]->time_base,
                    (AVRounding)(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));

                pkt_out.pts = av_rescale_q_rnd(pkt_out.pts,
                    out_fmt_ctx->streams[out_stream->index]->codec->time_base,
                    out_fmt_ctx->streams[out_stream->index]->time_base,
                    (AVRounding)(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));

                pkt_out.duration = av_rescale_q(pkt_out.duration,
                    out_fmt_ctx->streams[out_stream->index]->codec->time_base,
                    out_fmt_ctx->streams[out_stream->index]->time_base);

                av_log(NULL, AV_LOG_DEBUG, "Muxing frame\n");
                /* mux encoded frame */
                ret = av_interleaved_write_frame(out_fmt_ctx, &pkt_out);
                av_free_packet(&pkt_out);
                if (ret < 0)
                {
                    printf("write a frame failed!\n");
                    break;
                }
                printf("success write a frame:index %d\n", frame_index++);
            }

        }
    }
    ret = flush_encoder(out_fmt_ctx, out_stream->index);

    if (ret < 0)
    {
        printf("Flushing encoder failed");
        return -1;
    }

    //write file trailer
    av_write_trailer(out_fmt_ctx);

    //clean
    avcodec_close(out_stream->codec);
    avcodec_close(in_fmt_ctx->streams[audio_index]->codec);

end:
    avformat_close_input(&in_fmt_ctx);

    if (out_fmt_ctx && !(out_fmt_ctx->oformat->flags & AVFMT_NOFILE))
    {
        avio_close(out_fmt_ctx->pb);
    }
    avformat_free_context(out_fmt_ctx);
    getchar();
    return 0;
}

4 利用FFmpeg的filter混音

集线程1采集到数据->解码->写入fifo1 |
采集线程2采集到数据->解码->写入fifo2 | ->主线程编码循环读取2个fifo->把数据压人filter->从filter中读取混音后数据->编码->写入文件

extern "C"
{
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libavdevice/avdevice.h"
#include "libavfilter/avfilter.h"
#include "libavfilter/avfiltergraph.h"
#include "libavfilter/buffersink.h"
#include "libavfilter/buffersrc.h"
#include "libavutil/audio_fifo.h"
#include "libavutil/avutil.h"
#include "libavutil/fifo.h"
}

#pragma comment(lib, "avcodec.lib")
#pragma comment(lib, "avformat.lib")
#pragma comment(lib, "avutil.lib")
#pragma comment(lib, "avdevice.lib")
#pragma comment(lib, "avfilter.lib")

//#pragma comment(lib, "avfilter.lib")
//#pragma comment(lib, "postproc.lib")
//#pragma comment(lib, "swresample.lib")
#pragma comment(lib, "swscale.lib")

#include <windows.h>
#include <conio.h>
#include <time.h>

enum CaptureState
{
    PREPARED,
    RUNNING,
    STOPPED,
    FINISHED
};

 typedef struct BufferSourceContext {
    const AVClass    *bscclass;
    AVFifoBuffer     *fifo;
    AVRational        time_base;     ///< time_base to set in the output link
    AVRational        frame_rate;    ///< frame_rate to set in the output link
    unsigned          nb_failed_requests;
    unsigned          warning_limit;

    /* video only */
    int               w, h;
    enum AVPixelFormat  pix_fmt;
    AVRational        pixel_aspect;
    char              *sws_param;

    AVBufferRef *hw_frames_ctx;

    /* audio only */
    int sample_rate;
    enum AVSampleFormat sample_fmt;
    int channels;
    uint64_t channel_layout;
    char    *channel_layout_str;

    int got_format_from_params;
    int eof;
 } BufferSourceContext;

AVFormatContext* _fmt_ctx_spk = NULL;
AVFormatContext* _fmt_ctx_mic = NULL;
AVFormatContext* _fmt_ctx_out = NULL;
int _index_spk = -1;
int _index_mic = -1;
int _index_a_out = -1;

AVFilterGraph* _filter_graph = NULL;
AVFilterContext* _filter_ctx_src_spk = NULL;
AVFilterContext* _filter_ctx_src_mic = NULL;
AVFilterContext* _filter_ctx_sink = NULL;

CaptureState _state = CaptureState::PREPARED;

CRITICAL_SECTION _section_spk;
CRITICAL_SECTION _section_mic;
AVAudioFifo* _fifo_spk = NULL;
AVAudioFifo* _fifo_mic = NULL;

void InitRecorder()
{
    av_register_all();
    avdevice_register_all();
    avfilter_register_all();
}

int OpenSpeakerInput(char* inputForamt, char* url)
{
    AVInputFormat* ifmt = av_find_input_format(inputForamt);
    AVDictionary* opt1 = NULL;
    av_dict_set(&opt1, "rtbufsize", "10M", 0);

    int ret = 0;
    ret = avformat_open_input(&_fmt_ctx_spk, url, ifmt, &opt1);
    if (ret < 0)
    {
        printf("Speaker: failed to call avformat_open_input\n");
        return -1;
    }
    ret = avformat_find_stream_info(_fmt_ctx_spk, NULL);
    if (ret < 0)
    {
        printf("Speaker: failed to call avformat_find_stream_info\n");
        return -1;
    }
    for (int i = 0; i < _fmt_ctx_spk->nb_streams; i++)
    {
        if (_fmt_ctx_spk->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO)
        {
            _index_spk = i;
            break;
        }
    }
    if (_index_spk < 0)
    {
        printf("Speaker: negative audio index\n");
        return -1;
    }
    AVCodecContext* codec_ctx = _fmt_ctx_spk->streams[_index_spk]->codec;
    AVCodec* codec = avcodec_find_decoder(codec_ctx->codec_id);
    if (codec == NULL)
    {
        printf("Speaker: null audio decoder\n");
        return -1;
    }
    ret = avcodec_open2(codec_ctx, codec, NULL);
    if (ret < 0)
    {
        printf("Speaker: failed to call avcodec_open2\n");
        return -1;
    }
    av_dump_format(_fmt_ctx_spk, _index_spk, url, 0);

    return 0;
}

int OpenMicrophoneInput(char* inputForamt, char* url)
{
    AVInputFormat* ifmt = av_find_input_format(inputForamt);
    AVDictionary* opt1 = NULL;
    av_dict_set(&opt1, "rtbufsize", "10M", 0);

    int ret = 0;
    ret = avformat_open_input(&_fmt_ctx_mic, url, ifmt, &opt1);
    if (ret < 0)
    {
        printf("Microphone: failed to call avformat_open_input\n");
        return -1;
    }
    ret = avformat_find_stream_info(_fmt_ctx_mic, NULL);
    if (ret < 0)
    {
        printf("Microphone: failed to call avformat_find_stream_info\n");
        return -1;
    }
    for (int i = 0; i < _fmt_ctx_mic->nb_streams; i++)
    {
        if (_fmt_ctx_mic->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO)
        {
            _index_mic = i;
            break;
        }
    }
    if (_index_mic < 0)
    {
        printf("Microphone: negative audio index\n");
        return -1;
    }
    AVCodecContext* codec_ctx = _fmt_ctx_mic->streams[_index_mic]->codec;
    AVCodec* codec = avcodec_find_decoder(codec_ctx->codec_id);
    if (codec == NULL)
    {
        printf("Microphone: null audio decoder\n");
        return -1;
    }
    ret = avcodec_open2(codec_ctx, codec, NULL);
    if (ret < 0)
    {
        printf("Microphone: failed to call avcodec_open2\n");
        return -1;
    }

    av_dump_format(_fmt_ctx_mic, _index_mic, url, 0);

    return 0;
}

int OpenFileOutput(char* fileName)
{
    int ret = 0;
    ret = avformat_alloc_output_context2(&_fmt_ctx_out, NULL, NULL, fileName);
    if (ret < 0)
    {
        printf("Mixer: failed to call avformat_alloc_output_context2\n");
        return -1;
    }
    AVStream* stream_a = NULL;
    stream_a = avformat_new_stream(_fmt_ctx_out, NULL);
    if (stream_a == NULL)
    {
        printf("Mixer: failed to call avformat_new_stream\n");
        return -1;
    }
    _index_a_out = 0;

    stream_a->codec->codec_type = AVMEDIA_TYPE_AUDIO;
    AVCodec* codec_mp3 = avcodec_find_encoder(AV_CODEC_ID_MP3);
    stream_a->codec->codec = codec_mp3;
    stream_a->codec->sample_rate = 16000;
    stream_a->codec->channels = 1;
    stream_a->codec->channel_layout = av_get_default_channel_layout(1);
    stream_a->codec->sample_fmt = codec_mp3->sample_fmts[0];
    stream_a->codec->bit_rate = 16000;
    stream_a->codec->time_base.num = 1;
    stream_a->codec->time_base.den = stream_a->codec->sample_rate;
    stream_a->codec->codec_tag = 0;


    if (_fmt_ctx_out->oformat->flags & AVFMT_GLOBALHEADER)
        stream_a->codec->flags |= CODEC_FLAG_GLOBAL_HEADER;

    if (avcodec_open2(stream_a->codec, stream_a->codec->codec, NULL) < 0)
    {
        printf("Mixer: failed to call avcodec_open2\n");
        return -1; 
    }
    if (!(_fmt_ctx_out->oformat->flags & AVFMT_NOFILE))
    {
        if (avio_open(&_fmt_ctx_out->pb, fileName, AVIO_FLAG_WRITE) < 0)
        {
            printf("Mixer: failed to call avio_open\n");
            return -1;
        }
    }

    if (avformat_write_header(_fmt_ctx_out, NULL) < 0)
    {
        printf("Mixer: failed to call avformat_write_header\n");
        return -1;
    }

    bool b = (!_fmt_ctx_out->streams[0]->time_base.num && _fmt_ctx_out->streams[0]->codec->time_base.num);

    av_dump_format(_fmt_ctx_out, _index_a_out, fileName, 1);

    _fifo_spk = av_audio_fifo_alloc(_fmt_ctx_spk->streams[_index_spk]->codec->sample_fmt, _fmt_ctx_spk->streams[_index_spk]->codec->channels, 30*_fmt_ctx_spk->streams[_index_spk]->codec->frame_size);
    _fifo_mic = av_audio_fifo_alloc(_fmt_ctx_mic->streams[_index_mic]->codec->sample_fmt, _fmt_ctx_mic->streams[_index_mic]->codec->channels, 30*_fmt_ctx_mic->streams[_index_spk]->codec->frame_size);

    return 0;
}

int InitFilter(char* filter_desc)
{
    char args_spk[5120];
    char* pad_name_spk = "in0";
    char args_mic[512];
    char* pad_name_mic = "in1";

    AVFilter* filter_src_spk = avfilter_get_by_name("abuffer");
    AVFilter* filter_src_mic = avfilter_get_by_name("abuffer");
    AVFilter* filter_sink = avfilter_get_by_name("abuffersink");
    AVFilterInOut* filter_output_spk = avfilter_inout_alloc();
    AVFilterInOut* filter_output_mic = avfilter_inout_alloc();
    AVFilterInOut* filter_input = avfilter_inout_alloc();
    _filter_graph = avfilter_graph_alloc();

    sprintf_s(args_spk, sizeof(args_spk), "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%I64x", 
        _fmt_ctx_spk->streams[_index_spk]->codec->time_base.num, 
        _fmt_ctx_spk->streams[_index_spk]->codec->time_base.den, 
        _fmt_ctx_spk->streams[_index_spk]->codec->sample_rate, 
        av_get_sample_fmt_name(_fmt_ctx_spk->streams[_index_spk]->codec->sample_fmt), 
        _fmt_ctx_spk->streams[_index_spk]->codec->channel_layout);
    sprintf_s(args_mic, sizeof(args_mic), "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%I64x", 
        _fmt_ctx_mic->streams[_index_mic]->codec->time_base.num, 
        _fmt_ctx_mic->streams[_index_mic]->codec->time_base.den, 
        _fmt_ctx_mic->streams[_index_mic]->codec->sample_rate, 
        av_get_sample_fmt_name(_fmt_ctx_mic->streams[_index_mic]->codec->sample_fmt), 
        _fmt_ctx_mic->streams[_index_mic]->codec->channel_layout);

    //sprintf_s(args_spk, sizeof(args_spk), "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%I64x", _fmt_ctx_out->streams[_index_a_out]->codec->time_base.num, _fmt_ctx_out->streams[_index_a_out]->codec->time_base.den, _fmt_ctx_out->streams[_index_a_out]->codec->sample_rate, av_get_sample_fmt_name(_fmt_ctx_out->streams[_index_a_out]->codec->sample_fmt), _fmt_ctx_out->streams[_index_a_out]->codec->channel_layout);
    //sprintf_s(args_mic, sizeof(args_mic), "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%I64x", _fmt_ctx_out->streams[_index_a_out]->codec->time_base.num, _fmt_ctx_out->streams[_index_a_out]->codec->time_base.den, _fmt_ctx_out->streams[_index_a_out]->codec->sample_rate, av_get_sample_fmt_name(_fmt_ctx_out->streams[_index_a_out]->codec->sample_fmt), _fmt_ctx_out->streams[_index_a_out]->codec->channel_layout);


    int ret = 0;
    ret = avfilter_graph_create_filter(&_filter_ctx_src_spk, filter_src_spk, pad_name_spk, args_spk, NULL, _filter_graph);
    if (ret < 0)
    {
        printf("Filter: failed to call avfilter_graph_create_filter -- src spk\n");
        return -1;
    }
    ret = avfilter_graph_create_filter(&_filter_ctx_src_mic, filter_src_mic, pad_name_mic, args_mic, NULL, _filter_graph);
    if (ret < 0)
    {
        printf("Filter: failed to call avfilter_graph_create_filter -- src mic\n");
        return -1;
    }

    ret = avfilter_graph_create_filter(&_filter_ctx_sink, filter_sink, "out", NULL, NULL, _filter_graph);
    if (ret < 0)
    {
        printf("Filter: failed to call avfilter_graph_create_filter -- sink\n");
        return -1;
    }
    AVCodecContext* encodec_ctx = _fmt_ctx_out->streams[_index_a_out]->codec;
    ret = av_opt_set_bin(_filter_ctx_sink, "sample_fmts", (uint8_t*)&encodec_ctx->sample_fmt, sizeof(encodec_ctx->sample_fmt), AV_OPT_SEARCH_CHILDREN);
    if (ret < 0)
    {
        printf("Filter: failed to call av_opt_set_bin -- sample_fmts\n");
        return -1;
    }
    ret = av_opt_set_bin(_filter_ctx_sink, "channel_layouts", (uint8_t*)&encodec_ctx->channel_layout, sizeof(encodec_ctx->channel_layout), AV_OPT_SEARCH_CHILDREN);
    if (ret < 0)
    {
        printf("Filter: failed to call av_opt_set_bin -- channel_layouts\n");
        return -1;
    }
    ret = av_opt_set_bin(_filter_ctx_sink, "sample_rates", (uint8_t*)&encodec_ctx->sample_rate, sizeof(encodec_ctx->sample_rate), AV_OPT_SEARCH_CHILDREN);
    if (ret < 0)
    {
        printf("Filter: failed to call av_opt_set_bin -- sample_rates\n");
        return -1;
    }

    filter_output_spk->name = av_strdup(pad_name_spk);
    filter_output_spk->filter_ctx = _filter_ctx_src_spk;
    filter_output_spk->pad_idx = 0;
    filter_output_spk->next = filter_output_mic;

    filter_output_mic->name = av_strdup(pad_name_mic);
    filter_output_mic->filter_ctx = _filter_ctx_src_mic;
    filter_output_mic->pad_idx = 0;
    filter_output_mic->next = NULL;

    filter_input->name = av_strdup("out");
    filter_input->filter_ctx = _filter_ctx_sink;
    filter_input->pad_idx = 0;
    filter_input->next = NULL;

    AVFilterInOut* filter_outputs[2];
    filter_outputs[0] = filter_output_spk;
    filter_outputs[1] = filter_output_mic;

    ret = avfilter_graph_parse_ptr(_filter_graph, filter_desc, &filter_input, filter_outputs, NULL);
    if (ret < 0)
    {
        printf("Filter: failed to call avfilter_graph_parse_ptr\n");
        return -1;
    }

    ret = avfilter_graph_config(_filter_graph, NULL);
    if (ret < 0)
    {
        printf("Filter: failed to call avfilter_graph_config\n");
        return -1;
    }

    avfilter_inout_free(&filter_input);
    av_free(filter_src_spk);
    av_free(filter_src_mic);
    avfilter_inout_free(filter_outputs);
    //av_free(filter_outputs);

    char* temp = avfilter_graph_dump(_filter_graph, NULL);
    printf("%s\n", temp);

    return 0;
}

DWORD WINAPI SpeakerCapThreadProc(LPVOID lpParam)
{
    AVFrame* pFrame = av_frame_alloc();
    AVPacket packet;
    av_init_packet(&packet);

    int got_sound;

    while (_state == CaptureState::RUNNING)
    {
        packet.data = NULL;
        packet.size = 0;

        if (av_read_frame(_fmt_ctx_spk, &packet) < 0)
        {
            continue;
        }
        if (packet.stream_index == _index_spk)
        {
            if (avcodec_decode_audio4(_fmt_ctx_spk->streams[_index_spk]->codec, pFrame, &got_sound, &packet) < 0)
            {
                break;
            }
            av_free_packet(&packet);

            if (!got_sound)
            {
                continue;
            }

            int fifo_spk_space = av_audio_fifo_space(_fifo_spk);
            while(fifo_spk_space < pFrame->nb_samples && _state == CaptureState::RUNNING)
            {
                Sleep(10);
                printf("_fifo_spk full !\n");
                fifo_spk_space = av_audio_fifo_space(_fifo_spk);
            }

            if (fifo_spk_space >= pFrame->nb_samples)
            {
                EnterCriticalSection(&_section_spk);
                int nWritten = av_audio_fifo_write(_fifo_spk, (void**)pFrame->data, pFrame->nb_samples);
                LeaveCriticalSection(&_section_spk);
            }
        }
    }
    av_frame_free(&pFrame);

    return 0;
}

DWORD WINAPI MicrophoneCapThreadProc(LPVOID lpParam)
{
    AVFrame* pFrame = av_frame_alloc();
    AVPacket packet;
    av_init_packet(&packet);

    int got_sound;

    while (_state == CaptureState::PREPARED)
    {

    }

    while (_state == CaptureState::RUNNING)
    {
        packet.data = NULL;
        packet.size = 0;

        if (av_read_frame(_fmt_ctx_mic, &packet) < 0)
        {
            continue;
        }
        if (packet.stream_index == _index_mic)
        {
            if (avcodec_decode_audio4(_fmt_ctx_mic->streams[_index_mic]->codec, pFrame, &got_sound, &packet) < 0)
            {
                break;
            }
            av_free_packet(&packet);

            if (!got_sound)
            {
                continue;
            }

            int fifo_mic_space = av_audio_fifo_space(_fifo_mic);
            while(fifo_mic_space < pFrame->nb_samples && _state == CaptureState::RUNNING)
            {
                Sleep(10);
                printf("_fifo_mic full !\n");
                fifo_mic_space = av_audio_fifo_space(_fifo_mic);
            }

            if (fifo_mic_space >= pFrame->nb_samples)
            {
                EnterCriticalSection(&_section_mic);
                int temp = av_audio_fifo_space(_fifo_mic);
                int temp2 = pFrame->nb_samples;
                int nWritten = av_audio_fifo_write(_fifo_mic, (void**)pFrame->data, pFrame->nb_samples);
                LeaveCriticalSection(&_section_mic);
            }
        }
    }
    av_frame_free(&pFrame);

    return 0;
}

int main()
{
    int ret = 0;

    InitRecorder();

    char fileName[128];
    char* outFileType = ".mp3";

    time_t rawtime;
    tm* timeInfo;
    time(&rawtime);
    timeInfo = localtime(&rawtime);
    sprintf_s(fileName, sizeof(fileName), "%d_%d_%d_%d_%d_%d%s",
        timeInfo->tm_year + 1900, timeInfo->tm_mon + 1, timeInfo->tm_mday,
        timeInfo->tm_hour, timeInfo->tm_min, timeInfo->tm_sec, outFileType);

    char* filter_desc = "[in0][in1]amix=inputs=2[out]";

    //ret = OpenSpeakerInput("dshow", "audio=virtual-audio-capturer");
    ret = OpenSpeakerInput(NULL, "aa.mp3");
    if (ret < 0)
    {
        goto Release;
    }
    //ret = OpenMicrophoneInput("dshow", "audio=External Microphone (Conexant S");
    ret = OpenMicrophoneInput(NULL, "bb.mp3");
    if (ret < 0)
    {
        goto Release;
    }
    ret = OpenFileOutput(fileName);
    if (ret < 0)
    {
        goto Release;
    }
    ret = InitFilter(filter_desc);
    if (ret < 0)
    {
        goto Release;
    }

    _state = CaptureState::RUNNING;

    InitializeCriticalSection(&_section_spk);
    InitializeCriticalSection(&_section_mic);

    CreateThread(NULL, 0, SpeakerCapThreadProc, 0, 0, NULL);
    CreateThread(NULL, 0, MicrophoneCapThreadProc, 0, 0, NULL);

    int tmpFifoFailed = 0;
    int64_t frame_count = 0;

    while (_state != CaptureState::FINISHED)
    {
        if (_kbhit())
        {
            _state = CaptureState::STOPPED;
            break;
        }
        else
        {
            int ret = 0;
            AVFrame* pFrame_spk = av_frame_alloc();
            AVFrame* pFrame_mic = av_frame_alloc();


            AVPacket packet_out;

            int got_packet_ptr = 0;

            int fifo_spk_size = av_audio_fifo_size(_fifo_spk);
            int fifo_mic_size = av_audio_fifo_size(_fifo_mic);
            int frame_spk_min_size = _fmt_ctx_spk->streams[_index_spk]->codec->frame_size;
            int frame_mic_min_size = _fmt_ctx_mic->streams[_index_mic]->codec->frame_size;
            if (fifo_spk_size >= frame_spk_min_size && fifo_mic_size >= frame_mic_min_size)
            {
                tmpFifoFailed = 0;

                pFrame_spk->nb_samples = frame_spk_min_size;
                pFrame_spk->channel_layout = _fmt_ctx_spk->streams[_index_spk]->codec->channel_layout;
                pFrame_spk->format = _fmt_ctx_spk->streams[_index_spk]->codec->sample_fmt;
                pFrame_spk->sample_rate = _fmt_ctx_spk->streams[_index_spk]->codec->sample_rate;
                av_frame_get_buffer(pFrame_spk, 0);

                pFrame_mic->nb_samples = frame_mic_min_size;
                pFrame_mic->channel_layout = _fmt_ctx_mic->streams[_index_mic]->codec->channel_layout;
                pFrame_mic->format = _fmt_ctx_mic->streams[_index_mic]->codec->sample_fmt;
                pFrame_mic->sample_rate = _fmt_ctx_mic->streams[_index_mic]->codec->sample_rate;
                av_frame_get_buffer(pFrame_mic, 0);

                EnterCriticalSection(&_section_spk);
                ret = av_audio_fifo_read(_fifo_spk, (void**)pFrame_spk->data, frame_spk_min_size);
                LeaveCriticalSection(&_section_spk);

                EnterCriticalSection(&_section_mic);
                ret = av_audio_fifo_read(_fifo_mic, (void**)pFrame_mic->data, frame_mic_min_size);
                LeaveCriticalSection(&_section_mic);

                pFrame_spk->pts = av_frame_get_best_effort_timestamp(pFrame_spk);
                pFrame_mic->pts = av_frame_get_best_effort_timestamp(pFrame_mic);

                BufferSourceContext* s = (BufferSourceContext*)_filter_ctx_src_spk->priv;
                bool b1 = (s->sample_fmt != pFrame_spk->format);
                bool b2 = (s->sample_rate != pFrame_spk->sample_rate);
                bool b3 = (s->channel_layout != pFrame_spk->channel_layout);
                bool b4 = (s->channels != pFrame_spk->channels);

                ret = av_buffersrc_add_frame(_filter_ctx_src_spk, pFrame_spk);
                if (ret < 0)
                {
                    printf("Mixer: failed to call av_buffersrc_add_frame (speaker)\n");
                    break;
                }

                ret = av_buffersrc_add_frame(_filter_ctx_src_mic, pFrame_mic);
                if (ret < 0)
                {
                    printf("Mixer: failed to call av_buffersrc_add_frame (microphone)\n");
                    break;
                }

                while (1)
                {
                    AVFrame* pFrame_out = av_frame_alloc();

                    ret = av_buffersink_get_frame_flags(_filter_ctx_sink, pFrame_out, 0);
                    if (ret < 0)
                    {
                        printf("Mixer: failed to call av_buffersink_get_frame_flags\n");
                        break;
                    }
                    if (pFrame_out->data[0] != NULL)
                    {
                        av_init_packet(&packet_out);
                        packet_out.data = NULL;
                        packet_out.size = 0;

                        ret = avcodec_encode_audio2(_fmt_ctx_out->streams[_index_a_out]->codec, &packet_out, pFrame_out, &got_packet_ptr);
                        if (ret < 0)
                        {
                            printf("Mixer: failed to call avcodec_decode_audio4\n");
                            break;
                        }
                        if (got_packet_ptr)
                        {
                            packet_out.stream_index = _index_a_out;
                            packet_out.pts = frame_count * _fmt_ctx_out->streams[_index_a_out]->codec->frame_size;
                            packet_out.dts = packet_out.pts;
                            packet_out.duration = _fmt_ctx_out->streams[_index_a_out]->codec->frame_size;

                            packet_out.pts = av_rescale_q_rnd(packet_out.pts, 
                                _fmt_ctx_out->streams[_index_a_out]->codec->time_base,
                                _fmt_ctx_out->streams[_index_a_out]->time_base,
                                (AVRounding)(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
                            packet_out.dts = packet_out.pts;
                            packet_out.duration = av_rescale_q_rnd(packet_out.duration,
                                _fmt_ctx_out->streams[_index_a_out]->codec->time_base,
                                _fmt_ctx_out->streams[_index_a_out]->time_base,
                                (AVRounding)(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));

                            frame_count++;

                            ret = av_interleaved_write_frame(_fmt_ctx_out, &packet_out);
                            if (ret < 0)
                            {
                                printf("Mixer: failed to call av_interleaved_write_frame\n");
                            }
                            printf("Mixer: write frame to file\n");
                        }
                        av_free_packet(&packet_out);                    
                    }
                    av_frame_free(&pFrame_out);
                }
            }
            else
            {
                tmpFifoFailed++;
                Sleep(20);
                if (tmpFifoFailed > 300)
                {
                    _state = CaptureState::STOPPED;
                    Sleep(30);
                    break;
                }
            }
            av_frame_free(&pFrame_spk);
            av_frame_free(&pFrame_mic);
        }
    }

    av_write_trailer(_fmt_ctx_out);

Release:
    av_audio_fifo_free(_fifo_spk);
    av_audio_fifo_free(_fifo_mic);

    avfilter_free(_filter_ctx_src_spk);
    avfilter_free(_filter_ctx_src_mic);
    avfilter_free(_filter_ctx_sink);

    avfilter_graph_free(&_filter_graph);


    if (_fmt_ctx_out)
    {
        avio_close(_fmt_ctx_out->pb);
    }

    avformat_close_input(&_fmt_ctx_spk);
    avformat_close_input(&_fmt_ctx_mic);
    avformat_free_context(_fmt_ctx_out);

    return ret;
}

http://blog.csdn.net/leixiaohua1020/article/details/25430449
http://blog.csdn.net/dancing_night/article/details/45642107

1 音频PCM采样数据编码为压缩码流

2 mp3解码为PCM

3 SwrContext音频重采样

4 利用FFmpeg的filter混音

猜你喜欢