最近测试提出了一个bug，ijk获取到的aac文件的duration不准，发来一看，确实不准，在AE或者系统mediaplayer中得到的都是3m48s（准确时间是MMParserExtractor: ADTS: duration = 228010580us，如下图），ijk得到的是2m54s，在播放的时候，在2m54s的时候流就结束了，放到编译的ffmpeg中， Duration:居然是00:03:13.07，但是VLC是3m53s，这个文件也是奇葩了！其他播放器暂时不去讨论，现在只希望做到MMParserExtractor与IJKPlayer获得的时长一直即可！

1、分析问题

下面开始分析这个问题，命令行看下这个文件，ffmpeg中获取到的确实是3m13s

仔细看下红色箭头所指，这个意思是获取到的duration是根据比特率计算的，可能不准确。这种获取音视频info有问题的我们一般可以从avformat_find_stream_info函数开始分析。

这里直接从log开始看，waring出现出现在utils.c/libavformat下

static void estimate_timings_from_bit_rate(AVFormatContext *ic)
{
    int64_t filesize, duration;
    int i, show_warning = 0;
    AVStream *st;
	
	av_log(ic, AV_LOG_WARNING, "-->ic->bit_rate:%lld\n",ic->bit_rate);
	//这里从log可以看到，bitrate也没获取到，bitrate = 0
    /* if bit_rate is already set, we believe it */
    if (ic->bit_rate <= 0) {
        int64_t bit_rate = 0;
        for (i = 0; i < ic->nb_streams; i++) {
            st = ic->streams[i];
			
            if (st->codecpar->bit_rate <= 0 && st->internal->avctx->bit_rate > 0)
                st->codecpar->bit_rate = st->internal->avctx->bit_rate;
            if (st->codecpar->bit_rate > 0) {
                if (INT64_MAX - st->codecpar->bit_rate < bit_rate) {
                    bit_rate = 0;
                    break;
                }
                bit_rate += st->codecpar->bit_rate;
            } else if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO && st->codec_info_nb_frames > 1) {
                // If we have a videostream with packets but without a bitrate
                // then consider the sum not known
                bit_rate = 0;
                break;
            }
        }
        //这里算出来一个bitrate
        ic->bit_rate = bit_rate;
		av_log(ic, AV_LOG_WARNING, "-->ic->bit_rate:%lld\n",ic->bit_rate);
    }
    //从log中可以看到，这里的duration也是0

    /* if duration is already set, we believe it */
	av_log(ic, AV_LOG_WARNING,"-->ic->duration:%lld\n",ic->duration);
    if (ic->duration == AV_NOPTS_VALUE &&
        ic->bit_rate != 0) {
        filesize = ic->pb ? avio_size(ic->pb) : 0;
		av_log(ic, AV_LOG_WARNING,"-->ic->filesize:%lld\n",filesize);
        if (filesize > ic->internal->data_offset) {
            filesize -= ic->internal->data_offset;
            for (i = 0; i < ic->nb_streams; i++) {
                st      = ic->streams[i];
                if (   st->time_base.num <= INT64_MAX / ic->bit_rate
                    && st->duration == AV_NOPTS_VALUE) {
                    //这里根据文件字节*8 /比特率来计算duration，这里cbr这样计算可以计算，但是如果vbr（码率动态）的话就有问题了
                    duration = av_rescale(8 * filesize, st->time_base.den,
                                          ic->bit_rate *
                                          (int64_t) st->time_base.num);
                //获取到的duration就不准确了
                    st->duration = duration;
                    show_warning = 1;
                }
            }
        }
    }
    if (show_warning)
        av_log(ic, AV_LOG_WARNING,
               "Estimating duration from bitrate, this may be inaccurate\n");
}

调用上面这个函数的地方是utils.c/libavofrmat：

static void estimate_timings(AVFormatContext *ic, int64_t old_offset)
{
    int64_t file_size;

    /* get the file size, if possible */
    if (ic->iformat->flags & AVFMT_NOFILE) {
        file_size = 0;
    } else {
        file_size = avio_size(ic->pb);
        file_size = FFMAX(0, file_size);
    }
	av_log(ic, AV_LOG_WARNING, "->ic->iformat->name:%s\n", ic->iformat->name);
	av_log(ic, AV_LOG_WARNING, "->file_size:%lld\n", file_size);
	av_log(ic, AV_LOG_WARNING, "->ic->pb->seekable:%d\n", ic->pb->seekable);

    if ((!strcmp(ic->iformat->name, "mpeg") ||
         !strcmp(ic->iformat->name, "mpegts")) &&
        file_size && (ic->pb->seekable & AVIO_SEEKABLE_NORMAL)) {
        /* get accurate estimate from the PTSes */
        estimate_timings_from_pts(ic, old_offset);
        ic->duration_estimation_method = AVFMT_DURATION_FROM_PTS;
    } else if (has_duration(ic)) {
    //如果在demuxer中获取到duration了
        /* at least one component has timings - we use them for all
         * the components */
        fill_all_stream_timings(ic);
        ic->duration_estimation_method = AVFMT_DURATION_FROM_STREAM;
    } else {
    //这个文件没有获取到duration，所以走的是这里
        /* less precise: use bitrate info */
        estimate_timings_from_bit_rate(ic);
        ic->duration_estimation_method = AVFMT_DURATION_FROM_BITRATE;
    }
    update_stream_timings(ic);

    {
        int i;
        AVStream av_unused *st;
        for (i = 0; i < ic->nb_streams; i++) {
            st = ic->streams[i];
            av_log(ic, AV_LOG_TRACE, "stream %d: start_time: %0.3f duration: %0.3f\n", i,
                   (double) st->start_time * av_q2d(st->time_base),
                   (double) st->duration   * av_q2d(st->time_base));
        }
        av_log(ic, AV_LOG_TRACE,
                "format: start_time: %0.3f duration: %0.3f bitrate=%"PRId64" kb/s\n",
                (double) ic->start_time / AV_TIME_BASE,
                (double) ic->duration   / AV_TIME_BASE,
                (int64_t)ic->bit_rate / 1000);
    }
}

调用上面这个方法是在avformat_find_stream_info/utils.c/libavformat函数中。

2、解决方案探究

原因已经知道了，那么可以如何解决这个问题呢？

aac的duration可以如何获取呢？

我们看下android系统中libstagefright框架中aacextractore的实现

AACExtractor::AACExtractor(
        const sp<DataSource> &source, const sp<AMessage> &_meta)
    : mDataSource(source),
      mInitCheck(NO_INIT),
      mFrameDurationUs(0) {
    sp<AMessage> meta = _meta;

    if (meta == NULL) {
        String8 mimeType;
        float confidence;
        sp<AMessage> _meta;

        if (!SniffAAC(mDataSource, &mimeType, &confidence, &meta)) {
            return;
        }
    }

    int64_t offset;
    CHECK(meta->findInt64("offset", &offset));

    uint8_t profile, sf_index, channel, header[2];
    if (mDataSource->readAt(offset + 2, &header, 2) < 2) {
        return;
    }
//获取profile
    profile = (header[0] >> 6) & 0x3;
//获取采样索引
    sf_index = (header[0] >> 2) & 0xf;
//获取采样率
    uint32_t sr = get_sample_rate(sf_index);
    if (sr == 0) {
        return;
    }
//通道
    channel = (header[0] & 0x1) << 2 | (header[1] >> 6);

    mMeta = MakeAACCodecSpecificData(profile, sf_index, channel);

    off64_t streamSize, numFrames = 0;
    size_t frameSize = 0;
    int64_t duration = 0;
//获取文件大小
    if (mDataSource->getSize(&streamSize) == OK) {
         while (offset < streamSize) {
         //获取adts每一帧大小
            if ((frameSize = getAdtsFrameLength(source, offset, NULL)) == 0) {
                return;
            }

            mOffsetVector.push(offset);

            offset += frameSize;//偏移加加
            numFrames ++;//计算帧数目
        }
//***************重点看下这里，这里在下面分析aac文件格式的时候会讲解细致一点*************
        // Round up and get the duration
        mFrameDurationUs = (1024 * 1000000ll + (sr - 1)) / sr;
        duration = numFrames * mFrameDurationUs;//总帧数x一个AAC音频帧的播放时间
        mMeta->setInt64(kKeyDuration, duration);
    }

    mInitCheck = OK;
}

我们再看下getAdtsFrameLength/AACExtractor.cpp/libstagefrgiht函数，这个函数其实就是根据adts头来计算出每一个framesize的大小的

static size_t getAdtsFrameLength(const sp<DataSource> &source, off64_t offset, size_t* headerSize) {
//CRC
    const size_t kAdtsHeaderLengthNoCrc = 7;
    const size_t kAdtsHeaderLengthWithCrc = 9;

    size_t frameSize = 0;
//同步字
    uint8_t syncword[2];
    if (source->readAt(offset, &syncword, 2) != 2) {
        return 0;
    }
    if ((syncword[0] != 0xff) || ((syncword[1] & 0xf6) != 0xf0)) {
        return 0;
    }
//0没有crc，1有crc
    uint8_t protectionAbsent;
	
    if (source->readAt(offset + 1, &protectionAbsent, 1) < 1) {
        return 0;
    }
    protectionAbsent &= 0x1;

    uint8_t header[3];
    if (source->readAt(offset + 3, &header, 3) < 3) {
        return 0;
    }
//获取framesize的大小
    frameSize = (header[0] & 0x3) << 11 | header[1] << 3 | header[2] >> 5;

    // protectionAbsent is 0 if there is CRC
    size_t headSize = protectionAbsent ? kAdtsHeaderLengthNoCrc : kAdtsHeaderLengthWithCrc;
    if (headSize > frameSize) {
        return 0;
    }
    if (headerSize != NULL) {
        *headerSize = headSize;
    }

    return frameSize;
}

上面的实现原理就是根据一个AAC原始帧包含一段时间内1024个采样及相关数据。一个AAC音频帧的播放时间=一个AAC帧对应的采样样本的个数/采样率。所以aac音频文件总时间t=总帧数x一个AAC音频帧的播放时间。

下面看一下aac的demuxer，在aacdec.c/libavformat下，发现里面连对aidf头的处理都没有，这个先不管了。

AAC格式介绍

首先需要了解的是AAC文件格式有ADIF和ADTS两种，其中ADIF（Audio Data Interchange Format 音频数据交换格式）的特征是解码必须在明确定义的开始处进行，不能从数据流中间开始；而ADTS（Audio Data Transport Stream 音频数据传输流）则相反，这种格式的特征是有同步字，解码可以在这个流中任何位置开始，正如它的名字一样，这是一种和TS流类似的格式。

ADTS格式中每一帧都有头信息，具备流特征，适合于网络传输与处理，而ADIF只有一个统一的头，并且这两种格式的header格式也是不同的。目前主流使用的都是ADTS格式。

ADTS AAC文件格式如下

ADTS_header

AAC ES

ADTS_header

AAC ES

…

ADTS_header

AAC ES

详细的AAC格式参考下这篇文章吧

AAC文件格式与音频文件时长计算

获取每帧时长：ffmpeg能正确读到每帧的nb_samples和总体的sample_rate，那么两者相除就是每帧的时长了。

AAC：帧大小1024个sample，采样率为44100Hz ,帧播放时长：acc dur=1024/44100 = 0.02322s=23.22ms

那么如何才能获取准确的时长呢？应该是通过adts frame header取总帧数*每帧时长的值作为duration。

3、解决问题

下面我们看下ffmpeg中这个格式的demuxer，这个文件封装格式raw ADTS AAC，下面我们看下aacdec.c/libavformat

//获取adts frame的帧长
static int getAdtsFrameLength(AVFormatContext *s,int64_t offset,int* headerSize)
{
	int64_t filesize, position = avio_tell(s->pb);  
    filesize = avio_size(s->pb);
	//av_log(NULL, AV_LOG_WARNING, "hxk->getAdtsFrameLength.filesize:%d\n",filesize);
    const int kAdtsHeaderLengthNoCrc = 7;
    const int kAdtsHeaderLengthWithCrc = 9;
    int frameSize = 0;
    uint8_t syncword[2];
	avio_seek(s->pb, offset, SEEK_SET);
	//读取同步字
    if(avio_read(s->pb,&syncword, 2)!= 2){
		return 0;
	}
    if ((syncword[0] != 0xff) || ((syncword[1] & 0xf6) != 0xf0)) {
        return 0;
    }
	uint8_t protectionAbsent;
	avio_seek(s->pb, offset+1, SEEK_SET);
	//读取protectionAbsent
    if (avio_read(s->pb, &protectionAbsent, 1) < 1) {
        return 0;
    }
    protectionAbsent &= 0x1;
    uint8_t header[3];
//读取header
	avio_seek(s->pb, offset+3, SEEK_SET);
    if (avio_read(s->pb, &header, 3) < 3) {
        return 0;
    }
    
    //获取framesize
    frameSize = (header[0] & 0x3) << 11 | header[1] << 3 | header[2] >> 5;
    // protectionAbsent is 0 if there is CRC
    int headSize = protectionAbsent ? kAdtsHeaderLengthNoCrc : kAdtsHeaderLengthWithCrc;
    if (headSize > frameSize) {
        return 0;
    }
    if (headerSize != NULL) {
        *headerSize = headSize;
    }
    return frameSize;
}
//根据采样率下标获取采样率
static uint32_t get_sample_rate(const uint8_t sf_index)
{
    static const uint32_t sample_rates[] =
    {
        96000, 88200, 64000, 48000, 44100, 32000,
        24000, 22050, 16000, 12000, 11025, 8000
    };

    if (sf_index < sizeof(sample_rates) / sizeof(sample_rates[0])) {
        return sample_rates[sf_index];
    }

    return 0;
}

//add end

修改adts_aac_read_header函数

static int adts_aac_read_header(AVFormatContext *s)
{
    AVStream *st;
    uint16_t state;

    st = avformat_new_stream(s, NULL);
    if (!st)
        return AVERROR(ENOMEM);

    st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
    st->codecpar->codec_id   = s->iformat->raw_codec_id;
    st->need_parsing         = AVSTREAM_PARSE_FULL_RAW;

    ff_id3v1_read(s);
    if ((s->pb->seekable & AVIO_SEEKABLE_NORMAL) &&
        !av_dict_get(s->metadata, "", NULL, AV_DICT_IGNORE_SUFFIX)) {
        int64_t cur = avio_tell(s->pb);
        ff_ape_parse_tag(s);
        avio_seek(s->pb, cur, SEEK_SET);
    }

    // skip data until the first ADTS frame is found
    state = avio_r8(s->pb);
    while (!avio_feof(s->pb) && avio_tell(s->pb) < s->probesize) {
        state = (state << 8) | avio_r8(s->pb);
        if ((state >> 4) != 0xFFF)
            continue;
        avio_seek(s->pb, -2, SEEK_CUR);
        break;
    }
    if ((state >> 4) != 0xFFF)
        return AVERROR_INVALIDDATA;

    // LCM of all possible ADTS sample rates
    //avpriv_set_pts_info(st, 64, 1, 28224000);

//add by M
#if  1
	//句柄指回起点
	avio_seek(s->pb, 0, SEEK_SET);
	uint8_t profile, sf_index, channel, header[2];
	//文件指针移动到文件起点前2个字节
	avio_seek(s->pb, 2, SEEK_SET);
	if (avio_read(s->pb,&header, 2) < 2) {
		av_log(NULL, AV_LOG_ERROR, "avio_read header error!\n");
		return 0;
	}
	int64_t offset = 0;
	//获取profile
	profile = (header[0] >> 6) & 0x3;
	st->codecpar->profile = profile;
	sf_index = (header[0] >> 2) & 0xf;
	//获取采样率
	uint32_t sr = get_sample_rate(sf_index);
	if (sr == 0) {
		av_log(NULL, AV_LOG_ERROR, "adts_aac_read_header read sampletare error!\n");
		return 0;
	}
	//st->codecpar->sample_rate = sr;
	channel = (header[0] & 0x1) << 2 | (header[1] >> 6);
	if(channel == 0) {
		av_log(NULL, AV_LOG_ERROR, "adts_aac_read_header read channel error!\n");
		return 0;
	}
	//赋值给codec 参数
	st->codecpar->channels = channel;
	sf_index = (header[0] >> 2) & 0xf;
	int frameSize = 0;
	int64_t mFrameDurationUs = 0;
	int64_t duration = 0;
	//采样率赋值给codec
	st->codecpar->sample_rate = sr;
	int64_t streamSize, numFrames = 0;
	avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
	//获取文件大小
	streamSize = avio_size(s->pb);
	if (streamSize > 0) {
		while (offset < streamSize) {
			if ((frameSize = getAdtsFrameLength(s, offset, NULL)) == 0) {
				goto  end;
			}
			offset += frameSize;
			//帧数加加，获取总帧数
			numFrames ++;
		}
end:
		av_log(NULL, AV_LOG_WARNING, "---streamSize:%lld,numFrames:%lld!---\n",streamSize, numFrames);
		// Round up and get the duration,计算每一帧时间
		mFrameDurationUs = (1024 * 1000000ll + (sr - 1)) / sr;
		av_log(NULL, AV_LOG_WARNING, "---mFrameDurationUs:%lld!---\n",mFrameDurationUs);
		duration = numFrames * mFrameDurationUs; //us
		duration = av_rescale_q(duration,AV_TIME_BASE_Q, st->time_base);
		st->duration = duration;
		av_log(NULL, AV_LOG_WARNING, "-------duration:%d------!\n",duration);
	}
	//置回句柄
	avio_seek(s->pb, 0, SEEK_SET);
#endif
	//add end

    return 0;
}

本来参照一朵桃花压海棠的博客，这里是return 0的，经测试，部分aac文件无法播放，后来改成上文中的goto了

if ((frameSize = getAdtsFrameLength(s, offset, NULL)) == 0) {
				return 0;
			}

目前测试没有问题，能够正常seek与播放！

参考链接：ffmpeg系列-解决ffmpeg获取aac音频文件duration不准_一朵桃花压海棠的博客-CSDN博客_ffmpeg 音频duration

解决ffmpeg获取AAC音频文件duration不准

1、分析问题

2、解决方案探究

3、解决问题

猜你喜欢