FFMpeg-8、音频编码及重采样

音频编码与视频编码类似的，但是涉及的细节东西还是比较多，之后遇到再持续补充。注意；1、pCodecCtx->frame_size是表示编码器一次能够编码的字节，但是是自动生成的。2、flush_encoder是最后将后面缓冲里面的进行编码3、16位和8位的互转可以自己写函数也可以进行重采样掉APIAPI重采样//申请重采样输出数据内存unsigned char *pcm = new unsi

卖酒的小码农

917人浏览 · 2021-05-30 21:17:13

卖酒的小码农 · 2021-05-30 21:17:13 发布

一、音频编码

音频编码与视频编码类似的，但是涉及的细节东西还是比较多，之后遇到再持续补充。
注意；
1、pCodecCtx->frame_size是表示编码器一次能够编码的字节，但是是自动生成的。
2、flush_encoder是最后将后面缓冲里面的进行编码
3、16位和8位的互转可以自己写函数也可以进行重采样掉API

API重采样
//申请重采样输出数据内存
unsigned char *pcm = new unsigned char[1024*1024*50];
 actx = swr_alloc_set_opts(
    actx,
     av_get_default_channel_layout(2),//输出格式 2通道
     (AVSampleFormat)outFormat,//输出的样本格式  16位两个字节表示
     para->sample_rate, //输出采样率
     av_get_default_channel_layout(para->channels),//输入格式
     (AVSampleFormat)para->format,
     para->sample_rate,
     0,0
 );

int re = swr_init(actx);
if (re != 0)
{
    char buf[1024] = { 0 };
    av_strerror(re, buf, sizeof(buf) - 1);
    cout << "swr_init  failed! :" << buf << endl;
    return false;
}

int XResample::Resample(AVFrame *indata, unsigned char *d)
{
    if(!indata) return 0;
    if(!d)
    {
        av_frame_free(&indata);
        return 0;
    }
    uint8_t *data[2] = {0};
    data[0] = (uint8_t *)d;
    //返回单通道样本数的数量
    int len = swr_convert(
        actx,
        data,indata->nb_samples,//输出数据的存放地址，样本数量
        (const uint8_t**)indata->data,indata->nb_samples//输入数据的存放地址，样本数量
    );
    //cout << "swr_convert = " << len << endl;

    if(len <= 0)
    {
        av_frame_free(&indata);
        return len;
    }

    //单样本设为s16 2个字节
    int outsize =  len*indata->channels*av_get_bytes_per_sample((AVSampleFormat)outFormat);
    av_frame_free(&indata);
    return outsize;
}



//自定义重采样
void Resample8KTo16(unsigned char* pSrc, int src_len, unsigned char* pDst, int& target_len)
{
	int src_short_len = src_len / 2;
	target_len = src_len * 2;
	short* src_short_src = (short*) pSrc;
	short* dst_short_src = (short*) pDst;
	int j = 0;
	for(int i = 0; i < src_short_len; i++)
	{
		dst_short_src[j] = src_short_src[i];
		j++;
		dst_short_src[j] = src_short_src[i];
		j++;
	}
}
void Resample16KTo8(unsigned char* pSrc, int src_len, unsigned char* pDst, int& target_len)
{
	int src_short_len = src_len / 2;
	target_len = src_len / 2;
	short* src_short_src = (short*) pSrc;
	short* dst_short_src = (short*) pDst;
	for(int i = 0,j = 0; i < src_short_len; i += 2,j++)
	{
		dst_short_src[j] = src_short_src[i];
	}
}

4、生成pcm视频的几条相关命令

将MP4转换为PCM
ffmpeg -i 1.mp4 -vn -ar 44100 -ac 1 -f s16le out.pcm  
播放PCM
ffplay -ar 44100 -ac 1 -f s16le -i out.pcm 
剪切mp4视频长度
ffmpeg  -i ./SRS1.mp4 -vcodec copy -acodec copy -ss 00:10:0 -to 00:20:00 ./1.mp4 -y

音频编码的代码

int flush_encoder(AVFormatContext *fmt_ctx, unsigned int stream_index)
{
	int ret;
	int got_frame;
	AVPacket enc_pkt;
	if (!(fmt_ctx->streams[stream_index]->codec->codec->capabilities & AV_CODEC_CAP_DELAY))
		return 0;
	while (1) {
		enc_pkt.data = NULL;
		enc_pkt.size = 0;
		av_init_packet(&enc_pkt);
		ret = avcodec_encode_audio2(fmt_ctx->streams[stream_index]->codec, &enc_pkt,
			NULL, &got_frame);
		av_frame_free(NULL);
		if (ret < 0)
			break;
		if (!got_frame) {
			ret = 0;
			break;
		}
		printf("Flush Encoder: Succeed to encode 1 frame!\tsize:%5d\n", enc_pkt.size);
		/* mux encoded frame */
		ret = av_write_frame(fmt_ctx, &enc_pkt);
		if (ret < 0)
			break;
	}
	return ret;
}
int testAudio()
{
	AVFormatContext* pFormatCtx;
	AVOutputFormat* fmt;
	AVStream* audio_st;
	AVCodecContext* pCodecCtx;
	AVCodec* pCodec;

	uint8_t* frame_buf;
	AVFrame* pFrame;
	AVPacket pkt;

	int got_frame = 0;
	int ret = 0;
	int size = 0;

	FILE *in_file = NULL;	                        //Raw PCM data
	int framenum = 8000;                          //Audio frame number
	const char* out_file = "test.mp3";          //Output URL
	int i;

	in_file = fopen("D://out.pcm", "rb");

	av_register_all();

	//Method 1.
	pFormatCtx = avformat_alloc_context();
	fmt = av_guess_format(NULL, out_file, NULL);
	pFormatCtx->oformat = fmt;


	//Method 2. 
	//avformat_alloc_output_context2(&pFormatCtx, NULL, NULL, out_file);
	//fmt = pFormatCtx->oformat;

	//Open output URL
	if (avio_open(&pFormatCtx->pb, out_file, AVIO_FLAG_READ_WRITE) < 0) {
		printf("Failed to open output file!\n");
		return -1;
	}

	audio_st = avformat_new_stream(pFormatCtx, 0);
	if (audio_st == NULL) {
		return -1;
	}
	pCodecCtx = audio_st->codec;
	pCodecCtx->codec_id = fmt->audio_codec;
	pCodecCtx->codec_type = AVMEDIA_TYPE_AUDIO;
	pCodecCtx->sample_fmt = AV_SAMPLE_FMT_S16;
	pCodecCtx->sample_rate = 44100;
	pCodecCtx->channel_layout = av_get_default_channel_layout(1);
	pCodecCtx->channels = av_get_channel_layout_nb_channels(pCodecCtx->channel_layout);
	pCodecCtx->bit_rate = 64000;
	//pCodecCtx->frame_size = 1024;
	//Show some information
	//av_dump_format(pFormatCtx, 0, out_file, 1);

	pCodec = avcodec_find_encoder(pCodecCtx->codec_id);
	if (!pCodec) {
		printf("Can not find encoder!\n");
		return -1;
	}
	if (avcodec_open2(pCodecCtx, pCodec, NULL) < 0) {
		printf("Failed to open encoder!\n");
		return -1;
	}
	pFrame = av_frame_alloc();
	pFrame->nb_samples = pCodecCtx->frame_size;
	pFrame->format = pCodecCtx->sample_fmt;

	size = av_samples_get_buffer_size(NULL, pCodecCtx->channels, pCodecCtx->frame_size, pCodecCtx->sample_fmt, 1);
	frame_buf = (uint8_t *)av_malloc(size);
	avcodec_fill_audio_frame(pFrame, pCodecCtx->channels, pCodecCtx->sample_fmt, (const uint8_t*)frame_buf, size, 1);
	//Write Header
	avformat_write_header(pFormatCtx, NULL);

	av_new_packet(&pkt, size);
	for (i = 0; i<framenum; i++) {
		//Read PCM
		if (fread(frame_buf, 1, size, in_file) <= 0) {
			printf("Failed to read raw data! \n");
			return -1;
		}
		else if (feof(in_file)) {
			break;
		}
		pFrame->data[0] = frame_buf;  //PCM Data

		pFrame->pts = i * 100;
		got_frame = 0;
		//Encode
		ret = avcodec_encode_audio2(pCodecCtx, &pkt, pFrame, &got_frame);
		if (ret < 0) {
			printf("Failed to encode!\n");
			return -1;
		}
		if (got_frame == 1) {
			printf("Succeed to encode 1 frame! \tsize:%5d\n", pkt.size);
			pkt.stream_index = audio_st->index;
			ret = av_write_frame(pFormatCtx, &pkt);
			av_free_packet(&pkt);
		}
	}

	//Flush Encoder
	ret = flush_encoder(pFormatCtx, 0);
	if (ret < 0) {
		printf("Flushing encoder failed\n");
		return -1;
	}

	//Write Trailer
	av_write_trailer(pFormatCtx);

	//Clean
	if (audio_st) {
		avcodec_close(audio_st->codec);
		av_free(pFrame);
		av_free(frame_buf);
	}
	avio_close(pFormatCtx->pb);
	avformat_free_context(pFormatCtx);

	fclose(in_file);

	return 0;
}

int main(int argc, char* argv[])
{
	testAudio();
	return 0;
}

二、音频重采样转换

首先可以了解到重采样的作用，类似于视频的样式尺寸转换一样，音频主要是采样数nb_samples以及通道数channels和样式等参数的转换。
其实音频重采样主要是应用在只要PCM数据进行编码，但是如MP4编码都所支持的就是AV_SAMPLE_FMT_FLTP浮点数类型那么就可能存在转换。

下面的案例是以Linux下alsa库对pcm音频数据获取之后的格式为S16、双通道、44100的采样率进行编码。

要注意的点是；
1、要根据你对已知数据PCM的样式定义帧以及内存，便于后期将数据赋值给帧。从而达到将数据抽象为帧的关键操作。

定义模板帧
AVFrame *input_frame = av_frame_alloc();
if (!input_frame)
{
	ret = AVERROR(ENOMEM);
}

input_frame->nb_samples = 1024;
input_frame->channel_layout = AV_CH_LAYOUT_STEREO;
input_frame->format = AV_SAMPLE_FMT_S16;
input_frame->sample_rate = 44100;
input_frame->channels = 2;

int sizeIN = av_samples_get_buffer_size(NULL, input_frame->channels, input_frame->nb_samples, AV_SAMPLE_FMT_S16, 1);
uint8_t * frame_bufIN = (uint8_t *)av_malloc(sizeIN);
avcodec_fill_audio_frame(input_frame, input_frame->channels, AV_SAMPLE_FMT_S16, (const uint8_t*)frame_bufIN, sizeIN, 1);

将PCM数据与重采样输入模板帧关联
int readlen = fread(frame_bufIN, 1, sizeIN, in_file);

2、定义编码器上下文以及重采样的参数要一致，

pCodecCtx = audio_st->codec;
pCodecCtx->codec_id = fmt->audio_codec;
pCodecCtx->codec_type = AVMEDIA_TYPE_AUDIO;
pCodecCtx->sample_fmt = AV_SAMPLE_FMT_FLTP;
pCodecCtx->sample_rate= 44100;
pCodecCtx->channel_layout= AV_CH_LAYOUT_STEREO;
pCodecCtx->channels = 2;
pCodecCtx->bit_rate = 64000;

ret = AudioConvert(input_frame, AV_SAMPLE_FMT_FLTP, 2, 44100, &pOutFrame);

3、注意一些匹配关系

pCodecCtx->channels = 2;
pCodecCtx->channel_layout= AV_CH_LAYOUT_STEREO;匹配的，要么就使用API来指定
pCodecCtx->channel_layout = av_get_default_channel_layout(1);
pCodecCtx->channels = av_get_channel_layout_nb_channels(pCodecCtx->channel_layout);

重采样编码代码
16位 44100采样率 AV_CH_LAYOUT_STEREO
转换为
AV_SAMPLE_FMT_FLTP 44100采样率 AV_CH_LAYOUT_STEREO

#include <stdio.h>

#define __STDC_CONSTANT_MACROS

#ifdef _WIN32
//Windows
extern "C"
{
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswresample\swresample.h"
#include "libavutil\opt.h"
};
#else
//Linux...
#ifdef __cplusplus
extern "C"
{
#endif
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#ifdef __cplusplus
};
#endif
#endif


int flush_encoder1(AVFormatContext *fmt_ctx,unsigned int stream_index){
	int ret;
	int got_frame;
	AVPacket enc_pkt;
	if (!(fmt_ctx->streams[stream_index]->codec->codec->capabilities &
		CODEC_CAP_DELAY))
		return 0;
	while (1) {
		enc_pkt.data = NULL;
		enc_pkt.size = 0;
		av_init_packet(&enc_pkt);
		ret = avcodec_encode_audio2 (fmt_ctx->streams[stream_index]->codec, &enc_pkt,
			NULL, &got_frame);
		av_frame_free(NULL);
		if (ret < 0)
			break;
		if (!got_frame){
			ret=0;
			break;
		}
		printf("Flush Encoder: Succeed to encode 1 frame!\tsize:%5d\n",enc_pkt.size);

		ret = av_write_frame(fmt_ctx, &enc_pkt);
		if (ret < 0)
			break;
	}
	return ret;
}

int32_t AudioConvert(
	const AVFrame* pInFrame,      // 输入音频帧
	AVSampleFormat eOutSmplFmt,   // 输出音频格式
	int32_t        nOutChannels,  // 输出音频通道数 
	int32_t        nOutSmplRate,  // 输出音频采样率
	AVFrame**      ppOutFrame)    // 输出视频帧
{
	struct SwrContext* pSwrCtx = nullptr;
	AVFrame*           pOutFrame = nullptr;


	// 创建格式转换器,
	int64_t nInChnlLayout = av_get_default_channel_layout(pInFrame->channels);
	int64_t nOutChnlLayout = (nOutChannels == 1) ? AV_CH_LAYOUT_MONO : AV_CH_LAYOUT_STEREO;

	pSwrCtx = swr_alloc();
	if (pSwrCtx == nullptr)
	{
		return -1;
	}
	swr_alloc_set_opts(pSwrCtx,
		nOutChnlLayout, eOutSmplFmt, nOutSmplRate, nInChnlLayout,
		(enum AVSampleFormat)(pInFrame->format), pInFrame->sample_rate,
		0, nullptr);

	swr_init(pSwrCtx);


	// 计算重采样转换后的样本数量,从而分配缓冲区大小
	int64_t nCvtBufSamples = av_rescale_rnd(pInFrame->nb_samples, nOutSmplRate, pInFrame->sample_rate, AV_ROUND_UP);

	// 创建输出音频帧
	pOutFrame = av_frame_alloc();
	pOutFrame->format = eOutSmplFmt;
	pOutFrame->nb_samples = (int)nCvtBufSamples;
	pOutFrame->channel_layout = (uint64_t)nOutChnlLayout;
	int res = av_frame_get_buffer(pOutFrame, 0); // 分配缓冲区
	if (res < 0)
	{
		swr_free(&pSwrCtx);
		av_frame_free(&pOutFrame);
		return -2;
	}

	// 进行重采样转换处理,返回转换后的样本数量
	int nCvtedSamples = swr_convert(pSwrCtx,
		const_cast<uint8_t**>(pOutFrame->data),
		(int)nCvtBufSamples,
		const_cast<const uint8_t**>(pInFrame->data),
		pInFrame->nb_samples);
	if (nCvtedSamples <= 0)
	{
		swr_free(&pSwrCtx);
		av_frame_free(&pOutFrame);
		return -3;
	}
	pOutFrame->nb_samples = nCvtedSamples;
	pOutFrame->pts = pInFrame->pts;      // pts等时间戳沿用
	pOutFrame->pkt_pts = pInFrame->pkt_pts;

	(*ppOutFrame) = pOutFrame;
	swr_free(&pSwrCtx); // 释放转换器
	return 0;
}

int testAudioEncode()
{
	AVFormatContext* pFormatCtx;
	AVOutputFormat* fmt;
	AVStream* audio_st;
	AVCodecContext* pCodecCtx;
	AVCodec* pCodec;

	AVPacket pkt;

	int got_frame=0;
	int ret=0;
	int size=0;

	FILE *in_file=NULL;	                        //Raw PCM data
	int framenum=1000;                          //Audio frame number
	const char* out_file = "tdjm.aac";          //Output URL
	int i;

	//in_file= fopen("tdjm.pcm", "rb");
	in_file = fopen("d:\\record_dump.raw", "rb");
	//in_file = fopen("d:\\SaveLocalAudio1.pcm", "rb");
	

	av_register_all();

	AVFrame *input_frame = av_frame_alloc();
	if (!input_frame)
	{
		ret = AVERROR(ENOMEM);
	}

	input_frame->nb_samples = 1024;
	input_frame->channel_layout = AV_CH_LAYOUT_STEREO;
	input_frame->format = AV_SAMPLE_FMT_S16;
	input_frame->sample_rate = 44100;
	input_frame->channels = 2;

	int sizeIN = av_samples_get_buffer_size(NULL, input_frame->channels, input_frame->nb_samples, AV_SAMPLE_FMT_S16, 1);
	uint8_t * frame_bufIN = (uint8_t *)av_malloc(sizeIN);
	avcodec_fill_audio_frame(input_frame, input_frame->channels, AV_SAMPLE_FMT_S16, (const uint8_t*)frame_bufIN, sizeIN, 1);

	//Method 1.
	pFormatCtx = avformat_alloc_context();
	fmt = av_guess_format(NULL, out_file, NULL);
	pFormatCtx->oformat = fmt;

	//Method 2.
	//avformat_alloc_output_context2(&pFormatCtx, NULL, NULL, out_file);
	//fmt = pFormatCtx->oformat;

	//Open output URL
	if (avio_open(&pFormatCtx->pb,out_file, AVIO_FLAG_READ_WRITE) < 0){
		printf("Failed to open output file!\n");
		return -1;
	}

	audio_st = avformat_new_stream(pFormatCtx, 0);
	if (audio_st==NULL){
		return -1;
	}

	pCodecCtx = audio_st->codec;
	pCodecCtx->codec_id = fmt->audio_codec;
	pCodecCtx->codec_type = AVMEDIA_TYPE_AUDIO;
	pCodecCtx->sample_fmt = AV_SAMPLE_FMT_FLTP;
	pCodecCtx->sample_rate= 44100;
	pCodecCtx->channel_layout= AV_CH_LAYOUT_STEREO;
	pCodecCtx->channels = 2;
	pCodecCtx->bit_rate = 64000;

	//Show some information
	av_dump_format(pFormatCtx, 0, out_file, 1);

	pCodec = avcodec_find_encoder(pCodecCtx->codec_id);
	if (!pCodec){
		printf("Can not find encoder!\n");
		return -1;
	}
	if (avcodec_open2(pCodecCtx, pCodec,NULL) < 0){
		printf("Failed to open encoder!\n");
		return -1;
	}

	size = av_samples_get_buffer_size(NULL, pCodecCtx->channels,pCodecCtx->frame_size,pCodecCtx->sample_fmt, 1);

	//Write Header
	avformat_write_header(pFormatCtx,NULL);

	av_new_packet(&pkt,size);
	int ncount = 0;

	while (1)
	{
		if (feof(in_file))
			break;

		int readlen = fread(frame_bufIN, 1, sizeIN, in_file);
		//input_frame->data[0] = frame_buf;
		input_frame->pts = ncount * 60;

		AVFrame *pOutFrame = NULL;
		ret = AudioConvert(input_frame, AV_SAMPLE_FMT_FLTP, 2, 44100, &pOutFrame);

		ret = avcodec_encode_audio2(pCodecCtx, &pkt, pOutFrame, &got_frame);

		if (got_frame == 1) {
			printf("Succeed to encode 1 frame! \tsize:%5d\n", pkt.size);
			pkt.stream_index = audio_st->index;
			ret = av_write_frame(pFormatCtx, &pkt);
			av_free_packet(&pkt);
		}

		ncount++;
	}

	/*for (i=0; i<framenum; i++){
		//Read PCM
		if (fread(frame_buf, 1, size, in_file) <= 0){
			printf("Failed to read raw data! \n");
			return -1;
		}else if(feof(in_file)){
			break;
		}


		pFrame->data[0] = frame_buf;  //PCM Data

		pFrame->pts=i*100;
		got_frame=0;
		//Encode
		ret = avcodec_encode_audio2(pCodecCtx, &pkt,pFrame, &got_frame);
		if(ret < 0){
			printf("Failed to encode!\n");
			return -1;
		}
		if (got_frame==1){
			printf("Succeed to encode 1 frame! \tsize:%5d\n",pkt.size);
			pkt.stream_index = audio_st->index;
			ret = av_write_frame(pFormatCtx, &pkt);
			av_free_packet(&pkt);
		}
	}
	*/
	//Flush Encoder
	ret = flush_encoder1(pFormatCtx,0);
	if (ret < 0) {
		printf("Flushing encoder failed\n");
		return -1;
	}

	//Write Trailer
	av_write_trailer(pFormatCtx);

	//Clean
	if (audio_st){
		avcodec_close(audio_st->codec);
		av_free(input_frame);
		av_free(frame_bufIN);
	}
	avio_close(pFormatCtx->pb);
	avformat_free_context(pFormatCtx);

	fclose(in_file);

	return 0;
}