
Recherche avancée
Autres articles (95)
-
Mediabox : ouvrir les images dans l’espace maximal pour l’utilisateur
8 février 2011, parLa visualisation des images est restreinte par la largeur accordée par le design du site (dépendant du thème utilisé). Elles sont donc visibles sous un format réduit. Afin de profiter de l’ensemble de la place disponible sur l’écran de l’utilisateur, il est possible d’ajouter une fonctionnalité d’affichage de l’image dans une boite multimedia apparaissant au dessus du reste du contenu.
Pour ce faire il est nécessaire d’installer le plugin "Mediabox".
Configuration de la boite multimédia
Dès (...) -
Les autorisations surchargées par les plugins
27 avril 2010, parMediaspip core
autoriser_auteur_modifier() afin que les visiteurs soient capables de modifier leurs informations sur la page d’auteurs -
Des sites réalisés avec MediaSPIP
2 mai 2011, parCette page présente quelques-uns des sites fonctionnant sous MediaSPIP.
Vous pouvez bien entendu ajouter le votre grâce au formulaire en bas de page.
Sur d’autres sites (8562)
-
[ffmpeg]How to use libfdk_aac to encode pcm to constant bitrate
27 décembre 2024, par qytI have tried both libfdk_aac and aac, but the encoded PCM audio always has a variable bit rate. Why is this happening ? How can I make it encode with a constant bit rate
The code is as follows :


#include 
#include 
#include 
extern "C"
{
#include <libavcodec></libavcodec>avcodec.h>
#include <libavdevice></libavdevice>avdevice.h>
#include <libavfilter></libavfilter>avfilter.h>
#include <libavfilter></libavfilter>buffersrc.h>
#include <libavfilter></libavfilter>buffersink.h>
#include <libavformat></libavformat>avformat.h>
#include <libavutil></libavutil>avutil.h>
#include <libavutil></libavutil>imgutils.h>
#include <libavutil></libavutil>opt.h>
#include <libavutil></libavutil>channel_layout.h> 
#include <libswresample></libswresample>swresample.h>
#include <libswscale></libswscale>swscale.h>
}

int main(int argc, char** argv) {
 AVCodecContext* codec_context = NULL;
 const AVCodec* codec = NULL;
 AVFrame* frame = NULL;
 AVPacket* pkt = NULL;
 FILE* input_file = NULL;
 FILE* output_file = NULL;
 int ret;

 // Open input file
 const char* input_filename = "D:\\audio\\b.pcm";
 const char* output_filename = "D:\\audio\\input.aac";
 input_file = fopen(input_filename, "rb");
 output_file = fopen(output_filename, "wb");
 if (!input_file || !output_file) {
 fprintf(stderr, "Could not open input or output file\n");
 exit(1);
 }

 // Find the AAC encoder
 codec = avcodec_find_encoder_by_name("libfdk_aac");
 if (!codec) {
 fprintf(stderr, "Codec not found\n");
 exit(1);
 }

 codec_context = avcodec_alloc_context3(codec);
 if (!codec_context) {
 fprintf(stderr, "Could not allocate audio codec context\n");
 exit(1);
 }

 // Set codec parameters
 codec_context->sample_fmt = AV_SAMPLE_FMT_S16;
 codec_context->sample_rate = 44100;
 codec_context->bit_rate = 256000;
 codec_context->rc_buffer_size = codec_context->bit_rate; 
 codec_context->rc_min_rate = codec_context->bit_rate; 
 codec_context->rc_max_rate = codec_context->bit_rate; 
 av_channel_layout_default(&codec_context->ch_layout, 2);
 // Open codec
 if (avcodec_open2(codec_context, codec, &opts) < 0) {
 fprintf(stderr, "Could not open codec\n");
 exit(1);
 }

 // Initialize packet
 pkt = av_packet_alloc();
 if (!pkt) {
 fprintf(stderr, "Could not allocate AVPacket\n");
 exit(1);
 }

 // Initialize frame
 frame = av_frame_alloc();
 frame->nb_samples = codec_context->frame_size;
 frame->format = codec_context->sample_fmt;
 frame->ch_layout.nb_channels = 2;

 // Allocate the data buffers
 ret = av_frame_get_buffer(frame, 0);
 if (ret < 0) {
 fprintf(stderr, "Could not allocate audio data buffers\n");
 exit(1);
 }

 // Main loop: read from the input file, encode, write to the output file
 while (fread(frame->data[0], 1, frame->linesize[0], input_file) == frame->linesize[0]) {
 // Send the frame to the encoder
 if (avcodec_send_frame(codec_context, frame) < 0) {
 fprintf(stderr, "Error sending frame to codec\n");
 exit(1);
 }

 // Get the encoded packet
 while (avcodec_receive_packet(codec_context, pkt) == 0) {
 fwrite(pkt->data, 1, pkt->size, output_file);
 av_packet_unref(pkt);
 }
 }

 // Flush the encoder
 avcodec_send_frame(codec_context, NULL);
 while (avcodec_receive_packet(codec_context, pkt) == 0) {
 fwrite(pkt->data, 1, pkt->size, output_file);
 av_packet_unref(pkt);
 }

 // Clean up
 fclose(input_file);
 fclose(output_file);
 av_frame_free(&frame);
 av_packet_free(&pkt);
 avcodec_free_context(&codec_context);

 return 0;
}




I encoded the AAC and then used FFmpeg to write it to an MP4 file : ./ffmpeg -i input.aac -c copy output.mp4, and I checked it using MediaInfo


The PCM file is S16, 2 channels, 44100 Hz


I try to write mp4 directly and check the aac audio through mediainfo which is also a variable bitrate


#include 
#include 
#include 
extern "C"
{
#include <libavcodec></libavcodec>avcodec.h>
#include <libavdevice></libavdevice>avdevice.h>
#include <libavfilter></libavfilter>avfilter.h>
#include <libavfilter></libavfilter>buffersrc.h>
#include <libavfilter></libavfilter>buffersink.h>
#include <libavformat></libavformat>avformat.h>
#include <libavutil></libavutil>avutil.h>
#include <libavutil></libavutil>imgutils.h>
#include <libavutil></libavutil>opt.h>
#include <libavutil></libavutil>channel_layout.h> 
#include <libswresample></libswresample>swresample.h>
#include <libswscale></libswscale>swscale.h>
}

int main(int argc, char** argv) {
 AVCodecContext* codec_context = NULL;
 const AVCodec* codec = NULL;
 AVFormatContext* format_context = NULL;
 AVStream* audio_stream = NULL;
 AVFrame* frame = NULL;
 AVPacket* pkt = NULL;
 FILE* input_file = NULL;
 int ret;
 int64_t next_pts = 0;

 // Open input file
 const char* input_filename = "D:\\audio\\b.pcm";
 const char* output_filename = "D:\\audio\\input.mp4";
 input_file = fopen(input_filename, "rb");
 if (!input_file) {
 fprintf(stderr, "Could not open input file\n");
 exit(1);
 }

 // Find the AAC encoder
 codec = avcodec_find_encoder_by_name("libfdk_aac");
 if (!codec) {
 fprintf(stderr, "Codec not found\n");
 exit(1);
 }

 // Allocate AVFormatContext for MP4 output
 avformat_alloc_output_context2(&format_context, NULL, NULL, output_filename);
 if (!format_context) {
 fprintf(stderr, "Could not allocate output context\n");
 exit(1);
 }

 // Add audio stream to the output file
 audio_stream = avformat_new_stream(format_context, codec);
 if (!audio_stream) {
 fprintf(stderr, "Could not allocate stream\n");
 exit(1);
 }

 codec_context = avcodec_alloc_context3(codec);
 if (!codec_context) {
 fprintf(stderr, "Could not allocate audio codec context\n");
 exit(1);
 }
 audio_stream->codecpar->frame_size = codec_context->frame_size = 1024;

 // Set codec parameters
 codec_context->sample_fmt = AV_SAMPLE_FMT_S16;
 codec_context->sample_rate = 44100;
 codec_context->bit_rate = 256000;
 codec_context->rc_buffer_size = codec_context->bit_rate;
 codec_context->rc_min_rate = codec_context->bit_rate;
 codec_context->rc_max_rate = codec_context->bit_rate;
 av_channel_layout_default(&codec_context->ch_layout, 2);

 // Copy settings to stream
 ret = avcodec_parameters_from_context(audio_stream->codecpar, codec_context);
 if (ret < 0) {
 fprintf(stderr, "Failed to copy codec parameters to stream\n");
 exit(1);
 }

 // Open codec
 if (avcodec_open2(codec_context, codec, NULL) < 0) {
 fprintf(stderr, "Could not open codec\n");
 exit(1);
 }

 // Open output file
 if (!(format_context->oformat->flags & AVFMT_NOFILE)) {
 ret = avio_open(&format_context->pb, output_filename, AVIO_FLAG_WRITE);
 if (ret < 0) {
 fprintf(stderr, "Could not open output file '%s'\n", output_filename);
 exit(1);
 }
 }

 // Write file header
 ret = avformat_write_header(format_context, NULL);
 if (ret < 0) {
 fprintf(stderr, "Error occurred when opening output file\n");
 exit(1);
 }

 // Initialize packet and frame
 pkt = av_packet_alloc();
 if (!pkt) {
 fprintf(stderr, "Could not allocate AVPacket\n");
 exit(1);
 }

 frame = av_frame_alloc();
 frame->nb_samples = codec_context->frame_size;
 frame->format = codec_context->sample_fmt;
 frame->ch_layout = codec_context->ch_layout;

 // Allocate the data buffers
 ret = av_frame_get_buffer(frame, 0);
 if (ret < 0) {
 fprintf(stderr, "Could not allocate audio data buffers\n");
 exit(1);
 }

 // Main loop: read from the input file, encode, write to the output file
 while (fread(frame->data[0], 1, frame->linesize[0], input_file) == frame->linesize[0]) {
 frame->pts = next_pts; // Set PTS for the frame
 next_pts += frame->nb_samples; // Increment the next PTS
 // Send the frame to the encoder
 if (avcodec_send_frame(codec_context, frame) < 0) {
 fprintf(stderr, "Error sending frame to codec\n");
 exit(1);
 }

 // Get the encoded packet
 while (avcodec_receive_packet(codec_context, pkt) == 0) {
 pkt->pts = pkt->dts = frame->pts;
 av_packet_rescale_ts(pkt, codec_context->time_base, audio_stream->time_base);
 pkt->stream_index = audio_stream->index;
 av_interleaved_write_frame(format_context, pkt);
 av_packet_unref(pkt);
 }
 }

 // Flush the encoder
 avcodec_send_frame(codec_context, NULL);
 while (avcodec_receive_packet(codec_context, pkt) == 0) {
 pkt->pts = pkt->dts = next_pts;
 av_packet_rescale_ts(pkt, codec_context->time_base, audio_stream->time_base);
 pkt->stream_index = audio_stream->index;
 av_interleaved_write_frame(format_context, pkt);
 av_packet_unref(pkt);
 next_pts += pkt->duration;
 }

 // Write file trailer
 av_write_trailer(format_context);

 // Clean up
 fclose(input_file);
 av_frame_free(&frame);
 av_packet_free(&pkt);
 avcodec_free_context(&codec_context);
 avio_closep(&format_context->pb);
 avformat_free_context(format_context);

 return 0;
}



-
FFmpeg VAAPI : GOP Size Setting Not Applied During Encoding
17 octobre 2024, par sun tonyI'm working on a video transcoding project using FFmpeg with VAAPI hardware acceleration, and I'm encountering an issue where the gop_size parameter is not being respected during encoding. Despite setting the gop_size to 150 in the encoder context, the output video does not have the expected GOP structure. This problem only occurs when using VAAPI for hardware acceleration—when I switch to software decoding or CUDA, the GOP size is applied correctly.


Here’s a simplified version of the relevant code where I set the gop_size :


encoder_ctx->gop_size = 150;



I'm using FFmpeg's VAAPI for hardware-accelerated encoding, and everything else works as expected, except for this GOP size issue. Has anyone encountered this problem, or is there something specific to VAAPI that I'm missing ?


Environment :


- 

- FFmpeg version : (6.0)
- VAAPI (Intel GPU)
- Encoder : H.264








#include 
#include 
extern "C"
{
#include <libavutil></libavutil>hwcontext.h>
#include <libavcodec></libavcodec>avcodec.h>
#include <libavformat></libavformat>avformat.h>
}
static AVFormatContext* ifmt_ctx = NULL, * ofmt_ctx = NULL;
static AVBufferRef* hw_device_ctx = NULL;
static AVCodecContext* decoder_ctx = NULL, * encoder_ctx = NULL;
static int video_stream = -1;
static AVStream* ost;
static int initialized = 0;

static enum AVPixelFormat get_vaapi_format(AVCodecContext* ctx,
 const enum AVPixelFormat* pix_fmts)
{
 const enum AVPixelFormat* p;

 for (p = pix_fmts; *p != AV_PIX_FMT_NONE; p++) { 
 if (*p == AV_PIX_FMT_VAAPI)
 return *p;
 }

 fprintf(stderr, "Unable to decode this file using VA-API.\n");
 return AV_PIX_FMT_NONE;
}

static int open_input_file(const char* filename)
{
 int ret;
 const AVCodec* decoder = NULL;
 AVStream* video = NULL;
 char err_buf[AV_ERROR_MAX_STRING_SIZE] = { 0, };

 if ((ret = avformat_open_input(&ifmt_ctx, filename, NULL, NULL)) < 0) {
 av_strerror(ret, err_buf, AV_ERROR_MAX_STRING_SIZE);
 fprintf(stderr, "Cannot open input file '%s', Error code: %s\n", filename, err_buf);
 return ret;
 }

 if ((ret = avformat_find_stream_info(ifmt_ctx, NULL)) < 0) {
 av_strerror(ret, err_buf, AV_ERROR_MAX_STRING_SIZE);
 fprintf(stderr, "Cannot find input stream information. Error code: %s\n", err_buf);
 return ret;
 }

 ret = av_find_best_stream(ifmt_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, &decoder, 0);
 if (ret < 0) {
 av_strerror(ret, err_buf, AV_ERROR_MAX_STRING_SIZE);
 fprintf(stderr, "Cannot find a video stream in the input file. Error code: %s\n", err_buf);
 return ret;
 }
 video_stream = ret;

 if (!(decoder_ctx = avcodec_alloc_context3(decoder)))
 return AVERROR(ENOMEM);

 video = ifmt_ctx->streams[video_stream];
 if ((ret = avcodec_parameters_to_context(decoder_ctx, video->codecpar)) < 0) {

 av_strerror(ret, err_buf, AV_ERROR_MAX_STRING_SIZE);
 fprintf(stderr, "avcodec_parameters_to_context error. Error code: %s\n", err_buf);
 return ret;
 }

 decoder_ctx->hw_device_ctx = av_buffer_ref(hw_device_ctx);
 if (!decoder_ctx->hw_device_ctx) {
 fprintf(stderr, "A hardware device reference create failed.\n");
 return AVERROR(ENOMEM);
 }
 decoder_ctx->get_format = get_vaapi_format;

 if ((ret = avcodec_open2(decoder_ctx, decoder, NULL)) < 0){
 
 av_strerror(ret, err_buf, AV_ERROR_MAX_STRING_SIZE);
 fprintf(stderr, "Failed to open codec for decoding.Error code: %s\n", err_buf);
 }
 
 return ret;
}

static int encode_write(AVPacket* enc_pkt, AVFrame* frame)
{
 int ret = 0;
 char err_buf[AV_ERROR_MAX_STRING_SIZE] = { 0, };
 av_packet_unref(enc_pkt);

 if ((ret = avcodec_send_frame(encoder_ctx, frame)) < 0) {
 av_strerror(ret, err_buf, AV_ERROR_MAX_STRING_SIZE);
 fprintf(stderr, "Error during encoding. Error code: %s\n", err_buf);
 goto end;
 }
 while (1) {
 ret = avcodec_receive_packet(encoder_ctx, enc_pkt);
 if (ret)
 break;

 enc_pkt->stream_index = 0;
 av_packet_rescale_ts(enc_pkt, ifmt_ctx->streams[video_stream]->time_base,
 ofmt_ctx->streams[0]->time_base);
 ret = av_interleaved_write_frame(ofmt_ctx, enc_pkt);
 if (ret < 0) {
 av_strerror(ret, err_buf, AV_ERROR_MAX_STRING_SIZE);
 fprintf(stderr, "Error during writing data to output file. Error code: %s\n", err_buf);
 return -1;
 }
 }

end:
 if (ret == AVERROR_EOF)
 return 0;
 ret = ((ret == AVERROR(EAGAIN)) ? 0 : -1);
 return ret;
}

static int dec_enc(AVPacket* pkt, const AVCodec* enc_codec)
{
 AVFrame* frame;
 AVDictionary* opts = NULL;

 int ret = 0;
 char err_buf[AV_ERROR_MAX_STRING_SIZE] = { 0, };
 ret = avcodec_send_packet(decoder_ctx, pkt);
 if (ret < 0) {
 av_strerror(ret, err_buf, AV_ERROR_MAX_STRING_SIZE);
 fprintf(stderr, "Error during decoding.Error code : %s\n", err_buf);
 return ret;
 }

 while (ret >= 0) {
 if (!(frame = av_frame_alloc()))
 return AVERROR(ENOMEM);

 ret = avcodec_receive_frame(decoder_ctx, frame);
 if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
 av_frame_free(&frame);
 return 0;
 }
 else if (ret < 0) {
 av_strerror(ret, err_buf, AV_ERROR_MAX_STRING_SIZE);
 fprintf(stderr, "Error while decoding.Error code : %s\n", err_buf);
 goto fail;
 }

 if (!initialized) {
 /* we need to ref hw_frames_ctx of decoder to initialize encoder's codec.
 Only after we get a decoded frame, can we obtain its hw_frames_ctx */
 encoder_ctx->hw_frames_ctx = av_buffer_ref(decoder_ctx->hw_frames_ctx);
 if (!encoder_ctx->hw_frames_ctx) {
 ret = AVERROR(ENOMEM);
 goto fail;
 }
 /* set AVCodecContext Parameters for encoder, here we keep them stay
 * the same as decoder.
 * xxx: now the sample can't handle resolution change case.
 */
 encoder_ctx->time_base = av_inv_q(decoder_ctx->framerate);
 encoder_ctx->pix_fmt = AV_PIX_FMT_VAAPI;
 encoder_ctx->width = decoder_ctx->width;
 encoder_ctx->height = decoder_ctx->height;
 encoder_ctx->gop_size = 150;
 av_dict_set(&opts, "g", "150", 0); // gop_size 설정

 if ((ret = avcodec_open2(encoder_ctx, enc_codec, &opts)) < 0) {
 av_strerror(ret, err_buf, AV_ERROR_MAX_STRING_SIZE);
 fprintf(stderr, "Failed to open encode codec. Error code : %s\n", err_buf);
 goto fail;
 }

 if (!(ost = avformat_new_stream(ofmt_ctx, enc_codec))) {
 fprintf(stderr, "Failed to allocate stream for output format.\n");
 ret = AVERROR(ENOMEM);
 goto fail;
 }

 ost->time_base = encoder_ctx->time_base;
 ret = avcodec_parameters_from_context(ost->codecpar, encoder_ctx);
 if (ret < 0) {
 av_strerror(ret, err_buf, AV_ERROR_MAX_STRING_SIZE);
 fprintf(stderr, "Failed to copy the stream parameters. Error code : %s\n", err_buf);
 goto fail;
 }

 /* write the stream header */
 if ((ret = avformat_write_header(ofmt_ctx, NULL)) < 0) {
 av_strerror(ret, err_buf, AV_ERROR_MAX_STRING_SIZE);
 fprintf(stderr, "Error while writing stream header. Error code : %s\n", err_buf);
 goto fail;
 }

 initialized = 1;
 }

 if ((ret = encode_write(pkt, frame)) < 0)
 fprintf(stderr, "Error during encoding and writing.\n");

 fail:
 av_frame_free(&frame);
 if (ret < 0)
 return ret;
 }
 return 0;
}

int main(int argc, char** argv)
{
 const AVCodec* enc_codec;
 int ret = 0;
 AVPacket* dec_pkt;
 char err_buf[AV_ERROR_MAX_STRING_SIZE] = { 0, };

 if (argc != 4) {
 fprintf(stderr, "Usage: %s <input file="file" /> <encode codec="codec"> <output file="file">\n"
 "The output format is guessed according to the file extension.\n"
 "\n", argv[0]);
 return -1;
 }

 ret = av_hwdevice_ctx_create(&hw_device_ctx, AV_HWDEVICE_TYPE_VAAPI, NULL, NULL, 0);
 if (ret < 0) {
 av_strerror(ret, err_buf, AV_ERROR_MAX_STRING_SIZE);
 fprintf(stderr, "Failed to create a VAAPI device. Error code : %s\n", err_buf);
 return -1;
 }

 dec_pkt = av_packet_alloc();
 if (!dec_pkt) {
 fprintf(stderr, "Failed to allocate decode packet\n");
 goto end;
 }

 if ((ret = open_input_file(argv[1])) < 0)
 goto end;

 if (!(enc_codec = avcodec_find_encoder_by_name(argv[2]))) {
 fprintf(stderr, "Could not find encoder '%s'\n", argv[2]);
 ret = -1;
 goto end;
 }

 if ((ret = (avformat_alloc_output_context2(&ofmt_ctx, NULL, NULL, argv[3]))) < 0) {
 av_strerror(ret, err_buf, AV_ERROR_MAX_STRING_SIZE);
 fprintf(stderr, "Failed to deduce output format from file extension. Error code : %s\n", err_buf);
 goto end;
 }

 if (!(encoder_ctx = avcodec_alloc_context3(enc_codec))) {
 ret = AVERROR(ENOMEM);
 goto end;
 }

 ret = avio_open(&ofmt_ctx->pb, argv[3], AVIO_FLAG_WRITE);
 if (ret < 0) {
 av_strerror(ret, err_buf, AV_ERROR_MAX_STRING_SIZE);
 fprintf(stderr, "Cannot open output file. Error code : %s\n", err_buf);
 goto end;
 }

 /* read all packets and only transcoding video */
 while (ret >= 0) {
 if ((ret = av_read_frame(ifmt_ctx, dec_pkt)) < 0)
 break;

 if (video_stream == dec_pkt->stream_index)
 ret = dec_enc(dec_pkt, enc_codec);

 av_packet_unref(dec_pkt);
 }

 /* flush decoder */
 av_packet_unref(dec_pkt);
 ret = dec_enc(dec_pkt, enc_codec);

 /* flush encoder */
 ret = encode_write(dec_pkt, NULL);

 /* write the trailer for output stream */
 av_write_trailer(ofmt_ctx);

end:
 avformat_close_input(&ifmt_ctx);
 avformat_close_input(&ofmt_ctx);
 avcodec_free_context(&decoder_ctx);
 avcodec_free_context(&encoder_ctx);
 av_buffer_unref(&hw_device_ctx);
 av_packet_free(&dec_pkt);
 return ret;
}
</output></encode>


-
C++ ffmpeg - export to wav error : Invalid PCM packet, data has size 2 but at least a size of 4 was expected
9 septembre 2024, par Chris PC++ code :


AudioSegment AudioSegment::from_file(const std::string& file_path, const std::string& format, const std::string& codec,
 const std::map& parameters, int start_second, int duration) {

 avformat_network_init();
 av_log_set_level(AV_LOG_ERROR); // Adjust logging level as needed

 AVFormatContext* format_ctx = nullptr;
 if (avformat_open_input(&format_ctx, file_path.c_str(), nullptr, nullptr) != 0) {
 std::cerr << "Error: Could not open audio file." << std::endl;
 return AudioSegment(); // Return an empty AudioSegment on failure
 }

 if (avformat_find_stream_info(format_ctx, nullptr) < 0) {
 std::cerr << "Error: Could not find stream information." << std::endl;
 avformat_close_input(&format_ctx);
 return AudioSegment();
 }

 int audio_stream_index = -1;
 for (unsigned int i = 0; i < format_ctx->nb_streams; i++) {
 if (format_ctx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
 audio_stream_index = i;
 break;
 }
 }

 if (audio_stream_index == -1) {
 std::cerr << "Error: Could not find audio stream." << std::endl;
 avformat_close_input(&format_ctx);
 return AudioSegment();
 }

 AVCodecParameters* codec_par = format_ctx->streams[audio_stream_index]->codecpar;
 const AVCodec* my_codec = avcodec_find_decoder(codec_par->codec_id);
 AVCodecContext* codec_ctx = avcodec_alloc_context3(my_codec);

 if (!codec_ctx) {
 std::cerr << "Error: Could not allocate codec context." << std::endl;
 avformat_close_input(&format_ctx);
 return AudioSegment();
 }

 if (avcodec_parameters_to_context(codec_ctx, codec_par) < 0) {
 std::cerr << "Error: Could not initialize codec context." << std::endl;
 avcodec_free_context(&codec_ctx);
 avformat_close_input(&format_ctx);
 return AudioSegment();
 }

 if (avcodec_open2(codec_ctx, my_codec, nullptr) < 0) {
 std::cerr << "Error: Could not open codec." << std::endl;
 avcodec_free_context(&codec_ctx);
 avformat_close_input(&format_ctx);
 return AudioSegment();
 }

 SwrContext* swr_ctx = swr_alloc();
 if (!swr_ctx) {
 std::cerr << "Error: Could not allocate SwrContext." << std::endl;
 avcodec_free_context(&codec_ctx);
 avformat_close_input(&format_ctx);
 return AudioSegment();
 }
 codec_ctx->sample_rate = 44100;
 // Set up resampling context to convert to S16 format with 2 bytes per sample
 av_opt_set_chlayout(swr_ctx, "in_chlayout", &codec_ctx->ch_layout, 0);
 av_opt_set_int(swr_ctx, "in_sample_rate", codec_ctx->sample_rate, 0);
 av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", codec_ctx->sample_fmt, 0);

 AVChannelLayout dst_ch_layout;
 av_channel_layout_copy(&dst_ch_layout, &codec_ctx->ch_layout);
 av_channel_layout_uninit(&dst_ch_layout);
 av_channel_layout_default(&dst_ch_layout, 2);

 av_opt_set_chlayout(swr_ctx, "out_chlayout", &dst_ch_layout, 0);
 av_opt_set_int(swr_ctx, "out_sample_rate", codec_ctx->sample_rate, 0); // Match input sample rate
 av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", AV_SAMPLE_FMT_S16, 0); // Force S16 format

 if (swr_init(swr_ctx) < 0) {
 std::cerr << "Error: Failed to initialize the resampling context" << std::endl;
 swr_free(&swr_ctx);
 avcodec_free_context(&codec_ctx);
 avformat_close_input(&format_ctx);
 return AudioSegment();
 }

 AVPacket packet;
 AVFrame* frame = av_frame_alloc();
 if (!frame) {
 std::cerr << "Error: Could not allocate frame." << std::endl;
 swr_free(&swr_ctx);
 avcodec_free_context(&codec_ctx);
 avformat_close_input(&format_ctx);
 return AudioSegment();
 }

 std::vector<char> output;
 while (av_read_frame(format_ctx, &packet) >= 0) {
 if (packet.stream_index == audio_stream_index) {
 if (avcodec_send_packet(codec_ctx, &packet) == 0) {
 while (avcodec_receive_frame(codec_ctx, frame) == 0) {
 if (frame->pts != AV_NOPTS_VALUE) {
 frame->pts = av_rescale_q(frame->pts, codec_ctx->time_base, format_ctx->streams[audio_stream_index]->time_base);
 }

 uint8_t* output_buffer;
 int output_samples = av_rescale_rnd(
 swr_get_delay(swr_ctx, codec_ctx->sample_rate) + frame->nb_samples,
 codec_ctx->sample_rate, codec_ctx->sample_rate, AV_ROUND_UP);

 int output_buffer_size = av_samples_get_buffer_size(
 nullptr, 2, output_samples, AV_SAMPLE_FMT_S16, 1);

 output_buffer = (uint8_t*)av_malloc(output_buffer_size);

 if (output_buffer) {
 memset(output_buffer, 0, output_buffer_size); // Zero padding to avoid random noise
 int converted_samples = swr_convert(swr_ctx, &output_buffer, output_samples,
 (const uint8_t**)frame->extended_data, frame->nb_samples);

 if (converted_samples >= 0) {
 output.insert(output.end(), output_buffer, output_buffer + output_buffer_size);
 }
 else {
 std::cerr << "Error: Failed to convert audio samples." << std::endl;
 }
 // Make sure output_buffer is valid before freeing
 if (output_buffer != nullptr) {
 av_free(output_buffer);
 output_buffer = nullptr; // Prevent double-free
 }
 }
 else {
 std::cerr << "Error: Could not allocate output buffer." << std::endl;
 }
 }
 }
 else {
 std::cerr << "Error: Failed to send packet to codec context." << std::endl;
 }
 }
 av_packet_unref(&packet);
 }

 int frame_width = av_get_bytes_per_sample(AV_SAMPLE_FMT_S16) * 2; // Use 2 bytes per sample and 2 channels

 std::map metadata = {
 {"sample_width", 2}, // S16 format has 2 bytes per sample
 {"frame_rate", codec_ctx->sample_rate}, // Use the input sample rate
 {"channels", 2}, // Assuming stereo output
 {"frame_width", frame_width}
 };

 av_frame_free(&frame);
 swr_free(&swr_ctx);
 avcodec_free_context(&codec_ctx);
 avformat_close_input(&format_ctx);

 return AudioSegment(static_cast<const>(output.data()), output.size(), metadata);
}

std::ofstream AudioSegment::export_segment_to_wav_file(const std::string& out_f) {
 std::cout << this->get_channels() << std::endl;
 av_log_set_level(AV_LOG_ERROR);
 AVCodecContext* codec_ctx = nullptr;
 AVFormatContext* format_ctx = nullptr;
 AVStream* stream = nullptr;
 AVFrame* frame = nullptr;
 AVPacket* pkt = nullptr;
 int ret;

 // Initialize format context for WAV
 if (avformat_alloc_output_context2(&format_ctx, nullptr, "wav", out_f.c_str()) < 0) {
 throw std::runtime_error("Could not allocate format context.");
 }

 // Find encoder for PCM
 const AVCodec* codec_ptr = avcodec_find_encoder(AV_CODEC_ID_PCM_S16LE);
 if (!codec_ptr) {
 throw std::runtime_error("PCM encoder not found.");
 }

 // Add stream
 stream = avformat_new_stream(format_ctx, codec_ptr);
 if (!stream) {
 throw std::runtime_error("Failed to create new stream.");
 }

 // Allocate codec context
 codec_ctx = avcodec_alloc_context3(codec_ptr);
 if (!codec_ctx) {
 throw std::runtime_error("Could not allocate audio codec context.");
 }

 // Set codec parameters for PCM
 codec_ctx->bit_rate = 128000; // Bitrate
 codec_ctx->sample_rate = this->get_frame_rate(); // Use correct sample rate
 codec_ctx->ch_layout.nb_channels = this->get_channels(); // Set the correct channel count

 // Set the channel layout: stereo or mono
 if (this->get_channels() == 2) {
 av_channel_layout_default(&codec_ctx->ch_layout, 2); // Stereo layout
 }
 else {
 av_channel_layout_default(&codec_ctx->ch_layout, 1); // Mono layout
 }

 codec_ctx->sample_fmt = AV_SAMPLE_FMT_S16; // PCM 16-bit format

 // Open codec
 if (avcodec_open2(codec_ctx, codec_ptr, nullptr) < 0) {
 throw std::runtime_error("Could not open codec.");
 }

 // Set codec parameters to the stream
 if (avcodec_parameters_from_context(stream->codecpar, codec_ctx) < 0) {
 throw std::runtime_error("Could not initialize stream codec parameters.");
 }

 // Open output file
 std::ofstream out_file(out_f, std::ios::binary);
 if (!out_file) {
 throw std::runtime_error("Failed to open output file.");
 }

 if (!(format_ctx->oformat->flags & AVFMT_NOFILE)) {
 if (avio_open(&format_ctx->pb, out_f.c_str(), AVIO_FLAG_WRITE) < 0) {
 throw std::runtime_error("Could not open output file.");
 }
 }

 // Write file header
 if (avformat_write_header(format_ctx, nullptr) < 0) {
 throw std::runtime_error("Error occurred when writing file header.");
 }

 // Initialize packet
 pkt = av_packet_alloc();
 if (!pkt) {
 throw std::runtime_error("Could not allocate AVPacket.");
 }

 // Initialize frame
 frame = av_frame_alloc();
 if (!frame) {
 throw std::runtime_error("Could not allocate AVFrame.");
 }

 // Set the frame properties
 frame->format = codec_ctx->sample_fmt;
 frame->ch_layout = codec_ctx->ch_layout;

 // Number of audio samples available in the data buffer
 int total_samples = data_.size() / (av_get_bytes_per_sample(AV_SAMPLE_FMT_S16) * codec_ctx->ch_layout.nb_channels);
 int samples_read = 0;

 // Set the number of samples per frame dynamically based on the input data
 while (samples_read < total_samples) {
 // Determine how many samples to read in this iteration (don't exceed the total sample count)
 int num_samples = std::min(codec_ctx->frame_size, total_samples - samples_read);
 if (num_samples == 0) {
 num_samples = 1024;
 codec_ctx->frame_size = 1024;
 }
 // Ensure num_samples is not zero
 if (num_samples <= 0) {
 throw std::runtime_error("Invalid number of samples in frame.");
 }

 // Set the number of samples in the frame
 frame->nb_samples = num_samples;

 // Allocate the frame buffer based on the number of samples
 ret = av_frame_get_buffer(frame, 0);
 if (ret < 0) {
 std::cerr << "Error allocating frame buffer: " << ret << std::endl;
 throw std::runtime_error("Could not allocate audio data buffers.");
 }

 // Copy the audio data into the frame's buffer (interleaving if necessary)
 /*if (codec_ctx->ch_layout.nb_channels == 2) {
 // If stereo, interleave planar data into packed format
 for (int i = 0; i < num_samples; ++i) {
 ((int16_t*)frame->data[0])[2 * i] = ((int16_t*)data_.data())[i]; // Left channel
 ((int16_t*)frame->data[0])[2 * i + 1] = ((int16_t*)data_.data())[total_samples + i]; // Right channel
 }
 }
 else {
 // For mono or packed data, directly copy the samples
 std::memcpy(frame->data[0], data_.data() + samples_read * av_get_bytes_per_sample(AV_SAMPLE_FMT_S16) * codec_ctx->ch_layout.nb_channels,
 num_samples * av_get_bytes_per_sample(AV_SAMPLE_FMT_S16) * codec_ctx->ch_layout.nb_channels);
 }
 */
 std::memcpy(frame->data[0], data_.data() + samples_read * av_get_bytes_per_sample(AV_SAMPLE_FMT_S16) * codec_ctx->ch_layout.nb_channels,
 num_samples * av_get_bytes_per_sample(AV_SAMPLE_FMT_S16) * codec_ctx->ch_layout.nb_channels);

 // Send the frame for encoding
 ret = avcodec_send_frame(codec_ctx, frame);
 if (ret < 0) {
 std::cerr << "Error sending frame for encoding: " << ret << std::endl;
 throw std::runtime_error("Error sending frame for encoding.");
 }

 // Receive and write encoded packets
 while (ret >= 0) {
 ret = avcodec_receive_packet(codec_ctx, pkt);
 if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
 break;
 }
 else if (ret < 0) {
 throw std::runtime_error("Error during encoding.");
 }

 out_file.write(reinterpret_cast(pkt->data), pkt->size);
 av_packet_unref(pkt);
 }

 samples_read += num_samples;
 }

 // Flush the encoder
 if (avcodec_send_frame(codec_ctx, nullptr) < 0) {
 throw std::runtime_error("Error flushing the encoder.");
 }

 while (avcodec_receive_packet(codec_ctx, pkt) >= 0) {
 out_file.write(reinterpret_cast(pkt->data), pkt->size);
 av_packet_unref(pkt);
 }

 // Write file trailer
 av_write_trailer(format_ctx);

 // Cleanup
 av_frame_free(&frame);
 av_packet_free(&pkt);
 avcodec_free_context(&codec_ctx);

 if (!(format_ctx->oformat->flags & AVFMT_NOFILE)) {
 avio_closep(&format_ctx->pb);
 }
 avformat_free_context(format_ctx);

 out_file.close();
 return out_file;
}

</const></char>


Run code :


#include "audio_segment.h"
#include "effects.h"
#include "playback.h"
#include "cppaudioop.h"
#include "exceptions.h"
#include "generators.h"
#include "silence.h"
#include "utils.h"

#include <iostream>
#include <filesystem>

using namespace cppdub;

int main() {
 try {
 // Load the source audio file
 AudioSegment seg_1 = AudioSegment::from_file("../data/test10.mp3");
 std::string out_file_name = "ah-ah-ah.wav";

 // Export the audio segment to a new file with specified settings
 //seg_1.export_segment(out_file_name, "mp3");
 seg_1.export_segment_to_wav_file(out_file_name);


 // Optionally play the audio segment to verify
 // play(seg_1);

 // Load the exported audio file
 AudioSegment seg_2 = AudioSegment::from_file(out_file_name);

 // Play segments
 //play(seg_1);
 play(seg_2);
 }
 catch (const std::exception& e) {
 std::cerr << "An error occurred: " << e.what() << std::endl;
 }

 return 0;
}
</filesystem></iostream>


Error in second call of from_file function :


[pcm_s16le @ 000002d82ca5bfc0] Invalid PCM packet, data has size 2 but at least a size of 4 was expected


The process continue, i call hear the seg_2 with play(seg_2) call, but i can't directly play seg_2 export wav file (from windows explorer).


I had a guess that error may be because packed vs plannar formats missmatch but i am not quit sure. Maybe a swr_convert is necessary.