
Recherche avancée
Autres articles (35)
-
Support de tous types de médias
10 avril 2011Contrairement à beaucoup de logiciels et autres plate-formes modernes de partage de documents, MediaSPIP a l’ambition de gérer un maximum de formats de documents différents qu’ils soient de type : images (png, gif, jpg, bmp et autres...) ; audio (MP3, Ogg, Wav et autres...) ; vidéo (Avi, MP4, Ogv, mpg, mov, wmv et autres...) ; contenu textuel, code ou autres (open office, microsoft office (tableur, présentation), web (html, css), LaTeX, Google Earth) (...)
-
Other interesting software
13 avril 2011, parWe don’t claim to be the only ones doing what we do ... and especially not to assert claims to be the best either ... What we do, we just try to do it well and getting better ...
The following list represents softwares that tend to be more or less as MediaSPIP or that MediaSPIP tries more or less to do the same, whatever ...
We don’t know them, we didn’t try them, but you can take a peek.
Videopress
Website : http://videopress.com/
License : GNU/GPL v2
Source code : (...) -
Keeping control of your media in your hands
13 avril 2011, parThe vocabulary used on this site and around MediaSPIP in general, aims to avoid reference to Web 2.0 and the companies that profit from media-sharing.
While using MediaSPIP, you are invited to avoid using words like "Brand", "Cloud" and "Market".
MediaSPIP is designed to facilitate the sharing of creative media online, while allowing authors to retain complete control of their work.
MediaSPIP aims to be accessible to as many people as possible and development is based on expanding the (...)
Sur d’autres sites (6098)
-
Sending raw h264 video and aac audio frames to an RTMP server using ffmpeg
7 décembre 2022, par codeimpalerI am receiving raw h264 and aac audio frames from an even driven source. I am trying to send these frames to an rtmp server. 
I started working from the ffmpeg example muxing.c which successfully sends a custom stream to the rtmp server. I figure I just need to replace their frame data with my own.I found this suggestion online. I have tried How to pack raw h264 stream to flv container and send over rtmp using ffmpeg (not command) 
and
How to publish selfmade stream with ffmpeg and c++ to rtmp server ?
and a few other suggestions but none have worked for me. 
I have tried to directly memcpy my byte buffer but my code keeps failing
at ret = avcodec_encode_video2(c, &pkt, frame, &got_packet).
Specifically, I get an invalid access error.
For a little more context, anytime I receive a frame (which is event driven), void RTMPWriter::WriteVideoFrame(...) is called. Assume the constructor has already been called before the first frame is received. 
I am not that familiar with ffmpeg and there could be several things wrong with the code. Any input will be really appreciated.



#define STREAM_FRAME_RATE 25 /* 25 images/s */
 #define STREAM_PIX_FMT AV_PIX_FMT_YUV420P /* default pix_fmt */
 #define SCALE_FLAGS SWS_BICUBIC
 RTMPWriter::RTMPWriter()
 : seenKeyFrame(false),
 video_st({ 0 }), 
 audio_st({ 0 }),
 have_video(0), 
 have_audio(0)
 {

 const char *filename;
 AVCodec *audio_codec = NULL, *video_codec = NULL;
 int ret;

 int encode_video = 0, encode_audio = 0;
 AVDictionary *opt = NULL;
 int i;

 /* Initialize libavcodec, and register all codecs and formats. */
 av_register_all();

 avformat_network_init();

 String^ StreamURL = "StreamURL";
 String^ out_uri = safe_cast(ApplicationData::Current->LocalSettings->Values->Lookup(StreamURL));
 std::wstring out_uriW(out_uri->Begin());
 std::string out_uriA(out_uriW.begin(), out_uriW.end());
 filename = out_uriA.c_str(); 

 /* allocate the output media context */
 avformat_alloc_output_context2(&oc, NULL, "flv", filename);
 if (!oc)
 {
 OutputDebugString(L"Could not deduce output format from file extension: using MPEG.\n");
 avformat_alloc_output_context2(&oc, NULL, "mpeg", filename);
 }
 if (!oc)
 {
 OutputDebugString(L"Could not allocate using MPEG.\n");
 }


 fmt = oc->oformat;

 /* Add the audio and video streams using the default format codecs
 * and initialize the codecs. */
 if (fmt->video_codec != AV_CODEC_ID_NONE) {
 add_stream(&video_st, oc, &video_codec, fmt->video_codec);
 have_video = 1;
 encode_video = 1;
 }
 if (fmt->audio_codec != AV_CODEC_ID_NONE) {
 add_stream(&audio_st, oc, &audio_codec, fmt->audio_codec);
 have_audio = 1;
 encode_audio = 1;
 }

 /* Now that all the parameters are set, we can open the audio and
 * video codecs and allocate the necessary encode buffers. */
 if (have_video)
 {
 open_video(oc, video_codec, &video_st, opt);
 }

 if (have_audio)
 {
 open_audio(oc, audio_codec, &audio_st, opt);
 }

 av_dump_format(oc, 0, filename, 1);

 /* open the output file, if needed */
 if (!(fmt->flags & AVFMT_NOFILE))
 {
 ret = avio_open(&oc->pb, filename, AVIO_FLAG_WRITE);
 if (ret < 0)
 {
 OutputDebugString(L"Could not open ");
 OutputDebugString(out_uri->Data());
 }
 }

 /* Write the stream header, if any. */
 ret = avformat_write_header(oc, &opt);
 if (ret < 0)
 {
 OutputDebugString(L"Error occurred when writing stream header \n");
 }

 }

 void RTMPWriter::WriteVideoFrame(
 boolean isKeyFrame,
 boolean hasDiscontinuity,
 UINT64 frameId,
 UINT32 videoBufferLength,
 BYTE *videoBytes)
 {

 int ret;
 AVCodecContext *c;
 AVFrame* frame;
 int got_packet = 0;
 AVPacket pkt = { 0 };

 c = video_st.enc;

 frame = get_video_frame(videoBufferLength, videoBytes);

 /* encode the image */
 ret = avcodec_encode_video2(c, &pkt, frame, &got_packet);
 if (ret < 0) {
 OutputDebugString(L"Error encoding video frame: \n")
 }

 if (got_packet) 
 {
 ret = write_frame(oc, &c->time_base, video_st.st, &pkt);
 }
 else {
 ret = 0;
 }

 if (ret < 0) {
 OutputDebugString(L"Error while writing video frame: %s\n");
 }
 }

 AVFrame * RTMPWriter::get_video_frame(
 UINT32 videoBufferLength,
 BYTE *videoBytes)
 {
 AVCodecContext *c = video_st.enc;

 if (c->pix_fmt != AV_PIX_FMT_YUV420P) {
 /* as we only generate a YUV420P picture, we must convert it
 * to the codec pixel format if needed */
 if (!video_st.sws_ctx) {
 video_st.sws_ctx = sws_getContext(c->width, c->height,
 AV_PIX_FMT_YUV420P,
 c->width, c->height,
 c->pix_fmt,
 SCALE_FLAGS, NULL, NULL, NULL);
 if (!video_st.sws_ctx) {
 fprintf(stderr,
 "Could not initialize the conversion context\n");
 exit(1);
 }
 }
 fill_yuv_image(video_st.tmp_frame, video_st.next_pts, c->width, c->height, videoBufferLength, videoBytes);
 sws_scale(video_st.sws_ctx,
 (const uint8_t * const *)video_st.tmp_frame->data, video_st.tmp_frame->linesize,
 0, c->height, video_st.frame->data, video_st.frame->linesize);
 }
 else {
 fill_yuv_image(video_st.frame, video_st.next_pts, c->width, c->height, videoBufferLength, videoBytes);
 }

 video_st.frame->pts = video_st.next_pts++;

 return video_st.frame;
 }

 /* Prepare a dummy image. */
 void RTMPWriter::fill_yuv_image(
 AVFrame *pict, 
 int frame_index,
 int width, 
 int height, 
 UINT32 videoBufferLength,
 BYTE *videoBytes)
 {
 //int x, y, i, ret;

 /* when we pass a frame to the encoder, it may keep a reference to it
 * internally;
 * make sure we do not overwrite it here
 */
 ret = av_frame_make_writable(pict);
 if (ret < 0) 
 {
 OutputDebugString(L"Unable to make piture writable");
 }

 memcpy(pict->data, videoBytes, videoBufferLength);

 //i = frame_index;

 ///* Y */
 //for (y = 0; y < height; y++)
 // for (x = 0; x < width; x++)
 // pict->data[0][y * pict->linesize[0] + x] = x + y + i * 3;

 ///* Cb and Cr */
 //for (y = 0; y < height / 2; y++) {
 // for (x = 0; x < width / 2; x++) {
 // pict->data[1][y * pict->linesize[1] + x] = 128 + y + i * 2;
 // pict->data[2][y * pict->linesize[2] + x] = 64 + x + i * 5;
 // }
 //}
 }

 void RTMPWriter::WriteAudioFrame()
 {

 }

 /* Add an output stream. */
 void RTMPWriter::add_stream(
 OutputStream *ost, 
 AVFormatContext *oc,
 AVCodec **codec,
 enum AVCodecID codec_id)
 {
 AVCodecContext *c;
 int i;

 /* find the encoder */
 *codec = avcodec_find_encoder(codec_id);
 if (!(*codec)) {
 OutputDebugString(L"Could not find encoder for '%s'\n");
 //avcodec_get_name(codec_id));
 exit(1);
 }

 ost->st = avformat_new_stream(oc, NULL);
 if (!ost->st) {
 OutputDebugString(L"Could not allocate stream\n");
 exit(1);
 }
 ost->st->id = oc->nb_streams - 1;
 c = avcodec_alloc_context3(*codec);
 if (!c) {
 OutputDebugString(L"Could not alloc an encoding context\n");
 exit(1);
 }
 ost->enc = c;

 switch ((*codec)->type) {
 case AVMEDIA_TYPE_AUDIO:
 c->sample_fmt = (*codec)->sample_fmts ?
 (*codec)->sample_fmts[0] : AV_SAMPLE_FMT_FLTP;
 c->bit_rate = 64000;
 c->sample_rate = 44100;
 if ((*codec)->supported_samplerates) {
 c->sample_rate = (*codec)->supported_samplerates[0];
 for (i = 0; (*codec)->supported_samplerates[i]; i++) {
 if ((*codec)->supported_samplerates[i] == 44100)
 c->sample_rate = 44100;
 }
 }
 c->channels = av_get_channel_layout_nb_channels(c->channel_layout);
 c->channel_layout = AV_CH_LAYOUT_STEREO;
 if ((*codec)->channel_layouts) {
 c->channel_layout = (*codec)->channel_layouts[0];
 for (i = 0; (*codec)->channel_layouts[i]; i++) {
 if ((*codec)->channel_layouts[i] == AV_CH_LAYOUT_STEREO)
 c->channel_layout = AV_CH_LAYOUT_STEREO;
 }
 }
 c->channels = av_get_channel_layout_nb_channels(c->channel_layout);
 ost->st->time_base = /*(AVRational)*/{ 1, c->sample_rate };
 break;

 case AVMEDIA_TYPE_VIDEO:
 c->codec_id = codec_id;

 c->bit_rate = 400000;
 /* Resolution must be a multiple of two. */
 c->width = 352;
 c->height = 288;
 /* timebase: This is the fundamental unit of time (in seconds) in terms
 * of which frame timestamps are represented. For fixed-fps content,
 * timebase should be 1/framerate and timestamp increments should be
 * identical to 1. */
 ost->st->time_base = /*(AVRational)*/{ 1, STREAM_FRAME_RATE };
 c->time_base = ost->st->time_base;

 c->gop_size = 12; /* emit one intra frame every twelve frames at most */
 c->pix_fmt = STREAM_PIX_FMT;
 if (c->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
 /* just for testing, we also add B-frames */
 c->max_b_frames = 2;
 }
 if (c->codec_id == AV_CODEC_ID_MPEG1VIDEO) {
 /* Needed to avoid using macroblocks in which some coeffs overflow.
 * This does not happen with normal video, it just happens here as
 * the motion of the chroma plane does not match the luma plane. */
 c->mb_decision = 2;
 }
 break;

 default:
 break;
 }

 /* Some formats want stream headers to be separate. */
 if (oc->oformat->flags & AVFMT_GLOBALHEADER)
 c->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
 }

AVFrame * RTMPWriter::alloc_audio_frame(
 enum AVSampleFormat sample_fmt,
 uint64_t channel_layout,
 int sample_rate, int nb_samples)
{
 AVFrame *frame = av_frame_alloc();
 int ret;

 if (!frame) {
 OutputDebugString(L"Error allocating an audio frame\n");
 exit(1);
 }

 frame->format = sample_fmt;
 frame->channel_layout = channel_layout;
 frame->sample_rate = sample_rate;
 frame->nb_samples = nb_samples;

 if (nb_samples) {
 ret = av_frame_get_buffer(frame, 0);
 if (ret < 0) {
 OutputDebugString(L"Error allocating an audio buffer\n");
 exit(1);
 }
 }

 return frame;
 }




void RTMPWriter::open_audio(
 AVFormatContext *oc, 
 AVCodec *codec, 
 OutputStream *ost, 
 AVDictionary *opt_arg)
{
 AVCodecContext *c;
 int nb_samples;
 int ret;
 AVDictionary *opt = NULL;

 c = ost->enc;

 /* open it */
 av_dict_copy(&opt, opt_arg, 0);
 ret = avcodec_open2(c, codec, &opt);
 av_dict_free(&opt);
 if (ret < 0) {
 OutputDebugString(L"Could not open audio codec: %s\n");// , av_err2str(ret));
 exit(1);
 }

 /* init signal generator */
 ost->t = 0;
 ost->tincr = 2 * M_PI * 110.0 / c->sample_rate;
 /* increment frequency by 110 Hz per second */
 ost->tincr2 = 2 * M_PI * 110.0 / c->sample_rate / c->sample_rate;

 if (c->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE)
 nb_samples = 10000;
 else
 nb_samples = c->frame_size;

 ost->frame = alloc_audio_frame(c->sample_fmt, c->channel_layout,
 c->sample_rate, nb_samples);
 ost->tmp_frame = alloc_audio_frame(AV_SAMPLE_FMT_S16, c->channel_layout,
 c->sample_rate, nb_samples);

 /* copy the stream parameters to the muxer */
 ret = avcodec_parameters_from_context(ost->st->codecpar, c);
 if (ret < 0) {
 OutputDebugString(L"Could not copy the stream parameters\n");
 exit(1);
 }

 /* create resampler context */
 ost->swr_ctx = swr_alloc();
 if (!ost->swr_ctx) {
 OutputDebugString(L"Could not allocate resampler context\n");
 exit(1);
 }

 /* set options */
 av_opt_set_int(ost->swr_ctx, "in_channel_count", c->channels, 0);
 av_opt_set_int(ost->swr_ctx, "in_sample_rate", c->sample_rate, 0);
 av_opt_set_sample_fmt(ost->swr_ctx, "in_sample_fmt", AV_SAMPLE_FMT_S16, 0);
 av_opt_set_int(ost->swr_ctx, "out_channel_count", c->channels, 0);
 av_opt_set_int(ost->swr_ctx, "out_sample_rate", c->sample_rate, 0);
 av_opt_set_sample_fmt(ost->swr_ctx, "out_sample_fmt", c->sample_fmt, 0);

 /* initialize the resampling context */
 if ((ret = swr_init(ost->swr_ctx)) < 0) {
 OutputDebugString(L"Failed to initialize the resampling context\n");
 exit(1);
 }
}

int RTMPWriter::write_frame(
 AVFormatContext *fmt_ctx, 
 const AVRational *time_base, 
 AVStream *st, 
 AVPacket *pkt)
{
 /* rescale output packet timestamp values from codec to stream timebase */
 av_packet_rescale_ts(pkt, *time_base, st->time_base);
 pkt->stream_index = st->index;

 /* Write the compressed frame to the media file. */
 //log_packet(fmt_ctx, pkt);
 OutputDebugString(L"Actually sending video frame: %s\n");
 return av_interleaved_write_frame(fmt_ctx, pkt);
}


AVFrame *RTMPWriter::alloc_picture(
 enum AVPixelFormat pix_fmt, 
 int width, 
 int height)
{
 AVFrame *picture;
 int ret;

 picture = av_frame_alloc();
 if (!picture)
 return NULL;

 picture->format = pix_fmt;
 picture->width = width;
 picture->height = height;

 /* allocate the buffers for the frame data */
 ret = av_frame_get_buffer(picture, 32);
 if (ret < 0) {
 fprintf(stderr, "Could not allocate frame data.\n");
 exit(1);
 }

 return picture;
}

void RTMPWriter::open_video(
 AVFormatContext *oc, 
 AVCodec *codec, 
 OutputStream *ost, 
 AVDictionary *opt_arg)
{
 int ret;
 AVCodecContext *c = ost->enc;
 AVDictionary *opt = NULL;

 av_dict_copy(&opt, opt_arg, 0);

 /* open the codec */
 ret = avcodec_open2(c, codec, &opt);
 av_dict_free(&opt);
 if (ret < 0) {
 OutputDebugString(L"Could not open video codec: %s\n");// , av_err2str(ret));
 exit(1);
 }

 /* allocate and init a re-usable frame */
 ost->frame = alloc_picture(c->pix_fmt, c->width, c->height);
 if (!ost->frame) {
 OutputDebugString(L"Could not allocate video frame\n");
 exit(1);
 }

 /* If the output format is not YUV420P, then a temporary YUV420P
 * picture is needed too. It is then converted to the required
 * output format. */
 ost->tmp_frame = NULL;
 if (c->pix_fmt != AV_PIX_FMT_YUV420P) {
 ost->tmp_frame = alloc_picture(AV_PIX_FMT_YUV420P, c->width, c->height);
 if (!ost->tmp_frame) {
 OutputDebugString(L"Could not allocate temporary picture\n");
 exit(1);
 }
 }

 /* copy the stream parameters to the muxer */
 ret = avcodec_parameters_from_context(ost->st->codecpar, c);
 if (ret < 0) {
 OutputDebugString(L"Could not copy the stream parameters\n");
 exit(1);
 }
}

void RTMPWriter::close_stream(AVFormatContext *oc, OutputStream *ost)
{
 avcodec_free_context(&ost->enc);
 av_frame_free(&ost->frame);
 av_frame_free(&ost->tmp_frame);
 sws_freeContext(ost->sws_ctx);
 swr_free(&ost->swr_ctx);
}

RTMPWriter::~RTMPWriter()
{
 av_write_trailer(oc);
 /* Close each codec. */
 if (have_video)
 close_stream(oc, &video_st);
 if (have_audio)
 close_stream(oc, &audio_st);

 if (!(fmt->flags & AVFMT_NOFILE))
 /* Close the output file. */
 avio_closep(&oc->pb);

 /* free the stream */
 avformat_free_context(oc);
}



-
What's wrong with how I save a vector of AVFrames as mp4 video using the h264 encoder ?
8 avril 2023, par noklaI am trying to encode a vector of AVFrames to an MP4 file using the h264 codec.


The code runs without errors but both when I try to open the saved video file with the windows media and adobe Media Encoded they it says that it is in an unsupported format.


I went through it with a debugger and everything seemed to work fine.



This is the function I used to saved the video :


void SaveVideo(std::string& output_filename, std::vector<avframe> video)
{
 // Initialize FFmpeg
 avformat_network_init();

 // Open the output file context
 AVFormatContext* format_ctx = nullptr;
 int ret = avformat_alloc_output_context2(&format_ctx, nullptr, nullptr, output_filename.c_str());
 if (ret < 0) {
 wxMessageBox("Error creating output context: ");
 wxMessageBox(av_err2str(ret));
 return;
 }

 // Open the output file
 ret = avio_open(&format_ctx->pb, output_filename.c_str(), AVIO_FLAG_WRITE);
 if (ret < 0) {
 std::cerr << "Error opening output file: " << av_err2str(ret) << std::endl;
 avformat_free_context(format_ctx);
 return;
 }

 // Create the video stream
 const AVCodec* codec = avcodec_find_encoder(AV_CODEC_ID_H264);
 if (!codec) {
 std::cerr << "Error finding H.264 encoder" << std::endl;
 avformat_free_context(format_ctx);
 return;
 }

 AVStream* stream = avformat_new_stream(format_ctx, codec);
 if (!stream) {
 std::cerr << "Error creating output stream" << std::endl;
 avformat_free_context(format_ctx);
 return;
 }

 // Set the stream parameters
 stream->codecpar->codec_id = AV_CODEC_ID_H264;
 stream->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
 stream->codecpar->width =video.front().width;
 stream->codecpar->height = video.front().height;
 stream->codecpar->format = AV_PIX_FMT_YUV420P;
 stream->codecpar->bit_rate = 400000;
 AVRational framerate = { 1, 30};
 stream->time_base = av_inv_q(framerate);

 // Open the codec context
 AVCodecContext* codec_ctx = avcodec_alloc_context3(codec);
 codec_ctx->codec_tag = 0;
 codec_ctx->time_base = stream->time_base;
 codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
 if (!codec_ctx) {
 std::cout << "Error allocating codec context" << std::endl;
 avformat_free_context(format_ctx);
 return;
 }

 ret = avcodec_parameters_to_context(codec_ctx, stream->codecpar);
 if (ret < 0) {
 std::cout << "Error setting codec context parameters: " << av_err2str(ret) << std::endl;
 avcodec_free_context(&codec_ctx);
 avformat_free_context(format_ctx);
 return;
 }
 AVDictionary* opt = NULL;
 ret = avcodec_open2(codec_ctx, codec, &opt);
 if (ret < 0) {
 wxMessageBox("Error opening codec: ");
 wxMessageBox(av_err2str(ret));
 avcodec_free_context(&codec_ctx);
 avformat_free_context(format_ctx);
 return;
 }

 // Allocate a buffer for the frame data
 AVFrame* frame = av_frame_alloc();
 if (!frame) {
 std::cerr << "Error allocating frame" << std::endl;
 avcodec_free_context(&codec_ctx);
 avformat_free_context(format_ctx);
 return;
 }

 frame->format = codec_ctx->pix_fmt;
 frame->width = codec_ctx->width;
 frame->height = codec_ctx->height;

 ret = av_frame_get_buffer(frame, 0);
 if (ret < 0) {
 std::cerr << "Error allocating frame buffer: " << av_err2str(ret) << std::endl;
 av_frame_free(&frame);
 avcodec_free_context(&codec_ctx);
 avformat_free_context(format_ctx);
 return;
 }

 // Allocate a buffer for the converted frame data
 AVFrame* converted_frame = av_frame_alloc();
 if (!converted_frame) {
 std::cerr << "Error allocating converted frame" << std::endl;
 av_frame_free(&frame);
 avcodec_free_context(&codec_ctx);
 avformat_free_context(format_ctx);
 return;
 }

 converted_frame->format = AV_PIX_FMT_YUV420P;
 converted_frame->width = codec_ctx->width;
 converted_frame->height = codec_ctx->height;

 ret = av_frame_get_buffer(converted_frame, 0);
 if (ret < 0) {
 std::cerr << "Error allocating converted frame buffer: " << av_err2str(ret) << std::endl;
 av_frame_free(&frame);
 av_frame_free(&converted_frame);
 avcodec_free_context(&codec_ctx);
 avformat_free_context(format_ctx);
 return;
 }

 // Initialize the converter
 SwsContext* converter = sws_getContext(
 codec_ctx->width, codec_ctx->height, codec_ctx->pix_fmt,
 codec_ctx->width, codec_ctx->height, AV_PIX_FMT_YUV420P,
 SWS_BICUBIC, nullptr, nullptr, nullptr
 );
 if (!converter) {
 std::cerr << "Error initializing converter" << std::endl;
 av_frame_free(&frame);
 av_frame_free(&converted_frame);
 avcodec_free_context(&codec_ctx);
 avformat_free_context(format_ctx);
 return;
 }

 // Write the header to the output file
 ret = avformat_write_header(format_ctx, nullptr);
 if (ret < 0) {
 std::cerr << "Error writing header to output file: " << av_err2str(ret) << std::endl;
 av_frame_free(&frame);
 av_frame_free(&converted_frame);
 sws_freeContext(converter);
 avcodec_free_context(&codec_ctx);
 avformat_free_context(format_ctx);
 return;
 }

 // Iterate over the frames and write them to the output file
 int frame_count = 0;
 for (auto& frame: video) {
 {
 // Convert the frame to the output format
 sws_scale(converter,
 srcFrame.data, srcFrame.linesize, 0, srcFrame.height,
 converted_frame->data, converted_frame->linesize
 );

 // Set the frame properties
 converted_frame->pts = av_rescale_q(frame_count, stream->time_base, codec_ctx->time_base);
 frame_count++;
 //converted_frame->time_base.den = codec_ctx->time_base.den;
 //converted_frame->time_base.num = codec_ctx->time_base.num;
 // Encode the frame and write it to the output
 ret = avcodec_send_frame(codec_ctx, converted_frame);
 if (ret < 0) {
 std::cerr << "Error sending frame for encoding: " << av_err2str(ret) << std::endl;
 av_frame_free(&frame);
 av_frame_free(&converted_frame);
 sws_freeContext(converter);
 avcodec_free_context(&codec_ctx);
 avformat_free_context(format_ctx);
 return;
 }
 AVPacket* pkt = av_packet_alloc();
 if (!pkt) {
 std::cerr << "Error allocating packet" << std::endl;
 return;
 }
 while (ret >= 0) {
 ret = avcodec_receive_packet(codec_ctx, pkt);
 if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
 std::string a = av_err2str(ret);
 break;
 }
 else if (ret < 0) {
 wxMessageBox("Error during encoding");
 wxMessageBox(av_err2str(ret));
 av_packet_unref(pkt);
 av_frame_free(&frame);
 av_frame_free(&converted_frame);
 sws_freeContext(converter);
 avcodec_free_context(&codec_ctx);
 avformat_free_context(format_ctx);
 return;
 }

 // Write the packet to the output file
 av_packet_rescale_ts(pkt, codec_ctx->time_base, stream->time_base);
 pkt->stream_index = stream->index;
 ret = av_interleaved_write_frame(format_ctx, pkt);
 av_packet_unref(pkt);
 if (ret < 0) {
 std::cerr << "Error writing packet to output file: " << av_err2str(ret) << std::endl;
 av_frame_free(&frame);
 av_frame_free(&converted_frame);
 sws_freeContext(converter);
 avcodec_free_context(&codec_ctx);
 avformat_free_context(format_ctx);
 return;
 }
 }
 }
 }

 // Flush the encoder
 ret = avcodec_send_frame(codec_ctx, nullptr);
 if (ret < 0) {
 std::cerr << "Error flushing encoder: " << av_err2str(ret) << std::endl;
 av_frame_free(&frame);
 av_frame_free(&converted_frame);
 sws_freeContext(converter);
 avcodec_free_context(&codec_ctx);
 avformat_free_context(format_ctx);
 return;
 }

 while (ret >= 0) {
 AVPacket* pkt = av_packet_alloc();
 if (!pkt) {
 std::cerr << "Error allocating packet" << std::endl;
 return;
 }
 ret = avcodec_receive_packet(codec_ctx, pkt);
 if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
 wxMessageBox("Error recieving packet");
 wxMessageBox(av_err2str(ret));
 break;
 }
 else if (ret < 0) {
 std::cerr << "Error during encoding: " << av_err2str(ret) << std::endl;
 av_packet_unref(pkt);
 av_frame_free(&frame);
 av_frame_free(&converted_frame);
 sws_freeContext(converter);
 avcodec_free_context(&codec_ctx);
 avformat_free_context(format_ctx);
 return;
 }

 // Write the packet to the output file
 av_packet_rescale_ts(pkt, codec_ctx->time_base, stream->time_base);
 pkt->stream_index = stream->index;
 ret = av_interleaved_write_frame(format_ctx, pkt);
 av_packet_unref(pkt);
 if (ret < 0) {
 std::cerr << "Error writing packet to output file: " << av_err2str(ret) << std::endl;
 av_frame_free(&frame);
 av_frame_free(&converted_frame);
 sws_freeContext(converter);
 avcodec_free_context(&codec_ctx);
 avformat_free_context(format_ctx);
 return;
 }
 }

 // Write the trailer to the output file
 ret = av_write_trailer(format_ctx);
 if (ret < 0) {
 std::cerr << "Error writing trailer to output file: " << av_err2str(ret) << std::endl;
 }

 // Free all resources
 av_frame_free(&frame);
 av_frame_free(&converted_frame);
 sws_freeContext(converter);
 avcodec_free_context(&codec_ctx);
 avformat_free_context(format_ctx);
}

</avframe>


** I know it is not the prettiest way to write this code, I just wanted to try and do something like that.


** This is an altered version of the function as the original one was inside class. I changed it so you could compile it, but it might has some errors if I forgot to change something


Any help would be appreciated.


-
Getting shifted timestamps when encoding a fragmented h264 mp4 with ffmpeg
14 septembre 2022, par Martin CastinI am trying to encode a fragmented h264 mp4 with ffmpeg. I tried the following command :


ffmpeg -i input.mp4 -movflags +frag_keyframe+separate_moof+omit_tfhd_offset+empty_moov output.mp4



It does give me a fragmented mp4 but the timestamps of the frames seem to be shifted by 0.04s when I read the video with mpv. The first frame has a timestamp of 0.04s instead of 0s, as in the input video (1920x1080, 50 fps). I encountered the problem both with ffmpeg 5.1 and ffmpeg 3.4.11.


I tried to add several flags, as
-avoid_negative_ts make_zero
or-copyts -output_ts_offset -0.04
, but it did not help.

I am also trying to achieve this using the ffmpeg libav libraries in C++ but did not get to better result. Here are the code fragments I used.


avformat_alloc_output_context2(&oc, NULL, NULL, filename);

 if (oc_->oformat->flags & AVFMT_GLOBALHEADER) {
 codecCtx_->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
 }
...
 AVDictionary* opts = NULL;

 av_dict_set(&opts, "movflags", "frag_keyframe+separate_moof+omit_tfhd_offset+empty_moov", 0);

 ret = avformat_write_header(oc_, &opts);



Do you know how to avoid this behaviour of shifted timestamps for fragmented mp4, either with ffmpeg or libav ?


Edit : example videos and complete code example


I also tried with the following ffmpeg build


ffmpeg version 5.0.1-static https://johnvansickle.com/ffmpeg/ Copyright (c) 2000-2022 the FFmpeg developers
built with gcc 8 (Debian 8.3.0-6)
configuration: --enable-gpl --enable-version3 --enable-static --disable-debug --disable-ffplay --disable-indev=sndio --disable-outdev=sndio --cc=gcc --enable-fontconfig --enable-frei0r --enable-gnutls --enable-gmp --enable-libgme --enable-gray --enable-libaom --enable-libfribidi --enable-libass --enable-libvmaf --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-librubberband --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libvorbis --enable-libopus --enable-libtheora --enable-libvidstab --enable-libvo-amrwbenc --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libdav1d --enable-libxvid --enable-libzvbi --enable-libzimg
libavutil 57. 17.100 / 57. 17.100
libavcodec 59. 18.100 / 59. 18.100
libavformat 59. 16.100 / 59. 16.100
libavdevice 59. 4.100 / 59. 4.100
libavfilter 8. 24.100 / 8. 24.100
libswscale 6. 4.100 / 6. 4.100
libswresample 4. 3.100 / 4. 3.100
libpostproc 56. 3.100 / 56. 3.100



and with the sintel trailer as input video, which is 24fps, and I thus get a timeshift of 83ms. Here is the output I get.


Here is a complete code example, slightly adapted from the
muxing.c
ffmpeg example (audio removed and adapted for c++). This code shows exactly the same problem.

You can just comment the line 383 (that is calling
av_dict_set
) to switch back to a not fragmented mp4 that will not have the timestamp shift.

/*
 * Copyright (c) 2003 Fabrice Bellard
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

/**
 * @file
 * libavformat API example.
 *
 * Output a media file in any supported libavformat format. The default
 * codecs are used.
 * @example muxing.c
 */

#include <cstdlib>
#include <cstdio>
#include <cstring>
#include <cmath>

extern "C"
{
#define __STDC_CONSTANT_MACROS
#include <libavutil></libavutil>avassert.h>
#include <libavutil></libavutil>channel_layout.h>
#include <libavutil></libavutil>opt.h>
#include <libavutil></libavutil>mathematics.h>
#include <libavutil></libavutil>timestamp.h>
#include <libavcodec></libavcodec>avcodec.h>
#include <libavformat></libavformat>avformat.h>
#include <libswscale></libswscale>swscale.h>
#include <libswresample></libswresample>swresample.h>
}

#define STREAM_DURATION 10.0
#define STREAM_FRAME_RATE 25 /* 25 images/s */
#define STREAM_PIX_FMT AV_PIX_FMT_YUV420P /* default pix_fmt */

#define SCALE_FLAGS SWS_BICUBIC

// a wrapper around a single output AVStream
typedef struct OutputStream {
 AVStream *st;
 AVCodecContext *enc;

 /* pts of the next frame that will be generated */
 int64_t next_pts;
 int samples_count;

 AVFrame *frame;
 AVFrame *tmp_frame;

 AVPacket *tmp_pkt;

 float t, tincr, tincr2;

 struct SwsContext *sws_ctx;
 struct SwrContext *swr_ctx;
} OutputStream;

static void log_packet(const AVFormatContext *fmt_ctx, const AVPacket *pkt)
{
 AVRational *time_base = &fmt_ctx->streams[pkt->stream_index]->time_base;

// printf("pts:%s pts_time:%s dts:%s dts_time:%s duration:%s duration_time:%s stream_index:%d\n",
// av_ts2str(pkt->pts), av_ts2timestr(pkt->pts, time_base),
// av_ts2str(pkt->dts), av_ts2timestr(pkt->dts, time_base),
// av_ts2str(pkt->duration), av_ts2timestr(pkt->duration, time_base),
// pkt->stream_index);
}

static int write_frame(AVFormatContext *fmt_ctx, AVCodecContext *c,
 AVStream *st, AVFrame *frame, AVPacket *pkt)
{
 int ret;

 // send the frame to the encoder
 ret = avcodec_send_frame(c, frame);
 if (ret < 0) {
 fprintf(stderr, "Error sending a frame to the encoder");
 exit(1);
 }

 while (ret >= 0) {
 ret = avcodec_receive_packet(c, pkt);
 if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
 break;
 else if (ret < 0) {
 fprintf(stderr, "Error encoding a frame\n");
 exit(1);
 }

 /* rescale output packet timestamp values from codec to stream timebase */
 av_packet_rescale_ts(pkt, c->time_base, st->time_base);
 pkt->stream_index = st->index;

 /* Write the compressed frame to the media file. */
 log_packet(fmt_ctx, pkt);
 ret = av_interleaved_write_frame(fmt_ctx, pkt);
 /* pkt is now blank (av_interleaved_write_frame() takes ownership of
 * its contents and resets pkt), so that no unreferencing is necessary.
 * This would be different if one used av_write_frame(). */
 if (ret < 0) {
 fprintf(stderr, "Error while writing output packet\n");
 exit(1);
 }
 }

 return ret == AVERROR_EOF ? 1 : 0;
}

/* Add an output stream. */
static void add_stream(OutputStream *ost, AVFormatContext *oc,
 const AVCodec **codec,
 enum AVCodecID codec_id)
{
 AVCodecContext *c;
 int i;

 /* find the encoder */
 *codec = avcodec_find_encoder(codec_id);
 if (!(*codec)) {
 fprintf(stderr, "Could not find encoder for '%s'\n",
 avcodec_get_name(codec_id));
 exit(1);
 }

 ost->tmp_pkt = av_packet_alloc();
 if (!ost->tmp_pkt) {
 fprintf(stderr, "Could not allocate AVPacket\n");
 exit(1);
 }

 ost->st = avformat_new_stream(oc, NULL);
 if (!ost->st) {
 fprintf(stderr, "Could not allocate stream\n");
 exit(1);
 }
 ost->st->id = oc->nb_streams-1;
 c = avcodec_alloc_context3(*codec);
 if (!c) {
 fprintf(stderr, "Could not alloc an encoding context\n");
 exit(1);
 }
 ost->enc = c;

 switch ((*codec)->type) {
 case AVMEDIA_TYPE_VIDEO:
 c->codec_id = codec_id;

 c->bit_rate = 400000;
 /* Resolution must be a multiple of two. */
 c->width = 352;
 c->height = 288;
 /* timebase: This is the fundamental unit of time (in seconds) in terms
 * of which frame timestamps are represented. For fixed-fps content,
 * timebase should be 1/framerate and timestamp increments should be
 * identical to 1. */
 ost->st->time_base = (AVRational){ 1, STREAM_FRAME_RATE };
 c->time_base = ost->st->time_base;

 c->gop_size = 12; /* emit one intra frame every twelve frames at most */
 c->pix_fmt = STREAM_PIX_FMT;
 if (c->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
 /* just for testing, we also add B-frames */
 c->max_b_frames = 2;
 }
 if (c->codec_id == AV_CODEC_ID_MPEG1VIDEO) {
 /* Needed to avoid using macroblocks in which some coeffs overflow.
 * This does not happen with normal video, it just happens here as
 * the motion of the chroma plane does not match the luma plane. */
 c->mb_decision = 2;
 }
 break;

 default:
 break;
 }

 /* Some formats want stream headers to be separate. */
 if (oc->oformat->flags & AVFMT_GLOBALHEADER)
 c->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}

/**************************************************************/
/* video output */

static AVFrame *alloc_picture(enum AVPixelFormat pix_fmt, int width, int height)
{
 AVFrame *picture;
 int ret;

 picture = av_frame_alloc();
 if (!picture)
 return NULL;

 picture->format = pix_fmt;
 picture->width = width;
 picture->height = height;

 /* allocate the buffers for the frame data */
 ret = av_frame_get_buffer(picture, 0);
 if (ret < 0) {
 fprintf(stderr, "Could not allocate frame data.\n");
 exit(1);
 }

 return picture;
}

static void open_video(AVFormatContext *oc, const AVCodec *codec,
 OutputStream *ost, AVDictionary *opt_arg)
{
 int ret;
 AVCodecContext *c = ost->enc;
 AVDictionary *opt = NULL;

 av_dict_copy(&opt, opt_arg, 0);

 /* open the codec */
 ret = avcodec_open2(c, codec, &opt);
 av_dict_free(&opt);
 if (ret < 0) {
 fprintf(stderr, "Could not open video codec\n");
 exit(1);
 }

 /* allocate and init a re-usable frame */
 ost->frame = alloc_picture(c->pix_fmt, c->width, c->height);
 if (!ost->frame) {
 fprintf(stderr, "Could not allocate video frame\n");
 exit(1);
 }

 /* If the output format is not YUV420P, then a temporary YUV420P
 * picture is needed too. It is then converted to the required
 * output format. */
 ost->tmp_frame = NULL;
 if (c->pix_fmt != AV_PIX_FMT_YUV420P) {
 ost->tmp_frame = alloc_picture(AV_PIX_FMT_YUV420P, c->width, c->height);
 if (!ost->tmp_frame) {
 fprintf(stderr, "Could not allocate temporary picture\n");
 exit(1);
 }
 }

 /* copy the stream parameters to the muxer */
 ret = avcodec_parameters_from_context(ost->st->codecpar, c);
 if (ret < 0) {
 fprintf(stderr, "Could not copy the stream parameters\n");
 exit(1);
 }
}

/* Prepare a dummy image. */
static void fill_yuv_image(AVFrame *pict, int frame_index,
 int width, int height)
{
 int x, y, i;

 i = frame_index;

 /* Y */
 for (y = 0; y < height; y++)
 for (x = 0; x < width; x++)
 pict->data[0][y * pict->linesize[0] + x] = x + y + i * 3;

 /* Cb and Cr */
 for (y = 0; y < height / 2; y++) {
 for (x = 0; x < width / 2; x++) {
 pict->data[1][y * pict->linesize[1] + x] = 128 + y + i * 2;
 pict->data[2][y * pict->linesize[2] + x] = 64 + x + i * 5;
 }
 }
}

static AVFrame *get_video_frame(OutputStream *ost)
{
 AVCodecContext *c = ost->enc;

 /* check if we want to generate more frames */
 if (av_compare_ts(ost->next_pts, c->time_base,
 STREAM_DURATION, (AVRational){ 1, 1 }) > 0)
 return NULL;

 /* when we pass a frame to the encoder, it may keep a reference to it
 * internally; make sure we do not overwrite it here */
 if (av_frame_make_writable(ost->frame) < 0)
 exit(1);

 if (c->pix_fmt != AV_PIX_FMT_YUV420P) {
 /* as we only generate a YUV420P picture, we must convert it
 * to the codec pixel format if needed */
 if (!ost->sws_ctx) {
 ost->sws_ctx = sws_getContext(c->width, c->height,
 AV_PIX_FMT_YUV420P,
 c->width, c->height,
 c->pix_fmt,
 SCALE_FLAGS, NULL, NULL, NULL);
 if (!ost->sws_ctx) {
 fprintf(stderr,
 "Could not initialize the conversion context\n");
 exit(1);
 }
 }
 fill_yuv_image(ost->tmp_frame, ost->next_pts, c->width, c->height);
 sws_scale(ost->sws_ctx, (const uint8_t * const *) ost->tmp_frame->data,
 ost->tmp_frame->linesize, 0, c->height, ost->frame->data,
 ost->frame->linesize);
 } else {
 fill_yuv_image(ost->frame, ost->next_pts, c->width, c->height);
 }

 ost->frame->pts = ost->next_pts++;

 return ost->frame;
}

/*
 * encode one video frame and send it to the muxer
 * return 1 when encoding is finished, 0 otherwise
 */
static int write_video_frame(AVFormatContext *oc, OutputStream *ost)
{
 return write_frame(oc, ost->enc, ost->st, get_video_frame(ost), ost->tmp_pkt);
}

static void close_stream(AVFormatContext *oc, OutputStream *ost)
{
 avcodec_free_context(&ost->enc);
 av_frame_free(&ost->frame);
 av_frame_free(&ost->tmp_frame);
 av_packet_free(&ost->tmp_pkt);
 sws_freeContext(ost->sws_ctx);
 swr_free(&ost->swr_ctx);
}

/**************************************************************/
/* media file output */

int main(int argc, char **argv)
{
 OutputStream video_st = { 0 }, audio_st = { 0 };
 const AVOutputFormat *fmt;
 const char *filename;
 AVFormatContext *oc;
 const AVCodec *audio_codec, *video_codec;
 int ret;
 int have_video = 0, have_audio = 0;
 int encode_video = 0, encode_audio = 0;
 AVDictionary *opt = NULL;
 int i;

 if (argc < 2) {
 printf("usage: %s output_file\n"
 "API example program to output a media file with libavformat.\n"
 "This program generates a synthetic audio and video stream, encodes and\n"
 "muxes them into a file named output_file.\n"
 "The output format is automatically guessed according to the file extension.\n"
 "Raw images can also be output by using '%%d' in the filename.\n"
 "\n", argv[0]);
 return 1;
 }

 filename = argv[1];

 av_dict_set(&opt, "movflags", "frag_keyframe+separate_moof+omit_tfhd_offset+empty_moov", 0);

 /* allocate the output media context */
 avformat_alloc_output_context2(&oc, NULL, NULL, filename);
 if (!oc) {
 printf("Could not deduce output format from file extension: using MPEG.\n");
 avformat_alloc_output_context2(&oc, NULL, "mpeg", filename);
 }
 if (!oc)
 return 1;

 fmt = oc->oformat;

 /* Add the audio and video streams using the default format codecs
 * and initialize the codecs. */
 if (fmt->video_codec != AV_CODEC_ID_NONE) {
 add_stream(&video_st, oc, &video_codec, fmt->video_codec);
 have_video = 1;
 encode_video = 1;
 }

 /* Now that all the parameters are set, we can open the audio and
 * video codecs and allocate the necessary encode buffers. */
 if (have_video)
 open_video(oc, video_codec, &video_st, opt);


 av_dump_format(oc, 0, filename, 1);

 /* open the output file, if needed */
 if (!(fmt->flags & AVFMT_NOFILE)) {
 ret = avio_open(&oc->pb, filename, AVIO_FLAG_WRITE);
 if (ret < 0) {
 fprintf(stderr, "Could not open '%s'\n", filename);
 return 1;
 }
 }

 /* Write the stream header, if any. */
 ret = avformat_write_header(oc, &opt);
 if (ret < 0) {
 fprintf(stderr, "Error occurred when opening output file\n");
 return 1;
 }

 while (encode_video || encode_audio) {
 /* select the stream to encode */
 if (encode_video &&
 (!encode_audio || av_compare_ts(video_st.next_pts, video_st.enc->time_base,
 audio_st.next_pts, audio_st.enc->time_base) <= 0)) {
 encode_video = !write_video_frame(oc, &video_st);
 }
 }

 av_write_trailer(oc);

 /* Close each codec. */
 if (have_video)
 close_stream(oc, &video_st);
 if (have_audio)
 close_stream(oc, &audio_st);

 if (!(fmt->flags & AVFMT_NOFILE))
 /* Close the output file. */
 avio_closep(&oc->pb);

 /* free the stream */
 avformat_free_context(oc);

 return 0;
}
</cmath></cstring></cstdio></cstdlib>