Newest 'ffmpeg' Questions - Stack Overflow
Les articles publiés sur le site
-
How to write H264 raw stream into mp4 using ffmpeg directly
28 mars, par YelsinI want to wrap the H264 Nalus(x264 encoded) into mp4 using ffmpeg(SDK 2.1), but the output mp4 file could not play. I don't know how to set the pts and dts. Here's my code, using the code from Raw H264 frames in mpegts container using libavcodec and muxing.c from www.ffmpeg.org. My H264 stream has no B-Frame, every nalu starts with 00 00 00 01,the stream begins with sps pps then the h264 data.
#include "stdafx.h" #include #include #include "Stream2Mp4.h" #include
opt.h> #include mathematics.h> #include timestamp.h> #include avformat.h> #include swresample.h> #include swresample.h> #define STREAM_FRAME_RATE 25 #define STREAM_PIX_FMT AV_PIX_FMT_YUV420P /* default pix_fmt */ static int ptsInc = 0; static int vi = -1; static int waitkey = 1; // < 0 = error // 0 = I-Frame // 1 = P-Frame // 2 = B-Frame // 3 = S-Frame int getVopType( const void *p, int len ) { if ( !p || 6 >= len ) return -1; unsigned char *b = (unsigned char*)p; // Verify NAL marker if ( b[ 0 ] || b[ 1 ] || 0x01 != b[ 2 ] ) { b++; if ( b[ 0 ] || b[ 1 ] || 0x01 != b[ 2 ] ) return -1; } // end if b += 3; // Verify VOP id if ( 0xb6 == *b ) { b++; return ( *b & 0xc0 ) >> 6; } // end if switch( *b ) { case 0x65 : return 0; case 0x61 : return 1; case 0x01 : return 2; } // end switch return -1; } int get_nal_type( void *p, int len ) { if ( !p || 5 >= len ) return -1; unsigned char *b = (unsigned char*)p; // Verify NAL marker if ( b[ 0 ] || b[ 1 ] || 0x01 != b[ 2 ] ) { b++; if ( b[ 0 ] || b[ 1 ] || 0x01 != b[ 2 ] ) return -1; } // end if b += 3; return *b; } /* Add an output stream */ AVStream *add_stream(AVFormatContext *oc, AVCodec **codec, enum AVCodecID codec_id) { AVCodecContext *c; AVStream *st; /* find the encoder */ *codec = avcodec_find_encoder(codec_id); if (!*codec) { printf("could not find encoder for '%s' \n", avcodec_get_name(codec_id)); exit(1); } st = avformat_new_stream(oc, *codec); if (!st) { printf("could not allocate stream \n"); exit(1); } st->id = oc->nb_streams-1; c = st->codec; vi = st->index; switch ((*codec)->type) { case AVMEDIA_TYPE_AUDIO: c->sample_fmt = (*codec)->sample_fmts ? (*codec)->sample_fmts[0] : AV_SAMPLE_FMT_FLTP; c->bit_rate = 64000; c->sample_rate = 44100; c->channels = 2; break; case AVMEDIA_TYPE_VIDEO: c->codec_id = codec_id; c->bit_rate = 90000; c->width = 480; c->height = 354; c->time_base.den = 15; c->time_base.num = 1; c->gop_size = 12; c->pix_fmt = STREAM_PIX_FMT; if (c->codec_id == AV_CODEC_ID_MPEG2VIDEO) { c->max_b_frames = 2; } if (c->codec_id == AV_CODEC_ID_MPEG1VIDEO) { c->mb_decision = 2; } break; default: break; } if (oc->oformat->flags & AVFMT_GLOBALHEADER) { c->flags |= CODEC_FLAG_GLOBAL_HEADER; } return st; } void open_video(AVFormatContext *oc, AVCodec *codec, AVStream *st) { int ret; AVCodecContext *c = st->codec; /* open the codec */ ret = avcodec_open2(c, codec, NULL); if (ret < 0) { printf("could not open video codec"); //exit(1); } } int CreateMp4(AVFormatContext *&m_pOc, void *p, int len) { int ret; const char* pszFileName = "output002.mp4"; AVOutputFormat *fmt; AVCodec *video_codec; AVStream *m_pVideoSt; if (0x67 != get_nal_type(p, len)) { printf("can not detect nal type"); return -1; } av_register_all(); avformat_alloc_output_context2(&m_pOc, NULL, NULL, pszFileName); if (!m_pOc) { printf("Could not deduce output format from file extension: using MPEG. \n"); avformat_alloc_output_context2(&m_pOc, NULL, "mpeg", pszFileName); } if (!m_pOc) { return 1; } fmt = m_pOc->oformat; if (fmt->video_codec != AV_CODEC_ID_NONE) { m_pVideoSt = add_stream(m_pOc, &video_codec, fmt->video_codec); } if (m_pVideoSt) { open_video(m_pOc, video_codec, m_pVideoSt); } av_dump_format(m_pOc, 0, pszFileName, 1); /* open the output file, if needed */ if (!(fmt->flags & AVFMT_NOFILE)) { ret = avio_open(&m_pOc->pb, pszFileName, AVIO_FLAG_WRITE); if (ret < 0) { printf("could not open '%s': %s\n", pszFileName); return 1; } } /* Write the stream header, if any */ ret = avformat_write_header(m_pOc, NULL); if (ret < 0) { printf("Error occurred when opening output file"); return 1; } } /* write h264 data to mp4 file*/ void WriteVideo(AVFormatContext *&m_pOc,void* data, int nLen) { int ret; if ( 0 > vi ) { printf("vi less than 0"); //return -1; } AVStream *pst = m_pOc->streams[ vi ]; // Init packet AVPacket pkt; AVCodecContext *c = pst->codec; av_init_packet( &pkt ); pkt.flags |= ( 0 >= getVopType( data, nLen ) ) ? AV_PKT_FLAG_KEY : 0; pkt.stream_index = pst->index; pkt.data = (uint8_t*)data; pkt.size = nLen; // Wait for key frame if ( waitkey ) if ( 0 == ( pkt.flags & AV_PKT_FLAG_KEY ) ) return ; else waitkey = 0; pkt.pts = (ptsInc++) * (90000/STREAM_FRAME_RATE); //pkt.dts = (ptsInc++) * (90000/STREAM_FRAME_RATE); ret = av_interleaved_write_frame( m_pOc, &pkt ); if (ret < 0) { printf("cannot write frame"); } } void CloseMp4(AVFormatContext *&m_pOc) { waitkey = -1; vi = -1; if (m_pOc) av_write_trailer(m_pOc); if (m_pOc && !(m_pOc->oformat->flags & AVFMT_NOFILE)) avio_close(m_pOc->pb); if (m_pOc) { avformat_free_context(m_pOc); m_pOc = NULL; } } could anybody help me? Thank you very much!
-
Empty audio backends for torchaudio list audio backends, with ffmpeg installed on system libraries [closed]
28 mars, par Alberto Agudo DominguezI installed torchaudio 2.5.1 and a system install of ffmpeg on Windows and get:
PS C:\Users\> ffmpeg -version ffmpeg version 2025-01-05-git-19c95ecbff-essentials_build-www.gyan.dev Copyright (c) 2000-2025 the FFmpeg developers built with gcc 14.2.0 (Rev1, Built by MSYS2 project) configuration: --enable-gpl --enable-version3 --enable-static --disable-w32threads --disable-autodetect --enable-fontconfig --enable-iconv --enable-gnutls --enable-libxml2 --enable-gmp --enable-bzlib --enable-lzma --enable-zlib --enable-libsrt --enable-libssh --enable-libzmq --enable-avisynth --enable-sdl2 --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxvid --enable-libaom --enable-libopenjpeg --enable-libvpx --enable-mediafoundation --enable-libass --enable-libfreetype --enable-libfribidi --enable-libharfbuzz --enable-libvidstab --enable-libvmaf --enable-libzimg --enable-amf --enable-cuda-llvm --enable-cuvid --enable-dxva2 --enable-d3d11va --enable-d3d12va --enable-ffnvcodec --enable-libvpl --enable-nvdec --enable-nvenc --enable-vaapi --enable-libgme --enable-libopenmpt --enable-libopencore-amrwb --enable-libmp3lame --enable-libtheora --enable-libvo-amrwbenc --enable-libgsm --enable-libopencore-amrnb --enable-libopus --enable-libspeex --enable-libvorbis --enable-librubberband libavutil 59. 54.101 / 59. 54.101 libavcodec 61. 31.100 / 61. 31.100 libavfilter 10. 6.101 / 10. 6.101 libswresample 5. 4.100 / 5. 4.100 libpostproc 58. 4.100 / 58. 4.100 PS C:\Users\> python Python 3.12.5 (tags/v3.12.5:ff3bc82, Aug 6 2024, 20:45:27) [MSC v.1940 64 bit (AMD64)] on win32 Type "help", "copyright", "credits" or "license" for more information. >>> import torchaudio >>> torchaudio.list_audio_backends() []
Hence ffmpeg is added to the path and recognized by the console, but not by torchaudio.
-
How to initialize video decoder with a already decoded frame ?
27 mars, par Ragdoll CarLet's say I have an FFmpeg video decoder x264 initially initialized with some parameters. I am using C++ in my scenario. In normal conditions we push an encoded I-frame into such decoder and then referencing encoded P-frames.
I have a special case where my I-frame is already decoded. So in my case I want to:
- push already decoded I-frame into my decoder
- push referencing encoded P-frames into my decoder
How can I initialize the decoder state with already decoded I-frame? Currently to bypass these limitations I have to:
- create a new temporary encoder
- encode already decoded I-frame
- decode encoded I-frame
- and then I am able to push referencing encoded P-frames
-
Audio delay after resuming FFmpeg on Windows
27 mars, par Iman SajadpurI'm building a screen recording software for Windows using Python. I use FFmpeg for recording and psutil to pause and resume the process.
Here is a sample of my code:
import psutil import subprocess process = subprocess.Popen([ 'ffmpeg', '-y', '-rtbufsize', '100M', '-f', 'gdigrab', '-thread_queue_size', '1024', '-probesize', '50M', '-r', '24', '-draw_mouse', '1', '-video_size', '1920x1080', '-i', 'desktop', '-f', 'dshow', '-channel_layout', 'stereo', '-thread_queue_size', '1024', '-i', 'audio=Microphone (2- High Definition Audio Device)', # my audio device '-c:v', 'h264_nvenc', # encoding via Nvidia '-r', '24', '-preset', 'p1', '-pix_fmt', 'yuv444p', '-fps_mode', 'cfr', '-c:a', 'aac', '-ac', '2', '-b:a', '128k', 'output.mp4']) ffmpeg_proc = psutil.Process(process.pid) # pause ffmpeg_proc.suspend() # resume ffmpeg_proc.resume()
The issue is that after resuming, the audio becomes choppy and delayed, while the video continues smoothly.
I have tried using following flags, but they didn't solve the issue:
-analyzeduration -fflags +genpts -async -use_wallclock_as_timestamps -af aresample=async=1
How can I properly pause and resume FFmpeg without causing audio delay? Is there any other method to handle this properly?
Thanks for any suggestions.
-
How can I remove silence from an MP3 programmatically ?
27 mars, par Benjamin OakesI have MP3 files that sometimes have silence at the end. I would like to remove this silence automatically. From what I can tell, it is "perfect" silence (0 amplitude), not background noise. The length of the content and the silence varies.
I found some other questions about cropping to the first 30 seconds or cropping to X and X+N seconds using
ffmpeg
. I would think I could use a similar approach, as long as I have a way to find when the silence starts. How would I do that programatically?For example, one possible solution would be to have a command that finds the beginning of the "silence". I'd expect a sequence like this
end=$(ffmpeg some-command-to-find-start-of-silence) ffmpeg -t "$end" -acodec copy -i inputfile.mp3 outputfile.mp3
The solution does not have to use
ffmpeg
, but it does need to be available on Ubuntu.