Newest 'ffmpeg' Questions - Stack Overflow

http://stackoverflow.com/questions/tagged/ffmpeg

Les articles publiés sur le site

  • Pipe raw OpenCV images to FFmpeg

    15 février 2017, par BrianTheLion

    Here's a fairly straightforward example of reading off a web cam using OpenCV's python bindings:

    '''capture.py'''
    import cv, sys
    cap = cv.CaptureFromCAM(0)                    # 0 is for /dev/video0
    while True :
        if not cv.GrabFrame(cap) : break
        frame = cv.RetrieveFrame(cap)
        sys.stdout.write( frame.tostring() )
    

    Now I want to pipe the output to ffmpeg as in:

    $ python capture.py | ffmpeg -f image2pipe -pix_fmt bgr8 -i - -s 640x480 foo.avi

    Sadly, I can't get the ffmpeg magic incantation quite right and it fails with

      libavutil     50.15. 1 / 50.15. 1
      libavcodec    52.72. 2 / 52.72. 2
      libavformat   52.64. 2 / 52.64. 2
      libavdevice   52. 2. 0 / 52. 2. 0
      libavfilter    1.19. 0 /  1.19. 0
      libswscale     0.11. 0 /  0.11. 0
      libpostproc   51. 2. 0 / 51. 2. 0
    Output #0, avi, to 'out.avi':
        Stream #0.0: Video: flv, yuv420p, 640x480, q=2-31, 19660 kb/s, 90k tbn, 30 tbc
    [image2pipe @ 0x1508640]max_analyze_duration reached
    [image2pipe @ 0x1508640]Estimating duration from bitrate, this may be inaccurate
    Input #0, image2pipe, from 'pipe:':
      Duration: N/A, bitrate: N/A
        Stream #0.0: Video: 0x0000, bgr8, 25 fps, 25 tbr, 25 tbn, 25 tbc
    swScaler: 0x0 -> 640x480 is invalid scaling dimension
    
    • The captured frames are definitely 640x480.
    • I'm pretty sure the pixel order for the OpenCV image type (IplImage) is GBR, one byte per channel. At least, that's what seems to be coming off the camera.

    I'm no ffmpeg guru. Has anyone done this successfully?

  • FFMPEG AAC encoding causes audio to be lower in pitch

    14 février 2017, par Paul Knopf

    I built a sample application that encodes AAC (from PortAudio) into a MP4 container (no video stream).

    The resulting audio is lower in pitch.

    #include "stdafx.h"
    #include "TestRecording.h"
    #include "libffmpeg.h"
    
    TestRecording::TestRecording()
    {
    }
    
    
    TestRecording::~TestRecording()
    {
    }
    
    struct RecordingContext
    {
        RecordingContext()
        {
            formatContext = NULL;
            audioStream = NULL;
            audioFrame = NULL;
            audioFrameframeNumber = 0;
        }
    
        libffmpeg::AVFormatContext* formatContext;
        libffmpeg::AVStream* audioStream;
        libffmpeg::AVFrame* audioFrame;
        int audioFrameframeNumber;
    };
    
    static int AudioRecordCallback(const void *inputBuffer, void *outputBuffer,
        unsigned long framesPerBuffer,
        const PaStreamCallbackTimeInfo* timeInfo,
        PaStreamCallbackFlags statusFlags,
        void *userData)
    {
        RecordingContext* recordingContext = (RecordingContext*)userData;
    
        libffmpeg::avcodec_fill_audio_frame(recordingContext->audioFrame,
            recordingContext->audioFrame->channels,
            recordingContext->audioStream->codec->sample_fmt,
            static_cast(inputBuffer),
            (framesPerBuffer * sizeof(float) * recordingContext->audioFrame->channels),
            0);
    
        libffmpeg::AVPacket pkt;
        libffmpeg::av_init_packet(&pkt);
        pkt.data = NULL;
        pkt.size = 0;
    
        int gotpacket;
        int result = avcodec_encode_audio2(recordingContext->audioStream->codec, &pkt, recordingContext->audioFrame, &gotpacket);
    
        if (result < 0)
        {
            LOGINT_WITH_MESSAGE(ERROR, result, "Couldn't encode the audio frame to acc");
            return paContinue;
        }
    
        if (gotpacket)
        {
            pkt.stream_index = recordingContext->audioStream->index;
            recordingContext->audioFrameframeNumber++;
    
            // this codec requires no bitstream filter, just send it to the muxer!
            result = libffmpeg::av_write_frame(recordingContext->formatContext, &pkt);
            if (result < 0)
            {
                LOG(ERROR) << "Couldn't write the encoded audio frame";
                libffmpeg::av_free_packet(&pkt);
                return paContinue;
            }
    
            libffmpeg::av_free_packet(&pkt);
        }
    
        return paContinue;
    }
    
    static bool InitializeRecordingContext(RecordingContext* recordingContext)
    {
        int result = libffmpeg::avformat_alloc_output_context2(&recordingContext->formatContext, NULL, NULL, "C:\\Users\\Paul\\Desktop\\test.mp4");
        if (result < 0)
        {
            LOGINT_WITH_MESSAGE(ERROR, result, "Couldn't create output format context");
            return false;
        }
    
        libffmpeg::AVCodec *audioCodec;
        audioCodec = libffmpeg::avcodec_find_encoder(libffmpeg::AV_CODEC_ID_AAC);
        if (audioCodec == NULL)
        {
            LOG(ERROR) << "Couldn't find the encoder for AAC";
        }
    
        recordingContext->audioStream = libffmpeg::avformat_new_stream(recordingContext->formatContext, audioCodec);
        if (!recordingContext->audioStream)
        {
            LOG(ERROR) << "Couldn't create the audio stream";
            return false;
        }
    
        recordingContext->audioStream->codec->bit_rate = 64000;
        recordingContext->audioStream->codec->sample_fmt = libffmpeg::AV_SAMPLE_FMT_FLTP;
        recordingContext->audioStream->codec->sample_rate = 48000;
        recordingContext->audioStream->codec->channel_layout = AV_CH_LAYOUT_STEREO;
        recordingContext->audioStream->codec->channels = libffmpeg::av_get_channel_layout_nb_channels(recordingContext->audioStream->codec->channel_layout);
    
        recordingContext->audioStream->codecpar->bit_rate = recordingContext->audioStream->codec->bit_rate;
        recordingContext->audioStream->codecpar->format = recordingContext->audioStream->codec->sample_fmt;
        recordingContext->audioStream->codecpar->sample_rate = recordingContext->audioStream->codec->sample_rate;
        recordingContext->audioStream->codecpar->channel_layout = recordingContext->audioStream->codec->channel_layout;
        recordingContext->audioStream->codecpar->channels = recordingContext->audioStream->codec->channels;
    
        result = libffmpeg::avcodec_open2(recordingContext->audioStream->codec, audioCodec, NULL);
        if (result < 0)
        {
            LOGINT_WITH_MESSAGE(ERROR, result, "Couldn't open the audio codec");
            return false;
        }
    
        // create a new frame to store the audio samples
        recordingContext->audioFrame = libffmpeg::av_frame_alloc();
        if (!recordingContext->audioFrame)
        {
            LOG(ERROR) << "Couldn't alloce the output audio frame";
            return false;
        }
    
        recordingContext->audioFrame->nb_samples = recordingContext->audioStream->codec->frame_size;
        recordingContext->audioFrame->channel_layout = recordingContext->audioStream->codec->channel_layout;
        recordingContext->audioFrame->channels = recordingContext->audioStream->codec->channels;
        recordingContext->audioFrame->format = recordingContext->audioStream->codec->sample_fmt;
        recordingContext->audioFrame->sample_rate = recordingContext->audioStream->codec->sample_rate;
    
        result = libffmpeg::av_frame_get_buffer(recordingContext->audioFrame, 0);
        if (result < 0)
        {
            LOG(ERROR) << "Coudln't initialize the output audio frame buffer";
            return false;
        }
    
        // some formats want video_stream headers to be separate  
        if (!strcmp(recordingContext->formatContext->oformat->name, "mp4") || !strcmp(recordingContext->formatContext->oformat->name, "mov") || !strcmp(recordingContext->formatContext->oformat->name, "3gp"))
        {
            recordingContext->audioStream->codec->flags |= CODEC_FLAG_GLOBAL_HEADER;
        }
    
        // open the ouput file
        if (!(recordingContext->formatContext->oformat->flags & AVFMT_NOFILE))
        {
            result = libffmpeg::avio_open(&recordingContext->formatContext->pb, recordingContext->formatContext->filename, AVIO_FLAG_WRITE);
            if (result < 0)
            {
                LOGINT_WITH_MESSAGE(ERROR, result, "Couldn't open the output file");
                return false;
            }
        }
    
        // write the stream headers
        result = libffmpeg::avformat_write_header(recordingContext->formatContext, NULL);
        if (result < 0)
        {
            LOGINT_WITH_MESSAGE(ERROR, result, "Couldn't write the headers to the file");
            return false;
        }
    
        return true;
    }
    
    static bool FinalizeRecordingContext(RecordingContext* recordingContext)
    {
        int result = 0;
    
        // write the trailing information
        if (recordingContext->formatContext->pb)
        {
            result = libffmpeg::av_write_trailer(recordingContext->formatContext);
            if (result < 0)
            {
                LOGINT_WITH_MESSAGE(ERROR, result, "Couldn't write the trailer information");
                return false;
            }
        }
    
        // close all the codes
        for (int i = 0; i < (int)recordingContext->formatContext->nb_streams; i++)
        {
            result = libffmpeg::avcodec_close(recordingContext->formatContext->streams[i]->codec);
            if (result < 0)
            {
                LOGINT_WITH_MESSAGE(ERROR, result, "Couldn't close the codec");
                return false;
            }
        }
    
        // close the output file
        if (recordingContext->formatContext->pb)
        {
            if (!(recordingContext->formatContext->oformat->flags & AVFMT_NOFILE))
            {
                result = libffmpeg::avio_close(recordingContext->formatContext->pb);
                if (result < 0)
                {
                    LOGINT_WITH_MESSAGE(ERROR, result, "Couldn't close the output file");
                    return false;
                }
            }
        }
    
        // free the format context and all of its data
        libffmpeg::avformat_free_context(recordingContext->formatContext);
    
        recordingContext->formatContext = NULL;
        recordingContext->audioStream = NULL;
    
        if (recordingContext->audioFrame)
        {
            libffmpeg::av_frame_free(&recordingContext->audioFrame);
            recordingContext->audioFrame = NULL;
        }
    
        return true;
    }
    
    int TestRecording::Test()
    {
        PaError result = paNoError;
    
        result = Pa_Initialize();
        if (result != paNoError) LOGINT_WITH_MESSAGE(ERROR, result, "Error initializing audio device framework");
    
        RecordingContext recordingContext;
        if (!InitializeRecordingContext(&recordingContext))
        {
            LOG(ERROR) << "Couldn't start recording file";
            return 0;
        }
    
        auto defaultDevice = Pa_GetDefaultInputDevice();
        auto deviceInfo = Pa_GetDeviceInfo(defaultDevice);
    
        PaStreamParameters  inputParameters;
        inputParameters.device = defaultDevice;
        inputParameters.channelCount = 2;
        inputParameters.sampleFormat = paFloat32;
        inputParameters.suggestedLatency = deviceInfo->defaultLowInputLatency;
        inputParameters.hostApiSpecificStreamInfo = NULL;
    
        PaStream* stream = NULL;
        result = Pa_OpenStream(
            &stream,
            &inputParameters,
            NULL,
            48000,
            1024,
            paClipOff,
            AudioRecordCallback,
            &recordingContext);
        if (result != paNoError)LOGINT_WITH_MESSAGE(ERROR, result, "Couldn't open the audio stream");
    
        result = Pa_StartStream(stream);
        if (result != paNoError)LOGINT_WITH_MESSAGE(ERROR, result, "Couldn't start the audio stream");
    
        Sleep(1000 * 5);
    
        result = Pa_StopStream(stream);
        if (result != paNoError)LOGINT_WITH_MESSAGE(ERROR, result, "Couldn't stop the audio stream");
    
        if (!FinalizeRecordingContext(&recordingContext)) LOG(ERROR) << "Couldn't stop recording file";
    
        result = Pa_CloseStream(stream);
        if (result != paNoError)LOGINT_WITH_MESSAGE(ERROR, result, "Couldn't stop the audio stream");
    
        return 0;
    }
    

    Here is the stdout, in case it helps.

    https://gist.github.com/pauldotknopf/9f24a604ce1f8a081aa68da1bf169e98

    Why is the audio lower in pitch? I assume I am overlooking a parameter that needs to be configured between PortAudio and FFMPEG. Is there something super obvious that I am missing?

  • How to efficiently create H264 mpeg from two clips with known different motion intensity ?

    14 février 2017, par Serge

    Given an audio and an image we create a static image "video" and then append a short clip that has a rather intense motion.

    Audio and image varies from run to run. Appended animation is always the same. Rendering is done with ffmpeg on a remote server. Rendered file must be in h264 codec mpg.

    Speed of encoding is crucial. Is there a fast and effective way to generate and merge the two clips quickly?

    Atm we use the following ffmpeg commands:

    // create first clip from image
    ffmpeg -loop 1 -r 24 -i $IMAGE -i $AUDIO -t $AUDIO_LENGTH -c:a aac -profile:a aac_low -ar 48000 -b:a 192k -bsf:a aac_adtstoasc -strict -2 -y -c:v libx264 -profile:v high -preset veryfast  -tune stillimage -crf 24 -x264opts bframes=2 -pix_fmt yuv420p -safe 0  clip1.mpg
    // . . .
    // then append the animation
    ffmpeg -f concat -safe 0 -i list.txt -c copy -y -safe 0 final.mpg 
    

    Intuition is that we can benefit form knowing exact timing of a first clip with a static image and the second one with intense animation – like it is determined in the 1-st pass of a 2-pass compression.

    Someone experienced in tech of h264 codec and mpeg please advice.

  • cause of mpeg-dash delay

    14 février 2017, par evan

    I have created mpeg-dash stream from a MP4 file. I trascoded my videos to MP4 using ffmpeg library and made the mpeg-dash using MP4Box. my resault mpeg-dash video seems to have some buffering problem (buffer stall I guess) and also 5 second delay when I seek in the video. the startup delay seems to be fine. I was wondering what is the cause of this delay? is it coming from my ffmpeg and transcode commands? or is it coming from MP4box and the process of dashing? I need to find the source to be able to solve it. any ideas? I would realy appreciate any help.

    these are my ffmpeg codes:

    ffmpeg -i main720.MTS -movflags faststart -vcodec libx264 -r 24 -tune zerolatency -tune fastdecode -bf 0 -slices 0 -x264opts intra_refresh=1 -g 96 -b:v 700k -maxrate 700k -bufsize 400k -an -s 640*360 -ss 00:00:00 -t 00:02:00 main720_700_video.mp4

    ffmpeg -i main720.MTS -movflags faststart -acodec libmp3lame -b:a 128k -vn -ss 00:00:00 -t 00:02:00 main720_700_audio.mp4

    and this my MP4Box command:

    MP4Box -dash 4000 -frag 4000 -profile onDamand -rap -segment-name %s_ -out manifest.mpd main720_300_video.mp4 main720_700_video.mp4 main720_300_audio.mp4

  • Stream publishing using ffmpeg rtmp : network bandwidth not fully utilized

    14 février 2017, par DeducibleSteak

    I'm developing an application that needs to publish a media stream to an rtmp "ingestion" url (as used in YouTube Live, or as input to Wowza Streaming Engine, etc), and I'm using the ffmpeg library (programmatically, from C/C++, not the command line tool) to handle the rtmp layer. I've got a working version ready, but am seeing some problems when streaming higher bandwidth streams to servers with worse ping. The problem exists both when using the ffmpeg "native"/builtin rtmp implementation and the librtmp implementation.

    When streaming to a local target server with low ping through a good network (specifically, a local Wowza server), my code has so far handled every stream I've thrown at it and managed to upload everything in real time - which is important, since this is meant exclusively for live streams.

    However, when streaming to a remote server with a worse ping (e.g. the youtube ingestion urls on a.rtmp.youtube.com, which for me have 50+ms pings), lower bandwidth streams work fine, but with higher bandwidth streams the network is underutilized - for example, for a 400kB/s stream, I'm only seeing ~140kB/s network usage, with a lot of frames getting delayed/dropped, depending on the strategy I'm using to handle network pushback.

    Now, I know this is not a problem with the network connection to the target server, because I can successfully upload the stream in real time when using the ffmpeg command line tool to the same target server or using my code to stream to a local Wowza server which then forwards the stream to the youtube ingestion point.

    So the network connection is not the problem and the issue seems to lie with my code.

    I've timed various parts of my code and found that when the problem appears, calls to av_write_frame / av_interleaved_write_frame (I never mix & match them, I am always using one version consistently in any specific build, it's just that I've experimented with both to see if there is any difference) sometimes take a really long time - I've seen those calls sometimes take up to 500-1000ms, though the average "bad case" is in the 50-100ms range. Not all calls to them take this long, most return instantly, but the average time spent in these calls grows bigger than the average frame duration, so I'm not getting a real time upload anymore.

    The main suspect, it seems to me, could be the rtmp Acknowledgement Window mechanism, where a sender of data waits for a confirmation of receipt after sending every N bytes, before sending any more data - this would explain the available network bandwidth not being fully used, since the client would simply sit there and wait for a response (which takes a longer time because of the lower ping), instead of using the available bandwidth. Though I haven't looked at ffmpeg's rtmp/librtmp code to see if it actually implements this kind of throttling, so it could be something else entirely.

    The full code of the application is too much to post here, but here are some important snippets:

    Format context creation:

    const int nAVFormatContextCreateError = avformat_alloc_output_context2(&m_pAVFormatContext, nullptr, "flv", m_sOutputUrl.c_str());
    

    Stream creation:

    m_pVideoAVStream = avformat_new_stream(m_pAVFormatContext, nullptr);
    m_pVideoAVStream->id = m_pAVFormatContext->nb_streams - 1;
    
    m_pAudioAVStream = avformat_new_stream(m_pAVFormatContext, nullptr);
    m_pAudioAVStream->id = m_pAVFormatContext->nb_streams - 1;
    

    Video stream setup:

    m_pVideoAVStream->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
    m_pVideoAVStream->codecpar->codec_id = AV_CODEC_ID_H264;
    m_pVideoAVStream->codecpar->width = nWidth;
    m_pVideoAVStream->codecpar->height = nHeight;
    m_pVideoAVStream->codecpar->format = AV_PIX_FMT_YUV420P;
    m_pVideoAVStream->codecpar->bit_rate = 10 * 1000 * 1000;
    m_pVideoAVStream->time_base = AVRational { 1, 1000 };
    
    m_pVideoAVStream->codecpar->extradata_size = int(nTotalSizeRequired);
    m_pVideoAVStream->codecpar->extradata = (uint8_t*)av_malloc(m_pVideoAVStream->codecpar->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
    // Fill in the extradata here - I'm sure I'm doing that correctly.
    

    Audio stream setup:

    m_pAudioAVStream->time_base = AVRational { 1, 1000 };
    // Let's leave creation of m_pAudioCodecContext out of the scope of this question, I'm quite sure everything is done right there.
    const int nAudioCodecCopyParamsError = avcodec_parameters_from_context(m_pAudioAVStream->codecpar, m_pAudioCodecContext);
    

    Opening the connection:

    const int nAVioOpenError = avio_open2(&m_pAVFormatContext->pb, m_sOutputUrl.c_str(), AVIO_FLAG_WRITE);
    

    Starting the stream:

    AVDictionary * pOptions = nullptr;
    const int nWriteHeaderError = avformat_write_header(m_pAVFormatContext, &pOptions);
    

    Sending a video frame:

    AVPacket pkt = { 0 };
    av_init_packet(&pkt);
    pkt.dts = nTimestamp;
    pkt.pts = nTimestamp;
    pkt.duration = nDuration; // I know what I have the wrong duration sometimes, but I don't think that's the issue.
    pkt.data = pFrameData;
    pkt.size = pFrameDataSize;
    pkt.flags = bKeyframe ? AV_PKT_FLAG_KEY : 0;
    pkt.stream_index = m_pVideoAVStream->index;
    const int nWriteFrameError = av_write_frame(m_pAVFormatContext, &pkt); // This is where too much time is spent.
    

    Sending an audio frame:

    AVPacket pkt = { 0 };
    av_init_packet(&pkt);
    pkt.pts = m_nTimestampMs;
    pkt.dts = m_nTimestampMs;
    pkt.duration = m_nDurationMs;
    pkt.stream_index = m_pAudioAVStream->index;
    const int nWriteFrameError = av_write_frame(m_pAVFormatContext, &pkt);
    

    Any ideas? Am I on the right track with thinking about the Acknowledgement Window? Am I doing something else completely wrong?