Recherche avancée

Médias (91)

Sur d’autres sites (362)

  • What is wrong while providing arguments for sws_scale ?

    30 avril 2019, par hamidi

    In the following code, I can’t figure out what’s wrong :

    uint8_t *dstData[4];
    int dstLinesize[4];
    AVPixelFormat convertToPixFmt = AV_PIX_FMT_RGBA;
    int ret;

    // ...

    printf("tmp_frame format: %d (%s) %dx%d\n", tmp_frame->format, av_get_pix_fmt_name((AVPixelFormat)tmp_frame->format), tmp_frame->width, tmp_frame->height);
    // The above line prints: tmp_frame format: 23 (nv12) 480x480

    int size = av_image_get_buffer_size(convertToPixFmt, tmp_frame->width, tmp_frame->height, 1);
    uint8_t *buffer = (uint8_t *) av_malloc(size);

    ret = av_image_copy_to_buffer(buffer, size,
       (const uint8_t * const *)&tmp_frame->data[i],
       (const int *)&tmp_frame->linesize[i], (AVPixelFormat)tmp_frame->format,
       tmp_frame->width, tmp_frame->height, 1);
    ASSERT(ret >= 0);

    ret = av_image_fill_arrays(dstData, dstLinesize, buffer, convertToPixFmt, dest_width, dest_height, 1);
    ASSERT(ret >= 0);

    ret = sws_scale(
       convertContext,
       dstData,
       dstLinesize,
       0,
       dest_width,
       convertedFrame->data,
       convertedFrame->linesize);
    printf("sws_scale returns %d\n", ret);  // prints: sws_scale returns 0
    ASSERT(ret == tmp_frame->height);

    // ...

    It’s part of a code which uses dxva2 to obtain tmp_frame. I inspired the code from hw_decode.c and am sure that there’s no mistake in the code. The tmp_frame is properly made in NV12 format. The error occurs just when I call sws_scale and it’s :

    bad src image pointers

    So I don’t know how to provide pointers not to get this error and sws_scale may work properly.
    Any idea ?

    I update the question to include my whole code :

    static AVBufferRef *hw_device_ctx = NULL;
    static enum AVPixelFormat hw_pix_fmt;
    static FILE *output_file = NULL;

    int main(int argc, char *argv[])
    {
       AVFormatContext *input_ctx = NULL;
       int video_stream, ret;
       AVStream *video = NULL;
       AVCodecContext *decoder_ctx = NULL;
       AVCodec *decoder = NULL;
       AVPacket packet;
       enum AVHWDeviceType type;
       int i;

       if (argc < 2)
       {
           fprintf(stderr, "Usage: %s <input file="file" />\n", argv[0]);
           return -1;
       }

       type = av_hwdevice_find_type_by_name("dxva2");
       ASSERT(type != AV_HWDEVICE_TYPE_NONE);
       ASSERT(avformat_open_input(&amp;input_ctx, argv[1], NULL, NULL) == 0);
       ASSERT(avformat_find_stream_info(input_ctx, NULL) >= 0);
       video_stream = av_find_best_stream(input_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, &amp;decoder, 0);
       ASSERT(video_stream >= 0);
       decoder_ctx = avcodec_alloc_context3(decoder);
       ASSERT(decoder_ctx);
       video = input_ctx->streams[video_stream];
       ASSERT(avcodec_parameters_to_context(decoder_ctx, video->codecpar) >= 0);
       ASSERT(av_hwdevice_ctx_create(&amp;hw_device_ctx, type, NULL, NULL, 0) >= 0);
       decoder_ctx->hw_device_ctx = av_buffer_ref(hw_device_ctx);
       ASSERT(avcodec_open2(decoder_ctx, decoder, NULL) >= 0);
       printf("video info: %dx%d\n", decoder_ctx->width, decoder_ctx->height);

       AVFrame *frame = av_frame_alloc();
       ASSERT(frame);
       AVFrame *sw_frame = av_frame_alloc();
       ASSERT(sw_frame);
       AVFrame* convertedFrame = av_frame_alloc();
       ASSERT(convertedFrame);

       AVPixelFormat convertToPixFmt = AV_PIX_FMT_RGBA;
       //int dest_width = 320, dest_height = 200;
       int dest_width = decoder_ctx->width, dest_height = decoder_ctx->height;
       SwsContext* convertContext = sws_getContext(decoder_ctx->width, decoder_ctx->height, AV_PIX_FMT_YUV420P,
           dest_width, dest_height, convertToPixFmt,
           SWS_FAST_BILINEAR, NULL, NULL, NULL);
       ASSERT(convertContext);
       int convertedFrameAspectBufferSize = avpicture_get_size(convertToPixFmt, dest_width, dest_height);
       void *convertedFrameBuffer = av_malloc(convertedFrameAspectBufferSize);
       avpicture_fill((AVPicture*)convertedFrame, (uint8_t *)convertedFrameBuffer, convertToPixFmt, dest_width, dest_height);
       output_file = fopen("1.out", "w+");

       for (int i = 0; /*i &lt; 20*/; i++)
       {
           ret = av_read_frame(input_ctx, &amp;packet);
           if (ret == AVERROR_EOF)
               break;
           ASSERT(ret >= 0);
           if (video_stream != packet.stream_index)
               continue;
           int ret = avcodec_send_packet(decoder_ctx, &amp;packet);
           ASSERT(ret >= 0);
           //printf("%p", decoder->hw_configs->hwaccel);
           ret = avcodec_receive_frame(decoder_ctx, frame);
           if (ret &lt; 0)
               printf("%d\t%d\n", i, ret);
           AVFrame *tmp_frame;
           if (frame->format > 0)  // hw enabled
           {
               ASSERT(av_hwframe_transfer_data(sw_frame, frame, 0) >= 0);
               tmp_frame = sw_frame;
           }
           else
           {
               tmp_frame = frame;
           }
           printf("frame format: %d (%s) %dx%d\n", frame->format, av_get_pix_fmt_name((AVPixelFormat)frame->format), frame->width, frame->height);
           printf("sw_frame format: %d (%s) %dx%d\n", sw_frame->format, av_get_pix_fmt_name((AVPixelFormat)sw_frame->format), sw_frame->width, sw_frame->height);
           printf("tmp_frame format: %d (%s) %dx%d\n", tmp_frame->format, av_get_pix_fmt_name((AVPixelFormat)tmp_frame->format), tmp_frame->width, tmp_frame->height);
           /*
           video info: 480x480
           frame format: 53 (dxva2_vld) 480x480
           sw_frame format: 23 (nv12) 480x480
           [swscaler @ 004cb2c0] bad src image pointers
           */

           int size = av_image_get_buffer_size(convertToPixFmt, tmp_frame->width, tmp_frame->height, 1);
           uint8_t *buffer = (uint8_t *) av_malloc(size);

           ret = av_image_copy_to_buffer(buffer, size,
               (const uint8_t * const *)&amp;tmp_frame->data[i],
               (const int *)&amp;tmp_frame->linesize[i], (AVPixelFormat)tmp_frame->format,
               tmp_frame->width, tmp_frame->height, 1);
           ASSERT(ret > 0);

           ret = av_image_fill_arrays(dstData, dstLinesize, buffer, convertToPixFmt, dest_width, dest_height, 1);
           ASSERT(ret > 0);

           ret = sws_scale(
               convertContext,
               tmp_frame->data,
               tmp_frame->linesize,
               0,
               dest_width,
               convertedFrame->data,
               convertedFrame->linesize);
           printf("sws_scale returns %d\n", ret);
           ASSERT(ret == tmp_frame->height);
           ret = fwrite(convertedFrame->data, tmp_frame->height * tmp_frame->width, 1, output_file);
           ASSERT(ret == 1);
           break;
       }
       av_frame_free(&amp;frame);
       av_packet_unref(&amp;packet);
       avcodec_free_context(&amp;decoder_ctx);
       avformat_close_input(&amp;input_ctx);
       av_buffer_unref(&amp;hw_device_ctx);

       return 0;
    }
  • ffmpeg how to ignore initial empty audio frames when decoding to loop a sound

    1er décembre 2020, par cs guy

    I am trying to loop a ogg sound file. The goal is to make a loopable audio interface for my mobile app.

    &#xA;

    I decode the given ogg file into a buffer and that buffer is sent to audio card for playing. All good until it the audio finishes (end of file). When it finishes I use av_seek_frame(avFormatContext, streamInfoIndex, 0, AVSEEK_FLAG_FRAME); to basically loop back to beginning. And continue decoding into writing to the same buffer. At first sight I thought this would give me perfect loops. One problem I had was, the decoder in the end gives me extra empty frames. So I ignored them by keeping track of how many samples are decoded :

    &#xA;

    durationInMillis = avFormatContext->duration * 1000;&#xA;numOfTotalSamples =&#xA;                (uint64_t) avFormatContext->duration *&#xA;                (uint64_t) pLocalCodecParameters->sample_rate *&#xA;                (uint64_t) pLocalCodecParameters->channels /&#xA;                (uint64_t) AV_TIME_BASE;&#xA;

    &#xA;

    When the threshold is reached I ignore the frames sent by the codec. I thought this was it and ran some test. I recorded 5 minutes of my app and in the end I compared the results in FL studio by customly adding the same sound clip several times to match the length of my audio recording :

    &#xA;

    Here it is after 5 minutes :

    &#xA;

    enter image description here

    &#xA;

    In the first loops the difference is very low I thought it was working and I used this for several days until I tested this on 5 minute recording. As the looping approached to 5 minutes mark the difference got very huge. My code is not looping the audio correctly. I suspect that the codec is adding 1 or 2 empty frames at the very beginning in each loop caused by av_seek_frame knowing that a frame can contain up several audio samples. These probably accumulate and cause the mismatch.

    &#xA;

    My question is : how can I drop the empty frames that is sent by codec while decoding so that I can create a perfect loop of the audio ?

    &#xA;

    My code is below here. Please be aware that I deleted lots of if checks that was inteded for safety to make it more readable in the code below, these removed checks are always false so it doesnt matter for the reader.

    &#xA;

    helper.cpp

    &#xA;

    int32_t&#xA;outputAudioFrame(AVCodecContext *avCodecContext, AVFrame *avResampledDecFrame, int32_t &amp;ret,&#xA;                 LockFreeQueue<float> *&amp;buffer, int8_t *&amp;mediaLoadPointer,&#xA;                 AVFrame *avDecoderFrame, SwrContext *swrContext,&#xA;                 std::atomic_bool *&amp;signalExitFuture,&#xA;                 uint64_t &amp;currentNumSamples, uint64_t &amp;numOfTotalSamples) {&#xA;    // resampling is done here but its boiler code so I removed it.&#xA;    auto *floatArrPtr = (float *) (avResampledDecFrame->data[0]);&#xA;&#xA;    int32_t numOfSamples = avResampledDecFrame->nb_samples * avResampledDecFrame->channels;&#xA;&#xA;    for (int32_t i = 0; i &lt; numOfSamples; i&#x2B;&#x2B;) {&#xA;        if (currentNumSamples == numOfTotalSamples) {&#xA;            break;&#xA;        }&#xA;&#xA;        buffer->push(*floatArrPtr);&#xA;        currentNumSamples&#x2B;&#x2B;;&#xA;        floatArrPtr&#x2B;&#x2B;;&#xA;    }&#xA;&#xA;    return 0;&#xA;}&#xA;&#xA;&#xA;&#xA;int32_t decode(int32_t &amp;ret, AVCodecContext *avCodecContext, AVPacket *avPacket,&#xA;               LockFreeQueue<float> *&amp;buffer,&#xA;               AVFrame *avDecoderFrame,&#xA;               AVFrame *avResampledDecFrame,&#xA;               std::atomic_bool *&amp;signalExitFuture,&#xA;               int8_t *&amp;mediaLoadPointer, SwrContext *swrContext,&#xA;               uint64_t &amp;currentNumSamples, uint64_t &amp;numOfTotalSamples) {&#xA;   &#xA;    ret = avcodec_send_packet(avCodecContext, avPacket);&#xA;    if (ret &lt; 0) {&#xA;        LOGE("decode: Error submitting a packet for decoding %s", av_err2str(ret));&#xA;        return ret;&#xA;    }&#xA;&#xA;    // get all the available frames from the decoder&#xA;    while (ret >= 0) {&#xA;&#xA;        // submit the packet to the decoder&#xA;        ret = avcodec_receive_frame(avCodecContext, avDecoderFrame);&#xA;        if (ret &lt; 0) {&#xA;            // those two return values are special and mean there is no output&#xA;            // frame available, but there were no errors during decoding&#xA;            if (ret == AVERROR_EOF || ret == AVERROR(EAGAIN)) {&#xA;                //LOGD("avcodec_receive_frame returned special %s", av_err2str(ret));&#xA;                return 0;&#xA;            }&#xA;&#xA;            LOGE("avcodec_receive_frame Error during decoding %s", av_err2str(ret));&#xA;            return ret;&#xA;        }&#xA;&#xA;        ret = outputAudioFrame(avCodecContext, avResampledDecFrame, ret, buffer,&#xA;                               mediaLoadPointer, avDecoderFrame, swrContext, signalExitFuture,&#xA;                               currentNumSamples, numOfTotalSamples);&#xA;&#xA;        av_frame_unref(avDecoderFrame);&#xA;        av_frame_unref(avResampledDecFrame);&#xA;&#xA;        if (ret &lt; 0)&#xA;            return ret;&#xA;    }&#xA;&#xA;    return 0;&#xA;}&#xA;</float></float>

    &#xA;

    Main.cpp

    &#xA;

    while (!*signalExitFuture) {&#xA;            while ((ret = av_read_frame(avFormatContext, avPacket)) >= 0) {&#xA;&#xA;                ret = decode(ret, avCodecContext, avPacket, buffer, avDecoderFrame,&#xA;                             avResampledDecFrame, signalExitFuture,&#xA;                             mediaLoadPointer, swrContext,&#xA;                             currentNumSamples, numOfTotalSamples);&#xA;&#xA;                // The packet must be freed with av_packet_unref() when it is no longer needed.&#xA;                av_packet_unref(avPacket);&#xA;&#xA;                if (ret &lt; 0) {&#xA;                    LOGE("Error! %s", av_err2str(ret));&#xA;&#xA;                    goto cleanup;&#xA;                }&#xA;            }&#xA;&#xA;            if (ret == AVERROR_EOF) {&#xA;&#xA;                ret = av_seek_frame(avFormatContext, streamInfoIndex, 0, AVSEEK_FLAG_FRAME);&#xA;&#xA;                currentNumSamples = 0;&#xA;                avcodec_flush_buffers(avCodecContext);&#xA;            }&#xA;        }&#xA;

    &#xA;

  • How to get video pixel location from screen pixel location ?

    22 février, par AmLearning

    Wall of Text so I tried breaking it up into sections to make it better sorry in advance

    &#xA;

    The problem

    &#xA;

    I have some video files that I am reading with ffmpeg to get the colors at specific pixels, and all seems well, but I just ran into a problem with finding the right pixel to input. I realized (or mistakingly believe) that the pixel location (x,y) on the screen will be different than the local pixel location so to speak of the video (ie. If I want to get pixel 50,0 of the video that will be different than my screen's pixel 50,0 because the resolutions don't match). I was trying to think of a way to convert my screen's pixel location into the "local pixel location", and I have two ideas but I am not sure if any of them is any good. Note I am currently using cmd+shift+4 on macos to get the screen coordinates and the video is playing fullscreen like in the screenshot below.

    &#xA;

    Ideas

    &#xA;

      &#xA;
    1. enter image description here If I manually measure and account for this vertical offset, would it effectively convert the screen coordinate into the "local" one ?

      &#xA;

    2. &#xA;

    3. If I instead adjust my SwsContext to put the destination height and width as that of my screen, will it effectively replace the need to convert screen coordinates to the video coordinates ?

      &#xA;

    4. &#xA;

    &#xA;

    Problems with the Ideas

    &#xA;

    The problems I see with the first solution are that I am assuming there is no hidden horizontal offset (or conversely that all of the width of the video is actually renderable on the screen). Additionally, this solution would only get an approximate result as I would need to manually measure the offsets, screen width, and screen height using the method I currently am using to get the screen coordinates.

    &#xA;

    With the second solution, aside from the question of if it will even work, the problem becomes that I can no longer measure what the screen coordinates I want are because I can't seem to get rid of those black bars in VLC.

    &#xA;

    Some Testing I did

    &#xA;

    Given that if the black bars are part of the video itself, my entire problem would be fixed (maybe ?) I tried seeing if the black bars were part of the video, and when I looked at the frame data's first pixel, it was black. The problem then is that if the black bars are entirely part of the video, then why are the colors I get for some pixels slightly off (I am checking with ColorSync Utility). These colors aren't just slightly off as in wrong but it seems more that they belong to a slightly offset region of the video.

    &#xA;

    However, this may be somewhat explained if ffmpeg reads right to left. When I put the top left corner of the video into the program and looked again at the pixel data in the frame for that location (location again was calculated by assuming the video location would be the same as the screen location) instead of getting white, I got a bluish color much like the glove in the top right corner.

    &#xA;

    The Watered Down Code

    &#xA;

        struct SwsContext *rescaler = NULL;&#xA;    rescaler = sws_getContext(codec_context->width, codec_context->height, codec_context->pix_fmt, codec_context->width, codec_context->height, AV_PIX_FMT_RGB0, SWS_FAST_BILINEAR, NULL, NULL, 0);&#xA;&#xA;// Get Packets (containers for frames but not guaranteed to have a full frame) and Frames&#xA;    while (av_read_frame(avformatcontext, packet) >= 0)&#xA;    {&#xA;        &#xA;        // determine if packet is video packet&#xA;        if (packet->stream_index != video_index)&#xA;        {&#xA;            continue;&#xA;        }&#xA;        &#xA;        // send packet to decoder&#xA;        if (avcodec_send_packet(codec_context, packet) &lt; 0)&#xA;        {&#xA;            perror("Failed to decode packet");&#xA;        }&#xA;        &#xA;        // get frame from decoder&#xA;        int response = avcodec_receive_frame(codec_context, frame);&#xA;        if (response == AVERROR(EAGAIN))&#xA;        {&#xA;            continue;&#xA;        }&#xA;        else if (response &lt; 0)&#xA;        {&#xA;            perror("Failed to get frame");&#xA;        }&#xA;        &#xA;        // convert frame to RGB0 colorspace 4 bytes per pixel 1 per channel&#xA;        response = sws_scale_frame(rescaler, scaled_frame, frame);&#xA;        if(response &lt; 0){&#xA;            perror("Failed to change colorspace");&#xA;        }&#xA;        // get data and write it&#xA;        int pixel_number = y*(scaled_frame->linesize[0]/4)&#x2B;x; // divide by four gets pixel linesize (4 byte per pixel)&#xA;        int byte_number = 4*(pixel_number-1); // position of pixel in array&#xA;        // start of debugging things&#xA;        int temp = scaled_frame->data[0][byte_number]; // R&#xA;        int one_after = scaled_frame->data[0][byte_number&#x2B;1]; // G&#xA;        int two_after = scaled_frame->data[0][byte_number&#x2B;2]; // B&#xA;        int als; // where i put the breakpoint&#xA;        // end of debugging things&#xA;    }&#xA;

    &#xA;

    In Summary

    &#xA;

    I have no idea what is happening.

    &#xA;

    I take the data for a pixel and compare it to what colorsync utility says should be there, but it is always slightly off as though the pixel I was actually reading was offset from what I thought I was reading. Therefore, I want to find a way to get the pixel location in a video given a screen coordinate when the video is in fullscreen, but I have no idea how to (aside from a few ideas that are probably bad at best).

    &#xA;

    Also does FFMPEG put the frame data right to left ?

    &#xA;

    A Video Better Showing My Problem

    &#xA;

    https://www.youtube.com/watch?v=NSEErs2lC3A

    &#xA;