
Recherche avancée
Autres articles (38)
-
Support audio et vidéo HTML5
10 avril 2011MediaSPIP utilise les balises HTML5 video et audio pour la lecture de documents multimedia en profitant des dernières innovations du W3C supportées par les navigateurs modernes.
Pour les navigateurs plus anciens, le lecteur flash Flowplayer est utilisé.
Le lecteur HTML5 utilisé a été spécifiquement créé pour MediaSPIP : il est complètement modifiable graphiquement pour correspondre à un thème choisi.
Ces technologies permettent de distribuer vidéo et son à la fois sur des ordinateurs conventionnels (...) -
HTML5 audio and video support
13 avril 2011, parMediaSPIP uses HTML5 video and audio tags to play multimedia files, taking advantage of the latest W3C innovations supported by modern browsers.
The MediaSPIP player used has been created specifically for MediaSPIP and can be easily adapted to fit in with a specific theme.
For older browsers the Flowplayer flash fallback is used.
MediaSPIP allows for media playback on major mobile platforms with the above (...) -
De l’upload à la vidéo finale [version standalone]
31 janvier 2010, parLe chemin d’un document audio ou vidéo dans SPIPMotion est divisé en trois étapes distinctes.
Upload et récupération d’informations de la vidéo source
Dans un premier temps, il est nécessaire de créer un article SPIP et de lui joindre le document vidéo "source".
Au moment où ce document est joint à l’article, deux actions supplémentaires au comportement normal sont exécutées : La récupération des informations techniques des flux audio et video du fichier ; La génération d’une vignette : extraction d’une (...)
Sur d’autres sites (4374)
-
FFmpeg parse NALs from H264 bitstream
25 août 2020, par sipwizI'm able to use FFmpeg to encode a dummy frame into an H264 bitstream. What I'd additionally like to do is extract the individual NAL's from the bitstream.


From lots of hunting around it seems like using an
AVParser
andav_parser_parse2
is the way to do it ? I can see the functionality is there in h264_parser.c I just can't work out how to hook it up. Although maybeAVparser
only deals in frames andAVBitStreamFilter
or something else is needed ?

In my example below I am successfully encoding to H264 and transmitting the results over RTP.
ffplay
is able to receive and display the H264 RTP packets it receives so I'm confident the H264 encoding is working correctly.

#include <ctime>
#include <iomanip>
#include <iostream>
#include <string>
#include <sstream>

#include "strutils.h"

extern "C"
{
#include <libavcodec></libavcodec>avcodec.h>
#include <libavformat></libavformat>avformat.h>
#include <libavformat></libavformat>avio.h>
#include <libavutil></libavutil>imgutils.h>
#include <libswscale></libswscale>swscale.h>
#include <libavutil></libavutil>time.h>
}

#define WIDTH 640
#define HEIGHT 480
#define FRAMES_PER_SECOND 30
#define RTP_OUTPUT_FORMAT "rtp"
#define RTP_URL "rtp://127.0.0.1:5024"
#define ERROR_LEN 128
#define codecID AVCodecID::AV_CODEC_ID_H264 // AVCodecID::AV_CODEC_ID_VP8;

SwsContext* _swsContext;
AVCodec* _codec;
AVCodecContext* _codecCtx;
AVFormatContext* _formatContext;
AVStream* _rtpOutStream;
char _errorLog[ERROR_LEN];
AVCodecParserContext* _codecParserCtx;

int main()
{
 std::cout << "FFmpeg Encoder and RTP Stream Test" << std::endl;

 av_log_set_level(AV_LOG_DEBUG);

 // Initialise codec context.
 _codec = avcodec_find_encoder(codecID);
 if (_codec == NULL) {
 throw std::runtime_error("Could not find codec for ID " + std::to_string(codecID) + ".");
 }

 _codecCtx = avcodec_alloc_context3(_codec);
 if (!_codecCtx) {
 std::cerr << "Failed to initialise codec context." << std::endl;;
 }

 _codecCtx->width = WIDTH;
 _codecCtx->height = HEIGHT;
 //_codecCtx->bit_rate = 500000;
 _codecCtx->time_base.den = FRAMES_PER_SECOND;
 _codecCtx->time_base.num = 1;
 //_codecCtx->gop_size = 10;
 //_codecCtx->max_b_frames = 1;
 _codecCtx->pix_fmt = AVPixelFormat::AV_PIX_FMT_YUV420P;

 int res = avcodec_open2(_codecCtx, _codec, NULL);
 if (res < 0) {
 std::cerr << "Failed to open codec: " << av_make_error_string(_errorLog, ERROR_LEN, res) << std::endl;
 }

 // Set up a parser to extract NAL's from the H264 bit stream.
 // Note this is not needed for sending the RTP (I need to separate the NALs for another reason).
 _codecParserCtx = av_parser_init(codecID);
 if (!_codecParserCtx) {
 std::cerr << "Failed to initialise codec parser." << std::endl;
 }

 // Initialise RTP output stream.
 AVOutputFormat* fmt = av_guess_format(RTP_OUTPUT_FORMAT, NULL, NULL);
 if (!fmt) {
 std::cerr << "Failed to guess output format for " << RTP_OUTPUT_FORMAT << "." << std::endl;
 }

 res = avformat_alloc_output_context2(&_formatContext, fmt, fmt->name, RTP_URL);
 if (res < 0) {
 std::cerr << "Failed to allocate output context: " << av_make_error_string(_errorLog, ERROR_LEN, res) << std::endl;
 }

 _rtpOutStream = avformat_new_stream(_formatContext, _codec);
 if (!_rtpOutStream) {
 std::cerr << "Failed to allocate output stream." << std::endl;
 }

 res = avio_open(&_formatContext->pb, _formatContext->url, AVIO_FLAG_WRITE);
 if (res < 0) {
 std::cerr << "Failed to open RTP output context for writing: " << av_make_error_string(_errorLog, ERROR_LEN, res) << std::endl;
 }

 res = avcodec_parameters_from_context(_rtpOutStream->codecpar, _codecCtx);
 if (res < 0) {
 std::cerr << "Failed to copy codec parameters to stream: " << av_make_error_string(_errorLog, ERROR_LEN, res) << std::endl;
 }

 res = avformat_write_header(_formatContext, NULL);
 if (res < 0) {
 std::cerr << "Failed to write output header: " << av_make_error_string(_errorLog, ERROR_LEN, res) << std::endl;
 }

 av_dump_format(_formatContext, 0, RTP_URL, 1);

 // Set a dummy frame with a YUV420 image.
 AVFrame* frame = av_frame_alloc();
 frame->format = AVPixelFormat::AV_PIX_FMT_YUV420P;
 frame->width = WIDTH;
 frame->height = HEIGHT;
 frame->pts = 0;

 res = av_frame_get_buffer(frame, 0);
 if (res < 0) {
 std::cerr << "Failed on av_frame_get_buffer: " << av_make_error_string(_errorLog, ERROR_LEN, res) << std::endl;
 }

 res = av_frame_make_writable(frame);
 if (res < 0) {
 std::cerr << "Failed on av_frame_make_writable: " << av_make_error_string(_errorLog, ERROR_LEN, res) << std::endl;
 }

 for (int y = 0; y < HEIGHT; y++) {
 for (int x = 0; x < WIDTH; x++) {
 frame->data[0][y * frame->linesize[0] + x] = x + y + 1 * 3;
 }
 }

 for (int y = 0; y < HEIGHT / 2; y++) {
 for (int x = 0; x < WIDTH / 2; x++) {
 frame->data[1][y * frame->linesize[1] + x] = 128 + y + 2;
 frame->data[2][y * frame->linesize[2] + x] = 64 + y + 5;
 }
 }

 std::cout << "press any key to start the stream..." << std::endl;
 getchar();

 // Start the loop to encode the static dummy frame and output on the RTP stream.
 AVPacket* pkt = av_packet_alloc();
 uint8_t* data{ nullptr };
 int dataSize;

 while (true) {
 int sendres = avcodec_send_frame(_codecCtx, frame);
 if (sendres != 0) {
 std::cerr << "avcodec_send_frame error: " << av_make_error_string(_errorLog, ERROR_LEN, sendres) << std::endl;
 }

 // Read encoded packets.
 int ret = 0;
 while (ret >= 0) {

 ret = avcodec_receive_packet(_codecCtx, pkt);

 if (ret == AVERROR(EAGAIN)) {
 // Encoder needs more data.
 break;
 }
 else if (ret < 0) {
 std::cerr << "Failed to encode frame: " << av_make_error_string(_errorLog, ERROR_LEN, sendres) << std::endl;
 break;
 }
 else {
 std::cout << "Encoded packet pts " << pkt->pts << ", size " << pkt->size << "." << std::endl;
 std::cout << toHex(pkt->data, pkt->data + pkt->size) << std::endl;

 int pktOffset = 0;

 // TODO: Find a way to separate the NALs from the Annex B H264 byte stream in the AVPacket data.
 //AVBitStreamFilter 
 
 while (pkt->size > pktOffset) {
 int bytesRead = av_parser_parse2(_codecParserCtx, _codecCtx, &data, &dataSize, pkt->data + pktOffset, pkt->size - pktOffset, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);

 if (bytesRead == 0) {
 std::cout << "Failed to parse data from packet." << std::endl;
 break;
 }
 else if (bytesRead < 0) {
 std::cerr << "av_parser_parse2 error: " << av_make_error_string(_errorLog, ERROR_LEN, bytesRead) << std::endl;
 break;
 }
 else {
 std::cout << "Codec parser bytes read " << bytesRead << "." << std::endl;
 pktOffset += bytesRead;
 }
 }
 }

 // Write the encoded packet to the RTP stream.
 int sendRes = av_write_frame(_formatContext, pkt);
 if (sendRes < 0) {
 std::cerr << "Failed to write frame to output stream: " << av_make_error_string(_errorLog, ERROR_LEN, sendres) << std::endl;
 break;
 }

 std::cout << "press any key to continue..." << std::endl;
 getchar();
 }

 av_usleep(1000000 / FRAMES_PER_SECOND);

 frame->pts++;
 }

 av_packet_free(&pkt);
 av_frame_free(&frame);
 avcodec_close(_codecCtx);
 avcodec_free_context(&_codecCtx);
 avformat_free_context(_formatContext);

 return 0;
}
</sstream></string></iostream></iomanip></ctime>


The output from the first available frame that contains 4 separate NALs is below (apologies for the size). Since the H264 byte stream is using an Annex B format the NALs can be easily extracted, the delimiter being either
00000001
or000001
. If possible I'd rather use the proper FFmpeg way of parsing instead of re-inventing the wheel.

FFmpegCppEncodingTest\x64\Debug>FFmpegCppEncodingTest.exe
FFmpeg Encoder and RTP Stream Test
[libx264 @ 00000252de7e2180] using mv_range_thread = 24
[libx264 @ 00000252de7e2180] using cpu capabilities: MMX2 SSE2Fast SSSE3 SSE4.2 AVX FMA3 BMI2 AVX2
[libx264 @ 00000252de7e2180] profile High, level 3.0, 4:2:0, 8-bit
[rtp @ 00000252df9d4c80] No default whitelist set
[udp @ 00000252df9d46c0] No default whitelist set
[udp @ 00000252df501440] No default whitelist set
Output #0, rtp, to 'rtp://127.0.0.1:5024':
 Metadata:
 encoder : Lavf58.49.100
 Stream #0:0, 0, 1/90000: Video: h264 (libx264), 1 reference frame, yuv420p, 640x480 (0x0), 0/1, q=-1--1, 90k tbn
press any key to start the stream...

[libx264 @ 00000252de7e2180] frame= 0 QP=23.20 NAL=3 Slice:I Poc:0 I:1200 P:0 SKIP:0 size=8262 bytes
Encoded packet pts 0, size 8262.
000000016764001eacd940a03da10000030001000003003c0f162d960000000168ebe3cb22c00000010605ffff9ddc45e9bde6d948b7962cd820d923eeef78323634202d20636f726520313631202d20482e3236342f4d5045472d342041564320636f646563202d20436f70796c65667420323030332d32303230202d20687474703a2f2f7777772e766964656f6c616e2e6f72672f783236342e68746d6c202d206f7074696f6e733a2063616261633d31207265663d33206465626c6f636b3d313a303a3020616e616c7973653d3078333a3078313133206d653d686578207375626d653d37207073793d31207073795f72643d312e30303a302e3030206d697865645f7265663d31206d655f72616e67653d3136206368726f6d615f6d653d31207472656c6c69733d31203878386463743d312063716d3d3020646561647a6f6e653d32312c313120666173745f70736b69703d31206368726f6d615f71705f6f66667365743d2d3220746872656164733d3132206c6f6f6b61686561645f746872656164733d3220736c696365645f746872656164733d30206e723d3020646563696d6174653d3120696e7465726c616365643d3020626c757261795f636f6d7061743d3020636f6e73747261696e65645f696e7472613d3020626672616d65733d3320625f707972616d69643d3220625f61646170743d3120625f626961733d30206469726563743d3120776569676874623d31206f70656e5f676f703d3020776569676874703d32206b6579696e743d323530206b6579696e745f6d696e3d3235207363656e656375743d343020696e7472615f726566726573683d302072635f6c6f6f6b61686561643d34302072633d637266206d62747265653d31206372663d32332e302071636f6d703d302e36302071706d696e3d302071706d61783d3639207170737465703d342069705f726174696f3d312e34302061713d313a312e30300080000001658884005fe89f8677b600fd616763bed58622ce7651a4f1cda99202677e71f334ec6194d4a6485e585812544cbac6755f3e5a4908543c2eab60170fc741f23ef68edc93ff16bfa36c98d7549ea2e1f74d1bafe3cedb15a1b08bb86d804455f4f87d5b5c8ac84edabaa228ef41bb002d16db3be5223900e12129b18fd7b186d33d5f2043ad324200f476fb48272ec5cd166e98419f6c1297e6033031f4fa0ecf2ed7d0c6038e1598dc7923d15b4d918f00bbe7e4dc1296494b0bf6f209c3e84b8bd37c0283ec289b865f471aa8f93495c3cc7ed16332f88dc0bc48b5c3801769dee0dd244492271a89202f6d264031de9810eea8098e9075f56b8a375233ec48651e70ceaee5f0d40922762c46f40681dfdf9c3bd54cffc61a7b1c6c030723be13381247f3ff8de05d21d0053b3022815e6c4b236c84940e4a5de6db03b8f1731279c921bf2a0c92ba62000deaea30e23b2067b2da7c6583cc0d81e652c3fb56a8ae4ffed900983481bf7d14a208748d8ba767ed1b5a68b62c8f548ad5d1857c283b9ac4310767f54a06d6341d7a6a0c05bd1493f8b54b1992f0e528030571f494cf9857c419288115cbc872518338ac9497454439fc28632913f04a514653f1dbae98db306f207aef6467e5cbf977f5936880012e69160cdcab02d91d8fcf8d760cb459bec58b688aadcd9704d47d7e335edd0d14552b305148a2b43681adec4ba9320c3d63d7e742d43d77c08c4c9e46c28cb4f2e55f5384227ac138d3eccdc0dd2884075cbb329f37843e09df7a65e2fcc9555e579840c702385d53a97b021074ede6ed5cc56fe5a43e41751de604e4c1397b60463cbe2393ebce670fb6b56ccdd93f05f989376115d72f30d0d1b7956e56d85885d398f88869eb9ec752048429d30b952c5e8cd1dd624f80a3c8102c37e5b05380b10873ea7d3fcee4a4cf78dd653c62102d4428ee7ef401d2cf74e07b23e26a16c9db26ec390d86c504879e25c698992fdb0628ab4ff80d95125d992fd9ab56433d4c3c7f9294bf3f33e60de3019a9881b86c7a84c9e46cd5a2283dafa974652758bf232d401ec692dc14611d0495e804005a35b474cd63b874e9ffa0166133bc167391dc5c36bc18d90bef2bcf6d7df8f960463cbe2393ebce66e3f643b54afe41d1fca0ff7cdb115d72f4598b6746adcadb0b10e828a6133767c69f2273e853a60b41e590f9f0d004f5475f23e098f4122ece175e98d64a25f26ac44dd65c4618f1b779f0d6e57744aee7835729e311a171be863c1b2df729d000335e412e5b62fbdf60c97eedef55c769b947865ba9a3ccad2411722fe24e15c701c1cc7f875310c7f3367f029042ae2fdaaf3b117fbdd61a6f975e3e32184d21b0c2438b63764015d6eb4b010f821af01a58e881351a5d10a3dff7d754665530cb1d6464e122a3b94e2560dcd9704d47d7e32703c820259d65f0b165686d035bd89bcacba5363d7e742d43f3c4d719bb3e27491c85694e9bb54b18bf9f0d0025e4b1344792ed9ed5b705d7b048a19b83341d580440fb417cda076bdc0a5e996c04a96514311fc938cce9866a812810f519e097defb064bf76f7aad7c8b52d052cb12d364571208b917f126ecc75017f88b2a98c40ff2ccfc4c9e4764eaa353dafa99dddf597599569569e0285afe833ce80f471533fb71727714b151f867708d447d86200111e3e23e2fd9193b068ae0cb9a5ed5604295da0df192f72f03d14d6846571208b917f12709726012e63fc3a988208549ae33767c625f3909da846eed67365f369059c1636dae0a30935216cb7d3827f2454180348f111bc34375c81a4a8acc61e862cf9c760a73d1b2e536b2274000a64b3f1fa5e5eab3cbe2393ebce4adce11c6fa605b3e824cf9b622bae5e8cb697eee987ad616245f3e1f132791dd18dc88f6bea73e05acbaccab500712b0ec4306644fc66a0100166af1e219f190b596898bf13a5cd73cb68bb249603bc487705be58118f2f88e4faf39264330a406aa7d4242af01db0fe0b97a337cedada4bb9ebbff66cc9af64f22328a9e981758f76e936edd95a93aa0fc6dae0a30912d9c8d61e6f7eba5924c5e650d462a401104de6ea146111326a9a130356bbb66d927c938ccdb90e80138a51832fc97defb064bf76f7aad8cb052eed750fb787474cced9cbf893765f51b4717eee02cd869eccfc4c9e4471d65654f5dabc0f387ca5fca870c6bab5fd067e1d03bedfe6afa61f4b9fc5f6a725b6ba2ed537b74adae12ba909651c088a0000600e5a99f2c2ab7365c1351f5f8bc12708dafafa28fc183890cb86d035bd89be14b7e32bd7e742d4c22526b8cdd9fa6e3ce42b4a74f0527362fe7c340f2691f998b1cbdd10b5070baf45611282f18cae646e49a07ce550af5930043d728c0bc2e75b43d0d960766000010f3e1648304bef7d8325fbb7bd571aae2117725c06d3b98946899b236ef71520ad12e63fc3a98834a999f8993c88e3acaca9ebb565ad70f94bf950ec373c9f5c146197f3bb6e64793af2190341263531ff0b4fe0202c57d83b392fe164019c487981b169f5e1afb1e5f11c9f5e722c8cf82247f75a8005e681403b61fc172f3171cbf974c3d6b0b13edee334434f5f00568769da8469526b9b359b482b51f04ee5c28e9aa59f0c802badfee1d2f9e3763b47a898260e95e97a865be63dc9ab693f2ae97d43efa556e6cb826a3ebf19956472192b5f9e936cc2ae42f662d2f67343112f6bf6831e2c67b32010b2e43fba2d4bbb5d82a2f239e827e08b8364af2d8ba50dedff0976c07fae005428f34316369d7486cf5fa1527e2f975e3d2910a63961ac0c0942991f5fc2d5f046aa8d41138e574a9bdc81bc735d484b28e05060001e73f1809b8ac1b9b2e09a8fafc5dd73847797a00944c83d522ee32c1e606597a1e1b4590692a93defbf9d4d7c8f297ee447b5f52a7e43d68c7fcb518be0aab61704ffa33eb4f5abbeb8107f9b6b55db1e8902f580c8b4fecd6c350d64fddaf26d110a9a6342f6ab0214aed06f8c9a91292317b355150d133646ddee29f1206be22caa63109a0a953ec9e479209a4b8bdda7ca856b34af4b768e7d240421f04c8d10d0670bb0fef1893c210ee0ad01b47974d53d1b8a0e678e8009b51ad99fdd7dcac5c98aba3e2d7e47f4436f353ea62597b2f812be6d88aeb97a3402edd9a4bb9ebc001ded08bec9e4779a9a3f72afa9df9ee6ca79196a1cfd23c6a1f4e9bfa77dc050b1badbd0ad6c4803812e3e745165c95df8a100b83cf67f30a381b8eb276a188aadcd9704d47d7e2ed445cecdd9f71e27e39091771960f30162056649e6553ffc42192d1d3ec9e478ed9e51f3b1f1e889b59b4ff9695f95b075a0cfc3a8781d1f5fc2f8b508537b01c911aba80e0f7f6ade4db9fe1aa0e09873834aa67fd8aadcd9704d47d7e32711edbb3926513127170da06b7b137c022fe5a2c8349548f64c7c099bb3f37554b410a72bfd2163afba18455199378e17612413c22e64793b84fbb5ae812be93c02295949ddad71eeb37cb5d10ce41dae168a8204bd56797c4727d79c919463a6ddba17833e82a886ec22bae5e630697fa5c6dc8f74633ed16ffb2791e24a693bd45c9b3b8a18f60eda105618af3421474872181d1f5fc2e8bd474277fec0c66eb0fde2b879e4516a2bb46ad25f23642eac000142e36275078416cd9704d47d7e2ee7afcbff150ca08eaa7063090e2c1e60647e9cc67ccaa7ff8847c5a117d93c8f247347ee55f53d5d86b39b32ad445a51bcc4305ef9f4efb80a1638041208ee1ad0941d4d2b5621efabaa0f76e01c83550d89d431155b9b2e09a8fafc5c3835fa1b44bebaa9c848bb8cb07981948b3324f32a9ffe21c3eae2d1d3ec9dbf3c3c61776dad4eea9a7d3960fcb89067ed861230dd1ca200aeb7e14f1dfb161bfccad72cdcb432b63970829f3c27b00000df2ab81fcec556e6cb826a3ebf177e48cfcee0c31399538618487160f303353247e98f5f9d0b5289ee57f64f23bb9cd25b957d379beaca793d093d5bf9491385d866c0cee402002e355d17e5268a85c06a4b7f014325db22fbaffdaec23d71fbae530c5a1021d8148b66e3fb4960463cbe2393ebce45ac55915f9f7dd1f6f827be6d88aeb97955334eb971b723dd1967ea63132791e5ea68dcf6be9dd11380fabfebbc5a20b161a0cfc3590c1b4015d6ef84bdd8990c62578232d46c68a49cd5f4558b48340f84ed466155b9b2e09a8fafc5cb277537e978923b34b35c3681adec4af0c2cca7ccaa7ff885065a5054d7c8f375349dea2e51e37bcd9a7232d2dabe69060fdc28def0f575e6d4b76c944a9033069b846fac6797b7a6e719078ad5f4571e541c2eb2c90ad17b55810a576837c5a37ac08414dee67a508b1b33b672fe24dca3a65ea663fc3a98881d565a117d93b83ec8e51e222fc41ce1a82975e3d2ff540cea18c084b472c802badcd41b0d55a40cde3d59eba888ae543e17d86a1b6d517d00000b0a6c5ca7e420b66cb826a3ebf1779cdcfcee16528d6f1be8c2438b07981960a445ed1641a4aa525ad08e132791e8efdcb3e763e28b0f55cdcccab513335702e0a30f7e01b5a6803f9f1292b5761b0c6f23f7afb0e536891b2e5381680001119c6f453af231598d4563d1d501e9b0a102dfeb01e63855b51bfc9e34d3ff6d3e374183e5c4264143604c9e479df432c3dafa9f26655e93c8cb510ae56f310c19d01f8e1df4e09ef52c9b08268513e566c0f7f6d2364799227d8faf5eb4e1440004b3d1cad00a97aacf2f88e4faf3929fa62c2de6272d268377cdb115d72f46821f7c6e36e47ba31be5a3a7d93c8f0b5349dea2e51fa67cd9a7232d44ac71c5c43067456780201000a575fbb1fc8376f6a12140b88bbdbff4b965cf101a27b80c15c62b9b3b0831a8ce8f76a709d7ec1890bdd092d7891246899b236ef6df88ae7322fddc059b19768b7fd93c8f125349dea2e53d0eb6b369ff2d292ac307ee147482b181d1f5fc2e19331908bf382bce41df292d2ea5c0664239ec62a13eda0b27fd8aadcd9704d47d7e2eda7de728b848caa8a7063090e2c1e603a476231e8b20d25526e4b422fb2791e4ca68fdcabe9be7430897876d082bcc143f10a3a451bc028fafe168614c2cbf2f495f3a136eea1e78a7168e309ea1b8163a9f9082d9b2e09a8fafc5d5330c98fd85c531388f5dc4a65c3681adec4af2386fb74590692a9304fdf17d93c8f00516767b5f526dc85737332ad2c1dc9ec4428e9936420ff357d323cae70bf97cf608456c2b6b48ed379bd8a6709e1f368ce001c5d303a9eb882d9b2e09a8fafc5d505724801b199d77127170da06b7b137f0a3cb5a2c8349549b27c54fb2791e2a6525b957d37706622661db4230e3ceb5382ec34b86675cc8f27819629ce218ece5f088aff6a34d53d1b8a4640000c3e3e119f971afb1e5f11c9f5e724f1ab446bb6b7b6b5c0ebe6d88aeb97a36a94e93a61eb5858a49d7640a6be47a9d372cf9d8f89fafdae6e6655a890ec974e0a30f5fc886b4d007f400eb459bd73ac9a491e5ca105252674b898325fe5ebf5e7dd0800479b39fe02533c25cb0cc5fa025a46db088d7b5a09570d5b4d0f485bb16c38108780d61e730027294a1f1cf8dab11ffff7c41551ee757ddff993a03622a7e10b591b7b4821f2d05baae32b8a930b4e379e82132a25985d6c93ae2af0235fe000369a441a3bca02997c65646ddedbcbd31a1c45954c6207ac1ee132791dc046bfb957d395c7bf9dd7a1278727066b4819f86eb8b30802bade339e9604c0441b117141d0b0299881b0ed7624b62030c7bf94d83b22e60eeb8a033940f44db6271bdc740d6f62572a41d0d5ebf3a16a4fb31e54d7c8eef5dcc4f6bea75d0bad267fcb4a5624799033f0e38614200aeac7eb1e00b011833aa57b3d46dfdc45708dfb5ea55013bdcc857afaaf5bc43259ee1f1bbb0e4170b28e46792896c592d974542088a9c5c979ad60949e0f791761f0874fdd2bde624f15c68f45fbb80b36363cd95f19bb3ad83d3e52758f0b498fe052967620ca8a790035810a3306d00575bf3378cba74c37f2132a7bf7d6445d7e334b282706179595aa7d00c98f0ba24a476a330aadcd9704d47d7e332aae8ae84383e93d8858e8b43681adec4af62c394f9954fff1065bbc21099bb3f2cb68b52d29c9c98408d501ba1052dc0d73409f483de9efa0588b49e7e8b6ae728e2964605c61b8df1127e5420476495ca360059af5f3c3b485308b8007bfc0d2d3da3f5aa9f06c20f3835c1c833efb4cc5ddaeaa6ec3090e2c1e6065be222d68b20d2552872b3b1fb2791e89e6925cabea3cf189b333fe5a896be4e8e0a30d821ded7323c753a8622f4e6edcb5d3e76c98019803097f11b5c46697f9492e677420c6a33a3dda9c28035324a3c1fe8da195150d133646ddee46258dbc8bf770166c8850653ce733f543458aad29d2474c172fb3477691e1475843e0994743121c2ec3fd280aa1c987f61f8cbedb048316f2d04708552ec1dfaf0926245c1ce51a6f60e520a26c359e3f6432b2b17262ae8f8b5f5f327f3064e9179fe5a301403b61fc172f2a0680b8b49773d780053fbb1fb2791dd8aaa83cec7c77fb026a67fcb4a8ff92e173d70a34119cd579b52df62e9c8a56b0921e39962f5a67db8a23de7d44d263a875083fda0e22f6ab0214aed06f8c967177686cc19d6e01045c8bf8938951cd1af88b2a98c41308ca9e33767c6f790ed5a5395e8bb11d03d7422a18a911087c1322c7773705d87bf4eb0a223b0717ea440304943372e3ae57c75e8d66820b7c0839589900002536f7667d11f72b17262ae8f8b5f9e5f4d2639748bad7028076c3f82e5e8e1c54f2e987ad61624a9b3b1fb2791ddb2aa83cec7c773e39b333fe5a7ffd4db5e218331a63ba2e64793bc26d7a495017342a35a1e57870405354f05d272be909296e1453e7960463cbe2393ebce495f6d3102303e1e0d028076c3f82e5e8b859171692ee7aefff3663704cdd9f9f9443b5694e57c2d8c740f5d08a8a5244421f04c8be7651c1761ef12800d2ef18c6f62390ff516762980bf6a1d277bb09b3d1b22f40c16ec1023ae59412d95958b9315747c5afb0fdbc6d4b879f7ab413c4376115d72f29c748b8b49773d77ffab31b84c9e4776caaa7f3b1eedac4863cc3b6840cb1677410a3a41e23964015d6ee3bb78f67983aa7cc0eedab1352ed0f9cc7e4ab0000973e1ea1fbc2ab7365c1351f5f8bc2a44d3956d2d7def8df46121c583cc0cab491ef2bd7e742d50c552d78cdd9fb079a6908539368f101d1dd0c231e2c9b0987c13b309e3ee05097e7818efa5a0f136a9988b62ddb8a2bd299e59d3bc687226b91dab720000215fdc2b3fbafb958b9315747c5afcec833f3b69794f52c2b781db0fe0b97a370efbd9a61eb5858a26cd08bec9e4497625634f5dab37e8c05ca56a4ec7868de621833b143bfae64792dd857a3edcdea00e0a45dcee0acd67a3645e894102237ff145402a5eab3cbe2393ebce458e76766ee5f4b54b0af8076c3f82e5e54d098bd749773d7802e160e679ce67f057dd0fa14e4d9e8c078774308353f5940f13409f1059f33ebe002adb44f9832e493b3b512855930686a5afa90594acdaa00357d0ae8811668636e0d4c1afb1e5f11c9f5e722daed4bbb7066e838857c03b61fc172f31daf9d46e36e47ba379d5e9faa7d93c78091aa263c91fe97387ce20ce8715abac1fb851d32f82bc200aeb77204203b5134ba7a5b062ace99deba909651c35085c061940f50ffb155b9b2e09a8fafc5e0738471cdfd08d529c18c2438b07981cc5e7bf2d1641a4aa52b36845f64f23d29d51fb957d4fbc51cd94f232d44a4d3a4ae0a30d56bfd4402002f0bfae4615e86b0b0bd01370d3ecaa1ac59fc7c115884b5cbdc4b60c2d724bcdac218722622c178968dbd95a6528c9dbf72b6ec9acdbebd0c155faabbfb4485787cdb115d72f462c9dbd749773d77ff59b474fb2791dd72aa83cec7c76cd6c35565c778059e51ce0a2f0a053b573f458e1107d59c37d9e02a366f07df791152347ab93847887d8351e360ec8b983bae085aef2ccc6b5899ef71d035bd895e16180cf32a9ffe20ee507b535f23bf5555cf9d8f774578c3f2ffaef0e82d8dad067e1d396148fafe175d894ad41444d0b25f8ba6fc5ad88ebbe31a6d1795863dfca6c1d917307773a84d25785adf5e2feef3e80a69f194550c29083fe8f336f29d3601940893c65f0ec3434117431f33e6d9f046aa13f045c1b2579755d48a7c3544bb5f6f974dc3e954ebb8fee5e5c996a989d3d87d6c8ec4d2311f161ac0d52c645fcd5f4fadb48e09f4347d85a8e5f1c4f4eb53b93f7ea4a12e1cb3e7f6a67c05eff6fc46edd40c960dcd9704d47d7e32ad86c9ab18384e9842ffff0c2438b07980ecec9b43a2c834954a223e3e68869eaf0b8ebf63234f39e1fefbb979b2692b9221831c1a573041ee6251cb86da02b3932b56dcaa8076a37fe67949a863f4c590208b176ea109fad54f8361079c1ae1224a28ffe1b36dc716332d0da06b7b12eedf6fe457afce85aa804d08bc66ecec9d91b92cd9f6a1d5202c71d3af185d246b2040b1a4e99dbe8588a54810e60a1f4a687ef6f29602a586dd3c7b25533eb9a3c6643498399f2d24d240ac0b750858b2b43681adec4bbcc1d29b1ebf3a16a8813e1f132791e4bfb911ed7d4aca11867bc8cb4b2ef0b5fd067e1efb82b242128d562d2785378f1c9d288453b93c593cea732e0ede777c6da51ac9f30d1e3321a4c1ccf9692f53d64b73493290dc8bb8cb07981ca85a601e6553ffc41f57d9f8993c8dae76a63d45ca6962a78cf79196a04605465c1461283133500800b918f95c7b663bebcc0b2ddccd7023fd54a7ea2111a72afbf4446bec797c4727d79c94e032f00155de10b4a1267cdb115d72f297e3cb5b49773d780016a935c66ecfd2821f76b4a72c74ab11c83d74206788175902058755d88bb325d6fd065f6efaea76942d65f4d87e0135eb326b96423ce289e98b3c46b0556e6cb826a3ebf194142092abbc4398da06121c583cc0e3529300f32a9ffe20cb69fa290431f250a3353dafa9d34609a31ff2d421bdc346c2e09f82364fb38aeb7348541a866485d72d7fba7bce39fd4a82468849cc3dd6ad4f2e708304bef7d8325fbb7bd58df51f89dca4adb1208b917f12713df7512e63fc3a98875c2cfb3f1327869de2164c7923d3d1402dbf098d4d8c8a8cb828c250e7e6a010017789be48e069ebc1f7e9e7795f98ac429aefd96200029985f7502097aacf2f88e4faf3912e99289fecfe6c4f80901db0fe0b97a3ab6edada4bb9ebbffd9cff4520863f512ee447b5f53cf7c6b2eb32ad3ff9a479d43ea5f76776dcc8f1e46ceb4ab66182a09feba509c5161edfae23c2c7a84f8911dcdd8331c6ad8f119d1eed4e13a71490f6b568e13c6159600822e45fc49c24ba8db08bf770166c6bfa329e7399f990e43b5694e9953dab9699a3bb380f522210f8264e29328e0bb0fb62ca16b2745d97e6eb81b4cb18de29ec12d38f2e1240b8635b33e88fb958b9315747c5afb2ecb2a9e3127c197c0500ed87f05cbd184e76e2d25dcf5dffef541704c9e478228b4972afa9f01ceb9a59956a0ffdb246eee0a26f08000380a163e7392c9337e8bf763948eea906a289a4aa1f1d503dc4a237ec7905b365c1351f5f8bbdf39d65efbd8778ecd8938b86d035bd895ca1fa5ad1641a4aa49016763f64f23b894d24b957d4f516d36667fcb4a98d2c2b5cb8519f86490bcda96fb071f30ed5b628d92b1f5b3a408545faa47484739200000481fe9d9311db6ab0214aed06f8b90f031073c0e36d1dcc54344cd91b77b8a5cf2f3731fe1d4c41854a54f19bb3f3e1b43b5694e4cf86863a07ae8460f252e4d11e4bd650bb470bb0daa83286f28d8f0a874fcb5776f0babe0ee12bfb91b456000026f171e3f5e32b2b17262ae8f8b5fa2da20bae6e75a63744926ec22bae5e8ea65ef2e987ad61626e5a953ec9e478239a4b72afa76ed8863cc3b68460a25f13e0509471cef1b991e4f1de917e90fa133d793e55aca3735cf718c8ef600008461b169f971afb1e5f11c9f5e725f644d2653d6051c2b481db0fe0b97a377845cfa4bb9ebc004c2cf8bec9e477f39a3b3dafa9e2a3e6ce4ff96a1fe29cf37051846dceedb991e4f121b78440475092b9ab56f1e90c0ecb5f17c1eb143926c05f585a29f5e1afb1e5f11c9f5e725582e772569f4791d3e827886ec22bae5e541fb171692ee7aeffd6ec194f39ccfcb71b9dd694e575bc00e5ee86103178925e409f484a4d4a502c4598b0ac156cef3960965d1aeb9ee3f59f9c6e35244d36d2a6db9e4ee60327b32a1fd9d1e3321a4c1ccf9683e22d4bbb7ff4ecc0c90245dc6583cc056f14463d1641a4aa4989528fd93c8eda5346e7b5f5353b16b391ff2d2a4be5315f04819e7ca49caf36a5bee82a3f594a8dcee9cf2d1072b497c1ea57cb48217e8ed90ad17b55810a576837c5bc86e76b7857035e24432676ce5fc49bb474cbd4cc7f87531056ba91d4d7c8eef5346e7b5f4d5f500c7c876d082be5587b70a3a42ca395fcd5f4e5287583748b5ab1ca688d
Codec parser bytes read 8262, data size 0.
nal: .
[rtp @ 00000252df4ef8c0] Sending NAL 7 of len 24 M=0
[rtp @ 00000252df4ef8c0] Sending NAL 8 of len 6 M=0
[rtp @ 00000252df4ef8c0] Sending NAL 6 of len 673 M=0
[rtp @ 00000252df4ef8c0] Sending NAL 5 of len 7545 M=1
[rtp @ 00000252df4ef8c0] NAL size 7545 > 1460
press any key to continue...

[libx264 @ 00000252de7e2180] frame= 1 QP=23.21 NAL=2 Slice:P Poc:8 I:52 P:101 SKIP:1047 size=482 bytes
Encoded packet pts 4, size 482.
00000001419a246c45fffa5d37ecc1d1a2448b600952115b6c1b9880c2721414b9ad381385c2d2db0c0fc3041714814ca3fbfc1c85bb5cf888b1442cdf4fd78bed0ff15512df4949c046d5ed117d4ca9c2fe8e16d8b2a0ee8e8ca9ef07d709242427eec2e62e7c5ddd87a9cc7c7fa3c97bcf657971f49b92b8be0b5ec4d9de8d8abe9aa061abf1d193ca02fe38a4c37e5ca55fac90c7e3d20a050d0684cf50614872855915c4e51caffc4e16e50cfee7c2f92c574efd752e2493c2cb07447541446f498625f89c0396f244bd0674dac31a45e98cbdd7f1f447bf8c84b2c288e3693bcfc2c1a4c0789fdce4fa71181f99a2911c044284c0e9c801e9e7417fc7a65a6f02f8482bd969a8776ecbfff27f823c294ed9e28c56c1816d0f40d2c009f83beb246f5f22e39375fae6db239e9d560e8370f61653ec068631bfe84c2ba6376d1435ca231555a828d724ac0a38fc7986b92997c1a18940bc569d2c652b836b6d368c84ff7ebee187f31f84e6289aa7987ffe660ea59897174f5266bbb471b3ec50070d29b08ca8c92b8c2987da5e80448e99667627e55996a00c56753f9fc65fa75d742e5e15d89ecb007496045027a101244ea4f27792ef3210023196008043fa7e1ca05aa3b1e4a8a6ac5e384440cb5d11d9ec2d1117473875947c2f1aacc37c
Failed to parse data from packet.
[rtp @ 00000252df4ef8c0] Sending NAL 1 of len 478 M=1
press any key to continue...

[libx264 @ 00000252de7e2180] frame= 2 QP=26.00 NAL=2 Slice:B Poc:4 I:0 P:102 SKIP:1098 size=91 bytes
Encoded packet pts 2, size 91.
00000001419e42789bff42138e8cab7ce34f0aaf2f3fb0c41aac77dad7803c8a422c3668a09d337695ffad27dd3d2a1499cf8812c8873f3308741b44759e97059270a4f8678646dfa543ae4da163dacc33a85b2694e7e3c052a861
Codec parser bytes read 91, data size 0.
nal: .
[rtp @ 00000252df4ef8c0] Sending NAL 1 of len 87 M=1
press any key to continue...

[libx264 @ 00000252de7e2180] frame= 3 QP=28.00 NAL=0 Slice:B Poc:2 I:0 P:74 SKIP:1126 size=82 bytes
Encoded packet pts 1, size 82.
00000001019e6174457f4a9778ce66461da66e887d240ad470ec49fe325654c49141af33481787c812ab8d6e27331c0203d4fe099ef254623da56868fdac9a5e5f4e08ec8ef08390748186902972dbe37080
Failed to parse data from packet.
[rtp @ 00000252df4ef8c0] Sending NAL 1 of len 78 M=1
press any key to continue...



-
JavaCPP FFMpeg to JavaSound
8 août 2020, par TW2I have a problem to be able to read audio using JavaCPP FFMpeg library. I don’t know how to pass it to java sound and I don’t know too if my code is correct.


Let’s see the more important part of my code (video is OK so I drop this) :


The variables :


//==========================================================================
// FFMpeg 4.x - Video and Audio
//==========================================================================

private final AVFormatContext pFormatCtx = new AVFormatContext(null);
private final AVDictionary OPTIONS_DICT = null;
private AVPacket pPacket = new AVPacket();
 
//==========================================================================
// FFMpeg 4.x - Audio
//==========================================================================
 
private AVCodec pAudioCodec;
private AVCodecContext pAudioCodecCtx;
private final List<streaminfo> audioStreams = new ArrayList<>();
private int audio_data_size;
private final BytePointer audio_data = new BytePointer(0);
private int audio_ret;
private AVFrame pAudioDecodedFrame = null;
private AVCodecParserContext pAudioParser;
private SwrContext audio_swr_ctx = null;
</streaminfo>


Then I call prepare functions in this order :


private void prepareFirst() throws Exception{
 oldFile = file;
 
 // Initialize packet and check for error
 pPacket = av_packet_alloc();
 if(pPacket == null){
 throw new Exception("ALL: Couldn't allocate packet");
 }

 // Open video file
 if (avformat_open_input(pFormatCtx, file.getPath(), null, null) != 0) {
 throw new Exception("ALL: Couldn't open file");
 }

 // Retrieve stream information
 if (avformat_find_stream_info(pFormatCtx, (PointerPointer)null) < 0) {
 throw new Exception("ALL: Couldn't find stream information");
 }

 // Dump information about file onto standard error
 av_dump_format(pFormatCtx, 0, file.getPath(), 0);

 // Find the first audio/video stream
 for (int i = 0; i < pFormatCtx.nb_streams(); i++) {
 switch(pFormatCtx.streams(i).codecpar().codec_type()){
 case AVMEDIA_TYPE_VIDEO -> videoStreams.add(new StreamInfo(i, pFormatCtx.streams(i)));
 case AVMEDIA_TYPE_AUDIO -> audioStreams.add(new StreamInfo(i, pFormatCtx.streams(i)));
 }
 }
 
 if(videoStreams.isEmpty() && type != PlayType.AudioOnly){
 throw new Exception("Didn't find an audio stream");
 }
 if(audioStreams.isEmpty() && type != PlayType.VideoOnly){
 throw new Exception("Didn't find a video stream");
 }
}

private void prepareAudio() throws Exception{
 //++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 // AUDIO
 //------------------------------------------------------------------

 if(audioStreams.isEmpty() == false){
 //===========================
 //------------
 
// // Let's search for AVCodec
// pAudioCodec = avcodec_find_decoder(pFormatCtx.streams(audioStreams.get(0).getStreamIndex()).codecpar().codec_id());
// if (pAudioCodec == null) {
// throw new Exception("AUDIO: Unsupported codec or not found!");
// }
//
// // Let's alloc AVCodecContext
// pAudioCodecCtx = avcodec_alloc_context3(pAudioCodec);
// if (pAudioCodecCtx == null) { 
// throw new Exception("AUDIO: Unallocated codec context or not found!");
// }
 
 // Get a pointer to the codec context for the video stream
 pAudioCodecCtx = pFormatCtx.streams(audioStreams.get(0).getStreamIndex()).codec();

 // Find the decoder for the video stream
 pAudioCodec = avcodec_find_decoder(pAudioCodecCtx.codec_id());
 if (pAudioCodec == null) {
 throw new Exception("AUDIO: Unsupported codec or not found!");
 }

 //===========================
 //------------

 /* open it */
 if (avcodec_open2(pAudioCodecCtx, pAudioCodec, OPTIONS_DICT) < 0) {
 throw new Exception("AUDIO: Could not open codec");
 }

 pAudioDecodedFrame = av_frame_alloc();
 if (pAudioDecodedFrame == null){
 throw new Exception("AUDIO: DecodedFrame allocation failed");
 }

 audio_swr_ctx = swr_alloc_set_opts(
 null, // existing Swr context or NULL
 AV_CH_LAYOUT_STEREO, // output channel layout (AV_CH_LAYOUT_*)
 AV_SAMPLE_FMT_S16, // output sample format (AV_SAMPLE_FMT_*).
 44100, // output sample rate (frequency in Hz)
 pAudioCodecCtx.channels(), // input channel layout (AV_CH_LAYOUT_*)
 pAudioCodecCtx.sample_fmt(), // input sample format (AV_SAMPLE_FMT_*).
 pAudioCodecCtx.sample_rate(), // input sample rate (frequency in Hz)
 0, // logging level offset
 null // parent logging context, can be NULL
 );
 
 swr_init(audio_swr_ctx);
 
 av_samples_fill_arrays(
 pAudioDecodedFrame.data(), // audio_data,
 pAudioDecodedFrame.linesize(), // linesize
 audio_data, // buf
 (int)AV_CH_LAYOUT_STEREO, // nb_channels
 44100, // nb_samples
 AV_SAMPLE_FMT_S16, // sample_fmt
 0 // align
 );
 
 }
 
 // Audio treatment end ---------------------------------------------
 //==================================================================
}



And then when I launch the thread :


private void doPlay() throws Exception{
 av_init_packet(pPacket);

 // Read frames
 while (av_read_frame(pFormatCtx, pPacket) >= 0) {
 if (type != PlayType.AudioOnly && pPacket.stream_index() == videoStreams.get(0).getStreamIndex()) {
 // Is this a packet from the video stream?
 decodeVideo();
 renewPacket();
 }

 if (type != PlayType.VideoOnly && pPacket.stream_index() == audioStreams.get(0).getStreamIndex()) {
 // Is this a packet from the audio stream?
 if(pPacket.size() > 0){
 decodeAudio();
 renewPacket();
 }
 }
 }
}

private void renewPacket(){
 // Free the packet that was allocated by av_read_frame
 av_packet_unref(pPacket);

 pPacket.data(null);
 pPacket.size(0);
 av_init_packet(pPacket);
}



And again, this is where I don’t read audio :


private void decodeAudio() throws Exception{

 do {
 audio_ret = avcodec_send_packet(pAudioCodecCtx, pPacket);
 } while(audio_ret == AVERROR_EAGAIN());
 System.out.println("packet sent return value: " + audio_ret);

 if(audio_ret == AVERROR_EOF || audio_ret == AVERROR_EINVAL()) {
 StringBuilder sb = new StringBuilder();
 Formatter formatter = new Formatter(sb, Locale.US);
 formatter.format("AVERROR(EAGAIN): %d, AVERROR_EOF: %d, AVERROR(EINVAL): %d\n", AVERROR_EAGAIN(), AVERROR_EOF, AVERROR_EINVAL());
 formatter.format("Audio frame getting error (%d)!\n", audio_ret);
 throw new Exception(sb.toString());
 }

 audio_ret = avcodec_receive_frame(pAudioCodecCtx, pAudioDecodedFrame);
 System.out.println("frame received return value: " + audio_ret);

 audio_data_size = av_get_bytes_per_sample(AV_SAMPLE_FMT_S16);

 if (audio_data_size < 0) {
 /* This should not occur, checking just for paranoia */
 throw new Exception("Failed to calculate data size");
 }
 
 double frame_nb = 44100d / pAudioCodecCtx.sample_rate() * pAudioDecodedFrame.nb_samples();
 long out_count = Math.round(Math.floor(frame_nb));

 int out_samples = swr_convert(
 audio_swr_ctx,
 audio_data, 
 (int)out_count,
 pAudioDecodedFrame.data(0),
 pAudioDecodedFrame.nb_samples()
 );
 
 if (out_samples < 0) {
 throw new Exception("AUDIO: Error while converting");
 }
 
 int dst_bufsize = av_samples_get_buffer_size(
 pAudioDecodedFrame.linesize(), 
 (int)AV_CH_LAYOUT_STEREO, 
 out_samples,
 AV_SAMPLE_FMT_S16,
 1
 );
 
 AudioFormat audioFormat = new AudioFormat(
 pAudioDecodedFrame.sample_rate(),
 16,
 2, 
 true, 
 false
 );
 
 BytePointer bytePointer = pAudioDecodedFrame.data(0);
 ByteBuffer byteBuffer = bytePointer.asBuffer();

 byte[] bytes = new byte[byteBuffer.remaining()];
 byteBuffer.get(bytes);
 
 try (SourceDataLine sdl = AudioSystem.getSourceDataLine(audioFormat)) {
 sdl.open(audioFormat); 
 sdl.start();
 sdl.write(bytes, 0, bytes.length);
 sdl.drain();
 sdl.stop();
 } catch (LineUnavailableException ex) {
 Logger.getLogger(AVEntry.class.getName()).log(Level.SEVERE, null, ex);
 } 
}



Do you have an idea ?


-
Screeching white sound coming while playing audio as a raw stream
27 avril 2020, par Sri Nithya SharabheshwaranandaI. Background



- 

- I am trying to make an application which helps to match subtitles to the audio waveform very accurately at the waveform level, at the word level or even at the character level.
- The audio is expected to be Sanskrit chants (Yoga, rituals etc.) which are extremely long compound words [ example - aṅganyā-sokta-mātaro-bījam is traditionally one word broken only to assist reading ]
- The input transcripts / subtitles might be roughly in sync at the sentence/verse level but surely would not be in sync at the word level.
- The application should be able to figure out points of silence in the audio waveform, so that it can guess the start and end points of each word (or even letter/consonant/vowel in a word), such that the audio-chanting and visual-subtitle at the word level (or even at letter/consonant/vowel level) perfectly match, and the corresponding UI just highlights or animates the exact word (or even letter) in the subtitle line which is being chanted at that moment, and also show that word (or even the letter/consonant/vowel) in bigger font. This app's purpose is to assist learning Sanskrit chanting.
- It is not expected to be a 100% automated process, nor 100% manual but a mix where the application should assist the human as much as possible.













II. Following is the first code I wrote for this purpose, wherein



- 

- First I open a mp3 (or any audio format) file,
- Seek to some arbitrary point in the timeline of the audio file // as of now playing from zero offset
- Get the audio data in raw format for 2 purposes - (1) playing it and (2) drawing the waveform.
- Playing the raw audio data using standard java audio libraries











III. The problem I am facing is, between every cycle there is screeching sound.



- 

- Probably I need to close the line between cycles ? Sounds simple, I can try.
- But I am also wondering if this overall approach itself is correct ? Any tip, guide, suggestion, link would be really helpful.
- Also I just hard coded the sample-rate etc ( 44100Hz etc. ), are these good to set as default presets or it should depend on the input format ?









IV. Here is the code



import com.github.kokorin.jaffree.StreamType;
import com.github.kokorin.jaffree.ffmpeg.FFmpeg;
import com.github.kokorin.jaffree.ffmpeg.FFmpegProgress;
import com.github.kokorin.jaffree.ffmpeg.FFmpegResult;
import com.github.kokorin.jaffree.ffmpeg.NullOutput;
import com.github.kokorin.jaffree.ffmpeg.PipeOutput;
import com.github.kokorin.jaffree.ffmpeg.ProgressListener;
import com.github.kokorin.jaffree.ffprobe.Stream;
import com.github.kokorin.jaffree.ffmpeg.UrlInput;
import com.github.kokorin.jaffree.ffprobe.FFprobe;
import com.github.kokorin.jaffree.ffprobe.FFprobeResult;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioSystem;
import javax.sound.sampled.DataLine;
import javax.sound.sampled.SourceDataLine;


public class FFMpegToRaw {
 Path BIN = Paths.get("f:\\utilities\\ffmpeg-20190413-0ad0533-win64-static\\bin");
 String VIDEO_MP4 = "f:\\org\\TEMPLE\\DeviMahatmyamRecitationAudio\\03_01_Devi Kavacham.mp3";
 FFprobe ffprobe;
 FFmpeg ffmpeg;

 public void basicCheck() throws Exception {
 if (BIN != null) {
 ffprobe = FFprobe.atPath(BIN);
 } else {
 ffprobe = FFprobe.atPath();
 }
 FFprobeResult result = ffprobe
 .setShowStreams(true)
 .setInput(VIDEO_MP4)
 .execute();

 for (Stream stream : result.getStreams()) {
 System.out.println("Stream " + stream.getIndex()
 + " type " + stream.getCodecType()
 + " duration " + stream.getDuration(TimeUnit.SECONDS));
 } 
 if (BIN != null) {
 ffmpeg = FFmpeg.atPath(BIN);
 } else {
 ffmpeg = FFmpeg.atPath();
 }

 //Sometimes ffprobe can't show exact duration, use ffmpeg trancoding to NULL output to get it
 final AtomicLong durationMillis = new AtomicLong();
 FFmpegResult fFmpegResult = ffmpeg
 .addInput(
 UrlInput.fromUrl(VIDEO_MP4)
 )
 .addOutput(new NullOutput())
 .setProgressListener(new ProgressListener() {
 @Override
 public void onProgress(FFmpegProgress progress) {
 durationMillis.set(progress.getTimeMillis());
 }
 })
 .execute();
 System.out.println("audio size - "+fFmpegResult.getAudioSize());
 System.out.println("Exact duration: " + durationMillis.get() + " milliseconds");
 }

 public void toRawAndPlay() throws Exception {
 ProgressListener listener = new ProgressListener() {
 @Override
 public void onProgress(FFmpegProgress progress) {
 System.out.println(progress.getFrame());
 }
 };

 // code derived from : https://stackoverflow.com/questions/32873596/play-raw-pcm-audio-received-in-udp-packets

 int sampleRate = 44100;//24000;//Hz
 int sampleSize = 16;//Bits
 int channels = 1;
 boolean signed = true;
 boolean bigEnd = false;
 String format = "s16be"; //"f32le"

 //https://trac.ffmpeg.org/wiki/audio types
 final AudioFormat af = new AudioFormat(sampleRate, sampleSize, channels, signed, bigEnd);
 final DataLine.Info info = new DataLine.Info(SourceDataLine.class, af);
 final SourceDataLine line = (SourceDataLine) AudioSystem.getLine(info);

 line.open(af, 4096); // format , buffer size
 line.start();

 OutputStream destination = new OutputStream() {
 @Override public void write(int b) throws IOException {
 throw new UnsupportedOperationException("Nobody uses thi.");
 }
 @Override public void write(byte[] b, int off, int len) throws IOException {
 String o = new String(b);
 boolean showString = false;
 System.out.println("New output ("+ len
 + ", off="+off + ") -> "+(showString?o:"")); 
 // output wave form repeatedly

 if(len%2!=0) {
 len -= 1;
 System.out.println("");
 }
 line.write(b, off, len);
 System.out.println("done round");
 }
 };

 // src : http://blog.wudilabs.org/entry/c3d357ed/?lang=en-US
 FFmpegResult result = FFmpeg.atPath(BIN).
 addInput(UrlInput.fromPath(Paths.get(VIDEO_MP4))).
 addOutput(PipeOutput.pumpTo(destination).
 disableStream(StreamType.VIDEO). //.addArgument("-vn")
 setFrameRate(sampleRate). //.addArguments("-ar", sampleRate)
 addArguments("-ac", "1").
 setFormat(format) //.addArguments("-f", format)
 ).
 setProgressListener(listener).
 execute();

 // shut down audio
 line.drain();
 line.stop();
 line.close();

 System.out.println("result = "+result.toString());
 }

 public static void main(String[] args) throws Exception {
 FFMpegToRaw raw = new FFMpegToRaw();
 raw.basicCheck();
 raw.toRawAndPlay();
 }
}





Thank You