YARP
Yet Another Robot Platform
 
Loading...
Searching...
No Matches
FfmpegGrabber.cpp
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: 2006-2021 Istituto Italiano di Tecnologia (IIT)
3 * SPDX-FileCopyrightText: 2006-2010 RobotCub Consortium
4 * SPDX-FileCopyrightText: 2006 Jonas Ruesch
5 * SPDX-FileCopyrightText: 2006 Arjan Gijsberts
6 * SPDX-License-Identifier: BSD-3-Clause
7 */
8
9#include "FfmpegGrabber.h"
10#include "ffmpeg_api.h"
11
12#include <yarp/os/all.h>
13#include <yarp/sig/all.h>
14#include <yarp/os/Log.h>
16
17#include <cstdio>
18
19#define MAX_AUDIO_FRAME_SIZE 192000 // 1 second of 48khz 32bit audio
20
21using namespace yarp::os;
22using namespace yarp::dev;
23using namespace yarp::sig;
24using namespace yarp::sig::file;
25
26namespace {
27YARP_LOG_COMPONENT(FFMPEGGRABBER, "yarp.device.ffmpeg_grabber")
28}
29
31{
32public:
36 int index;
37
40
41 // video buffers
49
52 bytesDecoded(0),
54 index(-1),
55 pCodecCtx(nullptr),
56 pCodec(nullptr),
57 pFrame(nullptr),
58 pFrameRGB(nullptr),
59 pAudio(nullptr),
60 buffer(nullptr),
61 audioBuffer(nullptr),
62 audioBufferAt(nullptr),
64 {
65 }
66
68 {
69 return frameFinished!=0;
70 }
71
73 {
74 return index;
75 }
76
77 virtual ~DecoderState()
78 {
79 if (pCodecCtx!=nullptr) {
81 }
82 if (audioBuffer!=nullptr) {
83 delete [] audioBuffer;
84 }
85 if (buffer!=nullptr) {
86 delete [] buffer;
87 }
88 if (pFrameRGB!=nullptr) {
90 }
91 if (pFrame!=nullptr) {
93 }
94 if (pAudio!=nullptr) {
96 }
97 }
98
99 int getStream(AVFormatContext *pFormatCtx, AVMediaType code, const char *name)
100 {
101 // Find the first stream
102 int videoStream=-1;
103 for(int i=0; i<(int)(pFormatCtx->nb_streams); i++) {
104 if(pFormatCtx->streams[i]->codecpar->codec_type==code) {
106 yCInfo(FFMPEGGRABBER, "First %s stream is stream #%d", name, i);
107 break;
108 }
109 }
110 if(videoStream==-1) {
111 yCError(FFMPEGGRABBER, "Could not find %s stream", name);
112 }
114
115 return index;
116 }
117
118 bool getCodec(AVFormatContext *pFormatCtx)
119 {
120 // Get a pointer to the codec context for the video stream
121 pCodecCtx=pFormatCtx->streams[index]->codec;
122
123 // Find the decoder for the video stream
124 pCodec = avcodec_find_decoder(pFormatCtx->streams[index]->codecpar->codec_id);
125 if(pCodec==nullptr) {
126 yCError(FFMPEGGRABBER, "Codec not found");
127 return false; // Codec not found
128 }
129
130 // Open codec
131 if (avcodec_open2(pCodecCtx, pCodec, nullptr) < 0) {
132 yCError(FFMPEGGRABBER, "Could not open codec");
133 return false; // Could not open codec
134 }
135
136 return true;
137 }
138
139
141 {
142 // Allocate video frame
144
145 // Allocate an AVFrame structure
147 if(pFrameRGB==nullptr) {
148 yCError(FFMPEGGRABBER, "Could not allocate a frame");
149 return false;
150 }
151
152 // Determine required buffer size and allocate buffer
153 int numBytes=avpicture_get_size(AV_PIX_FMT_RGB24, pCodecCtx->width,
154 pCodecCtx->height);
155 buffer=new uint8_t[numBytes];
156
157 // Assign appropriate parts of buffer to image planes in pFrameRGB
159 pCodecCtx->width, pCodecCtx->height);
160 return true;
161 }
162
164 {
169 "channels %d, sample_rate %d, frame_size %d",
170 pCodecCtx->channels,
171 pCodecCtx->sample_rate,
172 pCodecCtx->frame_size);
173 return true;
174 }
175
177 {
178 return pCodecCtx->width;
179 }
180
182 {
183 return pCodecCtx->height;
184 }
185
186
188 {
189 return pCodecCtx->sample_rate;
190 }
191
193 {
194 return pCodecCtx->channels;
195 }
196
197 bool getAudio(AVPacket& packet,Sound& sound)
198 {
199 int ct = 0;
200 int bytesRead = 0;
201 int bytesWritten = 0;
202 int gotFrame = 0;
203 while (bytesRead<packet.size) {
204 ct = audioBufferLen;
205 AVPacket tmp = packet;
206 tmp.data += bytesRead;
207 tmp.size -= bytesRead;
208 if (!pAudio) {
209 if (!(pAudio = av_frame_alloc())) {
210 yCFatal(FFMPEGGRABBER, "out of memory");
211 }
212 } else {
214 }
215 int r = avcodec_decode_audio4(pCodecCtx, pAudio, &gotFrame, &packet);
216 ct = 0;
217 if (gotFrame) {
218 ct = av_samples_get_buffer_size(nullptr,
219 pCodecCtx->channels,
220 pAudio->nb_samples,
221 pCodecCtx->sample_fmt,
222 1);
223 }
224 if (r<0) {
225 yCError(FFMPEGGRABBER, "error decoding audio");
226 return false;
227 }
229 int num_rate = getRate();
230 //audioBufferAt += ct;
231 //audioBufferLen += ct;
232 bytesRead += r;
233 bytesWritten += ct;
234 if (bytesRead==packet.size) {
237 sound.setFrequency(num_rate);
238
239 int idx = 0;
240 for (int i=0; i<num_samples; i++) {
241 for (int j=0; j<num_channels; j++) {
242 sound.set(audioBuffer[idx],i,j);
243 idx++;
244 }
245 }
246 }
247 }
248 return true;
249 }
250
251 bool getVideo(AVPacket& packet)
252 {
253 // Decode video frame
255 &packet);
256
257 // Did we get a video frame?
258 if(frameFinished) {
259 // Convert the image from its native format to RGB
260 int w = pCodecCtx->width;
261 int h = pCodecCtx->height;
262 static struct SwsContext *img_convert_ctx = nullptr;
263 if (img_convert_ctx==nullptr) {
265 pCodecCtx->pix_fmt,
266 w, h, AV_PIX_FMT_RGB24,
267 //0,
268 //SWS_BILINEAR,
270 nullptr, nullptr, nullptr);
271 }
272 if (img_convert_ctx!=nullptr) {
274 ((AVPicture*)pFrame)->linesize, 0,
275 pCodecCtx->height,
276 ((AVPicture*)pFrameRGB)->data,
277 ((AVPicture*)pFrameRGB)->linesize);
278 } else {
279 yCFatal(FFMPEGGRABBER, "Software scaling not working");
280 }
281 }
282 return frameFinished;
283 }
284
285
287 {
288 if (frameFinished) {
290 flex.setPixelCode(VOCAB_PIXEL_RGB);
291 flex.setQuantum((pFrameRGB->linesize[0]));
292 flex.setExternal(pFrameRGB->data[0],
293 pCodecCtx->width,
294 pCodecCtx->height);
295 image.copy(flex);
296 }
297
298 return frameFinished;
299 }
300
302 {
303 return frameFinished;
304 }
305};
306
313
314
315#define HELPER(x) (*((FfmpegHelper*)x))
316
317
318const char *xstrdup(const char *str)
319{
320 if (str[0] == '-') {
321 return nullptr;
322 }
323 return strdup(str);
324}
325
329{
330 bool audio = (ppFormatCtx==nullptr);
333
335 std::string vs;
336
337 if (!audio) {
338 //formatParams.prealloced_context = 1;
339 vs = m_v4ldevice;
340 } else {
341 vs = m_audio;
342 }
343 yCInfo(FFMPEGGRABBER, "Device %s",vs.c_str());
344
345 m_uri = vs;
346
347 if (audio) {
349 "sample_rate",
351 0);
353 "channels",
355 0);
356 } else {
357 if (config.check("time_base_num") && config.check("time_base_den")) {
358 char buf[256];
359 sprintf(buf, "%d/%d",
362 av_dict_set(&formatParams, "framerate", buf, 0);
363 }
364
365 if (config.check("channel")) {
367 "channel",
368 m_channel,
369 0);
370 }
371 if (config.check("standard")) {
373 "standard",
374 m_standard.c_str(),
375 0);
376 }
378 "width",
379 m_width,
380 0);
382 "height",
383 m_height,
384 0);
385 }
386
387 std::string videoDevice = (m_v4l1 ? "video4linux" : "video4linux2");
388 iformat = av_find_input_format(audio ? "audio_device" : videoDevice.c_str());
389
391 vs.c_str(),
392 iformat,
393 &formatParams);
394
395 bool ok = (result==0);
396 if (!ok) {
397 yCError(FFMPEGGRABBER, "%s: ffmpeg error %d", vs.c_str(), result);
398 }
399
400 if (ok) {
401 if (ppFormatCtx!=nullptr) {
402 if (config.check("audio",
403 "optional audio device")) {
404 ok = openV4L(config,nullptr,ppFormatCtx2);
405 }
406 }
407 }
408
409 return ok;
410}
411
412
413
416{
418 iformat = av_find_input_format("dv1394");
419 yCInfo(FFMPEGGRABBER, "Checking for digital video in %s", m_devname.c_str());
420
422
423 return avformat_open_input(ppFormatCtx, strdup(m_devname.c_str()), iformat, nullptr) == 0;
424}
425
426
428 const char *fname)
429{
430 m_uri = fname;
431 return avformat_open_input(ppFormatCtx, fname, nullptr, nullptr) == 0;
432}
433
434
436{
437 if (!this->parseParams(config)) { return false; }
438
439 imageSync = false;
440 imageSync = (m_sync=="image");
441
442 needRateControl = true; // default for recorded media
443
444 if (m_nodelay) {
445 needRateControl = false;
446 }
447
448 // Register all formats and codecs
451
452 // Open video file
453 if (m_v4l|| m_v4l1 || m_v4l2) {
454 needRateControl = false; // reading from live media
455 if (!openV4L(config,&pFormatCtx,&pFormatCtx2)) {
456 yCError(FFMPEGGRABBER, "Could not open Video4Linux input");
457 return false;
458 }
459 } else if (m_ieee1394) {
460 needRateControl = false; // reading from live media
461 if (!openFirewire(config,&pFormatCtx)) {
462 yCError(FFMPEGGRABBER, "Could not open ieee1394 input");
463 return false;
464 }
465 } else {
466 if (!openFile(&pFormatCtx,m_source.c_str())) {
467 yCError(FFMPEGGRABBER, "Could not open media file %s", m_source.c_str());
468 return false; // Couldn't open file
469 }
470 }
471
472
473 // Retrieve stream information
474 if(avformat_find_stream_info(pFormatCtx, nullptr)<0) {
475 yCError(FFMPEGGRABBER, "Could not find stream information in %s", m_uri.c_str());
476 return false; // Couldn't find stream information
477 }
478
479 // Dump information about file onto standard error
480 av_dump_format(pFormatCtx, 0, m_uri.c_str(), false);
481
482 if (pFormatCtx2!=nullptr) {
483
484 if(avformat_find_stream_info(pFormatCtx2, nullptr)<0) {
485 yCError(FFMPEGGRABBER, "Could not find stream information in %s", m_uri.c_str());
486 return false; // Couldn't find stream information
487 }
488
489 // Dump information about file onto standard error
490 av_dump_format(pFormatCtx2, 0, m_uri.c_str(), false);
491 }
492
493
494 if (pFormatCtx2!=nullptr) {
496 } else {
498 }
499
503
505 DecoderState& videoDecoder = helper.videoDecoder;
506 DecoderState& audioDecoder = helper.audioDecoder;
507
508
509 // Find the first video stream
510 int videoStream = videoDecoder.getStream(pFormatCtx,
512 "video");
513 // Find the first audio stream
514 int audioStream = audioDecoder.getStream(pAudioFormatCtx,
516 "audio");
517
518 if (videoStream==-1&&audioStream==-1) {
519 return false;
520 }
521
522 _hasVideo = (videoStream!=-1);
523 _hasAudio = (audioStream!=-1);
524
525 bool ok = true;
526 if (_hasVideo) {
527 ok = ok && videoDecoder.getCodec(pFormatCtx);
528 }
529 if (_hasAudio) {
530 ok = ok && audioDecoder.getCodec(pAudioFormatCtx);
531 }
532 if (!ok) {
533 return false;
534 }
535
536 if (_hasVideo) {
537 ok = ok && videoDecoder.allocateImage();
538 }
539 if (_hasAudio) {
540 ok = ok && audioDecoder.allocateSound();
541 }
542 if (!ok) {
543 return false;
544 }
545
546 if (_hasVideo) {
547 m_w = videoDecoder.getWidth();
548 m_h = videoDecoder.getHeight();
549 }
550 if (_hasAudio) {
551 m_channels = audioDecoder.getChannels();
552 m_rate = audioDecoder.getRate();
553 }
555 " video size %dx%d, audio %dHz with %d channels, %s sync",
556 m_w,
557 m_h,
558 m_rate,
560 imageSync ? "image" : "audio");
561
562 if (!(_hasVideo||_hasAudio)) {
563 return false;
564 }
565 active = true;
566 return true;
567}
568
570{
571 if (formatParamsVideo) {
573 formatParamsVideo = nullptr;
574 }
575 if (formatParamsAudio) {
577 formatParamsAudio = nullptr;
578 }
579
580 if (!active) {
581 return false;
582 }
583
584 // Close the video file
585 if (pFormatCtx!=nullptr) {
587 }
588 if (pFormatCtx2!=nullptr) {
590 }
591 if (system_resource!=nullptr) {
592 delete &HELPER(system_resource);
593 system_resource = nullptr;
594 }
595
596 active = false;
597 return true;
598}
599
601{
602 if (!_hasVideo) {
603 return false;
604 }
605 Sound sound;
606 return getAudioVisual(image,sound);
607}
608
609bool FfmpegGrabber::getSound(yarp::sig::Sound& sound, size_t min_number_of_samples, size_t max_number_of_samples, double max_samples_timeout_s)
610{
611 if (!_hasAudio) {
612 return false;
613 }
614 ImageOf<PixelRgb> image;
615 return getAudioVisual(image, sound);
616}
617
618
620 yarp::sig::Sound& sound)
621{
622
624 DecoderState& videoDecoder = helper.videoDecoder;
625 DecoderState& audioDecoder = helper.audioDecoder;
626
627 bool tryAgain = false;
628 bool triedAgain = false;
629
630 do {
631
632 bool gotAudio = false;
633 bool gotVideo = false;
634 if (startTime<0.5) {
636 }
637 double time_target = 0;
638 while(av_read_frame(pFormatCtx, &packet)>=0) {
639 // Is this a packet from the video stream?
640 bool done = false;
641 if (packet.stream_index==videoDecoder.getIndex()) {
642 done = videoDecoder.getVideo(packet);
643 image.resize(1,1);
644 if (done) {
645 yCTrace(FFMPEGGRABBER, "got a video frame");
646 gotVideo = true;
647 }
648 } if (packet.stream_index==audioDecoder.getIndex()) {
649 done = audioDecoder.getAudio(packet,sound);
650 if (done) {
651 yCTrace(FFMPEGGRABBER, "got an audio frame");
652 gotAudio = true;
653 }
654 }
655 AVRational& time_base = pFormatCtx->streams[packet.stream_index]->time_base;
656 double rbase = av_q2d(time_base);
657
659
661 if (((imageSync?gotVideo:videoDecoder.haveFrame())||!_hasVideo)&&
662 ((imageSync?1:gotAudio)||!_hasAudio)) {
663 if (_hasVideo) {
664 videoDecoder.getVideo(image);
665 } else {
666 image.resize(0,0);
667 }
668 if (needRateControl) {
669 double now = (SystemClock::nowSystem()-startTime)*m_pace;
670 double delay = time_target-now;
671 if (delay>0) {
673 }
674 }
675
676 if (!_hasAudio) {
677 sound.resize(0,0);
678 }
679 return true;
680 }
681 }
682
684
685 if (tryAgain) {
686 if (!m_loop) {
687 return false;
688 }
691 triedAgain = true;
692 }
693 } while (tryAgain);
694
695 return false;
696}
const char * xstrdup(const char *str)
#define MAX_AUDIO_FRAME_SIZE
@ VOCAB_PIXEL_RGB
Definition Image.h:44
#define HELPER(x)
uint8_t * buffer
bool getCodec(AVFormatContext *pFormatCtx)
bool getVideo(AVPacket &packet)
AVFrame * pFrameRGB
int16_t * audioBufferAt
bool getAudio(AVPacket &packet, Sound &sound)
bool getVideo(ImageOf< PixelRgb > &image)
int getStream(AVFormatContext *pFormatCtx, AVMediaType code, const char *name)
int16_t * audioBuffer
AVFrame * pFrame
AVCodec * pCodec
AVCodecContext * pCodecCtx
virtual ~DecoderState()
AVFrame * pAudio
bool parseParams(const yarp::os::Searchable &config) override
Parse the DeviceDriver parameters.
bool openV4L(yarp::os::Searchable &config, AVFormatContext **ppFormatCtx, AVFormatContext **ppFormatCtx2)
int m_w
Width of the images a grabber produces.
bool openFile(AVFormatContext **ppFormatCtx, const char *fname)
int m_h
Height of the images a grabber produces.
void * system_resource
bool openFirewire(yarp::os::Searchable &config, AVFormatContext **ppFormatCtx)
AVFormatContext * pAudioFormatCtx
bool getSound(yarp::sig::Sound &sound, size_t min_number_of_samples, size_t max_number_of_samples, double max_samples_timeout_s) override
Get a sound from a device.
bool getImage(yarp::sig::ImageOf< yarp::sig::PixelRgb > &image) override
Get an image from the frame grabber.
bool open(yarp::os::Searchable &config) override
Open the DeviceDriver.
AVDictionary * formatParamsAudio
AVFormatContext * pFormatCtx2
std::string m_uri
Uri of the images a grabber produces.
virtual bool getAudioVisual(yarp::sig::ImageOf< yarp::sig::PixelRgb > &image, yarp::sig::Sound &sound) override
Get an image and sound.
AVFormatContext * pFormatCtx
AVDictionary * formatParamsVideo
bool close() override
Close the DeviceDriver.
DecoderState videoDecoder
DecoderState audioDecoder
A mini-server for performing network communication in the background.
A base class for nested structures that can be searched.
Definition Searchable.h:31
virtual bool check(const std::string &key) const =0
Check if there exists a property of the given name.
static double nowSystem()
static void delaySystem(double seconds)
Image class with user control of representation details.
Definition Image.h:363
Typed image class.
Definition Image.h:605
void resize(size_t imgWidth, size_t imgHeight)
Reallocate an image to be of a desired size, throwing away its current contents.
Definition Image.cpp:402
Class for storing sounds See Audio in YARP for additional documentation on YARP audio.
Definition Sound.h:25
void setFrequency(int freq)
Set the frequency of the sound (i.e.
Definition Sound.cpp:361
void resize(size_t samples, size_t channels=1)
Set the sound size.
Definition Sound.cpp:270
void set(audio_sample value, size_t sample, size_t channel=0)
Definition Sound.cpp:334
#define yCInfo(component,...)
#define yCError(component,...)
#define yCAssert(component, x)
#define yCTrace(component,...)
#define YARP_LOG_COMPONENT(name,...)
#define yCFatal(component,...)
For streams capable of holding different kinds of content, check what they actually have.
Definition jointData.cpp:13
An interface to the operating system, including Port based communication.