YARP
Yet Another Robot Platform
 
Loading...
Searching...
No Matches
SoundFileMp3.cpp
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: 2006-2021 Istituto Italiano di Tecnologia (IIT)
3 * SPDX-FileCopyrightText: 2006-2010 RobotCub Consortium
4 * SPDX-License-Identifier: BSD-3-Clause
5 */
6
8
9#include <yarp/conf/system.h>
10
11#include <yarp/os/NetInt16.h>
12#include <yarp/os/NetInt32.h>
14#include <yarp/os/Vocab.h>
15
16#include <yarp/sig/Sound.h>
17#include <yarp/os/Log.h>
18#include <yarp/os/LogStream.h>
19
20#include <cstdio>
21#include <cstring>
22#include <fstream>
23
24#if defined (YARP_HAS_FFMPEG)
25extern "C"
26{
27 #include <libavutil/opt.h>
28 #include <libavcodec/avcodec.h>
29 #include <libavcodec/version.h>
30 #include <libavutil/channel_layout.h>
31 #include <libavutil/common.h>
32 #include <libavutil/imgutils.h>
33 #include <libavutil/mathematics.h>
34 #include <libavutil/samplefmt.h>
35}
36#endif
37
38using namespace yarp::os;
39using namespace yarp::sig;
40using namespace yarp::sig::file;
41
42namespace
43{
44 YARP_LOG_COMPONENT(SOUNDFILE_MP3, "yarp.sig.SoundFileMp3")
45}
46
47//#######################################################################################################
48#if defined (YARP_HAS_FFMPEG)
49#define AUDIO_INBUF_SIZE 20480
50#define AUDIO_REFILL_THRESH 4096
51#endif
52
53//#######################################################################################################
54#if defined (YARP_HAS_FFMPEG)
56{
57 int i, ch;
58 int ret, data_size;
59 /* send the packet with the compressed data to the decoder */
61 if (ret < 0)
62 {
63 yCError(SOUNDFILE_MP3, "Error submitting the packet to the decoder");
64 return false;
65 }
66 /* read all the output frames (in general there may be any number of them */
67 while (ret >= 0)
68 {
70 if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
71 {
72 return false;
73 }
74 else if (ret < 0)
75 {
76 yCError(SOUNDFILE_MP3, "Error during decoding");
77 return false;
78 }
79 //this seems to be 2: S16P
81 if (data_size < 0)
82 {
83 /* This should not occur, checking just for paranoia */
84 yCError(SOUNDFILE_MP3, "Failed to calculate data size\n");
85 return false;
86 }
87
89 frame_sound.resize(frame->nb_samples, dec_ctx->channels);
90 if (sound_data.getChannels()==0) { sound_data.resize(0, dec_ctx->channels);}
91
92 for (i = 0; i < frame->nb_samples; i++) //1152
93 {
94 for (ch = 0; ch < dec_ctx->channels; ch++) //2
95 {
96 short int val = *((short int*)frame->data[ch] + i);
97 frame_sound.set(val,i,ch);
98 }
99 }
100 sound_data += frame_sound;
101 }
102 return true;
103}
104
105int check_sample_fmt(const AVCodec * codec, enum AVSampleFormat sample_fmt)
106{
107 const enum AVSampleFormat* p = codec->sample_fmts;
108
109 while (*p != AV_SAMPLE_FMT_NONE)
110 {
111 if (*p == sample_fmt) {
112 return 1;
113 }
114 p++;
115 }
116 return 0;
117}
118
119int select_sample_rate(const AVCodec * codec)
120{
121 const int* p;
122 int best_samplerate = 0;
123
124 if (!codec->supported_samplerates) {
125 return 44100;
126 }
127
128 p = codec->supported_samplerates;
129 while (*p)
130 {
131 if (!best_samplerate || abs(44100 - *p) < abs(44100 - best_samplerate)) {
133 }
134 p++;
135 }
136 return best_samplerate;
137}
138
139bool encode(AVCodecContext* ctx, AVFrame* frame, AVPacket* pkt, std::fstream& os)
140{
141 int ret;
142
143 // send the frame for encoding
145 if (ret < 0)
146 {
147 yCError(SOUNDFILE_MP3, "Error sending the frame to the encoder\n");
148 return false;
149 }
150
151 // read all the available output packets (in general there may be any
152 // number of them
153 while (ret >= 0)
154 {
156 if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
157 {
158 return true;
159 }
160 else if (ret < 0)
161 {
162 yCError(SOUNDFILE_MP3, "Error encoding audio frame\n");
163 return false;
164 }
165 os.write((const char*)(pkt->data), pkt->size);
167 }
168 return true;
169}
170
171/* select layout with the highest channel count */
172int select_channel_layout(const AVCodec * codec)
173{
174 const uint64_t * p;
176 int best_nb_channels = 0;
177
178 if (!codec->channel_layouts) {
179 return AV_CH_LAYOUT_STEREO;
180 }
181
182 p = codec->channel_layouts;
183 while (*p)
184 {
186
188 {
189 best_ch_layout = *p;
191 }
192 p++;
193 }
194 return best_ch_layout;
195}
196#endif
197
198//#######################################################################################################
199bool yarp::sig::file::write_mp3_file(const Sound& sound_data, const char* filename, size_t bitrate)
200{
201#if !defined (YARP_HAS_FFMPEG)
202
203 yCError(SOUNDFILE_MP3) << "write_mp3_file() not supported: lib ffmpeg not found";
204 return false;
205#else
206 const AVCodec * codec = nullptr;
207 AVCodecContext * c = nullptr;
208 AVFrame * frame = nullptr;
209 AVPacket * pkt = nullptr;
210 int ret;
211 std::fstream fos;
212 uint16_t * samples = nullptr;
213
214#if LIBAVCODEC_VERSION_MAJOR < 58
215 //register all the codecs, deprecated and useless in libffmpeg4.0
217#endif
218
219 // find the MP3 encoder
221 if (!codec)
222 {
223 yCError(SOUNDFILE_MP3, "Codec not found");
224 return false;
225 }
226
227 c = avcodec_alloc_context3(codec);
228 if (!c)
229 {
230 yCError(SOUNDFILE_MP3, "Could not allocate audio codec context");
231 return false;
232 }
233
234 // the compressed output bitrate
235 c->bit_rate = bitrate;
236
237 // check that the encoder supports s16 pcm input
238 c->sample_fmt = AV_SAMPLE_FMT_S16;
239 if (!check_sample_fmt(codec, c->sample_fmt))
240 {
241 yCError(SOUNDFILE_MP3, "Encoder does not support sample format %s",
242 av_get_sample_fmt_name(c->sample_fmt));
243 return false;
244 }
245
246 // select other audio parameters supported by the encoder
247 c->sample_rate = select_sample_rate(codec);
248 c->channel_layout = select_channel_layout(codec);
249 c->channels = av_get_channel_layout_nb_channels(c->channel_layout);
250
251 // open it
252 if (avcodec_open2(c, codec, NULL) < 0)
253 {
254 yCError(SOUNDFILE_MP3, "Could not open codec");
255 return false;
256 }
257
258 fos.open(filename, std::fstream::out | std::fstream::binary);
259 if (fos.is_open()==false)
260 {
261 yCError(SOUNDFILE_MP3, "Cannot open %s for writing", filename);
262 return false;
263 }
264
265 // packet for holding encoded output
267 if (!pkt)
268 {
269 yCError(SOUNDFILE_MP3, "could not allocate the packet");
270 fos.close();
271 return false;
272 }
273
274 // frame containing input raw audio
276 if (!frame)
277 {
278 yCError(SOUNDFILE_MP3, "Could not allocate audio frame");
279 fos.close();
280 return false;
281 }
282
283 frame->nb_samples = c->frame_size;
284 frame->format = c->sample_fmt;
285 frame->channel_layout = c->channel_layout;
286
287 // allocate the data buffers
289 if (ret < 0)
290 {
291 yCError(SOUNDFILE_MP3, "Could not allocate audio data buffers");
292 fos.close();
293 return false;
294 }
295
296 // encode
297 size_t soundsize = sound_data.getSamples();
298 size_t nframes = soundsize / c->frame_size;
299 size_t rem_lastframe = soundsize % c->frame_size;
301 for (size_t i = 0; i < nframes; i++)
302 {
304 if (ret < 0) {
305 exit(1);
306 }
307
308 samples = (uint16_t*)frame->data[0];
309 for (int j = 0; j < c->frame_size; j++)
310 {
311 for (int k = 0; k < c->channels; k++) {
312 samples[j * c->channels + k] = sound_data.get(j + i * c->frame_size, k);
313 }
314 }
315 if (encode(c, frame, pkt, fos) == false)
316 {
317 yCError(SOUNDFILE_MP3, "Encode failed, memory could be corrupted, should I exit?");
318 }
319 }
320
321 // flush the encoder
322 if (encode(c, NULL, pkt, fos) == false)
323 {
324 yCError(SOUNDFILE_MP3, "Encode failed, memory could be corrupted, should I exit?");
325 }
326
327 fos.close();
328
332
333 return true;
334#endif
335}
336
337bool read_mp3_istream(Sound& sound_data, std::istream& istream)
338{
339#if !defined (YARP_HAS_FFMPEG)
340 yCError(SOUNDFILE_MP3) << "read_mp3_istream() not supported: lib ffmpeg not found";
341 return false;
342#else
343 const AVCodec* codec = nullptr;
344 AVCodecContext* c = nullptr;
345 AVCodecParserContext* parser = nullptr;
346 int len, ret;
348 uint8_t* data = nullptr;
349 size_t data_size;
350 AVPacket* pkt = nullptr;
351 AVFrame* decoded_frame = nullptr;
352
354
355#if LIBAVCODEC_VERSION_MAJOR < 58
356 //register all the codecs, deprecated and useless in libffmpeg4.0
358#endif
359
360 // find the MPEG audio decoder
362 if (!codec)
363 {
364 yCError(SOUNDFILE_MP3, "Codec not found");
365 return false;
366 }
367 parser = av_parser_init(codec->id);
368 if (!parser)
369 {
370 yCError(SOUNDFILE_MP3, "Parser not found");
371 return false;
372 }
373 c = avcodec_alloc_context3(codec);
374 if (!c)
375 {
376 yCError(SOUNDFILE_MP3, "Could not allocate audio codec context");
377 return false;
378 }
379 //open the codec
380 if (avcodec_open2(c, codec, NULL) < 0)
381 {
382 yCError(SOUNDFILE_MP3, "Could not open codec");
383 return false;
384 }
385
386 // decode until eof
387 data = inbuf;
388 istream.read((char*)(inbuf), AUDIO_INBUF_SIZE);
389 data_size = istream.gcount();
390 if (data_size == 0)
391 {
392 yCError(SOUNDFILE_MP3, "Cannot process invalid (empty) stream");
393 return false;
394 }
395 while (data_size > 0)
396 {
397 if (!decoded_frame)
398 {
399 if (!(decoded_frame = av_frame_alloc()))
400 {
401 yCError(SOUNDFILE_MP3, "Could not allocate audio frame");
402 return false;
403 }
404 }
405 ret = av_parser_parse2(parser, c, &pkt->data, &pkt->size, data, data_size, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
406 if (ret < 0)
407 {
408 yCError(SOUNDFILE_MP3, "Error while parsing");
409 return false;
410 }
411 data += ret;
412 data_size -= ret;
413 if (pkt->size) {
414 decode(c, pkt, decoded_frame, sound_data);
415 }
417 {
418 memmove(inbuf, data, data_size);
419 data = inbuf;
420 istream.read((char*)(data + data_size), AUDIO_INBUF_SIZE - data_size);
421 len = istream.gcount();
422 if (len > 0) {
423 data_size += len;
424 }
425 }
426 }
427 // flush the decoder
428 pkt->data = NULL;
429 pkt->size = 0;
430 decode(c, pkt, decoded_frame, sound_data);
431
432 //set the sample rate (is it ok? maybe some codecs allow variable sample rate?)
433 sound_data.setFrequency(c->sample_rate);
434
435 //cleanup
437 av_parser_close(parser);
440 return true;
441#endif
442}
443
444bool yarp::sig::file::read_mp3_file(Sound& sound_data, const char* filename)
445{
446 std::fstream fis;
447 fis.open(filename, std::fstream::in | std::fstream::binary);
448 if (fis.is_open() == false)
449 {
450 yCError(SOUNDFILE_MP3, "Cannot open %s for reading", filename);
451 return false;
452 }
453
454 bool b = read_mp3_istream(sound_data, fis);
455 fis.close();
456 return b;
457}
458
460{
461 std::istringstream iss(std::string(bytestream, streamsize));
462 return read_mp3_istream(data, iss);
463}
int16_t * samples
bool ret
bool read_mp3_istream(Sound &sound_data, std::istream &istream)
A mini-server for performing network communication in the background.
void close() override
Stop port activity.
bool open(const std::string &name) override
Start port operation, with a specific name, with automatically-chosen network parameters.
T * read(bool shouldWait=true) override
Read an available object from the port.
Class for storing sounds See Audio in YARP for additional documentation on YARP audio.
Definition Sound.h:25
void setFrequency(int freq)
Set the frequency of the sound (i.e.
Definition Sound.cpp:361
size_t getChannels() const
Get the number of channels of the sound.
Definition Sound.cpp:603
void resize(size_t samples, size_t channels=1)
Set the sound size.
Definition Sound.cpp:270
audio_sample get(size_t sample, size_t channel=0) const
Definition Sound.cpp:294
size_t getSamples() const
Get the number of samples contained in the sound.
Definition Sound.cpp:598
#define yCError(component,...)
#define YARP_LOG_COMPONENT(name,...)
NetInt32 encode(const std::string &str)
Convert a string into a vocabulary identifier.
Definition Vocab.cpp:11
std::string decode(NetInt32 code)
Convert a vocabulary identifier into a string.
Definition Vocab.cpp:33
An interface to the operating system, including Port based communication.
bool write_mp3_file(const Sound &data, const char *filename, size_t bitrate=64000)
Write a sound to a mp3 file.
bool read_mp3_file(Sound &data, const char *filename)
Read a sound from a .mp3 audio file.
bool read_mp3_bytestream(Sound &data, const char *bytestream, size_t streamsize)
Read a sound from a byte array.
#define YARP_UNUSED(var)
Definition api.h:162