YARP
Yet Another Robot Platform
 
Loading...
Searching...
No Matches
whisperSpeechTranscription.h
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: 2023-2023 Istituto Italiano di Tecnologia (IIT)
3 * SPDX-License-Identifier: BSD-3-Clause
4 */
5
6#ifndef WHISPERSPEECHTRANSCRIPTION_H
7#define WHISPERSPEECHTRANSCRIPTION_H
8
11#include <yarp/os/Bottle.h>
12#include <stdio.h>
13
14#include "whisper.h"
15
16using namespace yarp::os;
17
34{
35private:
36 bool m_verbose = true;
37 bool m_no_symbols = true;
38 std::string m_language="auto";
39 std::string m_model;
40 std::vector<float> m_pcmf32; // mono-channel F32 PCM
41 std::vector<std::vector<float>> m_pcmf32s; // stereo-channel F32 PCM
42 struct whisper_context* m_ctx= nullptr;
43 whisper_full_params m_wparams;
44
45 int32_t n_processors = 1;
46
47public:
54
55 //DeviceDriver interface
56 bool open(yarp::os::Searchable& config) override;
57 bool close() override;
58
59 //ISpeechTranscription interface
60 virtual bool setLanguage(const std::string& language) override;
61 virtual bool getLanguage(std::string& language) override;
62 virtual bool transcribe(const yarp::sig::Sound& sound, std::string& transcription, double& score) override;
63};
64
65#endif
WhisperSpeechTranscription: A yarp device which performs audio-to-text transcription using OpenAI Whi...
WhisperSpeechTranscription(const WhisperSpeechTranscription &)=delete
WhisperSpeechTranscription & operator=(const WhisperSpeechTranscription &)=delete
virtual bool setLanguage(const std::string &language) override
Sets the language for speech transcription.
bool open(yarp::os::Searchable &config) override
Open the DeviceDriver.
bool close() override
Close the DeviceDriver.
WhisperSpeechTranscription & operator=(WhisperSpeechTranscription &&)=delete
virtual bool transcribe(const yarp::sig::Sound &sound, std::string &transcription, double &score) override
Performs the speech transcription.
WhisperSpeechTranscription(WhisperSpeechTranscription &&)=delete
virtual bool getLanguage(std::string &language) override
Gets the current language set for speech transcription.
Interface implemented by all device drivers.
A generic interface for speech transcription.
A mini-server for performing network communication in the background.
A base class for nested structures that can be searched.
Definition Searchable.h:31
Class for storing sounds See Audio in YARP for additional documentation on YARP audio.
Definition Sound.h:25
An interface to the operating system, including Port based communication.