40 if (config.
check(
"threads",
"number of threads")){
41 m_wparams.n_threads = config.
find(
"threads").asInt16();}
42 if (config.
check(
"processors",
"number of processors")){
43 m_wparams.n_threads = config.
find(
"processors").asInt16();}
44 if (config.
check(
"initial_prompt")) {
45 m_wparams.initial_prompt = config.
find(
"initial_prompt").asString().c_str();}
46 if (config.
check(
"duration",
"duration of audio to process in milliseconds")) {
47 m_wparams.duration_ms = config.
find(
"duration").asInt32();}
48 if (config.
check(
"offset_ms")) {
49 m_wparams.offset_ms = config.
find(
"offset_ms").asInt32();}
50 if (config.
check(
"speed_up")) {
51 m_wparams.speed_up = config.
find(
"speed_up").asBool();}
52 if (config.
check(
"thold_pt",
"word timestamp probability threshold")) {
53 m_wparams.thold_pt = config.
find(
"thold_pt").asFloat32();}
54 if (config.
check(
"entropy_thold",
"entropy threshold for decoder fail")) {
55 m_wparams.entropy_thold = config.
find(
"entropy_thold").asFloat32();}
56 if (config.
check(
"logprob_thold",
"log probability threshold for decoder fail")) {
57 m_wparams.logprob_thold = config.
find(
"logprob_thold").asFloat32();}
58 if (config.
check(
"print_timestamps",
"print_timestamps")) {
59 m_wparams.logprob_thold = config.
find(
"print_timestamps").asFloat32();}
60 if (config.
check(
"model",
"file containing the model")) {
61 m_model = config.
find(
"model").asString();}
62 if (config.
check(
"translate",
"translate from source language to English")) {
63 m_wparams.translate = config.
find(
"translate").asBool();}
66 if(config.
check(
"print_realtime",
"print_realtime")) {
67 m_wparams.print_realtime = config.
find(
"print_realtime").asBool();}
68 if(config.
check(
"print_progress",
"print_progress")) {
69 m_wparams.print_progress = config.
find(
"print_progress").asBool();}
70 if (config.
check(
"split_on_word",
"split on word rather than on token")) {
71 m_wparams.split_on_word = config.
find(
"split_on_word").asBool();}
72 if (config.
check(
"best_of",
"number of best candidates to keep")) {
73 m_wparams.greedy.best_of = config.
find(
"best_of").asInt32();}
74 if (config.
check(
"detect-language",
"exit after automatically detecting language")) {
75 m_wparams.detect_language = config.
find(
"detect-language").asBool();}
76 if (config.
check(
"language",
"spoken language ('auto' for auto-detect)")) {
77 m_wparams.language = config.
find(
"language").asString().c_str();
78 m_language = m_wparams.language;
80 if (config.
check(
"beam_size",
" beam size for beam search")) {
81 m_wparams.beam_search.beam_size = config.
find(
"beam_size").asInt32();
87 if (config.
check(
"max-context",
"maximum number of text context tokens to store")) {
89 if (config.
check(
"max-len",
"maximum segment length in characters")) {
91 if (config.
check(
"no-fallback",
"do not use temperature fallback while decoding")) {
93 if (config.
check(
"remove_symbols",
"remove [] symbols from the text transcript")) {
94 m_no_symbols = config.
find(
"remove_symbols").asBool();}
96 m_wparams.token_timestamps =
false ||
max_len > 0;
98 m_wparams.temperature_inc =
no_fallback ? 0.0f : m_wparams.temperature_inc;
113 if (m_ctx ==
nullptr)
129 if (m_wparams.language !=
"en" || m_wparams.translate)
131 m_wparams.language =
"en";
132 m_wparams.translate =
false;
136 if (m_wparams.detect_language)
138 m_wparams.language =
"auto";
140 yCDebug(
WHISPER_SPEECHTR,
"%s: processing (%d samples, %.1f sec), %d threads, %d processors, lang = %s, task = %s, timestamps = %d ...\n",
142 m_wparams.n_threads, n_processors,
144 m_wparams.translate ?
"translate" :
"transcribe",
145 m_wparams.print_timestamps);
189 m_pcmf32.resize(1000);
194 for (
size_t i = 0;
i < m_pcmf32.size();
i++)
215 transcription += std::string(text);
223 std::regex
pattern1(
"\\[[^\\]]*\\]");
224 std::string input = transcription;
225 transcription = std::regex_replace(input,
pattern1,
"");
230 if (transcription.empty()) {score = 0.0;}