Line 0
Link Here
|
|
|
1 |
/* pico2wave.c |
2 |
|
3 |
* Copyright (C) 2009 Mathieu Parent <math.parent@gmail.com> |
4 |
* |
5 |
* Licensed under the Apache License, Version 2.0 (the "License"); |
6 |
* you may not use this file except in compliance with the License. |
7 |
* You may obtain a copy of the License at |
8 |
* |
9 |
* http://www.apache.org/licenses/LICENSE-2.0 |
10 |
* |
11 |
* Unless required by applicable law or agreed to in writing, software |
12 |
* distributed under the License is distributed on an "AS IS" BASIS, |
13 |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 |
* See the License for the specific language governing permissions and |
15 |
* limitations under the License. |
16 |
* |
17 |
* Convert text to .wav using svox text-to-speech system. |
18 |
* |
19 |
*/ |
20 |
|
21 |
|
22 |
#include <popt.h> |
23 |
#include <stdio.h> |
24 |
#include <stdlib.h> |
25 |
#include <string.h> |
26 |
|
27 |
#include <picoapi.h> |
28 |
#include <picoapid.h> |
29 |
#include <picoos.h> |
30 |
|
31 |
|
32 |
/* adaptation layer defines */ |
33 |
#define PICO_MEM_SIZE 2500000 |
34 |
#define DummyLen 100000000 |
35 |
|
36 |
/* string constants */ |
37 |
#define MAX_OUTBUF_SIZE 128 |
38 |
const char * PICO_LINGWARE_PATH = "./lang/"; |
39 |
const char * PICO_VOICE_NAME = "PicoVoice"; |
40 |
|
41 |
/* supported voices |
42 |
Pico does not seperately specify the voice and locale. */ |
43 |
const char * picoSupportedLangIso3[] = { "eng", "eng", "deu", "spa", "fra", "ita" }; |
44 |
const char * picoSupportedCountryIso3[] = { "USA", "GBR", "DEU", "ESP", "FRA", "ITA" }; |
45 |
const char * picoSupportedLang[] = { "en-US", "en-GB", "de-DE", "es-ES", "fr-FR", "it-IT" }; |
46 |
const char * picoInternalLang[] = { "en-US", "en-GB", "de-DE", "es-ES", "fr-FR", "it-IT" }; |
47 |
const char * picoInternalTaLingware[] = { "en-US_ta.bin", "en-GB_ta.bin", "de-DE_ta.bin", "es-ES_ta.bin", "fr-FR_ta.bin", "it-IT_ta.bin" }; |
48 |
const char * picoInternalSgLingware[] = { "en-US_lh0_sg.bin", "en-GB_kh0_sg.bin", "de-DE_gl0_sg.bin", "es-ES_zl0_sg.bin", "fr-FR_nk0_sg.bin", "it-IT_cm0_sg.bin" }; |
49 |
const char * picoInternalUtppLingware[] = { "en-US_utpp.bin", "en-GB_utpp.bin", "de-DE_utpp.bin", "es-ES_utpp.bin", "fr-FR_utpp.bin", "it-IT_utpp.bin" }; |
50 |
const int picoNumSupportedVocs = 6; |
51 |
|
52 |
/* adapation layer global variables */ |
53 |
void * picoMemArea = NULL; |
54 |
pico_System picoSystem = NULL; |
55 |
pico_Resource picoTaResource = NULL; |
56 |
pico_Resource picoSgResource = NULL; |
57 |
pico_Resource picoUtppResource = NULL; |
58 |
pico_Engine picoEngine = NULL; |
59 |
pico_Char * picoTaFileName = NULL; |
60 |
pico_Char * picoSgFileName = NULL; |
61 |
pico_Char * picoUtppFileName = NULL; |
62 |
pico_Char * picoTaResourceName = NULL; |
63 |
pico_Char * picoSgResourceName = NULL; |
64 |
pico_Char * picoUtppResourceName = NULL; |
65 |
int picoSynthAbort = 0; |
66 |
|
67 |
|
68 |
int main(int argc, const char *argv[]) { |
69 |
char * wavefile = NULL; |
70 |
char * lang = "en-US"; |
71 |
int langIndex = -1, langIndexTmp = -1; |
72 |
char * text; |
73 |
int8_t * buffer; |
74 |
size_t bufferSize = 256; |
75 |
|
76 |
/* Parsing options */ |
77 |
poptContext optCon; /* context for parsing command-line options */ |
78 |
int opt; /* used for argument parsing */ |
79 |
|
80 |
struct poptOption optionsTable[] = { |
81 |
{ "wave", 'w', POPT_ARG_STRING, &wavefile, 0, |
82 |
"Write output to this WAV file (extension SHOULD be .wav)", "filename.wav" }, |
83 |
{ "lang", 'l', POPT_ARG_STRING | POPT_ARGFLAG_SHOW_DEFAULT, &lang, 0, |
84 |
"Language", "lang" }, |
85 |
POPT_AUTOHELP |
86 |
POPT_TABLEEND |
87 |
}; |
88 |
optCon = poptGetContext(NULL, argc, argv, optionsTable, POPT_CONTEXT_POSIXMEHARDER); |
89 |
poptSetOtherOptionHelp(optCon, "<words>"); |
90 |
|
91 |
/* Reporting about invalid extra options */ |
92 |
while ((opt = poptGetNextOpt(optCon)) != -1) { |
93 |
switch (opt) { |
94 |
default: |
95 |
fprintf(stderr, "Invalid option %s: %s\n", |
96 |
poptBadOption(optCon, 0), poptStrerror(opt)); |
97 |
poptPrintHelp(optCon, stderr, 0); |
98 |
exit(1); |
99 |
} |
100 |
} |
101 |
|
102 |
/* Mandatory option: --wave */ |
103 |
if(!wavefile) { |
104 |
fprintf(stderr, "Mandatory option: %s\n\n", |
105 |
"--wave=filename.wav"); |
106 |
poptPrintHelp(optCon, stderr, 0); |
107 |
exit(1); |
108 |
} |
109 |
/* option: --lang */ |
110 |
for(langIndexTmp =0; langIndexTmp<picoNumSupportedVocs; langIndexTmp++) { |
111 |
if(!strcmp(picoSupportedLang[langIndexTmp], lang)) { |
112 |
langIndex = langIndexTmp; |
113 |
break; |
114 |
} |
115 |
} |
116 |
if(langIndex == -1) { |
117 |
fprintf(stderr, "Unknown language: %s\nValid languages:\n", |
118 |
lang); |
119 |
for(langIndexTmp =0; langIndexTmp<picoNumSupportedVocs; langIndexTmp++) { |
120 |
fprintf(stderr, "%s\n", picoSupportedLang[langIndexTmp]); |
121 |
} |
122 |
lang = "en-US"; |
123 |
fprintf(stderr, "\n"); |
124 |
poptPrintHelp(optCon, stderr, 0); |
125 |
exit(1); |
126 |
} |
127 |
|
128 |
/* Remaining argument is <words> */ |
129 |
const char **extra_argv; |
130 |
extra_argv = poptGetArgs(optCon); |
131 |
if(extra_argv) { |
132 |
text = (char *) &(*extra_argv)[0]; |
133 |
} else { |
134 |
//TODO: stdin not supported yet. |
135 |
fprintf(stderr, "Missing argument: %s\n\n", |
136 |
"<words>"); |
137 |
poptPrintHelp(optCon, stderr, 0); |
138 |
exit(1); |
139 |
} |
140 |
|
141 |
poptFreeContext(optCon); |
142 |
|
143 |
buffer = malloc( bufferSize ); |
144 |
|
145 |
int ret, getstatus; |
146 |
pico_Char * inp = NULL; |
147 |
pico_Char * local_text = NULL; |
148 |
short outbuf[MAX_OUTBUF_SIZE/2]; |
149 |
pico_Int16 bytes_sent, bytes_recv, text_remaining, out_data_type; |
150 |
pico_Retstring outMessage; |
151 |
|
152 |
picoSynthAbort = 0; |
153 |
|
154 |
picoMemArea = malloc( PICO_MEM_SIZE ); |
155 |
if((ret = pico_initialize( picoMemArea, PICO_MEM_SIZE, &picoSystem ))) { |
156 |
pico_getSystemStatusMessage(picoSystem, ret, outMessage); |
157 |
fprintf(stderr, "Cannot initialize pico (%i): %s\n", ret, outMessage); |
158 |
goto terminate; |
159 |
} |
160 |
|
161 |
/* Load the text analysis Lingware resource file. */ |
162 |
picoTaFileName = (pico_Char *) malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE ); |
163 |
strcpy((char *) picoTaFileName, PICO_LINGWARE_PATH); |
164 |
strcat((char *) picoTaFileName, (const char *) picoInternalTaLingware[langIndex]); |
165 |
if((ret = pico_loadResource( picoSystem, picoTaFileName, &picoTaResource ))) { |
166 |
pico_getSystemStatusMessage(picoSystem, ret, outMessage); |
167 |
fprintf(stderr, "Cannot load text analysis resource file (%i): %s\n", ret, outMessage); |
168 |
goto unloadTaResource; |
169 |
} |
170 |
|
171 |
/* Load the signal generation Lingware resource file. */ |
172 |
picoSgFileName = (pico_Char *) malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE ); |
173 |
strcpy((char *) picoSgFileName, PICO_LINGWARE_PATH); |
174 |
strcat((char *) picoSgFileName, (const char *) picoInternalSgLingware[langIndex]); |
175 |
if((ret = pico_loadResource( picoSystem, picoSgFileName, &picoSgResource ))) { |
176 |
pico_getSystemStatusMessage(picoSystem, ret, outMessage); |
177 |
fprintf(stderr, "Cannot load signal generation Lingware resource file (%i): %s\n", ret, outMessage); |
178 |
goto unloadSgResource; |
179 |
} |
180 |
|
181 |
/* Load the utpp Lingware resource file if exists - NOTE: this file is optional |
182 |
and is currently not used. Loading is only attempted for future compatibility. |
183 |
If this file is not present the loading will still succeed. // |
184 |
picoUtppFileName = (pico_Char *) malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE ); |
185 |
strcpy((char *) picoUtppFileName, PICO_LINGWARE_PATH); |
186 |
strcat((char *) picoUtppFileName, (const char *) picoInternalUtppLingware[langIndex]); |
187 |
ret = pico_loadResource( picoSystem, picoUtppFileName, &picoUtppResource ); |
188 |
pico_getSystemStatusMessage(picoSystem, ret, outMessage); |
189 |
printf("pico_loadResource: %i: %s\n", ret, outMessage); |
190 |
*/ |
191 |
|
192 |
/* Get the text analysis resource name. */ |
193 |
picoTaResourceName = (pico_Char *) malloc( PICO_MAX_RESOURCE_NAME_SIZE ); |
194 |
if((ret = pico_getResourceName( picoSystem, picoTaResource, (char *) picoTaResourceName ))) { |
195 |
pico_getSystemStatusMessage(picoSystem, ret, outMessage); |
196 |
fprintf(stderr, "Cannot get the text analysis resource name (%i): %s\n", ret, outMessage); |
197 |
goto unloadUtppResource; |
198 |
} |
199 |
|
200 |
/* Get the signal generation resource name. */ |
201 |
picoSgResourceName = (pico_Char *) malloc( PICO_MAX_RESOURCE_NAME_SIZE ); |
202 |
if((ret = pico_getResourceName( picoSystem, picoSgResource, (char *) picoSgResourceName ))) { |
203 |
pico_getSystemStatusMessage(picoSystem, ret, outMessage); |
204 |
fprintf(stderr, "Cannot get the signal generation resource name (%i): %s\n", ret, outMessage); |
205 |
goto unloadUtppResource; |
206 |
} |
207 |
|
208 |
|
209 |
/* Create a voice definition. */ |
210 |
if((ret = pico_createVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME ))) { |
211 |
pico_getSystemStatusMessage(picoSystem, ret, outMessage); |
212 |
fprintf(stderr, "Cannot create voice definition (%i): %s\n", ret, outMessage); |
213 |
goto unloadUtppResource; |
214 |
} |
215 |
|
216 |
/* Add the text analysis resource to the voice. */ |
217 |
if((ret = pico_addResourceToVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME, picoTaResourceName ))) { |
218 |
pico_getSystemStatusMessage(picoSystem, ret, outMessage); |
219 |
fprintf(stderr, "Cannot add the text analysis resource to the voice (%i): %s\n", ret, outMessage); |
220 |
goto unloadUtppResource; |
221 |
} |
222 |
|
223 |
/* Add the signal generation resource to the voice. */ |
224 |
if((ret = pico_addResourceToVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME, picoSgResourceName ))) { |
225 |
pico_getSystemStatusMessage(picoSystem, ret, outMessage); |
226 |
fprintf(stderr, "Cannot add the signal generation resource to the voice (%i): %s\n", ret, outMessage); |
227 |
goto unloadUtppResource; |
228 |
} |
229 |
|
230 |
/* Create a new Pico engine. */ |
231 |
if((ret = pico_newEngine( picoSystem, (const pico_Char *) PICO_VOICE_NAME, &picoEngine ))) { |
232 |
pico_getSystemStatusMessage(picoSystem, ret, outMessage); |
233 |
fprintf(stderr, "Cannot create a new pico engine (%i): %s\n", ret, outMessage); |
234 |
goto disposeEngine; |
235 |
} |
236 |
|
237 |
local_text = (pico_Char *) text ; |
238 |
text_remaining = strlen((const char *) local_text) + 1; |
239 |
|
240 |
inp = (pico_Char *) local_text; |
241 |
|
242 |
size_t bufused = 0; |
243 |
|
244 |
picoos_Common common = (picoos_Common) pico_sysGetCommon(picoSystem); |
245 |
|
246 |
picoos_SDFile sdOutFile = NULL; |
247 |
|
248 |
picoos_bool done = TRUE; |
249 |
if(TRUE != (done = picoos_sdfOpenOut(common, &sdOutFile, |
250 |
(picoos_char *) wavefile, SAMPLE_FREQ_16KHZ, PICOOS_ENC_LIN))) |
251 |
{ |
252 |
fprintf(stderr, "Cannot open output wave file\n"); |
253 |
ret = 1; |
254 |
goto disposeEngine; |
255 |
} |
256 |
|
257 |
/* synthesis loop */ |
258 |
while (text_remaining) { |
259 |
/* Feed the text into the engine. */ |
260 |
if((ret = pico_putTextUtf8( picoEngine, inp, text_remaining, &bytes_sent ))) { |
261 |
pico_getSystemStatusMessage(picoSystem, ret, outMessage); |
262 |
fprintf(stderr, "Cannot put Text (%i): %s\n", ret, outMessage); |
263 |
goto disposeEngine; |
264 |
} |
265 |
|
266 |
text_remaining -= bytes_sent; |
267 |
inp += bytes_sent; |
268 |
|
269 |
do { |
270 |
if (picoSynthAbort) { |
271 |
goto disposeEngine; |
272 |
} |
273 |
/* Retrieve the samples and add them to the buffer. */ |
274 |
getstatus = pico_getData( picoEngine, (void *) outbuf, |
275 |
MAX_OUTBUF_SIZE, &bytes_recv, &out_data_type ); |
276 |
if((getstatus !=PICO_STEP_BUSY) && (getstatus !=PICO_STEP_IDLE)){ |
277 |
pico_getSystemStatusMessage(picoSystem, getstatus, outMessage); |
278 |
fprintf(stderr, "Cannot get Data (%i): %s\n", getstatus, outMessage); |
279 |
goto disposeEngine; |
280 |
} |
281 |
if (bytes_recv) { |
282 |
if ((bufused + bytes_recv) <= bufferSize) { |
283 |
memcpy(buffer+bufused, (int8_t *) outbuf, bytes_recv); |
284 |
bufused += bytes_recv; |
285 |
} else { |
286 |
done = picoos_sdfPutSamples( |
287 |
sdOutFile, |
288 |
bufused / 2, |
289 |
(picoos_int16*) (buffer)); |
290 |
bufused = 0; |
291 |
memcpy(buffer, (int8_t *) outbuf, bytes_recv); |
292 |
bufused += bytes_recv; |
293 |
} |
294 |
} |
295 |
} while (PICO_STEP_BUSY == getstatus); |
296 |
/* This chunk of synthesis is finished; pass the remaining samples. */ |
297 |
if (!picoSynthAbort) { |
298 |
done = picoos_sdfPutSamples( |
299 |
sdOutFile, |
300 |
bufused / 2, |
301 |
(picoos_int16*) (buffer)); |
302 |
} |
303 |
picoSynthAbort = 0; |
304 |
} |
305 |
|
306 |
if(TRUE != (done = picoos_sdfCloseOut(common, &sdOutFile))) |
307 |
{ |
308 |
fprintf(stderr, "Cannot close output wave file\n"); |
309 |
ret = 1; |
310 |
goto disposeEngine; |
311 |
} |
312 |
|
313 |
disposeEngine: |
314 |
if (picoEngine) { |
315 |
pico_disposeEngine( picoSystem, &picoEngine ); |
316 |
pico_releaseVoiceDefinition( picoSystem, (pico_Char *) PICO_VOICE_NAME ); |
317 |
picoEngine = NULL; |
318 |
} |
319 |
unloadUtppResource: |
320 |
if (picoUtppResource) { |
321 |
pico_unloadResource( picoSystem, &picoUtppResource ); |
322 |
picoUtppResource = NULL; |
323 |
} |
324 |
unloadSgResource: |
325 |
if (picoSgResource) { |
326 |
pico_unloadResource( picoSystem, &picoSgResource ); |
327 |
picoSgResource = NULL; |
328 |
} |
329 |
unloadTaResource: |
330 |
if (picoTaResource) { |
331 |
pico_unloadResource( picoSystem, &picoTaResource ); |
332 |
picoTaResource = NULL; |
333 |
} |
334 |
terminate: |
335 |
if (picoSystem) { |
336 |
pico_terminate(&picoSystem); |
337 |
picoSystem = NULL; |
338 |
} |
339 |
exit(ret); |
340 |
} |
341 |
|