# Speech Synthesis TODO ## Example This is a simple [C](c.md) program (using [float](float.md) for simplicity of demonstration) that creates basic vowel sounds using formant synthesis (run e.g. as `gcc -lm program.c && ./a.out | aplay`, 8000 Hz 8 bit audio is supposed): ``` #include #include double vowelParams[] = { // vocal tract shapes, can be found in literature // formant1 formant2 width1 width2 amplitude1 amplitude2 850, 1650, 500, 500, 1, 0.2, // a 390, 2300, 500, 450, 1, 0.9, // e 240, 2500, 300, 500, 1, 0.5, // i 250, 600, 500, 400, 1, 0.9, // o 300, 400, 400, 400, 1, 1.0 // u }; double tone(double t, double f) // tone of given frequency { return sin(f * t * 2 * M_PI); } /* simple linear ("triangle") function for modelling spectral shape of one formant with given frequency location, width and amplitude */ double formant(double freq, double f, double w, double a) { double r = ((freq - f + w / 2) * 2 * a) / w; if (freq > f) r = -1 * (r - a) + a; return r > 1 ? 1 : (r < 0 ? 0 : r); } /* gives one sample of speech, takes two formants as input, fundamental frequency and possible offset of both formants (can model "bigger/smaller head") */ double speech(double t, double fundamental, double offset, double f1, double f2, double w1, double w2, double a1, double a2) { int harmonic = 1; // number of harmonic frequency double r = 0; /* now generate harmonics (multiples of fundamental frequency) as the source, and multiply them by the envelope given by formants (no need to deal with multiplication of spectra; as we're constructing the result from basic frequencies, we can simply multiply each one directly): */ while (1) { double f = harmonic * fundamental; double formant1 = formant(f,f1 + offset,w1,a1); double formant2 = formant(f,f2 + offset,w2,a2); // envelope = max(formant1,formant2) r += (formant1 > formant2 ? formant1 : formant2) * 0.1 * tone(t,f); if (f > 10000) // stop generating harmonics above 10000 Hz break; harmonic++; } return r > 1.0 ? 1.0 : (r < 0 ? 0 : r); // clamp between 0 and 1 } int main(void) { for (int i = 0; i < 50000; ++i) { double t = ((double) i) / 8000.0; double *vowel = vowelParams + ((i / 4000) % 5) * 6; // change vowels putchar(128 + 127 * speech(t,150,-100,vowel[0],vowel[1],vowel[2],vowel[3],vowel[4],vowel[5])); } return 0; } ```