@inproceedings{ijcai2023p0575, title = {Learning to Speak from Text: Zero-Shot Multilingual Text-to-Speech with Unsupervised Text Pretraining}, author = {Saeki, Takaaki and Maiti, Soumi and Li, Xinjian and Watanabe, Shinji and Takamichi, Shinnosuke and Saruwatari, Hiroshi}, booktitle = {Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence, {IJCAI-23}}, publisher = {International Joint Conferences on Artificial Intelligence Organization}, editor = {Edith Elkind}, pages = {5179--5187}, year = {2023}, month = {8}, note = {Main Track}, doi = {10.24963/ijcai.2023/575}, url = {https://doi.org/10.24963/ijcai.2023/575}, }