@inproceedings{ijcai2018p214, title = {Deep Learning Based Multi-modal Addressee Recognition in Visual Scenes with Utterances}, author = {Thao Le Minh and Nobuyuki Shimizu and Takashi Miyazaki and Koichi Shinoda}, booktitle = {Proceedings of the Twenty-Seventh International Joint Conference on Artificial Intelligence, {IJCAI-18}}, publisher = {International Joint Conferences on Artificial Intelligence Organization}, pages = {1546--1553}, year = {2018}, month = {7}, doi = {10.24963/ijcai.2018/214}, url = {https://doi.org/10.24963/ijcai.2018/214}, }