@inproceedings{ijcai2025p1164, title = {Words Over Pixels? Rethinking Vision in Multimodal Large Language Models}, author = {Jain, Anubhooti and Vatsa, Mayank and Singh, Richa}, booktitle = {Proceedings of the Thirty-Fourth International Joint Conference on Artificial Intelligence, {IJCAI-25}}, publisher = {International Joint Conferences on Artificial Intelligence Organization}, editor = {James Kwok}, pages = {10481--10489}, year = {2025}, month = {8}, note = {Survey Track}, doi = {10.24963/ijcai.2025/1164}, url = {https://doi.org/10.24963/ijcai.2025/1164}, }