@inproceedings{ijcai2025p1164,
  title     = {Words Over Pixels? Rethinking Vision in Multimodal Large Language Models},
  author    = {Jain, Anubhooti and Vatsa, Mayank and Singh, Richa},
  booktitle = {Proceedings of the Thirty-Fourth International Joint Conference on
               Artificial Intelligence, {IJCAI-25}},
  publisher = {International Joint Conferences on Artificial Intelligence Organization},
  editor    = {James Kwok},
  pages     = {10481--10489},
  year      = {2025},
  month     = {8},
  note      = {Survey Track},
  doi       = {10.24963/ijcai.2025/1164},
  url       = {https://doi.org/10.24963/ijcai.2025/1164},
}