@inproceedings{ijcai2025p913, title = {Accurate Sublayer Pruning for Large Language Models by Exploiting Latency and Tunability Information}, author = {Park, Seungcheol and Lee, Sojin and Kim, Jongjin and Lee, Jinsik and Jo, Hyunjik and Kang, U}, booktitle = {Proceedings of the Thirty-Fourth International Joint Conference on Artificial Intelligence, {IJCAI-25}}, publisher = {International Joint Conferences on Artificial Intelligence Organization}, editor = {James Kwok}, pages = {8213--8221}, year = {2025}, month = {8}, note = {Main Track}, doi = {10.24963/ijcai.2025/913}, url = {https://doi.org/10.24963/ijcai.2025/913}, }