@inproceedings{ijcai2023p522, title = {Towards Long-delayed Sparsity: Learning a Better Transformer through Reward Redistribution}, author = {Zhu, Tianchen and Qiu, Yue and Zhou, Haoyi and Li, Jianxin}, booktitle = {Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence, {IJCAI-23}}, publisher = {International Joint Conferences on Artificial Intelligence Organization}, editor = {Edith Elkind}, pages = {4693--4701}, year = {2023}, month = {8}, note = {Main Track}, doi = {10.24963/ijcai.2023/522}, url = {https://doi.org/10.24963/ijcai.2023/522}, }