@inproceedings{ijcai2025p318, title = {Efficient Inter-Operator Scheduling for Concurrent Recommendation Model Inference on GPU}, author = {Guo, Shuxi and Xu, Zikang and Liu, Jiahao and Zhang, Jinyi and Qi, Qi and Sun, Haifeng and Huang, Jun and Liao, Jianxin and Wang, Jingyu}, booktitle = {Proceedings of the Thirty-Fourth International Joint Conference on Artificial Intelligence, {IJCAI-25}}, publisher = {International Joint Conferences on Artificial Intelligence Organization}, editor = {James Kwok}, pages = {2856--2864}, year = {2025}, month = {8}, note = {Main Track}, doi = {10.24963/ijcai.2025/318}, url = {https://doi.org/10.24963/ijcai.2025/318}, }