@inproceedings{ijcai2025p61, title = {Indirect Online Preference Optimization via Reinforcement Learning}, author = {Wang, En and Lin, Xingyu and Su, Du and Bao, Chenfu and Lv, Zhonghou and Yang, Funing and Xu, Yuanbo and Liu, Wenbin}, booktitle = {Proceedings of the Thirty-Fourth International Joint Conference on Artificial Intelligence, {IJCAI-25}}, publisher = {International Joint Conferences on Artificial Intelligence Organization}, editor = {James Kwok}, pages = {538--546}, year = {2025}, month = {8}, note = {Main Track}, doi = {10.24963/ijcai.2025/61}, url = {https://doi.org/10.24963/ijcai.2025/61}, }