@inproceedings{ijcai2025p53, title = {Towards Safer Pretraining: Analyzing and Filtering Harmful Content in Webscale Datasets for Responsible LLMs}, author = {Mendu, Sai Krishna and Yenala, Harish and Gulati, Aditi and Kumar, Shanu and Agrawal, Parag}, booktitle = {Proceedings of the Thirty-Fourth International Joint Conference on Artificial Intelligence, {IJCAI-25}}, publisher = {International Joint Conferences on Artificial Intelligence Organization}, editor = {James Kwok}, pages = {466--474}, year = {2025}, month = {8}, note = {Main Track}, doi = {10.24963/ijcai.2025/53}, url = {https://doi.org/10.24963/ijcai.2025/53}, }