papers.yml

- year: 2024
  papers: 
  - title: 'AR-Pro: Counterfactual Explanations for Anomaly Repair with Formal Properties'
    link: https://arxiv.org/abs/2410.24178
    authors: Xiayan Ji, Anton Xue, Eric Wong, Oleg Sokolsky, Insup Lee
    conference: Neural Information Processing Systems (NeurIPS), 2024
    short: NeurIPS 2024
    website: 
    blog: 
    github: https://github.com/xjiae/arpro
    external: false
    
  - title: 'The FIX Benchmark: Extracting Features Interpretable to eXperts'
    link: https://arxiv.org/abs/2407.00075
    authors: Helen Jin, Shreya Havaldar, Chaehyeon Kim, Anton Xue, Weiqiu You, Helen Qu, Marco Gatti, Daniel A. Hashimoto, Bhuvnesh Jain, Amin Madani, Masao Sako, Lyle Ungar, Eric Wong
    conference: 
    short: 
    website: https://brachiolab.github.io/fix/
    blog: https://debugml.github.io/fix/
    github: https://github.com/BrachioLab/exlib/tree/main/fix
    external: false

  - title: 'Logicbreaks: A Framework for Understanding Subversion of Rule-based Inference'
    link: https://arxiv.org/abs/2407.00075
    authors: Anton Xue, Avishree Khare, Rajeev Alur, Surbhi Goel, Eric Wong
    conference: International Conference on Learning Representations (ICLR), 2025
    short: ICLR 2025
    blog: https://debugml.github.io/logicbreaks/
    github: https://github.com/AntonXue/tf_logic
    external: false

  - title: 'Crowd-sourced machine learning prediction of long COVID using data from the National COVID Cohort Collaborative'
    link: https://www.sciencedirect.com/science/article/pii/S2352396424003694
    authors: Timothy Bergquist, Johanna Loomba, Emily Pfaff, Fangfang Xia, Zixuan Zhao, Yitan Zhu, Elliot Mitchell, Biplab Bhattacharya, Gaurav Shetty, Tamanna Munia, Grant Delong, Adbul Tariq, Zachary Butzin-Dozier, Yunwen Ji, Haodong Li, Jeremy Coyle, Seraphina Shi, Rachael V. Philips, Andrew Mertens, Romain Pirracchio, Mark van der Laan, John M. Colford Jr., Alan Hubbard, Jifan Gao, Guanhua Chen, Neelay Velingker, Ziyang Li, Yinjun Wu, Adam Stein, Jiani Huang, Zongyu Dai, Qi Long, Mayur Naik, John Holmes, Danielle Mowery, Eric Wong, Ravi Parekh, Emily Getzen, Jake Hightower, Jennifer Blase
    conference: eBioMedicine
    short: eBioMedicine
    website: 
    blog: 
    github: 
    external: true


  - title: 'Avoiding Copyright Infringement via Machine Unlearning'
    link: https://arxiv.org/abs/2406.10952
    authors: Guangyao Dou, Zheyuan Liu, Qing Lyu, Kaize Ding, Eric Wong
    conference: Findings of the Association for Computational Linguistics (NAACL), 2025
    short: NAACL-Findings 2025
    blog: 
    github: 
    external: true

  - title: 'Data-Efficient Learning with Neural Programs'
    link: https://arxiv.org/abs/2406.06246
    authors: Alaia Solko-Breslin, Seewon Choi, Ziyang Li, Neelay Velingker, Rajeev Alur, Mayur Naik, Eric Wong
    conference: Neural Information Processing Systems (NeurIPS), 2024
    short: NeurIPS 2024
    blog: https://debugml.github.io/neural-programs/
    github: https://github.com/alaiasolkobreslin/ISED/tree/v1.0.0
    external: false

  - title: 'Towards Compositionality in Concept Learning'
    link: 
    authors: Adam Stein, Aaditya Naik, Yinjun Wu, Mayur Naik, Eric Wong
    conference: International Conference on Machine learning (ICML), 2024
    short: ICML 2024
    blog: 
    github: 
    external: false

  - title: 'DISCRET: Synthesizing Faithful Explanations For Treatment Effect Estimation'
    link: 
    authors: Yinjun Wu, Mayank Keoliya, Kan Chen, Neelay Velingker, Ziyang Li, Emily J Getzen, Qi Long, Mayur Naik, Ravi B Parikh, Eric Wong
    conference: International Conference on Machine learning (ICML), 2024
    short: ICML 2024
    blog: 
    github: 
    external: false

  - title: 'JailbreakBench: An Open Robustness Benchmark for Jailbreaking Large Language Models'
    link: https://arxiv.org/abs/2404.01318
    authors: Patrick Chao, Edoardo Debenedetti, Alexander Robey, Maksym Andriushchenko, Francesco Croce, Vikash Sehwag, Edgar Dobriban, Nicolas Flammarion, George J. Pappas, Florian Tramer, Hamed Hassani, Eric Wong
    conference: Neural Information Processing Systems (NeurIPS), 2024
    short: NeurIPS 2024
    website: https://jailbreakbench.github.io/
    blog: 
    github: https://github.com/JailbreakBench/jailbreakbench/
    external: false

  - title: 'Defending Large Language Models against Jailbreak Attacks via Semantic Smoothing'
    link: https://arxiv.org/abs/2402.16192
    authors: Jiabao Ji, Bairu Hou, Alexander Robey, George J. Pappas, Hamed Hassani, Yang Zhang, Eric Wong, Shiyu Chang
    conference: 
    short: 
    blog: 
    github: 
    external: true
    
  - title: 'Evaluating Groups of Features via Consistency, Contiguity, and Stability'
    link: 
    authors: Chaehyeon Kim, Weiqiu You, Shreya Havaldar, Eric Wong
    conference: International Conference on Learning Representations (ICLR), 2024 Tiny Papers Track 
    short: ICLR 2024, Tiny Papers (Oral)
    blog: 
    github: 
    external: false

  - title: 'SalUn: Empowering Machine Unlearning via Gradient-based Weight Saliency in Both Image Classification and Generation'
    link: https://arxiv.org/abs/2310.12508
    authors: Chongyu Fan, Jiancheng Liu, Yihua Zhang, Dennis Wei, Eric Wong, Sijia Liu
    conference: International Conference on Learning Representations (ICLR), 2024
    short: ICLR 2024
    blog: 
    github: https://github.com/OPTML-Group/Unlearn-Saliency
    external: true

- year: 2023
  papers: 
  - title: 'Initialization Matters for Adversarial Transfer Learning'
    link: https://arxiv.org/abs/2312.05716
    authors: Andong Hua, Jindong Gu, Zhiyu Xue, Nicholas Carlini, Eric Wong, Yao Qin
    conference: IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 2024 
    short: CVPR 2024
    blog: 
    github: 
    external: true

  - title: 'Comparing Styles across Languages'
    link: https://arxiv.org/abs/2310.07135
    authors: Shreya Havaldar, Matthew Pressimone, Eric Wong, Lyle Ungar
    conference: Empirical Methods in Natural Language Processing (EMNLP), 2023
    short: EMNLP 2023
    blog: 
    github: 
    external: false

  - title: 'Sum-of-Parts Models: Faithful Attributions for Groups of Features'
    link: https://arxiv.org/abs/2310.16316
    authors: Weiqiu You, Helen Qu, Marco Gatti, Bhuvnesh Jain, Eric Wong
    conference: XAI in Action Workshop at NeurIPS 2022
    short: 
    blog: https://debugml.github.io/sum-of-parts/
    github: https://github.com/DebugML/sop
    external: false

  - title: 'Jailbreaking Black Box Large Language Models in Twenty Queries'
    link: https://arxiv.org/abs/2310.08419
    authors: Patrick Chao, Alexander Robey, Edgar Dobriban, Hamed Hassani, George J. Pappas, Eric Wong
    conference: 3rd IEEE Conference on Secure and Trustworthy Machine Learning, 2025
    short: SaTML 2025
    blog: https://jailbreaking-llms.github.io/
    github: https://github.com/patrickrchao/JailbreakingLLMs
    external: false
    
  - title: 'SmoothLLM: Defending Large Language Models Against Jailbreaking Attacks'
    link: https://arxiv.org/abs/2310.03684
    authors: Alexander Robey, Eric Wong, Hamed Hassani, George J. Pappas
    conference: 
    short: 
    blog: https://debugml.github.io/smooth-llm/
    github: https://github.com/arobey1/smooth-llm
    external: false
    
  - title: 'TorchQL: A Programming Framework for Integrity Constraints in Machine Learning'
    link: https://arxiv.org/abs/2308.06686
    authors: Aaditya Naik, Adam Stein, Yinjun Wu, Eric Wong, Mayur Naik
    conference: Object-oriented Programming, Systems, Languages, and Applications (OOPSLA), 2024
    short: OOPSLA 2024
    blog: 
    github: https://github.com/TorchQL/torchql
    external: false
  
  - title: Stability Guarantees for Feature Attributions with Multiplicative Smoothing
    link: https://arxiv.org/abs/2307.05902
    authors: Anton Xue, Rajeev Alur, Eric Wong
    conference: Neural Information Processing Systems (NeurIPS), 2023  
    short: NeurIPS 2023
    blog: https://debugml.github.io/multiplicative-smoothing/
    github: https://github.com/DebugML/mus
    external: false

  - title: 'TopEx: Topic-based Explanations for Model Comparison'
    link: https://arxiv.org/abs/2306.00976
    authors: Shreya Havaldar, Adam Stein, Eric Wong, Lyle Ungar
    conference: International Conference on Learning Representations (ICLR), 2023 Tiny Papers Track 
    short: ICLR 2023, Tiny Papers
    blog: 
    github: 
    external: false

  - title: Rectifying Group Irregularities in Explanations for Distribution Shift
    link: https://arxiv.org/abs/2305.16308
    authors: Adam Stein, Yinjun Wu, Eric Wong, Mayur Naik
    conference: 
    short: 
    blog: 
    github: 
    external: false

  - title: Do Machine Learning Models Learn Statistical Rules Inferred from Data?
    link: https://arxiv.org/abs/2303.01433
    authors: Aaditya Naik, Yinjun Wu, Mayur Naik, Eric Wong
    conference: International Conference on Machine learning (ICML), 2023
    short: ICML 2023
    blog: https://debugml.github.io/SQRL/
    github: https://github.com/DebugML/sqrl
    external: false

  - title: In-context Example Selection with Influences
    link: https://arxiv.org/abs/2302.11042
    authors: Tai Nguyen, Eric Wong
    conference: 
    short: 
    blog: https://debugml.github.io/incontext-influences/
    github: https://github.com/DebugML/incontext_influences
    external: false

  - title: Adversarial Prompting for Black Box Foundation Models
    link: https://arxiv.org/abs/2302.04237
    authors: Natalie Maus*, Patrick Chao*, Eric Wong, Jacob Gardner 
    conference: 
    short: DLSP 2023 Keynote
    blog: https://debugml.github.io/adversarial-prompts/
    github: https://github.com/DebugML/adversarial_prompting
    external: false

  - title: Faithful Chain-of-Thought Reasoning
    link: https://arxiv.org/abs/2301.13379
    authors: Qing Lyu*, Shreya Havaldar*, Adam Stein*, Li Zhang, Delip Rao, Eric Wong, Marianna Apidianaki, Chris Callison-Burch
    short: IJCNLP-AACL, 2023
    conference: IJCNLP-AACL, 2023
    blog: https://debugml.github.io/fcot/
    github: https://github.com/veronica320/Faithful-COT
    external: false

- year: 2022
  papers: 
  - title: A data-based perspective on transfer learning
    link: https://arxiv.org/abs/2207.05739
    authors: Saachi Jain*, Hadi Salman*, Alaa Khaddaj*, Eric Wong, Sung Min Park, Aleksander Madry
    conference: IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 2023  
    short: CVPR 2023
    blog: https://gradientscience.org/data-transfer/
    github: https://github.com/MadryLab/data-transfer
    external: true

  - title: When does bias transfer in transfer learning
    link: https://arxiv.org/abs/2207.02842
    authors: Hadi Salman*, Saachi Jain*, Andrew Ilyas*, Logan Engstrom*, Eric Wong, Aleksander Madry 
    conference: 
    short: 
    blog: https://gradientscience.org/bias-transfer/
    github: https://github.com/MadryLab/bias-transfer
    external: true

  - title: Missingness bias in model debugging
    link: https://arxiv.org/abs/2204.08945
    authors: Saachi Jain*, Hadi Salman*, Pengchuan Zhang, Vibhav Vineet, Sal Vemprala, Aleksander Madry  
    conference: International Conference on Learning Representations (ICLR), 2022  
    short: ICLR 2022
    blog: https://gradientscience.org/missingness/
    github: https://github.com/MadryLab/missingness
    external: true

- year: 2021
  papers: 
  - title: Certified patch robustness via smoothed vision transformers
    link: https://arxiv.org/abs/2110.07719
    authors: Hadi Salman*, Saachi Jain*, Eric Wong*, Aleksander Madry 
    conference: IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 2022  
    short: CVPR 2022
    blog: https://gradientscience.org/smoothing/
    github: https://github.com/MadryLab/smoothed-vit
    external: true

  - title: "DeepSplit: Scalable verification of deep neural networks via operator splitting"
    link: https://arxiv.org/abs/2106.09117
    authors: Shaoru Chen*, Eric Wong*, J. Zico Kolter, Mahyar Fazlyab  
    conference: IEEE Open Journal of Control Systems (OJCS), 2022  
    short: OJCS 2022
    blog: 
    github: 
    external: true

  - title: Leveraging Sparse Linear Layers for Debuggable Deep Networks
    link: https://arxiv.org/abs/2105.04857
    authors: Eric Wong*, Shibani Santurkar*, Aleksander Madry  
    conference: International Conference on Machine learning (ICML), 2021 *Long Oral*  
    short: ICML 2021 (Oral)
    blog: https://gradientscience.org/glm_saga/
    github: https://github.com/madrylab/debuggabledeepnetworks
    external: true

- year: 2020
  papers: 
  - title: Learning perturbation sets for robust machine learning
    link: https://arxiv.org/abs/2007.08450
    authors: Eric Wong, J. Zico Kolter 
    conference: International Conference on Learning Representations (ICLR), 2021  
    short: ICLR 2021
    blog: https://locuslab.github.io/2020-07-20-perturbation/
    github: https://github.com/locuslab/perturbation_learning/
    external: true

  - title: Overfitting in adversarially robust deep learning 
    link: https://arxiv.org/abs/2002.11569
    authors: Leslie Rice*, Eric Wong*, J. Zico Kolter 
    conference: International Conference on Machine learning (ICML), 2020  
    short: ICML 2020
    blog: 
    github: https://github.com/locuslab/robust_overfitting/ 
    external: true

  - title: Neural network virtual sensors for fuel injection quantities with provable performance specifications
    link: http://arxiv.org/abs/2007.00147
    authors: Eric Wong, Tim Schneider, Joerg Schmitt, Frank R. Schmidt, J. Zico Kolter  
    conference: IEEE Intelligent Vehicles Syimposium (IV), 2020
    short: IEEE IV 2020
    blog: 
    github: 
    external: true

  - title: "Fast is better than free: revisiting adversarial training"
    link: https://arxiv.org/abs/2001.03994
    authors: Eric Wong*, Leslie Rice*, J. Zico Kolter  
    conference: International Conference on Learning Representations (ICLR), 2020
    short: ICLR 2020
    blog: 
    github: 
    external: true

- year: 2019
  papers: 
  - title: Adversarial robustness against the union of multiple perturbation models
    link: https://arxiv.org/abs/1909.04068
    authors: Pratyush Maini, Eric Wong, J. Zico Kolter  
    conference: International Conference on Machine learning (ICML), 2020  
    short: ICML 2020
    blog: 
    github: https://github.com/locuslab/robust_union/
    external: true

  - title: Wasserstein adversarial examples
    link: https://arxiv.org/abs/1902.07906
    authors: Eric Wong, Frank R. Schmidt, J. Zico Kolter  
    conference: International Conference on Machine Learning (ICML), 2019  
    short: ICML 2019
    blog: 
    github: 
    external: true

- year: 2018
  papers: 
  - title: Scaling provable adversarial defenses
    link: https://arxiv.org/abs/1805.12514
    authors: Eric Wong, Frank R. Schmidt, Jan Hendrik Metzen, J. Zico Kolter  
    conference: In Neural Information Processing Systems (NeurIPS), 2018  
    short: NeurIPS 2018
    blog: 
    github: https://github.com/locuslab/convex_adversarial/
    external: true

- year: 2017
  papers: 
  - title: Provable defenses against adversarial examples via the convex outer adversarial polytope
    link: https://arxiv.org/abs/1711.00851
    authors: Eric Wong, J. Zico Kolter  
    conference: "International Conference on Machine Learning (ICML), 2018; Best defense paper at [NIPS 2017 ML &amp; Security Workshop](https://machine-learning-and-security.github.io/)"
    short: ICML 2018
    blog: https://locuslab.github.io/2019-03-12-provable/
    github: https://github.com/locuslab/convex_adversarial/
    external: true

  - title: A Semismooth Newton Method for Fast, Generic Convex Programming
    link: https://arxiv.org/abs/1705.00772
    authors: Alnur Ali*, Eric Wong*, J. Zico Kolter  
    conference: International Conference on Machine Learning (ICML), 2017  
    short: ICML 2017
    blog: 
    github: https://github.com/locuslab/newton_admm/
    external: true

- year: 2015
  papers: 
  - title: An SVD and Derivative Kernel Approach to Learning from Geometric Data
    link: http://zicokolter.com/publications/wong2015svdkernel.pdf
    authors: Eric Wong, J. Zico Kolter  
    conference: Conference on Artificial Intelligence (AAAI), 2015
    short: ICML 2015
    blog: 
    github: 
    external: true