BibTeX Entry


@inproceedings{RastegarpanahGummadiCrovella:NeurIPS21,
  author	= {Rastegarpanah, Bashir and Gummadi, Krishna P. and Crovella, Mark},
  title		= {Auditing Black-Box Prediction Models for Data Minimization Compliance},
  booktitle	= {Proceedings of NeurIPS},
  year		= {2021},
  month		= dec,
  address	= {Online},
  URL		= {http://www.cs.bu.edu/faculty/crovella/paper-archive/minimization-audit-Neurips21.pdf},
  note		= {Selected as a Spotlight Presentation (3% acceptance rate)},
  abstract	= {In this paper, we focus on auditing black-box prediction models for compliance with the GDPR's data minimization principle. The principle restricts prediction models to use the minimal information that is necessary for performing the task at hand. Given the challenge of the black-box setting, our key idea is to check if each of the prediction model's input features are individually necessary, by simply imputing (i.e., assigning) them some constant value and measuring the extent to which the prediction model's outcomes would change. We introduce a metric for data minimization that is based on model instability under different simple imputations. We extend the applicability of this metric from a finite sample model to a distributional setting by introducing a probabilistic data minimization guarantee, which we derive using a Bayesian approach. Furthermore, we address the auditing problem under a constraint on the number of queries to the prediction system. We formulate the problem of allocating a query budget to feasible simple imputations for investigating model instability as a multi-armed bandit framework with probabilistic success metrics, for which we design efficient algorithms. We consider two auditing problems for providing a probabilistic guarantee with a given confidence: a decision problem given a data minimization level, and a measurement problem given a fixed query budget. Our experiments with real-world prediction systems show that our auditing algorithms significantly outperform simpler benchmarks in both measurement and decision problems.}
}