BibTeX Entry

  author	= {Lappas, Theodoros and Crovella, Mark and Terzi, Evimari},
  title		= {Selecting a Characteristic Set of Reviews},
  booktitle	= {Proceedings of the ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD) 2012},
  year		= {2012},
  address	= {Beijing, China},
  month		= aug,
  doi		= {10.1145/2339530.2339663},
  abstract	= {Online reviews provide consumers with valuable information that guides their decisions on a variety of fronts: from entertainment and shopping to medical services. Although the proliferation of online reviews gives insights about different aspects of a product, it can also prove a serious drawback: consumers cannot and will not read thousands of reviews before making a purchase decision. This need to extract useful information from large review corpora has spawned considerable prior work, but so far all have drawbacks. Review summarization (generating statistical descriptions of review sets) sacrifices the immediacy and narrative structure of reviews. Likewise, review selection (identifying a subset of `helpful' or `important' reviews) leads to redundant or non-representative summaries. In this paper, we fill the gap between existing review-summarization and review-selection methods by selecting a small subset of reviews that together preserve the statistical properties of the entire review corpus. We formalize this task as a combinatorial optimization problem and show that it is not only NP-hard, but also NP-hard to approximate. We also design practical and effective algorithms that prove to work well in practice. Our experiments with real-life review corpora on different types of products demonstrate the practical utility of our methods, and our user studies indicate that our methods provide a better summary than prior approaches.},
  URL		= {}