BibTeX Entry


@inproceedings{FanEtAl:ECCB20,
  author	= {Fan, Jason and Li, Xuan Cindy and Crovella, Mark and Leiserson, Mark D. M.},
  title		= {Matrix (Factorization) Reloaded: Flexible Methods for Imputing Genetic Interactions with Cross-Species and Side Information},
  booktitle	= {Proceedings of the 19th European Conference on Computational Biology},
  address	= {Online},
  year		= {2020},
  month		= aug,
  URL		= {http://www.cs.bu.edu/faculty/crovella/paper-archive/eccb20-gi-mc.pdf},
  doi		= {10.1093/bioinformatics/btaa818},
  abstract	= {Motivation: Mapping genetic interactions (GIs) can reveal important insights into cellular function, and has potential translational applications. There has been great progress in developing high-throughput experimental systems for measuring GIs (e.g. with double knockouts) as well as in defining computational methods for inferring (imputing) unknown interactions. However, existing computational methods for imputation have largely been developed for and applied in baker's yeast, even as experimental systems have begun to allow measurements in other contexts. Importantly, existing methods face a number of limitations in requiring specific side information and with respect to computational cost. Further, few have addressed how GIs can be imputed when data is scarce. Results: In this paper we address these limitations by presenting a new imputation framework, called Extensible Matrix Factorization (EMF). EMF is a framework of composable models that flexibly exploit cross-species information in the form of GI data across multiple species, and arbitrary side information in the form of kernels (e.g. from protein-protein interaction networks). We perform a rigorous set of experiments on these models in matched GI datasets from baker's and fission yeast. These include the first such experiments on genome-scale GI datasets in multiple species in the same study. We find that EMF models that exploit side and cross-species information improve imputation, especially in data-scarce settings. Further, we show that EMF outperforms the state-of-the-art deep learning method while incurring orders of magnitude less computational cost. Availability: Implementations of models and experiments are available at: https://github.com/lrgr/emf. Contact: mdml@cs.umd.edu},
  note		= {Also appears in OUP Bioinformatics Special Issue, 2020}
}