manuscript/references.bib

% Generated by Paperpile. Check out http://paperpile.com for more information.
% BibTeX export options can be customized via Settings -> BibTeX.

@ARTICLE{Pais2014-sr,
  title       = "Assessing the efficiency of multiple sequence alignment
                 programs",
  author      = "Pais, Fabiano Sviatopolk-Mirsky and Ruy, Patr\'{\i}cia de
                 C\'{a}ssia and Oliveira, Guilherme and Coimbra, Roney Santos",
  affiliation = "Center for Excellence in Bioinformatics, Centro de Pesquisas
                 Ren\'{e} Rachou (CPqRR), Funda\c{c}\~{a}o Oswaldo Cruz
                 (FIOCRUZ/Minas), Belo Horizonte, MG Brazil.
                 roney.s.coimbra@cpqrr.fiocruz.br.",
  abstract    = "BACKGROUND: Multiple sequence alignment (MSA) is an extremely
                 useful tool for molecular and evolutionary biology and there
                 are several programs and algorithms available for this
                 purpose. Although previous studies have compared the alignment
                 accuracy of different MSA programs, their computational time
                 and memory usage have not been systematically evaluated. Given
                 the unprecedented amount of data produced by next generation
                 deep sequencing platforms, and increasing demand for
                 large-scale data analysis, it is imperative to optimize the
                 application of software. Therefore, a balance between
                 alignment accuracy and computational cost has become a
                 critical indicator of the most suitable MSA program. We
                 compared both accuracy and cost of nine popular MSA programs,
                 namely CLUSTALW, CLUSTAL OMEGA, DIALIGN-TX, MAFFT, MUSCLE,
                 POA, Probalign, Probcons and T-Coffee, against the benchmark
                 alignment dataset BAliBASE and discuss the relevance of some
                 implementations embedded in each program's algorithm. Accuracy
                 of alignment was calculated with the two standard scoring
                 functions provided by BAliBASE, the sum-of-pairs and
                 total-column scores, and computational costs were determined
                 by collecting peak memory usage and time of execution.
                 RESULTS: Our results indicate that mostly the
                 consistency-based programs Probcons, T-Coffee, Probalign and
                 MAFFT outperformed the other programs in accuracy. Whenever
                 sequences with large N/C terminal extensions were present in
                 the BAliBASE suite, Probalign, MAFFT and also CLUSTAL OMEGA
                 outperformed Probcons and T-Coffee. The drawback of these
                 programs is that they are more memory-greedy and slower than
                 POA, CLUSTALW, DIALIGN-TX, and MUSCLE. CLUSTALW and MUSCLE
                 were the fastest programs, being CLUSTALW the least RAM memory
                 demanding program. CONCLUSIONS: Based on the results presented
                 herein, all four programs Probcons, T-Coffee, Probalign and
                 MAFFT are well recommended for better accuracy of multiple
                 sequence alignments. T-Coffee and recent versions of MAFFT can
                 deliver faster and reliable alignments, which are specially
                 suited for larger datasets than those encountered in the
                 BAliBASE suite, if multi-core computers are available. In
                 fact, parallelization of alignments for multi-core computers
                 should probably be addressed by more programs in a near
                 future, which will certainly improve performance
                 significantly.",
  journal     = "Algorithms Mol. Biol.",
  volume      =  9,
  number      =  1,
  pages       = "4",
  month       =  "6~" # mar,
  year        =  2014
}

@ARTICLE{Freyhult2007-et,
  title       = "Exploring genomic dark matter: a critical assessment of the
                 performance of homology search methods on noncoding {RNA}",
  author      = "Freyhult, Eva K and Bollback, Jonathan P and Gardner, Paul P",
  affiliation = "The Linnaeus Centre for Bioinformatics, Uppsala University,
                 75124 Uppsala, Sweden.",
  abstract    = "Homology search is one of the most ubiquitous bioinformatic
                 tasks, yet it is unknown how effective the currently available
                 tools are for identifying noncoding RNAs (ncRNAs). In this
                 work, we use reliable ncRNA data sets to assess the
                 effectiveness of methods such as BLAST, FASTA, HMMer, and
                 Infernal. Surprisingly, the most popular homology search
                 methods are often the least accurate. As a result, many
                 studies have used inappropriate tools for their analyses. On
                 the basis of our results, we suggest homology search
                 strategies using the currently available tools and some
                 directions for future development.",
  journal     = "Genome Res.",
  publisher   = "Cold Spring Harbor Lab",
  volume      =  17,
  number      =  1,
  pages       = "117--125",
  month       =  "6~" # jan,
  year        =  2007,
  keywords    = "My publications;citations.bib;paulall.bib"
}

@ARTICLE{Yang2009-oc,
  title       = "Comparison of public peak detection algorithms for {MALDI}
                 mass spectrometry data analysis",
  author      = "Yang, Chao and He, Zengyou and Yu, Weichuan",
  affiliation = "Department of Electronic and Computer Engineering, The Hong
                 Kong University of Science and Technology, Clear Water Bay,
                 Hong Kong, PR China. yorkey@ust.hk",
  abstract    = "BACKGROUND: In mass spectrometry (MS) based proteomic data
                 analysis, peak detection is an essential step for subsequent
                 analysis. Recently, there has been significant progress in the
                 development of various peak detection algorithms. However,
                 neither a comprehensive survey nor an experimental comparison
                 of these algorithms is yet available. The main objective of
                 this paper is to provide such a survey and to compare the
                 performance of single spectrum based peak detection methods.
                 RESULTS: In general, we can decompose a peak detection
                 procedure into three consequent parts: smoothing, baseline
                 correction and peak finding. We first categorize existing peak
                 detection algorithms according to the techniques used in
                 different phases. Such a categorization reveals the
                 differences and similarities among existing peak detection
                 algorithms. Then, we choose five typical peak detection
                 algorithms to conduct a comprehensive experimental study using
                 both simulation data and real MALDI MS data. CONCLUSION: The
                 results of comparison show that the continuous wavelet-based
                 algorithm provides the best average performance.",
  journal     = "BMC Bioinformatics",
  volume      =  10,
  pages       = "4",
  month       =  "6~" # jan,
  year        =  2009
}

@ARTICLE{Altschul1990-ht,
  title    = "Basic local alignment search tool",
  author   = "Altschul, S F and Gish, W and Miller, W and Myers, E W and
              Lipman, D J",
  journal  = "J. Mol. Biol.",
  volume   =  215,
  number   =  3,
  pages    = "403--410",
  month    =  oct,
  year     =  1990,
  keywords = "Mendeley Import (Jan 23);paulall.bib"
}

@ARTICLE{Boutros2014-zm,
  title    = "Toward better benchmarking: challenge-based methods assessment in
              cancer genomics",
  author   = "Boutros, Paul C and Margolin, Adam A and Stuart, Joshua M and
              Califano, Andrea and Stolovitzky, Gustavo",
  abstract = "Rapid technological development has created an urgent need for
              improved evaluation of algorithms for the analysis of cancer
              genomics data. We outline how challenge-based assessment may help
              fill this gap by leveraging crowd-sourcing to distribute effort
              and reduce bias.",
  journal  = "Genome Biol.",
  volume   =  15,
  number   =  9,
  pages    = "462",
  month    =  "17~" # sep,
  year     =  2014,
  language = "en"
}

@ARTICLE{Boulesteix2013-vb,
  title       = "A plea for neutral comparison studies in computational
                 sciences",
  author      = "Boulesteix, A and Lauer, S and Eugster, M J
                 A",
  affiliation = "Department of Medical Informatics, Biometry and Epidemiology,
                 Ludwig-Maximilians-University of Munich, Munich, Germany.
                 boulesteix@ibe.med.uni-muenchen.de",
  abstract    = "In computational science literature including, e.g.,
                 bioinformatics, computational statistics or machine learning,
                 most published articles are devoted to the development of
                 ``new methods'', while comparison studies are generally
                 appreciated by readers but surprisingly given poor
                 consideration by many journals. This paper stresses the
                 importance of neutral comparison studies for the objective
                 evaluation of existing methods and the establishment of
                 standards by drawing parallels with clinical research. The
                 goal of the paper is twofold. Firstly, we present a survey of
                 recent computational papers on supervised classification
                 published in seven high-ranking computational science
                 journals. The aim is to provide an up-to-date picture of
                 current scientific practice with respect to the comparison of
                 methods in both articles presenting new methods and articles
                 focusing on the comparison study itself. Secondly, based on
                 the results of our survey we critically discuss the necessity,
                 impact and limitations of neutral comparison studies in
                 computational sciences. We define three reasonable criteria a
                 comparison study has to fulfill in order to be considered as
                 neutral, and explicate general considerations on the
                 individual components of a ``tidy neutral comparison study''.
                 R codes for completely replicating our statistical analyses
                 and figures are available from the companion website
                 http://www.ibe.med.uni-muenchen.de/organisation/mitarbeiter/020\_professuren/boulesteix/plea2013.",
  journal     = "PLoS One",
  volume      =  8,
  number      =  4,
  pages       = "e61562",
  month       =  "24~" # apr,
  year        =  2013
}

@ARTICLE{Bayzid2013-hc,
  title       = "Naive binning improves phylogenomic analyses",
  author      = "Bayzid, Md Shamsuzzoha and Warnow, Tandy",
  affiliation = "Department of Computer Science, The University of Texas at
                 Austin, Austin, TX 78712, USA.",
  abstract    = "MOTIVATION: Species tree estimation in the presence of
                 incomplete lineage sorting (ILS) is a major challenge for
                 phylogenomic analysis. Although many methods have been
                 developed for this problem, little is understood about the
                 relative performance of these methods when estimated gene
                 trees are poorly estimated, owing to inadequate phylogenetic
                 signal. RESULTS: We explored the performance of some methods
                 for estimating species trees from multiple markers on
                 simulated datasets in which gene trees differed from the
                 species tree owing to ILS. We included *BEAST, concatenated
                 analysis and several 'summary methods': BUCKy, MP-EST,
                 minimize deep coalescence, matrix representation with
                 parsimony and the greedy consensus. We found that *BEAST and
                 concatenation gave excellent results, often with substantially
                 improved accuracy over the other methods. We observed that
                 *BEAST's accuracy is largely due to its ability to co-estimate
                 the gene trees and species tree. However, *BEAST is
                 computationally intensive, making it challenging to run on
                 datasets with 100 or more genes or with more than 20 taxa. We
                 propose a new approach to species tree estimation in which the
                 genes are partitioned into sets, and the species tree is
                 estimated from the resultant 'supergenes'. We show that this
                 technique improves the scalability of *BEAST without affecting
                 its accuracy and improves the accuracy of the summary methods.
                 Thus, naive binning can improve phylogenomic analysis in the
                 presence of ILS. CONTACT: tandy@cs.utexas.edu SUPPLEMENTARY
                 INFORMATION: Supplementary data are available at
                 Bioinformatics online.",
  journal     = "Bioinformatics",
  volume      =  29,
  number      =  18,
  pages       = "2277--2284",
  month       =  "15~" # sep,
  year        =  2013
}

@ARTICLE{Kolodny2005-ry,
  title       = "Comprehensive evaluation of protein structure alignment
                 methods: scoring by geometric measures",
  author      = "Kolodny, Rachel and Koehl, Patrice and Levitt, Michael",
  affiliation = "Department of Structural Biology, Fairchild Building, Stanford
                 University, Stanford CA 94305, USA. trachel@cs.stanford.edu",
  abstract    = "We report the largest and most comprehensive comparison of
                 protein structural alignment methods. Specifically, we
                 evaluate six publicly available structure alignment programs:
                 SSAP, STRUCTAL, DALI, LSQMAN, CE and SSM by aligning all
                 8,581,970 protein structure pairs in a test set of 2930
                 protein domains specially selected from CATH v.2.4 to ensure
                 sequence diversity. We consider an alignment good if it
                 matches many residues, and the two substructures are
                 geometrically similar. Even with this definition, evaluating
                 structural alignment methods is not straightforward. At first,
                 we compared the rates of true and false positives using
                 receiver operating characteristic (ROC) curves with the CATH
                 classification taken as a gold standard. This proved
                 unsatisfactory in that the quality of the alignments is not
                 taken into account: sometimes a method that finds less good
                 alignments scores better than a method that finds better
                 alignments. We correct this intrinsic limitation by using four
                 different geometric match measures (SI, MI, SAS, and GSAS) to
                 evaluate the quality of each structural alignment. With this
                 improved analysis we show that there is a wide variation in
                 the performance of different methods; the main reason for this
                 is that it can be difficult to find a good structural
                 alignment between two proteins even when such an alignment
                 exists. We find that STRUCTAL and SSM perform best, followed
                 by LSQMAN and CE. Our focus on the intrinsic quality of each
                 alignment allows us to propose a new method, called
                 ``Best-of-All'' that combines the best results of all methods.
                 Many commonly used methods miss 10-50\% of the good
                 Best-of-All alignments. By putting existing structural
                 alignments into proper perspective, our study allows better
                 comparison of protein structures. By highlighting limitations
                 of existing methods, it will spur the further development of
                 better structural alignment methods. This will have
                 significant biological implications now that structural
                 comparison has come to play a central role in the analysis of
                 experimental work on protein structure, protein function and
                 protein evolution.",
  journal     = "J. Mol. Biol.",
  volume      =  346,
  number      =  4,
  pages       = "1173--1188",
  month       =  "4~" # mar,
  year        =  2005
}

@ARTICLE{Sheldrick2008-xy,
  title       = "A short history of {SHELX}",
  author      = "Sheldrick, G M",
  affiliation = "Department of Structural Chemistry, University of Goettingen,
                 Tammannstrasse 4, D-37077 Goettingen, Germany.
                 gsheldr@shelx.uni-ac.gwdg.de",
  abstract    = "An account is given of the development of the SHELX system of
                 computer programs from SHELX-76 to the present day. In
                 addition to identifying useful innovations that have come into
                 general use through their implementation in SHELX, a critical
                 analysis is presented of the less-successful features, missed
                 opportunities and desirable improvements for future releases
                 of the software. An attempt is made to understand how a
                 program originally designed for photographic intensity data,
                 punched cards and computers over 10000 times slower than an
                 average modern personal computer has managed to survive for so
                 long. SHELXL is the most widely used program for
                 small-molecule refinement and SHELXS and SHELXD are often
                 employed for structure solution despite the availability of
                 objectively superior programs. SHELXL also finds a niche for
                 the refinement of macromolecules against high-resolution or
                 twinned data; SHELXPRO acts as an interface for macromolecular
                 applications. SHELXC, SHELXD and SHELXE are proving useful for
                 the experimental phasing of macromolecules, especially because
                 they are fast and robust and so are often employed in
                 pipelines for high-throughput phasing. This paper could serve
                 as a general literature citation when one or more of the
                 open-source SHELX programs (and the Bruker AXS version
                 SHELXTL) are employed in the course of a crystal-structure
                 determination.",
  journal     = "Acta Crystallogr. A",
  volume      =  64,
  number      = "Pt 1",
  pages       = "112--122",
  month       =  jan,
  year        =  2008
}

@ARTICLE{Jelizarow2010-zf,
  title       = "Over-optimism in bioinformatics: an illustration",
  author      = "Jelizarow, M and Guillemot, V and Tenenhaus, A
                 and Strimmer, K and Boulesteix, A",
  affiliation = "Department of Medical Informatics, Biometry and Epidemiology,
                 University of Munich, Munich, Germany.",
  abstract    = "MOTIVATION: In statistical bioinformatics research, different
                 optimization mechanisms potentially lead to 'over-optimism' in
                 published papers. So far, however, a systematic critical study
                 concerning the various sources underlying this over-optimism
                 is lacking. RESULTS: We present an empirical study on
                 over-optimism using high-dimensional classification as
                 example. Specifically, we consider a 'promising' new
                 classification algorithm, namely linear discriminant analysis
                 incorporating prior knowledge on gene functional groups
                 through an appropriate shrinkage of the within-group
                 covariance matrix. While this approach yields poor results in
                 terms of error rate, we quantitatively demonstrate that it can
                 artificially seem superior to existing approaches if we 'fish
                 for significance'. The investigated sources of over-optimism
                 include the optimization of datasets, of settings, of
                 competing methods and, most importantly, of the method's
                 characteristics. We conclude that, if the improvement of a
                 quantitative criterion such as the error rate is the main
                 contribution of a paper, the superiority of new algorithms
                 should always be demonstrated on independent validation data.
                 AVAILABILITY: The R codes and relevant data can be downloaded
                 from
                 http://www.ibe.med.uni-muenchen.de/organisation/mitarbeiter/020\_professuren/boulesteix/overoptimism/,
                 such that the study is completely reproducible.",
  journal     = "Bioinformatics",
  volume      =  26,
  number      =  16,
  pages       = "1990--1998",
  month       =  "15~" # aug,
  year        =  2010
}

@ARTICLE{Thompson1994-eu,
  title    = "{CLUSTAL} W: improving the sensitivity of progressive multiple
              sequence alignment through sequence weighting, position-specific
              gap penalties and weight matrix choice",
  author   = "Thompson, J D and Higgins, D G and Gibson, T J",
  journal  = "Nucleic Acids Res.",
  volume   =  22,
  number   =  22,
  pages    = "4673--4680",
  month    =  nov,
  year     =  1994,
  keywords = "Mendeley Import (Jan 23);paulall.bib"
}

@ARTICLE{Lu2013-fs,
  title       = "Comparative study of de novo assembly and genome-guided
                 assembly strategies for transcriptome reconstruction based on
                 {RNA-Seq}",
  author      = "Lu, Bingxin and Zeng, Zhenbing and Shi, Tieliu",
  affiliation = "Center for Bioinformatics and Computational Biology, Shanghai
                 Key Laboratory of Regulatory Biology, Institute of Biomedical
                 Sciences and School of Life Sciences, East China Normal
                 University, Shanghai 200241, China.",
  abstract    = "Transcriptome reconstruction is an important application of
                 RNA-Seq, providing critical information for further analysis
                 of transcriptome. Although RNA-Seq offers the potential to
                 identify the whole picture of transcriptome, it still presents
                 special challenges. To handle these difficulties and
                 reconstruct transcriptome as completely as possible, current
                 computational approaches mainly employ two strategies: de novo
                 assembly and genome-guided assembly. In order to find the
                 similarities and differences between them, we firstly chose
                 five representative assemblers belonging to the two classes
                 respectively, and then investigated and compared their
                 algorithm features in theory and real performances in
                 practice. We found that all the methods can be reduced to
                 graph reduction problems, yet they have different conceptual
                 and practical implementations, thus each assembly method has
                 its specific advantages and disadvantages, performing worse
                 than others in certain aspects while outperforming others in
                 anther aspects at the same time. Finally we merged assemblies
                 of the five assemblers and obtained a much better assembly.
                 Additionally we evaluated an assembler using genome-guided de
                 novo assembly approach, and achieved good performance. Based
                 on these results, we suggest that to obtain a comprehensive
                 set of recovered transcripts, it is better to use a
                 combination of de novo assembly and genome-guided assembly.",
  journal     = "Sci. China Life Sci.",
  volume      =  56,
  number      =  2,
  pages       = "143--155",
  month       =  feb,
  year        =  2013
}

@ARTICLE{Wilson2006-ih,
  title     = "Where's the real bottleneck in scientific computing?",
  author    = "Wilson, G V",
  abstract  = "When I first started doing computational science in 1986, a new
               generation of fast, cheap chips had just ushered in the current
               era of low-cost supercomputers, in which multiple processors
               work in parallel on a single problem. Suddenly, it seemed as
               though everyone ...",
  journal   = "Am. Sci.",
  publisher = "americanscientist.org",
  year      =  2006
}

@ARTICLE{Felsenstein1995-ic,
  title   = "Phylogeny programs",
  author  = "Felsenstein, J",
  journal = "Internet address: http://evolution. gs. washington.
             edu/phylip/software. html",
  year    =  1995
}

@ARTICLE{Mann1947-re,
  title     = "On a Test of Whether one of Two Random Variables is
               Stochastically Larger than the Other",
  author    = "Mann, H B and Whitney, D R",
  abstract  = "Let x and y be two random variables with continuous cumulative
               distribution functions f and g. A statistic U depending on the
               relative ranks of the x's and y's is proposed for testing the
               hypothesis f = g. Wilcoxon proposed an equivalent test in the
               Biometrics Bulletin, December, 1945, but gave only a few points
               of the distribution of his statistic. Under the hypothesis f = g
               the probability of obtaining a given U in a sample of n x's and
               m y's is the solution of a certain recurrence relation involving
               n and m. Using this recurrence relation tables have been
               computed giving the probability of U for samples up to n = m =
               8. At this point the distribution is almost normal. From the
               recurrence relation explicit expressions for the mean, variance,
               and fourth moment are obtained. The 2rth moment is shown to have
               a certain form which enabled us to prove that the limit
               distribution is normal if m, n go to infinity in any arbitrary
               manner. The test is shown to be consistent with respect to the
               class of alternatives $f(x) > g(x)$ for every x.",
  journal   = "Ann. Math. Stat.",
  publisher = "Institute of Mathematical Statistics",
  volume    =  18,
  number    =  1,
  pages     = "50--60",
  year      =  1947
}

@ARTICLE{Knowles2008-jj,
  title       = "Why does a method that fails continue to be used?",
  author      = "Knowles, L Lacey",
  affiliation = "Department of Ecology and Evolutionary Biology, Museum of
                 Zoology, University of Michigan, Ann Arbor, Michigan 48109,
                 USA. knowlesl@umich.edu",
  abstract    = "As a critical framework for addressing a diversity of
                 evolutionary and ecological questions, any method that
                 provides accurate and detailed phylogeographic inference would
                 be embraced. What is difficult to understand is the continued
                 use of a method that not only fails, but also has never been
                 shown to work--nested clade analysis is applied widely even
                 though the conditions under which the method will provide
                 reliable results have not yet been demonstrated. This
                 contradiction between performance and popularity is even more
                 perplexing given the recent methodological and computational
                 advances for making historical inferences, which include
                 estimating population genetic parameters and testing different
                 biogeographic scenarios. Here I briefly review the history of
                 criticisms and rebuttals that focus specifically on the high
                 rate of incorrect phylogeographic inference of nested-clade
                 analysis, with the goal of understanding what drives its
                 unfettered popularity. In this case, the appeal of what
                 nested-clade analysis claims to do--not what the method
                 actually achieves--appears to explain its paradoxical status
                 as a favorite method that fails. What a method promises, as
                 opposed to how it performs, must be considered separately when
                 evaluating whether the method represents a valuable tool for
                 historical inference.",
  journal     = "Evolution",
  volume      =  62,
  number      =  11,
  pages       = "2713--2717",
  month       =  nov,
  year        =  2008
}

@INCOLLECTION{Kandemir2002-sv,
  title     = "Compiler Optimizations for Low Power Systems",
  booktitle = "Power Aware Computing",
  author    = "Kandemir, Mahmut and Vijaykrishnan, N and Irwin, Mary Jane",
  editor    = "Graybill, Robert and Melhem, Rami",
  publisher = "Springer US",
  pages     = "191--210",
  series    = "Series in Computer Science",
  year      =  2002,
  language  = "en"
}

@ARTICLE{Norel2011-cq,
  title     = "The self-assessment trap: can we all be better than average?",
  author    = "Norel, R and Rice, J J and Stolovitzky, G",
  journal   = "Mol. Syst. Biol.",
  publisher = "EMBO Press",
  volume    =  7,
  number    =  1,
  pages     = "537",
  month     =  "1~" # jan,
  year      =  2011
}

@ARTICLE{Posada1998-qq,
  title       = "{MODELTEST}: testing the model of {DNA} substitution",
  author      = "Posada, D and Crandall, K A",
  affiliation = "Department of Zoology, Brigham Young University, 574 WIDB,
                 Provo, UT 84602-5255, USA. dp47@email.byu.edu",
  abstract    = "SUMMARY: The program MODELTEST uses log likelihood scores to
                 establish the model of DNA evolution that best fits the data.
                 AVAILABILITY: The MODELTEST package, including the source code
                 and some documentation is available at http://bioag.byu.
                 edu/zoology/crandall\_lab/modeltest.html.",
  journal     = "Bioinformatics",
  volume      =  14,
  number      =  9,
  pages       = "817--818",
  year        =  1998
}

@INCOLLECTION{Otwinowski1997-xj,
  title     = "[20] Processing of {X-ray} diffraction data collected in
               oscillation mode",
  booktitle = "Methods in Enzymology",
  author    = "Otwinowski, Z and Minor, W",
  abstract  = "Publisher Summary X-ray data can be collected with zero-, one-,
               and two-dimensional detectors, zero-dimensional (single counter)
               being the simplest and two-dimensional the most efficient in
               terms of measuring diffracted X-rays in all directions. To
               analyze the single-crystal diffraction data collected with these
               detectors, several computer programs have been developed.
               Two-dimensional detectors and related software are now
               predominantly used to measure and integrate diffraction from
               single crystals of biological macromolecules. Macromolecular
               crystallography is an iterative process. To monitor the
               progress, the HKL package provides two tools: (1) statistics,
               both weighted ($\chi$2) and unweighted (R-merge), where the
               Bayesian reasoning and multicomponent error model helps obtain
               proper error estimates and (2) visualization of the process,
               which helps an operator to confirm that the process of data
               reduction, including the resulting statistics, is correct and
               allows the evaluation of the problems for which there are no
               good statistical criteria. Visualization also provides
               confidence that the point of diminishing returns in data
               collection and reduction has been reached. At that point, the
               effort should be directed to solving the structure. The methods
               presented in the chapter have been applied to solve a large
               variety of problems, from inorganic molecules with 5 \AA unit
               cell to rotavirus of 700 \AA diameters crystallized in 700
               \texttimes{} 1000 \texttimes{} 1400 \AA cell.",
  publisher = "Academic Press",
  address   = {}
  volume    = "276",
  pages     = "307--326",
  year      =  1997
}

@ARTICLE{Marx2013-zi,
  title    = "Biology: The big challenges of big data",
  author   = "Marx, V",
  journal  = "Nature",
  volume   =  498,
  number   =  7453,
  pages    = "255--260",
  month    =  "13~" # jun,
  year     =  2013,
  language = "en"
}

@ARTICLE{Woolley2010-ld,
  title       = "Evidence for a collective intelligence factor in the
                 performance of human groups",
  author      = "Woolley, A W and Chabris, C F and
                 Pentland, A and Hashmi, N and Malone, T W",
  affiliation = "Carnegie Mellon University, Tepper School of Business,
                 Pittsburgh, PA 15213, USA. awoolley@cmu.edu",
  abstract    = "Psychologists have repeatedly shown that a single statistical
                 factor--often called ``general intelligence''--emerges from
                 the correlations among people's performance on a wide variety
                 of cognitive tasks. But no one has systematically examined
                 whether a similar kind of ``collective intelligence'' exists
                 for groups of people. In two studies with 699 people, working
                 in groups of two to five, we find converging evidence of a
                 general collective intelligence factor that explains a group's
                 performance on a wide variety of tasks. This ``c factor'' is
                 not strongly correlated with the average or maximum individual
                 intelligence of group members but is correlated with the
                 average social sensitivity of group members, the equality in
                 distribution of conversational turn-taking, and the proportion
                 of females in the group.",
  journal     = "Science",
  volume      =  330,
  number      =  6004,
  pages       = "686--688",
  month       =  "29~" # oct,
  year        =  2010
}

@ARTICLE{Easterbrook1991-wp,
  title       = "Publication bias in clinical research",
  author      = "Easterbrook, P J and Berlin, J A and Gopalan, R and Matthews,
                 D R",
  affiliation = "Division of Internal Medicine, Johns Hopkins University School
                 of Medicine, Baltimore, MD 21205.",
  abstract    = "In a retrospective survey, 487 research projects approved by
                 the Central Oxford Research Ethics Committee between 1984 and
                 1987, were studied for evidence of publication bias. As of
                 May, 1990, 285 of the studies had been analysed by the
                 investigators, and 52\% of these had been published. Studies
                 with statistically significant results were more likely to be
                 published than those finding no difference between the study
                 groups (adjusted odds ratio [OR] 2.32; 95\% confidence
                 interval [Cl] 1.25-4.28). Studies with significant results
                 were also more likely to lead to a greater number of
                 publications and presentations and to be published in journals
                 with a high citation impact factor. An increased likelihood of
                 publication was also associated with a high rating by the
                 investigator of the importance of the study results, and with
                 increasing sample size. The tendency towards publication bias
                 was greater with observational and laboratory-based
                 experimental studies (OR = 3.79; 95\% Cl = 1.47-9.76) than
                 with randomised clinical trials (OR = 0.84; 95\% Cl =
                 0.34-2.09). We have confirmed the presence of publication bias
                 in a cohort of clinical research studies. These findings
                 suggest that conclusions based only on a review of published
                 data should be interpreted cautiously, especially for
                 observational studies. Improved strategies are needed to
                 identify the results of unpublished as well as published
                 studies.",
  journal     = "Lancet",
  volume      =  337,
  number      =  8746,
  pages       = "867--872",
  month       =  "13~" # apr,
  year        =  1991,
  keywords    = "Biomedical and Behavioral Research; Central Oxford Research
                 Ethics Committee; Empirical Approach"
}

@ARTICLE{Lowe1997-hq,
  title    = "{tRNAscan-SE}: a program for improved detection of transfer {RNA}
              genes in genomic sequence",
  author   = "Lowe, T M and Eddy, S R",
  journal  = "Nucleic Acids Res.",
  volume   =  25,
  number   =  5,
  pages    = "955--964",
  month    =  mar,
  year     =  1997,
  keywords = "paulall.bib"
}

@ARTICLE{Gadbury2004-ga,
  title     = "Power and sample size estimation in high dimensional biology",
  author    = "Gadbury, G L and Page, G P and Edwards, J and {others}",
  abstract  = "Abstract Genomic scientists often test thousands of hypotheses
               in a single experiment. One example is a microarray experiment
               that seeks to determine differential gene expression among
               experimental groups. Planning such experiments involves a
               determination of ...",
  journal   = "Stat. Methods Med. Res.",
  publisher = "smm.sagepub.com",
  year      =  2004
}

@ARTICLE{Minor2000-dv,
  title       = "Strategies for macromolecular synchrotron crystallography",
  author      = "Minor, W and Tomchick, D and Otwinowski, Z",
  affiliation = "Department of Molecular Physiology and Biological Physics,
                 University of Virginia, Charlottesville, VA 22903, USA.
                 wladek@iwonka.med.virginia.edu.",
  journal     = "Structure",
  volume      =  8,
  number      =  5,
  pages       = "R105--10",
  month       =  "15~" # may,
  year        =  2000
}

@ARTICLE{Jones1991-ik,
  title       = "Improved methods for building protein models in electron
                 density maps and the location of errors in these models",
  author      = "Jones, T A and Zou, J Y and Cowan, S W and Kjeldgaard, M",
  affiliation = "Department of Molecular Biology, BMC, Uppsala, Sweden.",
  abstract    = "Map interpretation remains a critical step in solving the
                 structure of a macromolecule. Errors introduced at this early
                 stage may persist throughout crystallographic refinement and
                 result in an incorrect structure. The normally quoted
                 crystallographic residual is often a poor description for the
                 quality of the model. Strategies and tools are described that
                 help to alleviate this problem. These simplify the
                 model-building process, quantify the goodness of fit of the
                 model on a per-residue basis and locate possible errors in
                 peptide and side-chain conformations.",
  journal     = "Acta Crystallogr. A",
  volume      = "47 ( Pt 2)",
  pages       = "110--119",
  month       =  "1~" # mar,
  year        =  1991
}

@ARTICLE{Wallner2005-qi,
  title       = "All are not equal: a benchmark of different homology modeling
                 programs",
  author      = "Wallner, Bj{\"{o}}rn and Elofsson, Arne",
  affiliation = "Stockholm Bioinformatics Center, Albanova University Center,
                 Stockholm University, Stockholm, Sweden. bjorn@sbc.su.se",
  abstract    = "Modeling a protein structure based on a homologous structure
                 is a standard method in structural biology today. In this
                 process an alignment of a target protein sequence onto the
                 structure of a template(s) is used as input to a program that
                 constructs a 3D model. It has been shown that the most
                 important factor in this process is the correctness of the
                 alignment and the choice of the best template structure(s),
                 while it is generally believed that there are no major
                 differences between the best modeling programs. Therefore, a
                 large number of studies to benchmark the alignment qualities
                 and the selection process have been performed. However, to our
                 knowledge no large-scale benchmark has been performed to
                 evaluate the programs used to transform the alignment to a 3D
                 model. In this study, a benchmark of six different homology
                 modeling programs- Modeller, SegMod/ENCAD, SWISS-MODEL,
                 3D-JIGSAW, nest, and Builder-is presented. The performance of
                 these programs is evaluated using physiochemical correctness
                 and structural similarity to the correct structure. From our
                 analysis it can be concluded that no single modeling program
                 outperform the others in all tests. However, it is quite clear
                 that three modeling programs, Modeller, nest, and SegMod/
                 ENCAD, perform better than the others. Interestingly, the
                 fastest and oldest modeling program, SegMod/ ENCAD, performs
                 very well, although it was written more than 10 years ago and
                 has not undergone any development since. It can also be
                 observed that none of the homology modeling programs builds
                 side chains as well as a specialized program (SCWRL), and
                 therefore there should be room for improvement.",
  journal     = "Protein Sci.",
  volume      =  14,
  number      =  5,
  pages       = "1315--1327",
  month       =  may,
  year        =  2005
}

@ARTICLE{Merton1968-cb,
  title     = "The {Matthew Effect in Science}",
  author    = "Merton, R K",
  journal   = "Science",
  publisher = "Washington",
  volume    =  159,
  number    =  3810,
  pages     = "56--63",
  year      =  1968
}

@ARTICLE{Joppa2013-vj,
  title     = "Troubling Trends in Scientific Software Use",
  author    = "Joppa, L N and McInerny, G and Harper, R and
               Salido, L and Takeda, K and O'Hara, K and Gavaghan,
               D and Emmott, S",
  abstract  = "Software pervades every domain of science ( 1 -- 3 ), perhaps
               nowhere more decisively than in modeling. In key scientific
               areas of great societal importance, models and the software that
               implement them define both how science is done and what science
               is done ( 4 , 5 ). Across all science, this dependence has led
               to concerns around the need for open access to software ( 6 , 7
               ), centered on the reproducibility of research ( 1 , 8 -- 10 ).
               From fields such as high-performance computing, we learn key
               insights and best practices for how to develop, standardize, and
               implement software ( 11 ). Open and systematic approaches to the
               development of software are essential for all sciences. But for
               many scientists this is not sufficient. We describe problems
               with the adoption and use of scientific software.",
  journal   = "Science",
  publisher = "American Association for the Advancement of Science",
  volume    =  340,
  number    =  6134,
  pages     = "814--815",
  month     =  "17~" # may,
  year      =  2013,
  language  = "en"
}

@ARTICLE{Fourment2008-vl,
  title       = "A comparison of common programming languages used in
                 bioinformatics",
  author      = "Fourment, M and Gillings, M R",
  affiliation = "Department of Biological Sciences, Macquarie University,
                 Sydney, NSW 2109, Australia. m.fourment@gmail.com",
  abstract    = "BACKGROUND: The performance of different programming languages
                 has previously been benchmarked using abstract mathematical
                 algorithms, but not using standard bioinformatics algorithms.
                 We compared the memory usage and speed of execution for three
                 standard bioinformatics methods, implemented in programs using
                 one of six different programming languages. Programs for the
                 Sellers algorithm, the Neighbor-Joining tree construction
                 algorithm and an algorithm for parsing BLAST file outputs were
                 implemented in C, C++, C\#, Java, Perl and Python. RESULTS:
                 Implementations in C and C++ were fastest and used the least
                 memory. Programs in these languages generally contained more
                 lines of code. Java and C\# appeared to be a compromise
                 between the flexibility of Perl and Python and the fast
                 performance of C and C++. The relative performance of the
                 tested languages did not change from Windows to Linux and no
                 clear evidence of a faster operating system was found. Source
                 code and additional information are available from
                 http://www.bioinformatics.org/benchmark/. CONCLUSION: This
                 benchmark provides a comparison of six commonly used
                 programming languages under two different operating systems.
                 The overall comparison shows that a developer should choose an
                 appropriate language carefully, taking into account the
                 performance expected and the library availability for each
                 language.",
  journal     = "BMC Bioinformatics",
  volume      =  9,
  pages       = "82",
  month       =  "5~" # feb,
  year        =  2008
}

@ARTICLE{Puton2014-hy,
  title   = "{CompaRNA}: a server for continuous benchmarking of automated
             methods for {RNA} secondary structure prediction",
  author  = "Puton, Tomasz and Kozlowski, Lukasz P and Rother, Kristian M and
             Bujnicki, Janusz M",
  journal = "Nucleic Acids Res.",
  volume  =  42,
  number  =  8,
  pages   = "5403--5406",
  month   =  apr,
  year    =  2014
}

@ARTICLE{Swenson2010-un,
  title     = "A simulation study comparing supertree and combined analysis
               methods using {SMIDGen}",
  author    = "Swenson, M S and Barban\c{c}on, F and Warnow, T and {others}",
  abstract  = "Abstract Background: Supertree methods comprise one approach to
               reconstructing large molecular phylogenies given multi-marker
               datasets: trees are estimated on each marker and then combined
               into a tree (the `` supertree '') on the entire set of taxa.
               Supertrees can be ...",
  journal   = "for Molecular Biology",
  publisher = "biomedcentral.com",
  year      =  2010
}

@BOOK{Carroll2010-fn,
  title     = "Alice in wonderland \& through the looking glass",
  author    = "Carroll, Lewis",
  publisher = "Bibliolis Books",
  year      =  2010
}

@ARTICLE{Bao2011-lv,
  title       = "Evaluation of next-generation sequencing software in mapping
                 and assembly",
  author      = "Bao, S and Jiang, R and Kwan, W and Wang,
                 B and Ma, X and Song, Y",
  affiliation = "Department of Biochemistry, Center for Reproduction,
                 Development and Growth, The University of Hong Kong, Hong
                 Kong, Hong Kong.",
  abstract    = "Next-generation high-throughput DNA sequencing technologies
                 have advanced progressively in sequence-based genomic research
                 and novel biological applications with the promise of
                 sequencing DNA at unprecedented speed. These new
                 non-Sanger-based technologies feature several advantages when
                 compared with traditional sequencing methods in terms of
                 higher sequencing speed, lower per run cost and higher
                 accuracy. However, reads from next-generation sequencing (NGS)
                 platforms, such as 454/Roche, ABI/SOLiD and Illumina/Solexa,
                 are usually short, thereby restricting the applications of NGS
                 platforms in genome assembly and annotation. We presented an
                 overview of the challenges that these novel technologies meet
                 and particularly illustrated various bioinformatics attempts
                 on mapping and assembly for problem solving. We then compared
                 the performance of several programs in these two fields, and
                 further provided advices on selecting suitable tools for
                 specific biological applications.",
  journal     = "J. Hum. Genet.",
  volume      =  56,
  number      =  6,
  pages       = "406--414",
  month       =  jun,
  year        =  2011
}

@ARTICLE{Tikk2010-qd,
  title       = "A comprehensive benchmark of kernel methods to extract
                 protein-protein interactions from literature",
  author      = "Tikk, Domonkos and Thomas, Philippe and Palaga, Peter and
                 Hakenberg, J{\"{o}}rg and Leser, Ulf",
  affiliation = "Knowledge Management in Bioinformatics, Computer Science
                 Department, Humboldt-Universit{\"{a}}t zu Berlin, Berlin,
                 Germany. tikk@informatik.hu-berlin.de",
  abstract    = "The most important way of conveying new findings in biomedical
                 research is scientific publication. Extraction of
                 protein-protein interactions (PPIs) reported in scientific
                 publications is one of the core topics of text mining in the
                 life sciences. Recently, a new class of such methods has been
                 proposed - convolution kernels that identify PPIs using deep
                 parses of sentences. However, comparing published results of
                 different PPI extraction methods is impossible due to the use
                 of different evaluation corpora, different evaluation metrics,
                 different tuning procedures, etc. In this paper, we study
                 whether the reported performance metrics are robust across
                 different corpora and learning settings and whether the use of
                 deep parsing actually leads to an increase in extraction
                 quality. Our ultimate goal is to identify the one method that
                 performs best in real-life scenarios, where information
                 extraction is performed on unseen text and not on specifically
                 prepared evaluation data. We performed a comprehensive
                 benchmarking of nine different methods for PPI extraction that
                 use convolution kernels on rich linguistic information.
                 Methods were evaluated on five different public corpora using
                 cross-validation, cross-learning, and cross-corpus evaluation.
                 Our study confirms that kernels using dependency trees
                 generally outperform kernels based on syntax trees. However,
                 our study also shows that only the best kernel methods can
                 compete with a simple rule-based approach when the evaluation
                 prevents information leakage between training and test
                 corpora. Our results further reveal that the F-score of many
                 approaches drops significantly if no corpus-specific parameter
                 optimization is applied and that methods reaching a good AUC
                 score often perform much worse in terms of F-score. We
                 conclude that for most kernels no sensible estimation of PPI
                 extraction performance on new text is possible, given the
                 current heterogeneity in evaluation data. Nevertheless, our
                 study shows that three kernels are clearly superior to the
                 other methods.",
  journal     = "PLoS Comput. Biol.",
  volume      =  6,
  pages       = "e1000837",
  month       =  "1~" # jul,
  year        =  2010
}

@ARTICLE{Harzing2008-wb,
  title     = "Comparing the Google Scholar h-index with the {ISI} journal
               impact factor",
  author    = "Harzing, A W and van der Wal, R",
  abstract  = "Abstract Publication in academic journals is a key criterion for
               appointment, tenure and promotion in universities. Many
               universities weigh publications according to the quality or
               impact of the journal. Traditionally, journal quality has been
               assessed through the ISI Journal Impact Factor (JIF).",
  journal   = "Research in Int. Management Products",
  publisher = "harzing.com",
  year      =  2008
}

@ARTICLE{Moran2003-ve,
  title     = "Arguments for Rejecting the Sequential Bonferroni in Ecological
               Studies",
  author    = "Moran, Matthew D",
  journal   = "Oikos",
  publisher = "[Nordic Society Oikos, Wiley]",
  volume    =  100,
  number    =  2,
  pages     = "403--405",
  year      =  2003
}

@ARTICLE{Altschul2013-bv,
  title       = "The anatomy of successful computational biology software",
  author      = "Altschul, S and Demchak, B and Durbin, R and
                 Gentleman, R and Krzywinski, M and Li, H and
                 Nekrutenko, A and Robinson, J and Rasband, W and
                 Taylor, J and Trapnell, C",
  affiliation = "National Center for Biotechnology Information, Bethesda,
                 Maryland.",
  journal     = "Nat. Biotechnol.",
  volume      =  31,
  number      =  10,
  pages       = "894--897",
  month       =  oct,
  year        =  2013
}

@ARTICLE{Dematte2010-ph,
  title       = "{GPU} computing for systems biology",
  author      = "Dematt\'{e}, L and Prandi, D",
  affiliation = "CoSBi Centre, Italy.",
  abstract    = "The development of detailed, coherent, models of complex
                 biological systems is recognized as a key requirement for
                 integrating the increasing amount of experimental data. In
                 addition, in-silico simulation of bio-chemical models provides
                 an easy way to test different experimental conditions, helping
                 in the discovery of the dynamics that regulate biological
                 systems. However, the computational power required by these
                 simulations often exceeds that available on common desktop
                 computers and thus expensive high performance computing
                 solutions are required. An emerging alternative is represented
                 by general-purpose scientific computing on graphics processing
                 units (GPGPU), which offers the power of a small computer
                 cluster at a cost of approximately \$400. Computing with a GPU
                 requires the development of specific algorithms, since the
                 programming paradigm substantially differs from traditional
                 CPU-based computing. In this paper, we review some recent
                 efforts in exploiting the processing power of GPUs for the
                 simulation of biological systems.",
  journal     = "Brief. Bioinform.",
  volume      =  11,
  number      =  3,
  pages       = "323--333",
  month       =  may,
  year        =  2010,
  language    = "en"
}

@ARTICLE{Gombiner2011-md,
  title   = "Carbon footprinting the internet",
  author  = "Gombiner, J",
  journal = "Consilience-The Journal of Sustainable Development",
  volume  =  5,
  number  =  1,
  year    =  2011
}

@ARTICLE{Huang2015-wu,
  title       = "Evaluation of variant detection software for pooled
                 next-generation sequence data",
  author      = "Huang, Howard W and {NISC Comparative Sequencing Program} and
                 Mullikin, James C and Hansen, Nancy F",
  affiliation = "National Human Genome Research Institute, National Institutes
                 of Health, Bethesda, MD, USA. hhuang58@jhu.edu. National Human
                 Genome Research Institute, National Institutes of Health,
                 Bethesda, MD, USA. mullikin@mail.nih.gov. National Human
                 Genome Research Institute, National Institutes of Health,
                 Bethesda, MD, USA. nhansen@mail.nih.gov.",
  abstract    = "BACKGROUND: Despite the tremendous drop in the cost of
                 nucleotide sequencing in recent years, many research projects
                 still utilize sequencing of pools containing multiple samples
                 for the detection of sequence variants as a cost saving
                 measure. Various software tools exist to analyze these pooled
                 sequence data, yet little has been reported on the relative
                 accuracy and ease of use of these different programs. RESULTS:
                 In this manuscript we evaluate five different variant
                 detection programs-The Genome Analysis Toolkit (GATK), CRISP,
                 LoFreq, VarScan, and SNVer-with regard to their ability to
                 detect variants in synthetically pooled Illumina sequencing
                 data, by creating simulated pooled binary alignment/map (BAM)
                 files using single-sample sequencing data from varying numbers
                 of previously characterized samples at varying depths of
                 coverage per sample. We report the overall runtimes and memory
                 usage of each program, as well as each program's sensitivity
                 and specificity to detect known true variants. CONCLUSIONS:
                 GATK, CRISP, and LoFreq all gave balanced accuracy of 80\% or
                 greater for datasets with varying per-sample depth of coverage
                 and numbers of samples per pool. VarScan and SNVer generally
                 had balanced accuracy lower than 80\%. CRISP and LoFreq
                 required up to four times less computational time and up to
                 ten times less physical memory than GATK did, and without
                 filtering, gave results with the highest sensitivity. VarScan
                 and SNVer had generally lower false positive rates, but also
                 significantly lower sensitivity than the other three programs.",
  journal     = "BMC Bioinformatics",
  volume      =  16,
  pages       = "235",
  month       =  "29~" # jul,
  year        =  2015
}

@BOOK{Chambers1992-yc,
  title     = "Statistical Models in {S}",
  author    = "Chambers, John M and Hastie, Trevor",
  abstract  = "This book contains a collection of ten articles by noted
               statistical researchers on implementing recent ideas in
               statistical computing using S. The software, S, can be purchased
               from AT\&T Bell Laboratories in North Carolina or Statistical
               Science Inc in Seattle, WA.",
  publisher = "Wadsworth \& Brooks/Cole Advanced Books \& Software",
  year      =  1992
}

@ARTICLE{Moonesinghe2007-kk,
  title     = "Most Published Research Findings Are {False---But} a Little
               Replication Goes a Long Way",
  author    = "Moonesinghe, Ramal and Khoury, Muin J and {A Cecile J}",
  abstract  = "While the authors agree with John Ioannidis that ``most research
               findings are false,'' here they show that replication of
               research findings enhances the positive predictive value of
               research findings being true.",
  journal   = "PLoS Med.",
  publisher = "Public Library of Science",
  volume    =  4,
  number    =  2,
  pages     = "e28",
  month     =  "27~" # feb,
  year      =  2007
}

@ARTICLE{Kaplan1958-ju,
  title     = "Nonparametric Estimation from Incomplete Observations",
  author    = "Kaplan, E L and Meier, Paul",
  journal   = "J. Am. Stat. Assoc.",
  publisher = "Taylor \& Francis, Ltd. on behalf of the American Statistical
               Association",
  volume    =  53,
  number    =  282,
  pages     = "457--481",
  month     =  "1~" # jun,
  year      =  1958
}

@ARTICLE{Hirsch2005-mt,
  title       = "An index to quantify an individual's scientific research
                 output",
  author      = "Hirsch, J E",
  affiliation = "Department of Physics, University of California at San Diego,
                 La Jolla, CA 92093-0319, USA. jhirsch@ucsd.edu",
  abstract    = "I propose the index h, defined as the number of papers with
                 citation number > or =h, as a useful index to characterize the
                 scientific output of a researcher.",
  journal     = "Proc. Natl. Acad. Sci. U. S. A.",
  volume      =  102,
  number      =  46,
  pages       = "16569--16572",
  month       =  "15~" # nov,
  year        =  2005
}

@ARTICLE{Garfield1955-wf,
  title    = "Citation indexes for science; a new dimension in documentation
              through association of ideas",
  author   = "Garfield, E",
  journal  = "Science",
  volume   =  122,
  number   =  3159,
  pages    = "108--111",
  month    =  "15~" # jul,
  year     =  1955,
  keywords = "INDEXING; SCIENCE"
}

@ARTICLE{Farrar2007-ky,
  title    = "Striped {Smith--Waterman} speeds database searches six times over
              other {SIMD} implementations",
  author   = "Farrar, M",
  abstract = "Motivation: The only algorithm guaranteed to find the optimal
              local alignment is the Smith--Waterman. It is also one of the
              slowest due to the number of computations required for the
              search. To speed up the algorithm, Single-Instruction
              Multiple-Data (SIMD) instructions have been used to parallelize
              the algorithm at the instruction level.Results: A faster
              implementation of the Smith--Waterman algorithm is presented.
              This algorithm achieved 2--8 times performance improvement over
              other SIMD based Smith--Waterman implementations. On a 2.0 GHz
              Xeon Core 2 Duo processor, speeds of >3.0 billion cell updates/s
              were
              achieved.Availability:http://farrar.michael.googlepages.com/Smith-watermanContact:farrar.michael@gmail.com",
  journal  = "Bioinformatics",
  volume   =  23,
  number   =  2,
  pages    = "156--161",
  month    =  "15~" # jan,
  year     =  2007
}

@MISC{Tolkien1997-ws,
  title     = "The Hobbit, or There and Back Again. 1937",
  author    = "Tolkien, John Ronald Reuel",
  publisher = "Boston: Houghton Mifflin",
  year      =  1997
}

@ARTICLE{Storey2003-cv,
  title       = "Statistical significance for genomewide studies",
  author      = "Storey, J D and Tibshirani, R",
  affiliation = "Department of Biostatistics, University of Washington,
                 Seattle, WA 98195, USA. jstorey@u.washington.edu",
  abstract    = "With the increase in genomewide experiments and the sequencing
                 of multiple genomes, the analysis of large data sets has
                 become commonplace in biology. It is often the case that
                 thousands of features in a genomewide data set are tested
                 against some null hypothesis, where a number of features are
                 expected to be significant. Here we propose an approach to
                 measuring statistical significance in these genomewide studies
                 based on the concept of the false discovery rate. This
                 approach offers a sensible balance between the number of true
                 and false positives that is automatically calibrated and
                 easily interpreted. In doing so, a measure of statistical
                 significance called the q value is associated with each tested
                 feature. The q value is similar to the well known p value,
                 except it is a measure of significance in terms of the false
                 discovery rate rather than the false positive rate. Our
                 approach avoids a flood of false positive results, while
                 offering a more liberal criterion than what has been used in
                 genome scans for linkage.",
  journal     = "Proc. Natl. Acad. Sci. U. S. A.",
  volume      =  100,
  number      =  16,
  pages       = "9440--9445",
  month       =  "5~" # aug,
  year        =  2003,
  language    = "en"
}

@ARTICLE{Workman1999-au,
  title    = "No evidence that {mRNAs} have lower folding free energies than
              random sequences with the same dinucleotide distribution",
  author   = "Workman, C and Krogh, A",
  journal  = "Nucleic Acids Res.",
  volume   =  27,
  number   =  24,
  pages    = "4816--4822",
  month    =  dec,
  year     =  1999,
  keywords = "paulall.bib"
}

@ARTICLE{Lindgreen2016-tt,
  title       = "An evaluation of the accuracy and speed of metagenome analysis
                 tools",
  author      = "Lindgreen, Stinus and Adair, Karen L and Gardner, Paul P",
  affiliation = "Biomolecular Interaction Centre, University of Canterbury,
                 Christchurch, New Zealand. School of Biological Sciences,
                 University of Canterbury, Christchurch, New Zealand. Section
                 for Computational and RNA Biology, Department of Biology,
                 University of Copenhagen, Copenhagen, Denmark. Biomolecular
                 Interaction Centre, University of Canterbury, Christchurch,
                 New Zealand. School of Biological Sciences, University of
                 Canterbury, Christchurch, New Zealand. Biomolecular
                 Interaction Centre, University of Canterbury, Christchurch,
                 New Zealand. School of Biological Sciences, University of
                 Canterbury, Christchurch, New Zealand.",
  abstract    = "Metagenome studies are becoming increasingly widespread,
                 yielding important insights into microbial communities
                 covering diverse environments from terrestrial and aquatic
                 ecosystems to human skin and gut. With the advent of
                 high-throughput sequencing platforms, the use of large scale
                 shotgun sequencing approaches is now commonplace. However, a
                 thorough independent benchmark comparing state-of-the-art
                 metagenome analysis tools is lacking. Here, we present a
                 benchmark where the most widely used tools are tested on
                 complex, realistic data sets. Our results clearly show that
                 the most widely used tools are not necessarily the most
                 accurate, that the most accurate tool is not necessarily the
                 most time consuming, and that there is a high degree of
                 variability between available tools. These findings are
                 important as the conclusions of any metagenomics study are
                 affected by errors in the predicted community composition and
                 functional capacity. Data sets and results are freely
                 available from
                 http://www.ucbioinformatics.org/metabenchmark.html.",
  journal     = "Sci. Rep.",
  volume      =  6,
  pages       = "19233",
  month       =  "18~" # jan,
  year        =  2016
}

@ARTICLE{Schaeffer1989-mu,
  title    = "The history heuristic and alpha-beta search enhancements in
              practice",
  author   = "Schaeffer, J",
  abstract = "Many enhancements to the alpha-beta algorithm have been proposed
              to help reduce the size of minimax trees. A recent enhancement,
              the history heuristic, which improves the order in which branches
              are considered at interior nodes is described. A comprehensive
              set of experiments is reported which tries all combinations of
              enhancements to determine which one yields the best performance.
              In contrast, previous work on assessing their performance has
              concentrated on the benefits of individual enhancements or a few
              combinations. The aim is to find the combination that provides
              the greatest reduction in tree size. Results indicate that the
              history heuristic combined with transposition tables
              significantly outperforms other alpha-beta enhancements in
              application-generated game trees. For trees up to depth 8, this
              combination accounts for 99\% of the possible reductions in tree
              size, with the other enhancements yielding insignificant gains",
  journal  = "IEEE Trans. Pattern Anal. Mach. Intell.",
  volume   =  11,
  number   =  11,
  pages    = "1203--1212",
  month    =  nov,
  year     =  1989,
  keywords = "minimax techniques;search problems;trees (mathematics);alpha-beta
              search enhancements;game trees;history heuristic;interior
              nodes;minimax trees;transposition tables;Councils;Decision
              trees;History;Iterative algorithms;Minimax techniques;Testing"
}

@ARTICLE{McEntyre2001-fl,
  title       = "{PubMed}: bridging the information gap",
  author      = "McEntyre, J and Lipman, D",
  affiliation = "National Center for Biotechnology Information, National
                 Library of Medicine, National Institute of Health, Bethesda,
                 Md., USA. mcentyre@ncbi.nlm.nih.gov",
  journal     = "CMAJ",
  volume      =  164,
  number      =  9,
  pages       = "1317--1319",
  month       =  "1~" # may,
  year        =  2001
}

@BOOK{Papadimitriou1982-lm,
  title     = "Combinatorial Optimization: Algorithms and Complexity",
  author    = "Papadimitriou, C H and Steiglitz, K",
  publisher = "Dover Publications",
  series    = "Dover Books on Computer Science",
  year      =  1982
}

@ARTICLE{Li2012-wr,
  title       = "Performance comparison and evaluation of software tools for
                 {microRNA} deep-sequencing data analysis",
  author      = "Li, Yue and Zhang, Zhuo and Liu, Feng and Vongsangnak, Wanwipa
                 and Jing, Qing and Shen, Bairong",
  affiliation = "Center for Systems Biology, Soochow University, Suzhou 215006,
                 China.",
  abstract    = "With the development of next-generation sequencing (NGS)
                 techniques, many software tools have emerged for the discovery
                 of novel microRNAs (miRNAs) and for analyzing the miRNAs
                 expression profiles. An overall evaluation of these diverse
                 software tools is lacking. In this study, we evaluated eight
                 software tools based on their common feature and key
                 algorithms. Three deep-sequencing data sets were collected
                 from different species and used to assess the computational
                 time, sensitivity and accuracy of detecting known miRNAs as
                 well as their capacity for predicting novel miRNAs. Our
                 results provide useful information for researchers to
                 facilitate their selection of the optimal software tools for
                 miRNA analysis depending on their specific requirements, i.e.
                 novel miRNAs discovery or miRNA expression profile analysis of
                 sequencing data sets.",
  journal     = "Nucleic Acids Res.",
  volume      =  40,
  number      =  10,
  pages       = "4298--4305",
  month       =  may,
  year        =  2012
}

@ARTICLE{Berman2000-to,
  title    = "The Protein Data Bank",
  author   = "Berman, H M and Westbrook, J and Feng, Z and
              Gilliland, G and Bhat, T N and Weissig, H and Shindyalov,
              I N and Bourne, P E",
  abstract = "The Protein Data Bank (PDB; http://www.rcsb.org/pdb/ ) is the
              single worldwide archive of structural data of biological
              macromolecules. This paper describes the goals of the PDB, the
              systems in place for data deposition and access, how to obtain
              further information, and near-term plans for the future
              development of the resource.",
  journal  = "Nucleic Acids Res.",
  volume   =  28,
  number   =  1,
  pages    = "235--242",
  month    =  "1~" # jan,
  year     =  2000
}

@ARTICLE{Saha2008-kd,
  title       = "Empirical comparison of ab initio repeat finding programs",
  author      = "Saha, Surya and Bridges, Susan and Magbanua, Zenaida V and
                 Peterson, Daniel G",
  affiliation = "Department of Computer Science and Engineering, Mississippi
                 State University, Mississippi State, MS 39762, USA.",
  abstract    = "Identification of dispersed repetitive elements can be
                 difficult, especially when elements share little or no
                 homology with previously described repeats. Consequently, a
                 growing number of computational tools have been designed to
                 identify repetitive elements in an ab initio manner, i.e.
                 without using prior sequence data. Here we present the results
                 of side-by-side evaluations of six of the most widely used ab
                 initio repeat finding programs. Using sequence from rice
                 chromosome 12, tools were compared with regard to time
                 requirements, ability to find known repeats, utility in
                 identifying potential novel repeats, number and types of
                 repeat elements recognized and compactness of family
                 descriptions. The study reveals profound differences in the
                 utility of the tools with some identifying virtually their
                 entire substrate as repetitive, others making reasonable
                 estimates of repetition, and some missing almost all repeats.
                 Of note, even when tools recognized similar numbers of repeats
                 they often showed marked differences in the nature and number
                 of repeat families identified. Within the context of this
                 comparative study, ReAS and RepeatScout showed the most
                 promise in analysis of sequence reads and assembled genomic
                 regions, respectively. Our results should help biologists
                 identify the program(s), if any, that is best suited for their
                 needs.",
  journal     = "Nucleic Acids Res.",
  volume      =  36,
  number      =  7,
  pages       = "2284--2294",
  month       =  apr,
  year        =  2008
}

@ARTICLE{Nuin2006-nk,
  title       = "The accuracy of several multiple sequence alignment programs
                 for proteins",
  author      = "Nuin, Paulo A S and Wang, Zhouzhi and Tillier, Elisabeth R M",
  affiliation = "Division of Cancer Genomics and Proteomics, Ontario Cancer
                 Institute, University Health Network, 101 College St, M5G 1L7,
                 Toronto, Ontario, Canada. pnuin@uhnres.utoronto.ca",
  abstract    = "BACKGROUND: There have been many algorithms and software
                 programs implemented for the inference of multiple sequence
                 alignments of protein and DNA sequences. The ``true''
                 alignment is usually unknown due to the incomplete knowledge
                 of the evolutionary history of the sequences, making it
                 difficult to gauge the relative accuracy of the programs.
                 RESULTS: We tested nine of the most often used protein
                 alignment programs and compared their results using sequences
                 generated with the simulation software Simprot which creates
                 known alignments under realistic and controlled evolutionary
                 scenarios. We have simulated more than 30,000 alignment sets
                 using various evolutionary histories in order to define
                 strengths and weaknesses of each program tested. We found that
                 alignment accuracy is extremely dependent on the number of
                 insertions and deletions in the sequences, and that indel size
                 has a weaker effect. We also considered benchmark alignments
                 from the latest version of BAliBASE and the results relative
                 to BAliBASE- and Simprot-generated data sets were consistent
                 in most cases. CONCLUSION: Our results indicate that employing
                 Simprot's simulated sequences allows the creation of a more
                 flexible and broader range of alignment classes than the usual
                 methods for alignment accuracy assessment. Simprot also allows
                 for a quick and efficient analysis of a wider range of
                 possible evolutionary histories that might not be present in
                 currently available alignment sets. Among the nine programs
                 tested, the iterative approach available in Mafft (L-INS-i)
                 and ProbCons were consistently the most accurate, with Mafft
                 being the faster of the two.",
  journal     = "BMC Bioinformatics",
  volume      =  7,
  pages       = "471",
  month       =  "24~" # oct,
  year        =  2006
}

@ARTICLE{Caboche2014-lj,
  title       = "Comparison of mapping algorithms used in high-throughput
                 sequencing: application to Ion Torrent data",
  author      = "Caboche, S and Audebert, C and
                 Lemoine, Y and Hot, D",
  affiliation = "FRE 3642 Molecular and Cellular Medecine, CNRS, Institut
                 Pasteur de Lille and Univ Lille Nord de France, Lille, France.
                 segolene.caboche@pasteur-lille.fr.",
  abstract    = "BACKGROUND: The rapid evolution in high-throughput sequencing
                 (HTS) technologies has opened up new perspectives in several
                 research fields and led to the production of large volumes of
                 sequence data. A fundamental step in HTS data analysis is the
                 mapping of reads onto reference sequences. Choosing a suitable
                 mapper for a given technology and a given application is a
                 subtle task because of the difficulty of evaluating mapping
                 algorithms. RESULTS: In this paper, we present a benchmark
                 procedure to compare mapping algorithms used in HTS using both
                 real and simulated datasets and considering four evaluation
                 criteria: computational resource and time requirements,
                 robustness of mapping, ability to report positions for reads
                 in repetitive regions, and ability to retrieve true genetic
                 variation positions. To measure robustness, we introduced a
                 new definition for a correctly mapped read taking into account
                 not only the expected start position of the read but also the
                 end position and the number of indels and substitutions. We
                 developed CuReSim, a new read simulator, that is able to
                 generate customized benchmark data for any kind of HTS
                 technology by adjusting parameters to the error types. CuReSim
                 and CuReSimEval, a tool to evaluate the mapping quality of the
                 CuReSim simulated reads, are freely available. We applied our
                 benchmark procedure to evaluate 14 mappers in the context of
                 whole genome sequencing of small genomes with Ion Torrent data
                 for which such a comparison has not yet been established.
                 CONCLUSIONS: A benchmark procedure to compare HTS data mappers
                 is introduced with a new definition for the mapping
                 correctness as well as tools to generate simulated reads and
                 evaluate mapping quality. The application of this procedure to
                 Ion Torrent data from the whole genome sequencing of small
                 genomes has allowed us to validate our benchmark procedure and
                 demonstrate that it is helpful for selecting a mapper based on
                 the intended application, questions to be addressed, and the
                 technology used. This benchmark procedure can be used to
                 evaluate existing or in-development mappers as well as to
                 optimize parameters of a chosen mapper for any application and
                 any sequencing platform.",
  journal     = "BMC Genomics",
  volume      =  15,
  pages       = "264",
  month       =  "5~" # apr,
  year        =  2014
}

@INCOLLECTION{Kaplan1992-gg,
  title     = "Nonparametric Estimation from Incomplete Observations",
  booktitle = "Breakthroughs in Statistics",
  author    = "Kaplan, E L and Meier, Paul",
  publisher = "Springer New York",
  pages     = "319--337",
  series    = "Springer Series in Statistics",
  year      =  1992
}

@ARTICLE{Henry2014-ut,
  title       = "{OMICtools}: an informative directory for multi-omic data
                 analysis",
  author      = "Henry, V J and Bandrowski, A E and Pepin,
                 A and Gonzalez, B J and Desfeux, A",
  affiliation = "Haute-Normandie-INSERM ERI-28, Institute for Research and
                 Innovation in Biomedicine of Rouen University, 76183 Rouen,
                 France, Center for Research in Biological Systems, University
                 of California, San Diego, 9500 Gilman Dr. La Jolla, CA 92093,
                 USA and STATSARRAY, 76300 Sotteville-l\`{e}s-Rouen, France.
                 Haute-Normandie-INSERM ERI-28, Institute for Research and
                 Innovation in Biomedicine of Rouen University, 76183 Rouen,
                 France, Center for Research in Biological Systems, University
                 of California, San Diego, 9500 Gilman Dr. La Jolla, CA 92093,
                 USA and STATSARRAY, 76300 Sotteville-l\`{e}s-Rouen, France.
                 Haute-Normandie-INSERM ERI-28, Institute for Research and
                 Innovation in Biomedicine of Rouen University, 76183 Rouen,
                 France, Center for Research in Biological Systems, University
                 of California, San Diego, 9500 Gilman Dr. La Jolla, CA 92093,
                 USA and STATSARRAY, 76300 Sotteville-l\`{e}s-Rouen, France.
                 Haute-Normandie-INSERM ERI-28, Institute for Research and
                 Innovation in Biomedicine of Rouen University, 76183 Rouen,
                 France, Center for Research in Biological Systems, University
                 of California, San Diego, 9500 Gilman Dr. La Jolla, CA 92093,
                 USA and STATSARRAY, 76300 Sotteville-l\`{e}s-Rouen, France.
                 Haute-Normandie-INSERM ERI-28, Institute for Research and
                 Innovation in Biomedicine of Rouen University, 76183 Rouen,
                 France, Center for Research in Biological Systems, University
                 of California, San Diego, 9500 Gilman Dr. La Jolla, CA 92093,
                 USA and STATSARRAY, 76300 Sotteville-l\`{e}s-Rouen, France
                 arnaud.desfeux@omictools.com.",
  abstract    = "Recent advances in 'omic' technologies have created
                 unprecedented opportunities for biological research, but
                 current software and database resources are extremely
                 fragmented. OMICtools is a manually curated metadatabase that
                 provides an overview of more than 4400 web-accessible tools
                 related to genomics, transcriptomics, proteomics and
                 metabolomics. All tools have been classified by omic
                 technologies (next-generation sequencing, microarray, mass
                 spectrometry and nuclear magnetic resonance) associated with
                 published evaluations of tool performance. Information about
                 each tool is derived either from a diverse set of developers,
                 the scientific literature or from spontaneous submissions.
                 OMICtools is expected to serve as a useful didactic resource
                 not only for bioinformaticians but also for experimental
                 researchers and clinicians. Database URL:
                 http://omictools.com/.",
  journal     = "Database",
  volume      =  2014,
  month       =  "14~" # jul,
  year        =  2014
}

@BOOK{Egan1975-nd,
  title     = "Signal Detection Theory and {ROC-analysis}",
  author    = "Egan, J P",
  publisher = "Academic Press, New York",
  series    = "Series in Cognition and Perception",
  address   = {New York},
  year      =  1975
}

@ARTICLE{Abbas2014-gu,
  title    = "Assessment of de novo assemblers for draft genomes: a case study
              with fungal genomes",
  author   = "Abbas, Mostafa M and Malluhi, Qutaibah M and Balakrishnan,
              Ponnuraman",
  abstract = "BACKGROUND: Recently, large bio-projects dealing with the release
              of different genomes have transpired. Most of these projects use
              next-generation sequencing platforms. As a consequence, many de
              novo assembly tools have evolved to assemble the reads generated
              by these platforms. Each tool has its own inherent advantages and
              disadvantages, which make the selection of an appropriate tool a
              challenging task. RESULTS: We have evaluated the performance of
              frequently used de novo assemblers namely ABySS, IDBA-UD, Minia,
              SOAP, SPAdes, Sparse, and Velvet. These assemblers are assessed
              based on their output quality during the assembly process
              conducted over fungal data. We compared the performance of these
              assemblers by considering both computational as well as quality
              metrics. By analyzing these performance metrics, the assemblers
              are ranked and a procedure for choosing the candidate assembler
              is illustrated. CONCLUSIONS: In this study, we propose an
              assessment method for the selection of de novo assemblers by
              considering their computational as well as quality metrics at the
              draft genome level. We divide the quality metrics into three
              groups: g1 measures the goodness of the assemblies, g2 measures
              the problems of the assemblies, and g3 measures the conservation
              elements in the assemblies. Our results demonstrate that the
              assemblers ABySS and IDBA-UD exhibit a good performance for the
              studied data from fungal genomes in terms of running time,
              memory, and quality. The results suggest that whole genome
              shotgun sequencing projects should make use of different
              assemblers by considering their merits.",
  journal  = "BMC Genomics",
  volume   = "15 Suppl 9",
  pages    = "S10",
  month    =  "8~" # dec,
  year     =  2014
}

@ARTICLE{Bornmann2008-il,
  title     = "Are there better indices for evaluation purposes than the h-index? 
               A comparison of nine different variants of the h-index
               using data from biomedicine",
  author    = "Bornmann, L and Mutz, R and Daniel, H",
  journal   = "J. Am. Soc. Inf. Sci.",
  publisher = "Wiley Subscription Services, Inc., A Wiley Company",
  volume    =  59,
  number    =  5,
  pages     = "830--837",
  month     =  "1~" # mar,
  year      =  2008
}

@ARTICLE{Kleftogiannis2013-wi,
  title       = "Comparing memory-efficient genome assemblers on stand-alone
                 and cloud infrastructures",
  author      = "Kleftogiannis, Dimitrios and Kalnis, Panos and Bajic, Vladimir
                 B",
  affiliation = "Computer, Electrical and Mathematical Sciences and Engineering
                 Division (CEMSE), King Abdullah University of Science and
                 Technology (KAUST), Thuwal, Saudi Arabia.",
  abstract    = "A fundamental problem in bioinformatics is genome assembly.
                 Next-generation sequencing (NGS) technologies produce large
                 volumes of fragmented genome reads, which require large
                 amounts of memory to assemble the complete genome efficiently.
                 With recent improvements in DNA sequencing technologies, it is
                 expected that the memory footprint required for the assembly
                 process will increase dramatically and will emerge as a
                 limiting factor in processing widely available NGS-generated
                 reads. In this report, we compare current memory-efficient
                 techniques for genome assembly with respect to quality, memory
                 consumption and execution time. Our experiments prove that it
                 is possible to generate draft assemblies of reasonable quality
                 on conventional multi-purpose computers with very limited
                 available memory by choosing suitable assembly methods. Our
                 study reveals the minimum memory requirements for different
                 assembly programs even when data volume exceeds memory
                 capacity by orders of magnitude. By combining existing
                 methodologies, we propose two general assembly strategies that
                 can improve short-read assembly approaches and result in
                 reduction of the memory footprint. Finally, we discuss the
                 possibility of utilizing cloud infrastructures for genome
                 assembly and we comment on some findings regarding suitable
                 computational resources for assembly.",
  journal     = "PLoS One",
  volume      =  8,
  number      =  9,
  pages       = "e75505",
  month       =  "27~" # sep,
  year        =  2013
}

@ARTICLE{Sheldrick1990-kc,
  title   = "Phase annealing in {SHELX-90}: direct methods for larger
             structures",
  author  = "Sheldrick, G M",
  journal = "Acta Crystallogr. A",
  volume  =  46,
  number  =  6,
  pages   = "467--473",
  month   =  "1~" # jun,
  year    =  1990
}

@ARTICLE{Liu2014-kz,
  title       = "Comparisons of computational methods for differential
                 alternative splicing detection using {RNA-seq} in plant
                 systems",
  author      = "Liu, Ruolin and Loraine, Ann E and Dickerson, Julie A",
  affiliation = "Department of Electrical and Computational Engineering, Iowa
                 State University, Howe Hall, Ames, 50011-3060, USA.
                 ruolin@iastate.edu. Department of Bioinformatics and Genomics,
                 University of North Carolina at Charlotte, North Carolina
                 Research Campus, 600 Laureate Way, Kannapolis, 28081, NC, USA.
                 Ann.Loraine@uncc.edu. Department of Electrical and
                 Computational Engineering, Iowa State University, Howe Hall,
                 Ames, 50011-3060, USA. julied@iastate.edu.",
  abstract    = "BACKGROUND: Alternative Splicing (AS) as a post-transcription
                 regulation mechanism is an important application of RNA-seq
                 studies in eukaryotes. A number of software and computational
                 methods have been developed for detecting AS. Most of the
                 methods, however, are designed and tested on animal data, such
                 as human and mouse. Plants genes differ from those of animals
                 in many ways, e.g., the average intron size and preferred AS
                 types. These differences may require different computational
                 approaches and raise questions about their effectiveness on
                 plant data. The goal of this paper is to benchmark existing
                 computational differential splicing (or transcription)
                 detection methods so that biologists can choose the most
                 suitable tools to accomplish their goals. RESULTS: This study
                 compares the eight popular public available software packages
                 for differential splicing analysis using both simulated and
                 real Arabidopsis thaliana RNA-seq data. All software are
                 freely available. The study examines the effect of varying AS
                 ratio, read depth, dispersion pattern, AS types, sample sizes
                 and the influence of annotation. Using a real data, the study
                 looks at the consistences between the packages and verifies a
                 subset of the detected AS events using PCR studies.
                 CONCLUSIONS: No single method performs the best in all
                 situations. The accuracy of annotation has a major impact on
                 which method should be chosen for AS analysis. DEXSeq performs
                 well in the simulated data when the AS signal is relative
                 strong and annotation is accurate. Cufflinks achieve a better
                 tradeoff between precision and recall and turns out to be the
                 best one when incomplete annotation is provided. Some methods
                 perform inconsistently for different AS types. Complex AS
                 events that combine several simple AS events impose problems
                 for most methods, especially for MATS. MATS stands out in the
                 analysis of real RNA-seq data when all the AS events being
                 evaluated are simple AS events.",
  journal     = "BMC Bioinformatics",
  volume      =  15,
  pages       = "364",
  month       =  "16~" # dec,
  year        =  2014
}

@ARTICLE{Kumar2016-xz,
  title       = "Comparative assessment of methods for the fusion transcripts
                 detection from {RNA-Seq} data",
  author      = "Kumar, Shailesh and Vo, Angie Duy and Qin, Fujun and Li, Hui",
  affiliation = "Department of Pathology, School of Medicine, University of
                 Virginia, Charlottesville, VA 22908. Department of Pathology,
                 School of Medicine, University of Virginia, Charlottesville,
                 VA 22908. Department of Pathology, School of Medicine,
                 University of Virginia, Charlottesville, VA 22908. Department
                 of Pathology, School of Medicine, University of Virginia,
                 Charlottesville, VA 22908. Department of Biochemistry and
                 Molecular Genetics, School of Medicine, University of
                 Virginia, Charlottesville, VA 22908.",
  abstract    = "RNA-Seq made possible the global identification of fusion
                 transcripts, i.e. ``chimeric RNAs''. Even though various
                 software packages have been developed to serve this purpose,
                 they behave differently in different datasets provided by
                 different developers. It is important for both users, and
                 developers to have an unbiased assessment of the performance
                 of existing fusion detection tools. Toward this goal, we
                 compared the performance of 12 well-known fusion detection
                 software packages. We evaluated the sensitivity, false
                 discovery rate, computing time, and memory usage of these
                 tools in four different datasets (positive, negative, mixed,
                 and test). We conclude that some tools are better than others
                 in terms of sensitivity, positive prediction value, time
                 consumption and memory usage. We also observed small overlaps
                 of the fusions detected by different tools in the real dataset
                 (test dataset). This could be due to false discoveries by
                 various tools, but could also be due to the reason that none
                 of the tools are inclusive. We have found that the performance
                 of the tools depends on the quality, read length, and number
                 of reads of the RNA-Seq data. We recommend that users choose
                 the proper tools for their purpose based on the properties of
                 their RNA-Seq data.",
  journal     = "Sci. Rep.",
  volume      =  6,
  pages       = "21597",
  month       =  "10~" # feb,
  year        =  2016
}

@ARTICLE{Rossner2007-yk,
  title    = "Show me the data",
  author   = "Rossner, Mike and Van Epps, Heather and Hill, Emma",
  journal  = "J. Cell Biol.",
  volume   =  179,
  number   =  6,
  pages    = "1091--1092",
  month    =  "17~" # dec,
  year     =  2007,
  language = "en"
}

@ARTICLE{Felsenstein1985-lj,
  title     = "Confidence Limits on Phylogenies: An Approach Using the
               Bootstrap",
  author    = "Felsenstein, J",
  abstract  = "The recently-developed statistical method known as the
               ``bootstrap'' can be used to place confidence intervals on
               phylogenies. It involves resampling points from one's own data,
               with replacement, to create a series of bootstrap samples of the
               same size as the original data. Each of these is analyzed, and
               the variation among the resulting estimates taken to indicate
               the size of the error involved in making estimates from the
               original data. In the case of phylogenies, it is argued that the
               proper method of resampling is to keep all of the original
               species while sampling characters with replacement, under the
               assumption that the characters have been independently drawn by
               the systematist and have evolved independently. Majority-rule
               consensus trees can be used to construct a phylogeny showing all
               of the inferred monophyletic groups that occurred in a majority
               of the bootstrap samples. If a group shows up 95\% of the time
               or more, the evidence for it is taken to be statistically
               significant. Existing computer programs can be used to analyze
               different bootstrap samples by using weights on the characters,
               the weight of a character being how many times it was drawn in
               bootstrap sampling. When all characters are perfectly
               compatible, as envisioned by Hennig, bootstrap sampling becomes
               unnecessary; the bootstrap method would show significant
               evidence for a group if it is defined by three or more
               characters.",
  journal   = "Evolution",
  publisher = "Society for the Study of Evolution",
  volume    =  39,
  number    =  4,
  pages     = "783--791",
  month     =  "1~" # jul,
  year      =  1985
}

@BOOK{Tolkien1937-ke,
  title     = "The Hobbit, Or, There and Back Again",
  author    = "Tolkien, J R R",
  publisher = "George Allen \& Unwin",
  year      =  1937,
  address   = "UK"
}

@ARTICLE{Liu2010-rp,
  title       = "Multiple sequence alignment: a major challenge to large-scale
                 phylogenetics",
  author      = "Liu, Kevin and Linder, C Randal and Warnow, Tandy",
  affiliation = "The University of Texas; Integrative Biology, University of
                 Texas and Microsoft Research New England, and The Department
                 of Computer Science, University of Texas at Austin.",
  abstract    = "Over the last decade, dramatic advances have been made in
                 developing methods for large-scale phylogeny estimation, so
                 that it is now feasible for investigators with moderate
                 computational resources to obtain reasonable solutions to
                 maximum likelihood and maximum parsimony, even for datasets
                 with a few thousand sequences. There has also been progress on
                 developing methods for multiple sequence alignment, so that
                 greater alignment accuracy (and subsequent improvement in
                 phylogenetic accuracy) is now possible through automated
                 methods. However, these methods have not been tested under
                 conditions that reflect properties of datasets confronted by
                 large-scale phylogenetic estimation projects. In this paper we
                 report on a study that compares several alignment methods on a
                 benchmark collection of nucleotide sequence datasets of up to
                 78,132 sequences. We show that as the number of sequences
                 increases, the number of alignment methods that can analyze
                 the datasets decreases. Furthermore, the most accurate
                 alignment methods are unable to analyze the very largest
                 datasets we studied, so that only moderately accurate
                 alignment methods can be used on the largest datasets. As a
                 result, alignments computed for large datasets have relatively
                 large error rates, and maximum likelihood phylogenies computed
                 on these alignments also have high error rates. Therefore, the
                 estimation of highly accurate multiple sequence alignments is
                 a major challenge for Tree of Life projects, and more
                 generally for large-scale systematics studies.",
  journal     = "PLoS Curr.",
  volume      =  2,
  pages       = "RRN1198",
  month       =  "19~" # nov,
  year        =  2010
}

@ARTICLE{Pawitan2005-ee,
  title       = "False discovery rate, sensitivity and sample size for
                 microarray studies",
  author      = "Pawitan, Yudi and Michiels, Stefan and Koscielny, Serge and
                 Gusnanto, Arief and Ploner, Alexander",
  affiliation = "Department of Medical Epidemiology and Biostatistics,
                 Karolinska Institutet 17177 Stockholm, Sweden.
                 yudi.pawitan@meb.ki.se",
  abstract    = "MOTIVATION: In microarray data studies most researchers are
                 keenly aware of the potentially high rate of false positives
                 and the need to control it. One key statistical shift is the
                 move away from the well-known P-value to false discovery rate
                 (FDR). Less discussion perhaps has been spent on the
                 sensitivity or the associated false negative rate (FNR). The
                 purpose of this paper is to explain in simple ways why the
                 shift from P-value to FDR for statistical assessment of
                 microarray data is necessary, to elucidate the determining
                 factors of FDR and, for a two-sample comparative study, to
                 discuss its control via sample size at the design stage.
                 RESULTS: We use a mixture model, involving differentially
                 expressed (DE) and non-DE genes, that captures the most common
                 problem of finding DE genes. Factors determining FDR are (1)
                 the proportion of truly differentially expressed genes, (2)
                 the distribution of the true differences, (3) measurement
                 variability and (4) sample size. Many current small microarray
                 studies are plagued with large FDR, but controlling FDR alone
                 can lead to unacceptably large FNR. In evaluating a design of
                 a microarray study, sensitivity or FNR curves should be
                 computed routinely together with FDR curves. Under certain
                 assumptions, the FDR and FNR curves coincide, thus simplifying
                 the choice of sample size for controlling the FDR and FNR
                 jointly.",
  journal     = "Bioinformatics",
  volume      =  21,
  number      =  13,
  pages       = "3017--3024",
  month       =  "1~" # jul,
  year        =  2005,
  language    = "en"
}

@ARTICLE{Oscamou2008-md,
  title       = "Comparison of methods for estimating the nucleotide
                 substitution matrix",
  author      = "Oscamou, Maribeth and McDonald, Daniel and Yap, Von Bing and
                 Huttley, Gavin A and Lladser, Manuel E and Knight, Rob",
  affiliation = "Department of Applied Mathematics, University of Colorado,
                 Boulder, CO, USA. Maribeth.Oscamou@colorado.edu",
  abstract    = "BACKGROUND: The nucleotide substitution rate matrix is a key
                 parameter of molecular evolution. Several methods for
                 inferring this parameter have been proposed, with different
                 mathematical bases. These methods include counting sequence
                 differences and taking the log of the resulting probability
                 matrices, methods based on Markov triples, and maximum
                 likelihood methods that infer the substitution probabilities
                 that lead to the most likely model of evolution. However, the
                 speed and accuracy of these methods has not been compared.
                 RESULTS: Different methods differ in performance by orders of
                 magnitude (ranging from 1 ms to 10 s per matrix), but
                 differences in accuracy of rate matrix reconstruction appear
                 to be relatively small. Encouragingly, relatively simple and
                 fast methods can provide results at least as accurate as far
                 more complex and computationally intensive methods, especially
                 when the sequences to be compared are relatively short.
                 CONCLUSION: Based on the conditions tested, we recommend the
                 use of method of Gojobori et al. (1982) for long sequences (>
                 600 nucleotides), and the method of Goldman et al. (1996) for
                 shorter sequences ( 2000 nucleotides) at the expense of
                 substantially longer computation time. The availability of
                 methods that are both fast and accurate will allow us to gain
                 a global picture of change in the nucleotide substitution rate
                 matrix on a genomewide scale across the tree of life.",
  journal     = "BMC Bioinformatics",
  volume      =  9,
  pages       = "511",
  month       =  "1~" # dec,
  year        =  2008
}

@INPROCEEDINGS{Sengupta2009-ao,
  title     = "Hardware efficient design of speed optimized power stringent
               Application Specific Processor",
  booktitle = "2009 International Conference on Microelectronics - {ICM}",
  author    = "Sengupta, A and Sedaghat, R and Zeng, Z",
  abstract  = "New standards in communication, multimedia and signal processing
               have challenged the researchers to formalize the design
               methodology of an optimized application specific processor (ASP)
               where the performance requirement should meet operational
               constraints like speed, chip area and power consumption. In this
               paper we describe a novel design approach to design a hardware
               efficient speed optimized power stringent application specific
               processor customized for a desired high performance. We initiate
               the design approach with the mathematical model of the
               application with strict operating constraints as specifications
               and finally describe our design at register transfer level. The
               proposed approach is capable for designing an ASP which is
               efficient not only in terms of hardware area but also
               contradictory parameters like speed and power consumption. To
               demonstrate our design approach for this power limited speed
               optimized ASP we selected a sample function as our application.",
  pages     = "173--176",
  month     =  dec,
  year      =  2009,
  keywords  = "circuit optimisation;integrated circuit
               design;system-on-chip;design methodology;hardware efficient
               design;multimedia;operational constraints;optimized application
               specific processor;register transfer level;signal
               processing;speed optimized power stringent application specific
               processor;Application specific processors;Communication
               standards;Constraint optimization;Design methodology;Design
               optimization;Energy consumption;Hardware;Mathematical
               model;Multimedia communication;Signal processing;Application
               Specific Processor;Design approach;power stringent;speed
               optimized"
}

@ARTICLE{Thompson2011-rf,
  title       = "A comprehensive benchmark study of multiple sequence alignment
                 methods: current challenges and future perspectives",
  author      = "Thompson, Julie D and Linard, Benjamin and Lecompte, Odile and
                 Poch, Olivier",
  affiliation = "D\'{e}partement de Biologie Structurale et G\'{e}nomique,
                 IGBMC (Institut de G\'{e}n\'{e}tique et de Biologie
                 Mol\'{e}culaire et Cellulaire), CNRS/INSERM/Universit\'{e} de
                 Strasbourg, Illkirch, France. julie@igbmc.fr",
  abstract    = "Multiple comparison or alignmentof protein sequences has
                 become a fundamental tool in many different domains in modern
                 molecular biology, from evolutionary studies to prediction of
                 2D/3D structure, molecular function and inter-molecular
                 interactions etc. By placing the sequence in the framework of
                 the overall family, multiple alignments can be used to
                 identify conserved features and to highlight differences or
                 specificities. In this paper, we describe a comprehensive
                 evaluation of many of the most popular methods for multiple
                 sequence alignment (MSA), based on a new benchmark test set.
                 The benchmark is designed to represent typical problems
                 encountered when aligning the large protein sequence sets that
                 result from today's high throughput biotechnologies. We show
                 that alignmentmethods have significantly progressed and can
                 now identify most of the shared sequence features that
                 determine the broad molecular function(s) of a protein family,
                 even for divergent sequences. However,we have identified a
                 number of important challenges. First, the locally conserved
                 regions, that reflect functional specificities or that
                 modulate a protein's function in a given cellular context,are
                 less well aligned. Second, motifs in natively disordered
                 regions are often misaligned. Third, the badly predicted or
                 fragmentary protein sequences, which make up a large
                 proportion of today's databases, lead to a significant number
                 of alignment errors. Based on this study, we demonstrate that
                 the existing MSA methods can be exploited in combination to
                 improve alignment accuracy, although novel approaches will
                 still be needed to fully explore the most difficult regions.
                 We then propose knowledge-enabled, dynamic solutions that will
                 hopefully pave the way to enhanced alignment construction and
                 exploitation in future evolutionary systems biology studies.",
  journal     = "PLoS One",
  volume      =  6,
  number      =  3,
  pages       = "e18093",
  month       =  "31~" # mar,
  year        =  2011
}

@ARTICLE{Shang2011-vy,
  title       = "Performance analysis of novel methods for detecting epistasis",
  author      = "Shang, Junliang and Zhang, Junying and Sun, Yan and Liu, Dan
                 and Ye, Daojun and Yin, Yaling",
  affiliation = "School of Computer Science \& Technology, Xidian University,
                 Xi'an 710071, China. jlshang@mail.xidian.edu.cn",
  abstract    = "BACKGROUND: Epistasis is recognized fundamentally important
                 for understanding the mechanism of disease-causing genetic
                 variation. Though many novel methods for detecting epistasis
                 have been proposed, few studies focus on their comparison.
                 Undertaking a comprehensive comparison study is an urgent task
                 and a pathway of the methods to real applications. RESULTS:
                 This paper aims at a comparison study of epistasis detection
                 methods through applying related software packages on
                 datasets. For this purpose, we categorize methods according to
                 their search strategies, and select five representative
                 methods (TEAM, BOOST, SNPRuler, AntEpiSeeker and epiMODE)
                 originating from different underlying techniques for
                 comparison. The methods are tested on simulated datasets with
                 different size, various epistasis models, and with/without
                 noise. The types of noise include missing data, genotyping
                 error and phenocopy. Performance is evaluated by detection
                 power (three forms are introduced), robustness, sensitivity
                 and computational complexity. CONCLUSIONS: None of selected
                 methods is perfect in all scenarios and each has its own
                 merits and limitations. In terms of detection power,
                 AntEpiSeeker performs best on detecting epistasis displaying
                 marginal effects (eME) and BOOST performs best on identifying
                 epistasis displaying no marginal effects (eNME). In terms of
                 robustness, AntEpiSeeker is robust to all types of noise on
                 eME models, BOOST is robust to genotyping error and phenocopy
                 on eNME models, and SNPRuler is robust to phenocopy on eME
                 models and missing data on eNME models. In terms of
                 sensitivity, AntEpiSeeker is the winner on eME models and both
                 SNPRuler and BOOST perform well on eNME models. In terms of
                 computational complexity, BOOST is the fastest among the
                 methods. In terms of overall performance, AntEpiSeeker and
                 BOOST are recommended as the efficient and effective methods.
                 This comparison study may provide guidelines for applying the
                 methods and further clues for epistasis detection.",
  journal     = "BMC Bioinformatics",
  volume      =  12,
  pages       = "475",
  month       =  "15~" # dec,
  year        =  2011
}

@UNPUBLISHED{Darriba2015-sz,
  title    = "The State of Software in Evolutionary Biology",
  author   = "Darriba, Diego and Flouri, Tomas and Stamatakis, Alexandros",
  abstract = "bioRxiv - the preprint server for biology, operated by Cold
              Spring Harbor Laboratory, a research and educational institution",
  journal  = "bioRxiv",
  pages    = "031930",
  month    =  "1~" # jan,
  year     =  2015
}

@ARTICLE{Lange2008-pt,
  title       = "Critical assessment of alignment procedures for {LC-MS}
                 proteomics and metabolomics measurements",
  author      = "Lange, Eva and Tautenhahn, Ralf and Neumann, Steffen and
                 Gr{\"{o}}pl, Clemens",
  affiliation = "Beatson Institute for Cancer Research, Proteomics and Mass
                 Spectrometry Group, Scotland, UK. e.lange@beatson.gla.ac.uk",
  abstract    = "BACKGROUND: Liquid chromatography coupled to mass spectrometry
                 (LC-MS) has become a prominent tool for the analysis of
                 complex proteomics and metabolomics samples. In many
                 applications multiple LC-MS measurements need to be compared,
                 e. g. to improve reliability or to combine results from
                 different samples in a statistical comparative analysis. As in
                 all physical experiments, LC-MS data are affected by
                 uncertainties, and variability of retention time is
                 encountered in all data sets. It is therefore necessary to
                 estimate and correct the underlying distortions of the
                 retention time axis to search for corresponding compounds in
                 different samples. To this end, a variety of so-called LC-MS
                 map alignment algorithms have been developed during the last
                 four years. Most of these approaches are well documented, but
                 they are usually evaluated on very specific samples only. So
                 far, no publication has been assessing different alignment
                 algorithms using a standard LC-MS sample along with commonly
                 used quality criteria. RESULTS: We propose two LC-MS
                 proteomics as well as two LC-MS metabolomics data sets that
                 represent typical alignment scenarios. Furthermore, we
                 introduce a new quality measure for the evaluation of LC-MS
                 alignment algorithms. Using the four data sets to compare six
                 freely available alignment algorithms proposed for the
                 alignment of metabolomics and proteomics LC-MS measurements,
                 we found significant differences with respect to alignment
                 quality, running time, and usability in general. CONCLUSION:
                 The multitude of available alignment methods necessitates the
                 generation of standard data sets and quality measures that
                 allow users as well as developers to benchmark and compare
                 their map alignment tools on a fair basis. Our study
                 represents a first step in this direction. Currently, the
                 installation and evaluation of the ``correct'' parameter
                 settings can be quite a time-consuming task, and the success
                 of a particular method is still highly dependent on the
                 experience of the user. Therefore, we propose to continue and
                 extend this type of study to a community-wide competition. All
                 data as well as our evaluation scripts are available at
                 http://msbi.ipb-halle.de/msbi/caap.",
  journal     = "BMC Bioinformatics",
  volume      =  9,
  pages       = "375",
  month       =  "15~" # sep,
  year        =  2008
}

@ARTICLE{Pain2015-gr,
  title       = "An assessment of bacterial small {RNA} target prediction
                 programs",
  author      = "Pain, Adrien and Ott, Alban and Amine, Hamza and Rochat,
                 Tatiana and Bouloc, Philippe and Gautheret, Daniel",
  affiliation = "a Institute for Integrative Biology of the Cell (I2BC), CEA,
                 CNRS ; Universit\'{e} Paris-Sud ; Orsay Cedex , France.",
  abstract    = "Most bacterial regulatory RNAs exert their function through
                 base-pairing with target RNAs. Computational prediction of
                 targets is a busy research field that offers biologists a
                 variety of web sites and software. However, it is difficult
                 for a non-expert to evaluate how reliable those programs are.
                 Here, we provide a simple benchmark for bacterial sRNA target
                 prediction based on trusted E. coli sRNA/target pairs. We use
                 this benchmark to assess the most recent RNA target predictors
                 as well as earlier programs for RNA-RNA hybrid prediction.
                 Moreover, we consider how the definition of mRNA boundaries
                 can impact overall predictions. Recent algorithms that exploit
                 both conservation of targets and accessibility information
                 offer improved accuracy over previous software. However, even
                 with the best predictors, the number of true biological
                 targets with low scores and non-targets with high scores
                 remains puzzling.",
  journal     = "RNA Biol.",
  volume      =  12,
  number      =  5,
  pages       = "509--513",
  year        =  2015,
  keywords    = "bacteria; sRNA; sRNA target prediction"
}

@ARTICLE{Liu2011-pz,
  title       = "{RAxML} and {FastTree}: comparing two methods for large-scale
                 maximum likelihood phylogeny estimation",
  author      = "Liu, Kevin and Linder, C Randal and Warnow, Tandy",
  affiliation = "Department of Computer Science, University of Texas at Austin,
                 Austin, Texas, United States of America.",
  abstract    = "Statistical methods for phylogeny estimation, especially
                 maximum likelihood (ML), offer high accuracy with excellent
                 theoretical properties. However, RAxML, the current leading
                 method for large-scale ML estimation, can require weeks or
                 longer when used on datasets with thousands of molecular
                 sequences. Faster methods for ML estimation, among them
                 FastTree, have also been developed, but their relative
                 performance to RAxML is not yet fully understood. In this
                 study, we explore the performance with respect to ML score,
                 running time, and topological accuracy, of FastTree and RAxML
                 on thousands of alignments (based on both simulated and
                 biological nucleotide datasets) with up to 27,634 sequences.
                 We find that when RAxML and FastTree are constrained to the
                 same running time, FastTree produces topologically much more
                 accurate trees in almost all cases. We also find that when
                 RAxML is allowed to run to completion, it provides an
                 advantage over FastTree in terms of the ML score, but does not
                 produce substantially more accurate tree topologies.
                 Interestingly, the relative accuracy of trees computed using
                 FastTree and RAxML depends in part on the accuracy of the
                 sequence alignment and dataset size, so that FastTree can be
                 more accurate than RAxML on large datasets with relatively
                 inaccurate alignments. Finally, the running times of RAxML and
                 FastTree are dramatically different, so that when run to
                 completion, RAxML can take several orders of magnitude longer
                 than FastTree to complete. Thus, our study shows that very
                 large phylogenies can be estimated very quickly using
                 FastTree, with little (and in some cases no) degradation in
                 tree accuracy, as compared to RAxML.",
  journal     = "PLoS One",
  volume      =  6,
  number      =  11,
  pages       = "e27731",
  month       =  "21~" # nov,
  year        =  2011
}

@INPROCEEDINGS{Hannay2009-cf,
  title     = "How Do Scientists Develop and Use Scientific Software?",
  booktitle = "Proceedings of the 2009 {ICSE} Workshop on Software Engineering
               for Computational Science and Engineering",
  author    = "Hannay, J E and MacLeod, C and Singer, J and
               Langtangen, H P and Pfahl, D and Wilson, G",
  publisher = "IEEE Computer Society",
  pages     = "1--8",
  series    = "SECSE '09",
  year      =  2009,
  address   = "Washington, DC, USA"
}

@ARTICLE{Boulesteix2010-te,
  title   = "Over-optimism in bioinformatics research",
  author  = "Boulesteix, A",
  journal = "Bioinformatics",
  volume  =  26,
  number  =  3,
  pages   = "437--439",
  month   =  "1~" # feb,
  year    =  2010
}

@ARTICLE{Zhang2011-nd,
  title       = "A practical comparison of de novo genome assembly software
                 tools for next-generation sequencing technologies",
  author      = "Zhang, Wenyu and Chen, Jiajia and Yang, Yang and Tang, Yifei
                 and Shang, Jing and Shen, Bairong",
  affiliation = "Center for Systems Biology, Soochow University, Suzhou,
                 Jiangsu, China.",
  abstract    = "The advent of next-generation sequencing technologies is
                 accompanied with the development of many whole-genome sequence
                 assembly methods and software, especially for de novo fragment
                 assembly. Due to the poor knowledge about the applicability
                 and performance of these software tools, choosing a befitting
                 assembler becomes a tough task. Here, we provide the
                 information of adaptivity for each program, then above all,
                 compare the performance of eight distinct tools against eight
                 groups of simulated datasets from Solexa sequencing platform.
                 Considering the computational time, maximum random access
                 memory (RAM) occupancy, assembly accuracy and integrity, our
                 study indicate that string-based assemblers,
                 overlap-layout-consensus (OLC) assemblers are well-suited for
                 very short reads and longer reads of small genomes
                 respectively. For large datasets of more than hundred millions
                 of short reads, De Bruijn graph-based assemblers would be more
                 appropriate. In terms of software implementation, string-based
                 assemblers are superior to graph-based ones, of which
                 SOAPdenovo is complex for the creation of configuration file.
                 Our comparison study will assist researchers in selecting a
                 well-suited assembler and offer essential information for the
                 improvement of existing assemblers or the developing of novel
                 assemblers.",
  journal     = "PLoS One",
  volume      =  6,
  number      =  3,
  pages       = "e17915",
  month       =  "14~" # mar,
  year        =  2011
}

@ARTICLE{Rackham2015-ag,
  title       = "{WGBSSuite}: simulating whole-genome bisulphite sequencing
                 data and benchmarking differential {DNA} methylation analysis
                 tools",
  author      = "Rackham, Owen J L and Dellaportas, Petros and Petretto, Enrico
                 and Bottolo, Leonardo",
  affiliation = "Program in Cardiovascular \& Metabolic Disorders and Centre
                 for Computational Biology, Duke-NUS Graduate Medical School,
                 Singapore, MRC Clinical Sciences Centre, Imperial College
                 London, UK. Department of Statistics, Athens University of
                 Economics and Business, Greece and. Program in Cardiovascular
                 \& Metabolic Disorders and Centre for Computational Biology,
                 Duke-NUS Graduate Medical School, Singapore, MRC Clinical
                 Sciences Centre, Imperial College London, UK. Department of
                 Mathematics, Imperial College London, UK.",
  abstract    = "MOTIVATION: As the number of studies looking at differences
                 between DNA methylation increases, there is a growing demand
                 to develop and benchmark statistical methods to analyse these
                 data. To date no objective approach for the comparison of
                 these methods has been developed and as such it remains
                 difficult to assess which analysis tool is most appropriate
                 for a given experiment. As a result, there is an unmet need
                 for a DNA methylation data simulator that can accurately
                 reproduce a wide range of experimental setups, and can be
                 routinely used to compare the performance of different
                 statistical models. RESULTS: We have developed WGBSSuite, a
                 flexible stochastic simulation tool that generates single-base
                 resolution DNA methylation data genome-wide. Several simulator
                 parameters can be derived directly from real datasets provided
                 by the user in order to mimic real case scenarios. Thus, it is
                 possible to choose the most appropriate statistical analysis
                 tool for a given simulated design. To show the usefulness of
                 our simulator, we also report a benchmark of commonly used
                 methods for differential methylation analysis. AVAILABILITY
                 AND IMPLEMENTATION: WGBS code and documentation are available
                 under GNU licence at http://www.wgbssuite.org.uk/ CONTACT: :
                 owen.rackham@imperial.ac.uk or l.bottolo@imperial.ac.uk
                 SUPPLEMENTARY INFORMATION: Supplementary data are available at
                 Bioinformatics online.",
  journal     = "Bioinformatics",
  volume      =  31,
  number      =  14,
  pages       = "2371--2373",
  month       =  "15~" # jul,
  year        =  2015
}

@ARTICLE{Sayers2010-vm,
  title       = "Database resources of the National Center for Biotechnology
                 Information",
  author      = "Sayers, E W and Barrett, T and Benson, D A and
                 Bolton, E and Bryant, S H and Canese, K and
                 Chetvernin, V and Church, D M and Dicuccio,
                 M and Federhen, S and Feolo, M and Geer, L
                 Y and Helmberg, W and Kapustin, Y and Landsman,
                 D and Lipman, D J and Lu, Z and Madden, T L
                 and Madej, T and Maglott, D R and Marchler-Bauer, A
                 and Miller, V and Mizrachi, I and Ostell, J and
                 Panchenko, A and Pruitt, K D and Schuler, G D and
                 Sequeira, E and Sherry, S T and Shumway, M and
                 Sirotkin, K and Slotta, D and Souvorov, A and
                 Starchenko, G and Tatusova, T A and Wagner, L
                 and Wang, Y and John W, W and Yaschenko, E and
                 Ye, J",
  affiliation = "National Center for Biotechnology Information, National
                 Library of Medicine, National Institutes of Health, Building
                 38A, 8600 Rockville Pike, Bethesda, MD 20894, USA.
                 sayers@ncbi.nlm.nih.gov",
  abstract    = "In addition to maintaining the GenBank nucleic acid sequence
                 database, the National Center for Biotechnology Information
                 (NCBI) provides analysis and retrieval resources for the data
                 in GenBank and other biological data made available through
                 the NCBI web site. NCBI resources include Entrez, the Entrez
                 Programming Utilities, MyNCBI, PubMed, PubMed Central, Entrez
                 Gene, the NCBI Taxonomy Browser, BLAST, BLAST Link (BLink),
                 Electronic PCR, OrfFinder, Spidey, Splign, Reference Sequence,
                 UniGene, HomoloGene, ProtEST, dbMHC, dbSNP, Cancer
                 Chromosomes, Entrez Genomes and related tools, the Map Viewer,
                 Model Maker, Evidence Viewer, Trace Archive, Sequence Read
                 Archive, Retroviral Genotyping Tools, HIV-1/Human Protein
                 Interaction Database, Gene Expression Omnibus, Entrez Probe,
                 GENSAT, Online Mendelian Inheritance in Man, Online Mendelian
                 Inheritance in Animals, the Molecular Modeling Database, the
                 Conserved Domain Database, the Conserved Domain Architecture
                 Retrieval Tool, Biosystems, Peptidome, Protein Clusters and
                 the PubChem suite of small molecule databases. Augmenting many
                 of the web applications are custom implementations of the
                 BLAST program optimized to search specialized data sets. All
                 these resources can be accessed through the NCBI home page at
                 www.ncbi.nlm.nih.gov.",
  journal     = "Nucleic Acids Res.",
  volume      =  38,
  number      = "Database issue",
  pages       = "D5--16",
  month       =  jan,
  year        =  2010
}

@ARTICLE{Maska2014-ak,
  title       = "A benchmark for comparison of cell tracking algorithms",
  author      = "Ma\v{s}ka, Martin and Ulman, Vladim\'{\i}r and Svoboda, David
                 and Matula, Pavel and Matula, Petr and Ederra, Cristina and
                 Urbiola, Ainhoa and Espa\~{n}a, Tom\'{a}s and Venkatesan,
                 Subramanian and Balak, Deepak M W and Karas, Pavel and
                 Bolckov\'{a}, Tereza and Streitov\'{a}, Mark\'{e}ta and
                 Carthel, Craig and Coraluppi, Stefano and Harder, Nathalie and
                 Rohr, Karl and Magnusson, Klas E G and Jald\'{e}n, Joakim and
                 Blau, Helen M and Dzyubachyk, Oleh and K\v{r}\'{\i}\v{z}ek,
                 Pavel and Hagen, Guy M and Pastor-Escuredo, David and
                 Jimenez-Carretero, Daniel and Ledesma-Carbayo, Maria J and
                 Mu\~{n}oz-Barrutia, Arrate and Meijering, Erik and Kozubek,
                 Michal and Ortiz-de-Solorzano, Carlos",
  affiliation = "Center for Biomedical Image Analysis, Masaryk University, 602
                 00 Brno, Czech Republic, Cancer Imaging Laboratory, Oncology
                 Division, Center for Applied Medical Research, University of
                 Navarra, 31008 Pamplona, Spain, Biomedical Imaging Group
                 Rotterdam, Erasmus University Medical Center, 3015 GE
                 Rotterdam, The Netherlands, Fusion Technology and Systems
                 Department, Compunetix Inc., Monroeville, PA 15146, USA,
                 Biomedical Computer Vision Group, Department of Bioinformatics
                 and Functional Genomics, University of Heidelberg, BIOQUANT,
                 IPMB and DKFZ, 69120 Heidelberg, Germany, KTH Royal Institute
                 of Technology, ACCESS Linnaeus Center, Department of Signal
                 Processing, 100 44 Stockholm, Sweden, Baxter Laboratory for
                 Stem Cell Biology, Department of Microbiology and Immunology,
                 Institute for Stem Cell Biology and Regenerative Medicine,
                 Stanford University School of Medicine, Stanford, CA 94305,
                 USA, Division of Image Processing, Leiden University Medical
                 Center, 2300 RC Leiden, The Netherlands, Institute of Cellular
                 Biology and Pathology, First Faculty of Medicine, Charles
                 University in Prague, 12801 Prague 2, Czech Republic and
                 Biomedical Image Technologies, Universidad Polit\'{e}cnica de
                 Madrid \& CIBER BBN, 28040 Madrid, SpainCenter for Biomedical
                 Image Analysis, Masaryk University, 602 00 Brno, Czech
                 Republic, Cancer Imaging Laboratory, Oncology Division, Center
                 for Applied Medical Research, University of Navarra, 31008
                 Pamplona, Spain, Biomedical Imaging Group Rotterdam, Erasmus
                 University Medical Center, 3015 GE Rotterdam, The Netherlands,
                 Fusion Technology and Systems Department, Compunetix Inc.,
                 Monroeville, PA 15146, USA, Biomedical Computer Vision Group,
                 Department of Bioinformatics and Functional Genomics,
                 University of Heidelberg, BIOQUANT, IPMB and DKFZ, 69120
                 Heidelberg, Germany, KTH Royal Institute of Technology, ACCESS
                 Linnaeus Center, Department of Signal Processing, 100 44
                 Stockholm, Sweden, Baxter Laboratory for Stem Cell Biology,
                 Department of Microbiology and Center for Biomedical Image
                 Analysis, Masaryk University, 602 00 Brno, Czech Republic,
                 Cancer Imaging Laboratory, Oncology Division, Center for
                 Applied Medical Research, University of Navarra, 31008
                 Pamplona, Spain, Biomedical Imaging Group Rotterdam, Erasmus
                 University Medical Center, 3015 GE Rotterdam, The Netherlands,
                 Fusion Technology and Systems Department, Compunetix Inc.,
                 Monroeville, PA 15146, USA, Biomedical Computer Vision Group,
                 Department of Bioinformatics and Functional Genomics,
                 University of Heidelberg, BIOQUANT, IPMB and DKFZ, 69120
                 Heidelberg, Germany, KTH Royal Institute of Technology, ACCESS
                 Linnaeus Center, Department of Signal Processing, 100 44
                 Stockholm, Sweden, Baxter Laboratory for Stem Cell Biology,
                 Department of Microbiology and Immunology, Institute for Stem
                 Cell Biology and Regenerative Medicine, Stanford University
                 School of Medicine, Stanford, CA 94305, USA, Division of Image
                 Processing, Leiden University Medical Center, 2300 RC Leiden,
                 The Netherlands, Institute of Cellular Biology and Pathology,
                 First Faculty of Medicine, Charles University in Prague, 12801
                 Prague 2, Czech Republic and Biomedical Image Technologies,
                 Universidad Polit\'{e}cnica de Madrid \& CIBER BBN, 28040
                 Madrid, Spain. Center for Biomedical Image Analysis, Masaryk
                 University, 602 00 Brno, Czech Republic, Cancer Imaging
                 Laboratory, Oncology Division, Center for Applied Medical
                 Research, University of Navarra, 31008 Pamplona, Spain,
                 Biomedical Imaging Group Rotterdam, Erasmus University Medical
                 Center, 3015 GE Rotterdam, The Netherlands, Fusion Technology
                 and Systems Department, Compunetix Inc., Monroeville, PA
                 15146, USA, Biomedical Computer Vision Group, Department of
                 Bioinformatics and Functional Genomics, University of
                 Heidelberg, BIOQUANT, IPMB and DKFZ, 69120 Heidelberg,
                 Germany, KTH Royal Institute of Technology, ACCESS Linnaeus
                 Center, Department of Signal Processing, 100 44 Stockholm,
                 Sweden, Baxter Laboratory for Stem Cell Biology, Department of
                 Microbiology and Immunology, Institute for Stem Cell Biology
                 and Regenerative Medicine, Stanford University School of
                 Medicine, Stanford, CA 94305, USA, Division of Image
                 Processing, Leiden University Medical Center, 2300 RC Leiden,
                 The Netherlands, Institute of Cellular Biology and Pathology,
                 First Faculty of Medicine, Charles University in Prague, 12801
                 Prague 2, Czech Republic and Biomedical Image Technologies,
                 Universidad Polit\'{e}cnica de Madrid \& CIBER BBN, 28040
                 Madrid, Spain. Center for Biomedical Image Analysis, Masaryk
                 University, 602 00 Brno, Czech Republic, Cancer Imaging
                 Laboratory, Oncology Division, Center for Applied Medical
                 Research, University of Navarra, 31008 Pamplona, Spain,
                 Biomedical Imaging Group Rotterdam, Erasmus University Medical
                 Center, 3015 GE Rotterdam, The Netherlands, Fusion Technology
                 and Systems Department, Compunetix Inc., Monroeville, PA
                 15146, USA, Biomedical Computer Vision Group, Department of
                 Bioinformatics and Functional Genomics, University of
                 Heidelberg, BIOQUANT, IPMB and DKFZ, 69120 Heidelberg,
                 Germany, KTH Royal Institute of Technology, ACCESS Linnaeus
                 Center, Department of Signal Processing, 100 44 Stockholm,
                 Sweden, Baxter Laboratory for Stem Cell Biology, Department of
                 Microbiology and Immunology, Institute for Stem Cell Biology
                 and Regenerative Medicine, Stanford University School of
                 Medicine, Stanford, CA 94305, USA, Division of Image
                 Processing, Leiden University Medical Center, 2300 RC Leiden,
                 The Netherlands, Institute of Cellular Biology and Pathology,
                 First Faculty of Medicine, Charles University in Prague, 12801
                 Prague 2, Czech Republic and Biomedical Image Technologies,
                 Universidad Polit\'{e}cnica de Madrid \& CIBER BBN, 28040
                 Madrid, Spain. Center for Biomedical Image Analysis, Masaryk
                 University, 602 00 Brno, Czech Republic, Cancer Imaging
                 Laboratory, Oncology Division, Center for Applied Medical
                 Research, University of Navarra, 31008 Pamplona, Spain,
                 Biomedical Imaging Group Rotterdam, Erasmus University Medical
                 Center, 3015 GE Rotterdam, The Netherlands, Fusion Technology
                 and Systems Department, Compunetix Inc., Monroeville, PA
                 15146, USA, Biomedical Computer Vision Group, Department of
                 Bioinformatics and Functional Genomics, University of
                 Heidelberg, BIOQUANT, IPMB and DKFZ, 69120 Heidelberg,
                 Germany, KTH Royal Institute of Technology, ACCESS Linnaeus
                 Center, Department of Signal Processing, 100 44 Stockholm,
                 Sweden, Baxter Laboratory for Stem Cell Biology, Department of
                 Microbiology and Immunology, Institute for Stem Cell Biology
                 and Regenerative Medicine, Stanford University School of
                 Medicine, Stanford, CA 94305, USA, Division of Image
                 Processing, Leiden University Medical Center, 2300 RC Leiden,
                 The Netherlands, Institute of Cellular Biology and Pathology,
                 First Faculty of Medicine, Charles University in Prague, 12801
                 Prague 2, Czech Republic and Biomedical Image Technologies,
                 Universidad Polit\'{e}cnica de Madrid \& CIBER BBN, 28040
                 Madrid, Spain. Center for Biomedical Image Analysis, Masaryk
                 University, 602 00 Brno, Czech Republic, Cancer Imaging
                 Laboratory, Oncology Division, Center for Applied Medical
                 Research, University of Navarra, 31008 Pamplona, Spain,
                 Biomedical Imaging Group Rotterdam, Erasmus University Medical
                 Center, 3015 GE Rotterdam, The Netherlands, Fusion Technology
                 and Systems Department, Compunetix Inc., Monroeville, PA
                 15146, USA, Biomedical Computer Vision Group, Department of
                 Bioinformatics and Functional Genomics, University of
                 Heidelberg, BIOQUANT, IPMB and DKFZ, 69120 Heidelberg,
                 Germany, KTH Royal Institute of Technology, ACCESS Linnaeus
                 Center, Department of Signal Processing, 100 44 Stockholm,
                 Sweden, Baxter Laboratory for Stem Cell Biology, Department of
                 Microbiology and Immunology, Institute for Stem Cell Biology
                 and Regenerative Medicine, Stanford University School of
                 Medicine, Stanford, CA 94305, USA, Division of Image
                 Processing, Leiden University Medical Center, 2300 RC Leiden,
                 The Netherlands, Institute of Cellular Biology and Pathology,
                 First Faculty of Medicine, Charles University in Prague, 12801
                 Prague 2, Czech Republic and Biomedical Image Technologies,
                 Universidad Polit\'{e}cnica de Madrid \& CIBER BBN, 28040
                 Madrid, Spain. Center for Biomedical Image Analysis, Masaryk
                 University, 602 00 Brno, Czech Republic, Cancer Imaging
                 Laboratory, Oncology Division, Center for Applied Medical
                 Research, University of Navarra, 31008 Pamplona, Spain,
                 Biomedical Imaging Group Rotterdam, Erasmus University Medical
                 Center, 3015 GE Rotterdam, The Netherlands, Fusion Technology
                 and Systems Department, Compunetix Inc., Monroeville, PA
                 15146, USA, Biomedical Computer Vision Group, Department of
                 Bioinformatics and Functional Genomics, University of
                 Heidelberg, BIOQUANT, IPMB and DKFZ, 69120 Heidelberg,
                 Germany, KTH Royal Institute of Technology, ACCESS Linnaeus
                 Center, Department of Signal Processing, 100 44 Stockholm,
                 Sweden, Baxter Laboratory for Stem Cell Biology, Department of
                 Microbiology and Immunology, Institute for Stem Cell Biology
                 and Regenerative Medicine, Stanford University School of
                 Medicine, Stanford, CA 94305, USA, Division of Image
                 Processing, Leiden University Medical Center, 2300 RC Leiden,
                 The Netherlands, Institute of Cellular Biology and Pathology,
                 First Faculty of Medicine, Charles University in Prague, 12801
                 Prague 2, Czech Republic and Biomedical Image Technologies,
                 Universidad Polit\'{e}cnica de Madrid \& CIBER BBN, 28040
                 Madrid, Spain. Center for Biomedical Image Analysis, Masaryk
                 University, 602 00 Brno, Czech Republic, Cancer Imaging
                 Laboratory, Oncology Division, Center for Applied Medical
                 Research, University of Navarra, 31008 Pamplona, Spain,
                 Biomedical Imaging Group Rotterdam, Erasmus University Medical
                 Center, 3015 GE Rotterdam, The Netherlands, Fusion Technology
                 and Systems Department, Compunetix Inc., Monroeville, PA
                 15146, USA, Biomedical Computer Vision Group, Department of
                 Bioinformatics and Functional Genomics, University of
                 Heidelberg, BIOQUANT, IPMB and DKFZ, 69120 Heidelberg,
                 Germany, KTH Royal Institute of Technology, ACCESS Linnaeus
                 Center, Department of Signal Processing, 100 44 Stockholm,
                 Sweden, Baxter Laboratory for Stem Cell Biology, Department of
                 Microbiology and Immunology, Institute for Stem Cell Biology
                 and Regenerative Medicine, Stanford University School of
                 Medicine, Stanford, CA 94305, USA, Division of Image
                 Processing, Leiden University Medical Center, 2300 RC Leiden,
                 The Netherlands, Institute of Cellular Biology and Pathology,
                 First Faculty of Medicine, Charles University in Prague, 12801
                 Prague 2, Czech Republic and Biomedical Image Technologies,
                 Universidad Polit\'{e}cnica de Madrid \& CIBER BBN, 28040
                 Madrid, Spain. Center for Biomedical Image Analysis, Masaryk
                 University, 602 00 Brno, Czech Republic, Cancer Imaging
                 Laboratory, Oncology Division, Center for Applied Medical
                 Research, University of Navarra, 31008 Pamplona, Spain,
                 Biomedical Imaging Group Rotterdam, Erasmus University Medical
                 Center, 3015 GE Rotterdam, The Netherlands, Fusion Technology
                 and Systems Department, Compunetix Inc., Monroeville, PA
                 15146, USA, Biomedical Computer Vision Group, Department of
                 Bioinformatics and Functional Genomics, University of
                 Heidelberg, BIOQUANT, IPMB and DKFZ, 69120 Heidelberg,
                 Germany, KTH Royal Institute of Technology, ACCESS Linnaeus
                 Center, Department of Signal Processing, 100 44 Stockholm,
                 Sweden, Baxter Laboratory for Stem Cell Biology, Department of
                 Microbiology and Immunology, Institute for Stem Cell Biology
                 and Regenerative Medicine, Stanford University School of
                 Medicine, Stanford, CA 94305, USA, Division of Image
                 Processing, Leiden University Medical Center, 2300 RC Leiden,
                 The Netherlands, Institute of Cellular Biology and Pathology,
                 First Faculty of Medicine, Charles University in Prague, 12801
                 Prague 2, Czech Republic and Biomedical Image Technologies,
                 Universidad Polit\'{e}cnica de Madrid \& CIBER BBN, 28040
                 Madrid, Spain. Center for Biomedical Image Analysis, Masaryk
                 University, 602 00 Brno, Czech Republic, Cancer Imaging
                 Laboratory, Oncology Division, Center for Applied Medical
                 Research, University of Navarra, 31008 Pamplona, Spain,
                 Biomedical Imaging Group Rotterdam, Erasmus University Medical
                 Center, 3015 GE Rotterdam, The Netherlands, Fusion Technology
                 and Systems Department, Compunetix Inc., Monroeville, PA
                 15146, USA, Biomedical Computer Vision Group, Department of
                 Bioinformatics and Functional Genomics, University of
                 Heidelberg, BIOQUANT, IPMB and DKFZ, 69120 Heidelberg,
                 Germany, KTH Royal Institute of Technology, ACCESS Linnaeus
                 Center, Department of Signal Processing, 100 44 Stockholm,
                 Sweden, Baxter Laboratory for Stem Cell Biology, Department of
                 Microbiology and Immunology, Institute for Stem Cell Biology
                 and Regenerative Medicine, Stanford University School of
                 Medicine, Stanford, CA 94305, USA, Division of Image
                 Processing, Leiden University Medical Center, 2300 RC Leiden,
                 The Netherlands, Institute of Cellular Biology and Pathology,
                 First Faculty of Medicine, Charles University in Prague, 12801
                 Prague 2, Czech Republic and Biomedical Image Technologies,
                 Universidad Polit\'{e}cnica de Madrid \& CIBER BBN, 28040
                 Madrid, Spain. Center for Biomedical Image Analysis, Masaryk
                 University, 602 00 Brno, Czech Republic, Cancer Imaging
                 Laboratory, Oncology Division, Center for Applied Medical
                 Research, University of Navarra, 31008 Pamplona, Spain,
                 Biomedical Imaging Group Rotterdam, Erasmus University Medical
                 Center, 3015 GE Rotterdam, The Netherlands, Fusion Technology
                 and Systems Department, Compunetix Inc., Monroeville, PA
                 15146, USA, Biomedical Computer Vision Group, Department of
                 Bioinformatics and Functional Genomics, University of
                 Heidelberg, BIOQUANT, IPMB and DKFZ, 69120 Heidelberg,
                 Germany, KTH Royal Institute of Technology, ACCESS Linnaeus
                 Center, Department of Signal Processing, 100 44 Stockholm,
                 Sweden, Baxter Laboratory for Stem Cell Biology, Department of
                 Microbiology and Immunology, Institute for Stem Cell Biology
                 and Regenerative Medicine, Stanford University School of
                 Medicine, Stanford, CA 94305, USA, Division of Image
                 Processing, Leiden University Medical Center, 2300 RC Leiden,
                 The Netherlands, Institute of Cellular Biology and Pathology,
                 First Faculty of Medicine, Charles University in Prague, 12801
                 Prague 2, Czech Republic and Biomedical Image Technologies,
                 Universidad Polit\'{e}cnica de Madrid \& CIBER BBN, 28040
                 Madrid, Spain. Center for Biomedical Image Analysis, Masaryk
                 University, 602 00 Brno, Czech Republic, Cancer Imaging
                 Laboratory, Oncology Division, Center for Applied Medical
                 Research, University of Navarra, 31008 Pamplona, Spain,
                 Biomedical Imaging Group Rotterdam, Erasmus University Medical
                 Center, 3015 GE Rotterdam, The Netherlands, Fusion Technology
                 and Systems Department, Compunetix Inc., Monroeville, PA
                 15146, USA, Biomedical Computer Vision Group, Department of
                 Bioinformatics and Functional Genomics, University of
                 Heidelberg, BIOQUANT, IPMB and DKFZ, 69120 Heidelberg,
                 Germany, KTH Royal Institute of Technology, ACCESS Linnaeus
                 Center, Department of Signal Processing, 100 44 Stockholm,
                 Sweden, Baxter Laboratory for Stem Cell Biology, Department of
                 Microbiology and Immunology, Institute for Stem Cell Biology
                 and Regenerative Medicine, Stanford University School of
                 Medicine, Stanford, CA 94305, USA, Division of Image
                 Processing, Leiden University Medical Center, 2300 RC Leiden,
                 The Netherlands, Institute of Cellular Biology and Pathology,
                 First Faculty of Medicine, Charles University in Prague, 12801
                 Prague 2, Czech Republic and Biomedical Image Technologies,
                 Universidad Polit\'{e}cnica de Madrid \& CIBER BBN, 28040
                 Madrid, Spain. Center for Biomedical Image Analysis, Masaryk
                 University, 602 00 Brno, Czech Republic, Cancer Imaging
                 Laboratory, Oncology Division, Center for Applied Medical
                 Research, University of Navarra, 31008 Pamplona, Spain,
                 Biomedical Imaging Group Rotterdam, Erasmus University Medical
                 Center, 3015 GE Rotterdam, The Netherlands, Fusion Technology
                 and Systems Department, Compunetix Inc., Monroeville, PA
                 15146, USA, Biomedical Computer Vision Group, Department of
                 Bioinformatics and Functional Genomics, University of
                 Heidelberg, BIOQUANT, IPMB and DKFZ, 69120 Heidelberg,
                 Germany, KTH Royal Institute of Technology, ACCESS Linnaeus
                 Center, Department of Signal Processing, 100 44 Stockholm,
                 Sweden, Baxter Laboratory for Stem Cell Biology, Department of
                 Microbiology and Immunology, Institute for Stem Cell Biology
                 and Regenerative Medicine, Stanford University School of
                 Medicine, Stanford, CA 94305, USA, Division of Image
                 Processing, Leiden University Medical Center, 2300 RC Leiden,
                 The Netherlands, Institute of Cellular Biology and Pathology,
                 First Faculty of Medicine, Charles University in Prague, 12801
                 Prague 2, Czech Republic and Biomedical Image Technologies,
                 Universidad Polit\'{e}cnica de Madrid \& CIBER BBN, 28040
                 Madrid, Spain. Center for Biomedical Image Analysis, Masaryk
                 University, 602 00 Brno, Czech Republic, Cancer Imaging
                 Laboratory, Oncology Division, Center for Applied Medical
                 Research, University of Navarra, 31008 Pamplona, Spain,
                 Biomedical Imaging Group Rotterdam, Erasmus University Medical
                 Center, 3015 GE Rotterdam, The Netherlands, Fusion Technology
                 and Systems Department, Compunetix Inc., Monroeville, PA
                 15146, USA, Biomedical Computer Vision Group, Department of
                 Bioinformatics and Functional Genomics, University of
                 Heidelberg, BIOQUANT, IPMB and DKFZ, 69120 Heidelberg,
                 Germany, KTH Royal Institute of Technology, ACCESS Linnaeus
                 Center, Department of Signal Processing, 100 44 Stockholm,
                 Sweden, Baxter Laboratory for Stem Cell Biology, Department of
                 Microbiology and Immunology, Institute for Stem Cell Biology
                 and Regenerative Medicine, Stanford University School of
                 Medicine, Stanford, CA 94305, USA, Division of Image
                 Processing, Leiden University Medical Center, 2300 RC Leiden,
                 The Netherlands, Institute of Cellular Biology and Pathology,
                 First Faculty of Medicine, Charles University in Prague, 12801
                 Prague 2, Czech Republic and Biomedical Image Technologies,
                 Universidad Polit\'{e}cnica de Madrid \& CIBER BBN, 28040
                 Madrid, Spain. Center for Biomedical Image Analysis, Masaryk
                 University, 602 00 Brno, Czech Republic, Cancer Imaging
                 Laboratory, Oncology Division, Center for Applied Medical
                 Research, University of Navarra, 31008 Pamplona, Spain,
                 Biomedical Imaging Group Rotterdam, Erasmus University Medical
                 Center, 3015 GE Rotterdam, The Netherlands, Fusion Technology
                 and Systems Department, Compunetix Inc., Monroeville, PA
                 15146, USA, Biomedical Computer Vision Group, Department of
                 Bioinformatics and Functional Genomics, University of
                 Heidelberg, BIOQUANT, IPMB and DKFZ, 69120 Heidelberg,
                 Germany, KTH Royal Institute of Technology, ACCESS Linnaeus
                 Center, Department of Signal Processing, 100 44 Stockholm,
                 Sweden, Baxter Laboratory for Stem Cell Biology, Department of
                 Microbiology and Immunology, Institute for Stem Cell Biology
                 and Regenerative Medicine, Stanford University School of
                 Medicine, Stanford, CA 94305, USA, Division of Image
                 Processing, Leiden University Medical Center, 2300 RC Leiden,
                 The Netherlands, Institute of Cellular Biology and Pathology,
                 First Faculty of Medicine, Charles University in Prague, 12801
                 Prague 2, Czech Republic and Biomedical Image Technologies,
                 Universidad Polit\'{e}cnica de Madrid \& CIBER BBN, 28040
                 Madrid, Spain. Center for Biomedical Image Analysis, Masaryk
                 University, 602 00 Brno, Czech Republic, Cancer Imaging
                 Laboratory, Oncology Division, Center for Applied Medical
                 Research, University of Navarra, 31008 Pamplona, Spain,
                 Biomedical Imaging Group Rotterdam, Erasmus University Medical
                 Center, 3015 GE Rotterdam, The Netherlands, Fusion Technology
                 and Systems Department, Compunetix Inc., Monroeville, PA
                 15146, USA, Biomedical Computer Vision Group, Department of
                 Bioinformatics and Functional Genomics, University of
                 Heidelberg, BIOQUANT, IPMB and DKFZ, 69120 Heidelberg,
                 Germany, KTH Royal Institute of Technology, ACCESS Linnaeus
                 Center, Department of Signal Processing, 100 44 Stockholm,
                 Sweden, Baxter Laboratory for Stem Cell Biology, Department of
                 Microbiology and Immunology, Institute for Stem Cell Biology
                 and Regenerative Medicine, Stanford University School of
                 Medicine, Stanford, CA 94305, USA, Division of Image
                 Processing, Leiden University Medical Center, 2300 RC Leiden,
                 The Netherlands, Institute of Cellular Biology and Pathology,
                 First Faculty of Medicine, Charles University in Prague, 12801
                 Prague 2, Czech Republic and Biomedical Image Technologies,
                 Universidad Polit\'{e}cnica de Madrid \& CIBER BBN, 28040
                 Madrid, Spain. Center for Biomedical Image Analysis, Masaryk
                 University, 602 00 Brno, Czech Republic, Cancer Imaging
                 Laboratory, Oncology Division, Center for Applied Medical
                 Research, University of Navarra, 31008 Pamplona, Spain,
                 Biomedical Imaging Group Rotterdam, Erasmus University Medical
                 Center, 3015 GE Rotterdam, The Netherlands, Fusion Technology
                 and Systems Department, Compunetix Inc., Monroeville, PA
                 15146, USA, Biomedical Computer Vision Group, Department of
                 Bioinformatics and Functional Genomics, University of
                 Heidelberg, BIOQUANT, IPMB and DKFZ, 69120 Heidelberg,
                 Germany, KTH Royal Institute of Technology, ACCESS Linnaeus
                 Center, Department of Signal Processing, 100 44 Stockholm,
                 Sweden, Baxter Laboratory for Stem Cell Biology, Department of
                 Microbiology and Immunology, Institute for Stem Cell Biology
                 and Regenerative Medicine, Stanford University School of
                 Medicine, Stanford, CA 94305, USA, Division of Image
                 Processing, Leiden University Medical Center, 2300 RC Leiden,
                 The Netherlands, Institute of Cellular Biology and Pathology,
                 First Faculty of Medicine, Charles University in Prague, 12801
                 Prague 2, Czech Republic and Biomedical Image Technologies,
                 Universidad Polit\'{e}cnica de Madrid \& CIBER BBN, 28040
                 Madrid, Spain. Center for Biomedical Image Analysis, Masaryk
                 University, 602 00 Brno, Czech Republic, Cancer Imaging
                 Laboratory, Oncology Division, Center for Applied Medical
                 Research, University of Navarra, 31008 Pamplona, Spain,
                 Biomedical Imaging Group Rotterdam, Erasmus University Medical
                 Center, 3015 GE Rotterdam, The Netherlands, Fusion Technology
                 and Systems Department, Compunetix Inc., Monroeville, PA
                 15146, USA, Biomedical Computer Vision Group, Department of
                 Bioinformatics and Functional Genomics, University of
                 Heidelberg, BIOQUANT, IPMB and DKFZ, 69120 Heidelberg,
                 Germany, KTH Royal Institute of Technology, ACCESS Linnaeus
                 Center, Department of Signal Processing, 100 44 Stockholm,
                 Sweden, Baxter Laboratory for Stem Cell Biology, Department of
                 Microbiology and Immunology, Institute for Stem Cell Biology
                 and Regenerative Medicine, Stanford University School of
                 Medicine, Stanford, CA 94305, USA, Division of Image
                 Processing, Leiden University Medical Center, 2300 RC Leiden,
                 The Netherlands, Institute of Cellular Biology and Pathology,
                 First Faculty of Medicine, Charles University in Prague, 12801
                 Prague 2, Czech Republic and Biomedical Image Technologies,
                 Universidad Polit\'{e}cnica de Madrid \& CIBER BBN, 28040
                 Madrid, Spain. Center for Biomedical Image Analysis, Masaryk
                 University, 602 00 Brno, Czech Republic, Cancer Imaging
                 Laboratory, Oncology Division, Center for Applied Medical
                 Research, University of Navarra, 31008 Pamplona, Spain,
                 Biomedical Imaging Group Rotterdam, Erasmus University Medical
                 Center, 3015 GE Rotterdam, The Netherlands, Fusion Technology
                 and Systems Department, Compunetix Inc., Monroeville, PA
                 15146, USA, Biomedical Computer Vision Group, Department of
                 Bioinformatics and Functional Genomics, University of
                 Heidelberg, BIOQUANT, IPMB and DKFZ, 69120 Heidelberg,
                 Germany, KTH Royal Institute of Technology, ACCESS Linnaeus
                 Center, Department of Signal Processing, 100 44 Stockholm,
                 Sweden, Baxter Laboratory for Stem Cell Biology, Department of
                 Microbiology and Immunology, Institute for Stem Cell Biology
                 and Regenerative Medicine, Stanford University School of
                 Medicine, Stanford, CA 94305, USA, Division of Image
                 Processing, Leiden University Medical Center, 2300 RC Leiden,
                 The Netherlands, Institute of Cellular Biology and Pathology,
                 First Faculty of Medicine, Charles University in Prague, 12801
                 Prague 2, Czech Republic and Biomedical Image Technologies,
                 Universidad Polit\'{e}cnica de Madrid \& CIBER BBN, 28040
                 Madrid, Spain. Center for Biomedical Image Analysis, Masaryk
                 University, 602 00 Brno, Czech Republic, Cancer Imaging
                 Laboratory, Oncology Division, Center for Applied Medical
                 Research, University of Navarra, 31008 Pamplona, Spain,
                 Biomedical Imaging Group Rotterdam, Erasmus University Medical
                 Center, 3015 GE Rotterdam, The Netherlands, Fusion Technology
                 and Systems Department, Compunetix Inc., Monroeville, PA
                 15146, USA, Biomedical Computer Vision Group, Department of
                 Bioinformatics and Functional Genomics, University of
                 Heidelberg, BIOQUANT, IPMB and DKFZ, 69120 Heidelberg,
                 Germany, KTH Royal Institute of Technology, ACCESS Linnaeus
                 Center, Department of Signal Processing, 100 44 Stockholm,
                 Sweden, Baxter Laboratory for Stem Cell Biology, Department of
                 Microbiology and Immunology, Institute for Stem Cell Biology
                 and Regenerative Medicine, Stanford University School of
                 Medicine, Stanford, CA 94305, USA, Division of Image
                 Processing, Leiden University Medical Center, 2300 RC Leiden,
                 The Netherlands, Institute of Cellular Biology and Pathology,
                 First Faculty of Medicine, Charles University in Prague, 12801
                 Prague 2, Czech Republic and Biomedical Image Technologies,
                 Universidad Polit\'{e}cnica de Madrid \& CIBER BBN, 28040
                 Madrid, Spain. Center for Biomedical Image Analysis, Masaryk
                 University, 602 00 Brno, Czech Republic, Cancer Imaging
                 Laboratory, Oncology Division, Center for Applied Medical
                 Research, University of Navarra, 31008 Pamplona, Spain,
                 Biomedical Imaging Group Rotterdam, Erasmus University Medical
                 Center, 3015 GE Rotterdam, The Netherlands, Fusion Technology
                 and Systems Department, Compunetix Inc., Monroeville, PA
                 15146, USA, Biomedical Computer Vision Group, Department of
                 Bioinformatics and Functional Genomics, University of
                 Heidelberg, BIOQUANT, IPMB and DKFZ, 69120 Heidelberg,
                 Germany, KTH Royal Institute of Technology, ACCESS Linnaeus
                 Center, Department of Signal Processing, 100 44 Stockholm,
                 Sweden, Baxter Laboratory for Stem Cell Biology, Department of
                 Microbiology and Immunology, Institute for Stem Cell Biology
                 and Regenerative Medicine, Stanford University School of
                 Medicine, Stanford, CA 94305, USA, Division of Image
                 Processing, Leiden University Medical Center, 2300 RC Leiden,
                 The Netherlands, Institute of Cellular Biology and Pathology,
                 First Faculty of Medicine, Charles University in Prague, 12801
                 Prague 2, Czech Republic and Biomedical Image Technologies,
                 Universidad Polit\'{e}cnica de Madrid \& CIBER BBN, 28040
                 Madrid, Spain. Center for Biomedical Image Analysis, Masaryk
                 University, 602 00 Brno, Czech Republic, Cancer Imaging
                 Laboratory, Oncology Division, Center for Applied Medical
                 Research, University of Navarra, 31008 Pamplona, Spain,
                 Biomedical Imaging Group Rotterdam, Erasmus University Medical
                 Center, 3015 GE Rotterdam, The Netherlands, Fusion Technology
                 and Systems Department, Compunetix Inc., Monroeville, PA
                 15146, USA, Biomedical Computer Vision Group, Department of
                 Bioinformatics and Functional Genomics, University of
                 Heidelberg, BIOQUANT, IPMB and DKFZ, 69120 Heidelberg,
                 Germany, KTH Royal Institute of Technology, ACCESS Linnaeus
                 Center, Department of Signal Processing, 100 44 Stockholm,
                 Sweden, Baxter Laboratory for Stem Cell Biology, Department of
                 Microbiology and Immunology, Institute for Stem Cell Biology
                 and Regenerative Medicine, Stanford University School of
                 Medicine, Stanford, CA 94305, USA, Division of Image
                 Processing, Leiden University Medical Center, 2300 RC Leiden,
                 The Netherlands, Institute of Cellular Biology and Pathology,
                 First Faculty of Medicine, Charles University in Prague, 12801
                 Prague 2, Czech Republic and Biomedical Image Technologies,
                 Universidad Polit\'{e}cnica de Madrid \& CIBER BBN, 28040
                 Madrid, Spain. Center for Biomedical Image Analysis, Masaryk
                 University, 602 00 Brno, Czech Republic, Cancer Imaging
                 Laboratory, Oncology Division, Center for Applied Medical
                 Research, University of Navarra, 31008 Pamplona, Spain,
                 Biomedical Imaging Group Rotterdam, Erasmus University Medical
                 Center, 3015 GE Rotterdam, The Netherlands, Fusion Technology
                 and Systems Department, Compunetix Inc., Monroeville, PA
                 15146, USA, Biomedical Computer Vision Group, Department of
                 Bioinformatics and Functional Genomics, University of
                 Heidelberg, BIOQUANT, IPMB and DKFZ, 69120 Heidelberg,
                 Germany, KTH Royal Institute of Technology, ACCESS Linnaeus
                 Center, Department of Signal Processing, 100 44 Stockholm,
                 Sweden, Baxter Laboratory for Stem Cell Biology, Department of
                 Microbiology and Immunology, Institute for Stem Cell Biology
                 and Regenerative Medicine, Stanford University School of
                 Medicine, Stanford, CA 94305, USA, Division of Image
                 Processing, Leiden University Medical Center, 2300 RC Leiden,
                 The Netherlands, Institute of Cellular Biology and Pathology,
                 First Faculty of Medicine, Charles University in Prague, 12801
                 Prague 2, Czech Republic and Biomedical Image Technologies,
                 Universidad Polit\'{e}cnica de Madrid \& CIBER BBN, 28040
                 Madrid, Spain. Center for Biomedical Image Analysis, Masaryk
                 University, 602 00 Brno, Czech Republic, Cancer Imaging
                 Laboratory, Oncology Division, Center for Applied Medical
                 Research, University of Navarra, 31008 Pamplona, Spain,
                 Biomedical Imaging Group Rotterdam, Erasmus University Medical
                 Center, 3015 GE Rotterdam, The Netherlands, Fusion Technology
                 and Systems Department, Compunetix Inc., Monroeville, PA
                 15146, USA, Biomedical Computer Vision Group, Department of
                 Bioinformatics and Functional Genomics, University of
                 Heidelberg, BIOQUANT, IPMB and DKFZ, 69120 Heidelberg,
                 Germany, KTH Royal Institute of Technology, ACCESS Linnaeus
                 Center, Department of Signal Processing, 100 44 Stockholm,
                 Sweden, Baxter Laboratory for Stem Cell Biology, Department of
                 Microbiology and Immunology, Institute for Stem Cell Biology
                 and Regenerative Medicine, Stanford University School of
                 Medicine, Stanford, CA 94305, USA, Division of Image
                 Processing, Leiden University Medical Center, 2300 RC Leiden,
                 The Netherlands, Institute of Cellular Biology and Pathology,
                 First Faculty of Medicine, Charles University in Prague, 12801
                 Prague 2, Czech Republic and Biomedical Image Technologies,
                 Universidad Polit\'{e}cnica de Madrid \& CIBER BBN, 28040
                 Madrid, Spain. Center for Biomedical Image Analysis, Masaryk
                 University, 602 00 Brno, Czech Republic, Cancer Imaging
                 Laboratory, Oncology Division, Center for Applied Medical
                 Research, University of Navarra, 31008 Pamplona, Spain,
                 Biomedical Imaging Group Rotterdam, Erasmus University Medical
                 Center, 3015 GE Rotterdam, The Netherlands, Fusion Technology
                 and Systems Department, Compunetix Inc., Monroeville, PA
                 15146, USA, Biomedical Computer Vision Group, Department of
                 Bioinformatics and Functional Genomics, University of
                 Heidelberg, BIOQUANT, IPMB and DKFZ, 69120 Heidelberg,
                 Germany, KTH Royal Institute of Technology, ACCESS Linnaeus
                 Center, Department of Signal Processing, 100 44 Stockholm,
                 Sweden, Baxter Laboratory for Stem Cell Biology, Department of
                 Microbiology and Immunology, Institute for Stem Cell Biology
                 and Regenerative Medicine, Stanford University School of
                 Medicine, Stanford, CA 94305, USA, Division of Image
                 Processing, Leiden University Medical Center, 2300 RC Leiden,
                 The Netherlands, Institute of Cellular Biology and Pathology,
                 First Faculty of Medicine, Charles University in Prague, 12801
                 Prague 2, Czech Republic and Biomedical Image Technologies,
                 Universidad Polit\'{e}cnica de Madrid \& CIBER BBN, 28040
                 Madrid, Spain. Center for Biomedical Image Analysis, Masaryk
                 University, 602 00 Brno, Czech Republic, Cancer Imaging
                 Laboratory, Oncology Division, Center for Applied Medical
                 Research, University of Navarra, 31008 Pamplona, Spain,
                 Biomedical Imaging Group Rotterdam, Erasmus University Medical
                 Center, 3015 GE Rotterdam, The Netherlands, Fusion Technology
                 and Systems Department, Compunetix Inc., Monroeville, PA
                 15146, USA, Biomedical Computer Vision Group, Department of
                 Bioinformatics and Functional Genomics, University of
                 Heidelberg, BIOQUANT, IPMB and DKFZ, 69120 Heidelberg,
                 Germany, KTH Royal Institute of Technology, ACCESS Linnaeus
                 Center, Department of Signal Processing, 100 44 Stockholm,
                 Sweden, Baxter Laboratory for Stem Cell Biology, Department of
                 Microbiology and Immunology, Institute for Stem Cell Biology
                 and Regenerative Medicine, Stanford University School of
                 Medicine, Stanford, CA 94305, USA, Division of Image
                 Processing, Leiden University Medical Center, 2300 RC Leiden,
                 The Netherlands, Institute of Cellular Biology and Pathology,
                 First Faculty of Medicine, Charles University in Prague, 12801
                 Prague 2, Czech Republic and Biomedical Image Technologies,
                 Universidad Polit\'{e}cnica de Madrid \& CIBER BBN, 28040
                 Madrid, Spain. Center for Biomedical Image Analysis, Masaryk
                 University, 602 00 Brno, Czech Republic, Cancer Imaging
                 Laboratory, Oncology Division, Center for Applied Medical
                 Research, University of Navarra, 31008 Pamplona, Spain,
                 Biomedical Imaging Group Rotterdam, Erasmus University Medical
                 Center, 3015 GE Rotterdam, The Netherlands, Fusion Technology
                 and Systems Department, Compunetix Inc., Monroeville, PA
                 15146, USA, Biomedical Computer Vision Group, Department of
                 Bioinformatics and Functional Genomics, University of
                 Heidelberg, BIOQUANT, IPMB and DKFZ, 69120 Heidelberg,
                 Germany, KTH Royal Institute of Technology, ACCESS Linnaeus
                 Center, Department of Signal Processing, 100 44 Stockholm,
                 Sweden, Baxter Laboratory for Stem Cell Biology, Department of
                 Microbiology and Immunology, Institute for Stem Cell Biology
                 and Regenerative Medicine, Stanford University School of
                 Medicine, Stanford, CA 94305, USA, Division of Image
                 Processing, Leiden University Medical Center, 2300 RC Leiden,
                 The Netherlands, Institute of Cellular Biology and Pathology,
                 First Faculty of Medicine, Charles University in Prague, 12801
                 Prague 2, Czech Republic and Biomedical Image Technologies,
                 Universidad Polit\'{e}cnica de Madrid \& CIBER BBN, 28040
                 Madrid, Spain. Center for Biomedical Image Analysis, Masaryk
                 University, 602 00 Brno, Czech Republic, Cancer Imaging
                 Laboratory, Oncology Division, Center for Applied Medical
                 Research, University of Navarra, 31008 Pamplona, Spain,
                 Biomedical Imaging Group Rotterdam, Erasmus University Medical
                 Center, 3015 GE Rotterdam, The Netherlands, Fusion Technology
                 and Systems Department, Compunetix Inc., Monroeville, PA
                 15146, USA, Biomedical Computer Vision Group, Department of
                 Bioinformatics and Functional Genomics, University of
                 Heidelberg, BIOQUANT, IPMB and DKFZ, 69120 Heidelberg,
                 Germany, KTH Royal Institute of Technology, ACCESS Linnaeus
                 Center, Department of Signal Processing, 100 44 Stockholm,
                 Sweden, Baxter Laboratory for Stem Cell Biology, Department of
                 Microbiology and Immunology, Institute for Stem Cell Biology
                 and Regenerative Medicine, Stanford University School of
                 Medicine, Stanford, CA 94305, USA, Division of Image
                 Processing, Leiden University Medical Center, 2300 RC Leiden,
                 The Netherlands, Institute of Cellular Biology and Pathology,
                 First Faculty of Medicine, Charles University in Prague, 12801
                 Prague 2, Czech Republic and Biomedical Image Technologies,
                 Universidad Polit\'{e}cnica de Madrid \& CIBER BBN, 28040
                 Madrid, Spain. Center for Biomedical Image Analysis, Masaryk
                 University, 602 00 Brno, Czech Republic, Cancer Imaging
                 Laboratory, Oncology Division, Center for Applied Medical
                 Research, University of Navarra, 31008 Pamplona, Spain,
                 Biomedical Imaging Group Rotterdam, Erasmus University Medical
                 Center, 3015 GE Rotterdam, The Netherlands, Fusion Technology
                 and Systems Department, Compunetix Inc., Monroeville, PA
                 15146, USA, Biomedical Computer Vision Group, Department of
                 Bioinformatics and Functional Genomics, University of
                 Heidelberg, BIOQUANT, IPMB and DKFZ, 69120 Heidelberg,
                 Germany, KTH Royal Institute of Technology, ACCESS Linnaeus
                 Center, Department of Signal Processing, 100 44 Stockholm,
                 Sweden, Baxter Laboratory for Stem Cell Biology, Department of
                 Microbiology and Immunology, Institute for Stem Cell Biology
                 and Regenerative Medicine, Stanford University School of
                 Medicine, Stanford, CA 94305, USA, Division of Image
                 Processing, Leiden University Medical Center, 2300 RC Leiden,
                 The Netherlands, Institute of Cellular Biology and Pathology,
                 First Faculty of Medicine, Charles University in Prague, 12801
                 Prague 2, Czech Republic and Biomedical Image Technologies,
                 Universidad Polit\'{e}cnica de Madrid \& CIBER BBN, 28040
                 Madrid, Spain. Center for Biomedical Image Analysis, Masaryk
                 University, 602 00 Brno, Czech Republic, Cancer Imaging
                 Laboratory, Oncology Division, Center for Applied Medical
                 Research, University of Navarra, 31008 Pamplona, Spain,
                 Biomedical Imaging Group Rotterdam, Erasmus University Medical
                 Center, 3015 GE Rotterdam, The Netherlands, Fusion Technology
                 and Systems Department, Compunetix Inc., Monroeville, PA
                 15146, USA, Biomedical Computer Vision Group, Department of
                 Bioinformatics and Functional Genomics, University of
                 Heidelberg, BIOQUANT, IPMB and DKFZ, 69120 Heidelberg,
                 Germany, KTH Royal Institute of Technology, ACCESS Linnaeus
                 Center, Department of Signal Processing, 100 44 Stockholm,
                 Sweden, Baxter Laboratory for Stem Cell Biology, Department of
                 Microbiology and Immunology, Institute for Stem Cell Biology
                 and Regenerative Medicine, Stanford University School of
                 Medicine, Stanford, CA 94305, USA, Division of Image
                 Processing, Leiden University Medical Center, 2300 RC Leiden,
                 The Netherlands, Institute of Cellular Biology and Pathology,
                 First Faculty of Medicine, Charles University in Prague, 12801
                 Prague 2, Czech Republic and Biomedical Image Technologies,
                 Universidad Polit\'{e}cnica de Madrid \& CIBER BBN, 28040
                 Madrid, Spain.",
  abstract    = "MOTIVATION: Automatic tracking of cells in multidimensional
                 time-lapse fluorescence microscopy is an important task in
                 many biomedical applications. A novel framework for objective
                 evaluation of cell tracking algorithms has been established
                 under the auspices of the IEEE International Symposium on
                 Biomedical Imaging 2013 Cell Tracking Challenge. In this
                 article, we present the logistics, datasets, methods and
                 results of the challenge and lay down the principles for
                 future uses of this benchmark. RESULTS: The main contributions
                 of the challenge include the creation of a comprehensive video
                 dataset repository and the definition of objective measures
                 for comparison and ranking of the algorithms. With this
                 benchmark, six algorithms covering a variety of segmentation
                 and tracking paradigms have been compared and ranked based on
                 their performance on both synthetic and real datasets. Given
                 the diversity of the datasets, we do not declare a single
                 winner of the challenge. Instead, we present and discuss the
                 results for each individual dataset separately. AVAILABILITY
                 AND IMPLEMENTATION: The challenge Web site
                 (http://www.codesolorzano.com/celltrackingchallenge) provides
                 access to the training and competition datasets, along with
                 the ground truth of the training videos. It also provides
                 access to Windows and Linux executable files of the evaluation
                 software and most of the algorithms that competed in the
                 challenge.",
  journal     = "Bioinformatics",
  volume      =  30,
  number      =  11,
  pages       = "1609--1617",
  month       =  "1~" # jun,
  year        =  2014
}

@ARTICLE{Saitou1987-zl,
  title       = "The neighbor-joining method: a new method for reconstructing
                 phylogenetic trees",
  author      = "Saitou, N and Nei, M",
  affiliation = "Center for Demographic and Population Genetics, University of
                 Texas Health Science Center, Houston 77225.",
  abstract    = "A new method called the neighbor-joining method is proposed
                 for reconstructing phylogenetic trees from evolutionary
                 distance data. The principle of this method is to find pairs
                 of operational taxonomic units (OTUs [= neighbors]) that
                 minimize the total branch length at each stage of clustering
                 of OTUs starting with a starlike tree. The branch lengths as
                 well as the topology of a parsimonious tree can quickly be
                 obtained by using this method. Using computer simulation, we
                 studied the efficiency of this method in obtaining the correct
                 unrooted tree in comparison with that of five other
                 tree-making methods: the unweighted pair group method of
                 analysis, Farris's method, Sattath and Tversky's method, Li's
                 method, and Tateno et al.'s modified Farris method. The new,
                 neighbor-joining method and Sattath and Tversky's method are
                 shown to be generally better than the other methods.",
  journal     = "Mol. Biol. Evol.",
  volume      =  4,
  number      =  4,
  pages       = "406--425",
  month       =  jul,
  year        =  1987
}

@MISC{Barton_undated-er,
  title        = "nucleotide genome assembler benchmarking",
  author       = "Barton, Michael",
  abstract     = "continuous, objective and reproducible evaluation of genome
                  assemblers using docker containers",
  howpublished = "\url{http://nucleotid.es/}",
  note         = "Accessed: 2015-12-18"
}

@ARTICLE{Hall2012-kg,
  title    = "A Systematic Literature Review on Fault Prediction Performance in
              Software Engineering",
  author   = "Hall, T and Beecham, S and Bowes, D and Gray, D and Counsell, S",
  abstract = "Background: The accurate prediction of where faults are likely to
              occur in code can help direct test effort, reduce costs, and
              improve the quality of software. Objective: We investigate how
              the context of models, the independent variables used, and the
              modeling techniques applied influence the performance of fault
              prediction models. Method: We used a systematic literature review
              to identify 208 fault prediction studies published from January
              2000 to December 2010. We synthesize the quantitative and
              qualitative results of 36 studies which report sufficient
              contextual and methodological information according to the
              criteria we develop and apply. Results: The models that perform
              well tend to be based on simple modeling techniques such as Naive
              Bayes or Logistic Regression. Combinations of independent
              variables have been used by models that perform well. Feature
              selection has been applied to these combinations when models are
              performing particularly well. Conclusion: The methodology used to
              build models seems to be influential to predictive performance.
              Although there are a set of fault prediction studies in which
              confidence is possible, more studies are needed that use a
              reliable methodology and which report their context, methodology,
              and performance comprehensively.",
  journal  = "IEEE Trans. Software Eng.",
  volume   =  38,
  number   =  6,
  pages    = "1276--1304",
  month    =  nov,
  year     =  2012,
  keywords = "Bayes methods;regression analysis;software fault
              tolerance;software quality;contextual information;cost
              reduction;fault prediction models;fault prediction
              performance;fault prediction study;feature selection;independent
              variables;logistic regression;methodological information;naive
              Bayes;predictive performance;reliable methodology;simple modeling
              techniques;software engineering;software quality;systematic
              literature review;Analytical models;Context modeling;Data
              models;Fault diagnosis;Predictive models;Software
              testing;Systematics;Systematic literature review;software fault
              prediction"
}

@ARTICLE{Altschul1997-ga,
  title       = "Gapped {BLAST} and {PSI-BLAST}: a new generation of protein
                 database search programs",
  author      = "Altschul, S F and Madden, T L and Sch{\"{a}}ffer, A A and
                 Zhang, J and Zhang, Z and Miller, W and Lipman, D J",
  affiliation = "National Center for Biotechnology Information, National
                 Library of Medicine, National Institutes of Health, Bethesda,
                 MD 20894, USA. altschul@ncbi.nlm.nih.gov",
  abstract    = "The BLAST programs are widely used tools for searching protein
                 and DNA databases for sequence similarities. For protein
                 comparisons, a variety of definitional, algorithmic and
                 statistical refinements described here permits the execution
                 time of the BLAST programs to be decreased substantially while
                 enhancing their sensitivity to weak similarities. A new
                 criterion for triggering the extension of word hits, combined
                 with a new heuristic for generating gapped alignments, yields
                 a gapped BLAST program that runs at approximately three times
                 the speed of the original. In addition, a method is introduced
                 for automatically combining statistically significant
                 alignments produced by BLAST into a position-specific score
                 matrix, and searching the database using this matrix. The
                 resulting Position-Specific Iterated BLAST (PSI-BLAST) program
                 runs at approximately the same speed per iteration as gapped
                 BLAST, but in many cases is much more sensitive to weak but
                 biologically relevant sequence similarities. PSI-BLAST is used
                 to uncover several new and interesting members of the BRCT
                 superfamily.",
  journal     = "Nucleic Acids Res.",
  volume      =  25,
  number      =  17,
  pages       = "3389--3402",
  month       =  "1~" # sep,
  year        =  1997,
  keywords    = "paulall.bib"
}

@ARTICLE{Lariviere2010-kx,
  title     = "The impact factor's {Matthew Effect}: A natural experiment in
               bibliometrics",
  author    = "Larivi\`{e}re, V and Gingras, Y",
  abstract  = "Since the publication of Robert K. Merton's theory of cumulative
               advantage in science (Matthew Effect), several empirical studies
               have tried to measure its presence at the level of papers,
               individual researchers, institutions, or countries. However,
               these studies seldom control for the intrinsic ``quality'' of
               papers or of researchers---``better'' (however defined) papers
               or researchers could receive higher citation rates because they
               are indeed of better quality. Using an original method for
               controlling the intrinsic value of papers---identical duplicate
               papers published in different journals with different impact
               factors---this paper shows that the journal in which papers are
               published have a strong influence on their citation rates, as
               duplicate papers published in high-impact journals obtain, on
               average, twice as many citations as their identical counterparts
               published in journals with lower impact factors. The intrinsic
               value of a paper is thus not the only reason a given paper gets
               cited or not, there is a specific Matthew Effect attached to
               journals and this gives to papers published there an added value
               over and above their intrinsic quality.",
  journal   = "J. Am. Soc. Inf. Sci.",
  publisher = "Wiley Subscription Services, Inc., A Wiley Company",
  volume    =  61,
  number    =  2,
  pages     = "424--427",
  month     =  "1~" # feb,
  year      =  2010
}

@ARTICLE{Cox1972-nu,
  title     = "Regression models and life-tables",
  author    = "Cox, David R",
  journal   = "J. R. Stat. Soc. Series B Stat. Methodol.",
  publisher = "JSTOR",
  pages     = "187--220",
  year      =  1972
}

@ARTICLE{Bujnicki2001-xr,
  title       = "{LiveBench-1}: continuous benchmarking of protein structure
                 prediction servers",
  author      = "Bujnicki, J M and Elofsson, A and Fischer, D and Rychlewski, L",
  affiliation = "Bioinformatics Laboratory, International Institute of
                 Molecular and Cell Biology, 02-109 Warsaw, Poland.",
  abstract    = "We present a novel, continuous approach aimed at the
                 large-scale assessment of the performance of available
                 fold-recognition servers. Six popular servers were
                 investigated: PDB-Blast, FFAS, T98-lib, GenTHREADER, 3D-PSSM,
                 and INBGU. The assessment was conducted using as prediction
                 targets a large number of selected protein structures released
                 from October 1999 to April 2000. A target was selected if its
                 sequence showed no significant similarity to any of the
                 proteins previously available in the structural database.
                 Overall, the servers were able to produce structurally similar
                 models for one-half of the targets, but significantly accurate
                 sequence-structure alignments were produced for only one-third
                 of the targets. We further classified the targets into two
                 sets: easy and hard. We found that all servers were able to
                 find the correct answer for the vast majority of the easy
                 targets if a structurally similar fold was present in the
                 server's fold libraries. However, among the hard
                 targets--where standard methods such as PSI-BLAST fail--the
                 most sensitive fold-recognition servers were able to produce
                 similar models for only 40\% of the cases, half of which had a
                 significantly accurate sequence-structure alignment. Among the
                 hard targets, the presence of updated libraries appeared to be
                 less critical for the ranking. An ``ideally combined
                 consensus'' prediction, where the results of all servers are
                 considered, would increase the percentage of correct
                 assignments by 50\%. Each server had a number of cases with a
                 correct assignment, where the assignments of all the other
                 servers were wrong. This emphasizes the benefits of
                 considering more than one server in difficult prediction
                 tasks. The LiveBench program (http://BioInfo.PL/LiveBench) is
                 being continued, and all interested developers are cordially
                 invited to join.",
  journal     = "Protein Sci.",
  volume      =  10,
  number      =  2,
  pages       = "352--361",
  month       =  feb,
  year        =  2001
}

@ARTICLE{Yang2013-aj,
  title       = "A survey of error-correction methods for next-generation
                 sequencing",
  author      = "Yang, Xiao and Chockalingam, Sriram P and Aluru, Srinivas",
  affiliation = "The Broad institute, 7 Cambridge Center, Cambridge, MA 02142,
                 USA. xiaoyang@broadinstitute.org",
  abstract    = "UNLABELLED: Error Correction is important for most
                 next-generation sequencing applications because highly
                 accurate sequenced reads will likely lead to higher quality
                 results. Many techniques for error correction of sequencing
                 data from next-gen platforms have been developed in the recent
                 years. However, compared with the fast development of
                 sequencing technologies, there is a lack of standardized
                 evaluation procedure for different error-correction methods,
                 making it difficult to assess their relative merits and
                 demerits. In this article, we provide a comprehensive review
                 of many error-correction methods, and establish a common set
                 of benchmark data and evaluation criteria to provide a
                 comparative assessment. We present experimental results on
                 quality, run-time, memory usage and scalability of several
                 error-correction methods. Apart from providing explicit
                 recommendations useful to practitioners, the review serves to
                 identify the current state of the art and promising directions
                 for future research. AVAILABILITY: All error-correction
                 programs used in this article are downloaded from hosting
                 websites. The evaluation tool kit is publicly available at:
                 http://aluru-sun.ece.iastate.edu/doku.php?id=ecr.",
  journal     = "Brief. Bioinform.",
  volume      =  14,
  number      =  1,
  pages       = "56--66",
  month       =  jan,
  year        =  2013
}

@ARTICLE{Holtgrewe2011-fd,
  title       = "A novel and well-defined benchmarking method for second
                 generation read mapping",
  author      = "Holtgrewe, M and Emde, A and Weese, D and
                 Reinert, K",
  affiliation = "Department of Computer Science, Free University of Berlin,
                 Takustr, Germany. holtgrewe@inf.fu-berlin.de",
  abstract    = "BACKGROUND: Second generation sequencing technologies yield
                 DNA sequence data at ultra high-throughput. Common to most
                 biological applications is a mapping of the reads to an almost
                 identical or highly similar reference genome. The assessment
                 of the quality of read mapping results is not straightforward
                 and has not been formalized so far. Hence, it has not been
                 easy to compare different read mapping approaches in a unified
                 way and to determine which program is the best for what task.
                 RESULTS: We present a new benchmark method, called Rabema
                 (Read Alignment BEnchMArk), for read mappers. It consists of a
                 strict definition of the read mapping problem and of tools to
                 evaluate the result of arbitrary read mappers supporting the
                 SAM output format. CONCLUSIONS: We show the usefulness of the
                 benchmark program by performing a comparison of popular read
                 mappers. The tools supporting the benchmark are licensed under
                 the GPL and available from
                 http://www.seqan.de/projects/rabema.html.",
  journal     = "BMC Bioinformatics",
  volume      =  12,
  pages       = "210",
  month       =  "26~" # may,
  year        =  2011
}

@ARTICLE{Garfield2006-lv,
  title       = "The history and meaning of the journal impact factor",
  author      = "Garfield, Eugene",
  affiliation = "Thomson Scientific, Philadelphia, Pa 19104, USA.
                 garfield@codex.cis.upenn.edu",
  journal     = "JAMA",
  volume      =  295,
  number      =  1,
  pages       = "90--93",
  month       =  "4~" # jan,
  year        =  2006
}

@ARTICLE{Rivas2000-fb,
  title    = "Secondary structure alone is generally not statistically
              significant for the detection of noncoding {RNAs}",
  author   = "Rivas, E and Eddy, S R",
  journal  = "Bioinformatics",
  volume   =  16,
  number   =  7,
  pages    = "583--605",
  month    =  jul,
  year     =  2000,
  keywords = "paulall.bib"
}

@ARTICLE{Boulesteix2015-am,
  title       = "Publication Bias in Methodological Computational Research",
  author      = "Boulesteix, A and Stierle, V and Hapfelmeier,
                 A",
  affiliation = "Department of Medical Informatics, Biometry and Epidemiology,
                 Ludwig-Maximilian University, Munich, Germany. Department of
                 Medical Informatics, Biometry and Epidemiology,
                 Ludwig-Maximilian University, Munich, Germany. Department of
                 Medical Statistics and Epidemiology, Klinikum rechts der Isar
                 Technical University of Munich, Munich, Germany.",
  abstract    = "The problem of publication bias has long been discussed in
                 research fields such as medicine. There is a consensus that
                 publication bias is a reality and that solutions should be
                 found to reduce it. In methodological computational research,
                 including cancer informatics, publication bias may also be at
                 work. The publication of negative research findings is
                 certainly also a relevant issue, but has attracted very little
                 attention to date. The present paper aims at providing a new
                 formal framework to describe the notion of publication bias in
                 the context of methodological computational research,
                 facilitate and stimulate discussions on this topic, and
                 increase awareness in the scientific community. We report an
                 exemplary pilot study that aims at gaining experiences with
                 the collection and analysis of information on unpublished
                 research efforts with respect to publication bias, and we
                 outline the encountered problems. Based on these experiences,
                 we try to formalize the notion of publication bias.",
  journal     = "Cancer Inform.",
  volume      =  14,
  number      = "Suppl 5",
  pages       = "11--19",
  month       =  "15~" # oct,
  year        =  2015,
  keywords    = "epistemology; false research findings; overoptimism;
                 publication practice",
  language    = "en"
}

@ARTICLE{Perez-Iratxeta2007-lv,
  title       = "Evolving research trends in bioinformatics",
  author      = "Perez-Iratxeta, C and Andrade-Navarro, M A and
                 Wren, J D",
  affiliation = "Ottawa Health Research Institute, Ontario, Canada.",
  abstract    = "The cross-disciplinary nature of bioinformatics entails
                 co-evolution with other biomedical disciplines, whereby some
                 bioinformatics applications become popular in certain
                 disciplines and, in turn, these disciplines influence the
                 focus of future bioinformatics development efforts. We observe
                 here that the growth of computational approaches within
                 various biomedical disciplines is not merely a reflection of a
                 general extended usage of computers and the Internet, but due
                 to the production of useful bioinformatics databases and
                 methods for the rest of the biomedical scientific community.
                 We have used the abstracts stored both in the MEDLINE database
                 of biomedical literature and in NIH-funded project grants, to
                 quantify two effects. First, we examine the biomedical
                 literature as a whole and find that the use of computational
                 methods has become increasingly prevalent across biomedical
                 disciplines over the past three decades, while use of
                 databases and the Internet have been rapidly increasing over
                 the past decade. Second, we study the recent trends in the use
                 of bioinformatics topics. We observe that molecular sequence
                 databases are a widely adopted contribution in biomedicine
                 from the field of bioinformatics, and that microarray analysis
                 is one of the major new topics engaged by the bioinformatics
                 community. Via this analysis, we were able to identify areas
                 of rapid growth in the use of informatics to aid in curriculum
                 planning, development of computational infrastructure and
                 strategies for workforce education and funding.",
  journal     = "Brief. Bioinform.",
  volume      =  8,
  number      =  2,
  pages       = "88--95",
  month       =  mar,
  year        =  2007
}

@MISC{Wikipedia_contributors2015-hr,
  title        = "List of {RNA} structure prediction software",
  booktitle    = "Wikipedia, The Free Encyclopedia",
  author       = "{Wikipedia contributors}",
  abstract     = "This list of RNA structure prediction software is a
                  compilation of software tools and web portals used for RNA
                  structure prediction.",
  month        =  "4~" # dec,
  year         =  2015,
  howpublished = "\url{https://en.wikipedia.org/w/index.php?title=List_of_RNA_structure_prediction_software&oldid=693718881}",
  note         = "Accessed: 2015-12-18"
}

@ARTICLE{Boulesteix2015-nc,
  title       = "Ten simple rules for reducing overoptimistic reporting in
                 methodological computational research",
  author      = "Boulesteix, Anne-Laure",
  affiliation = "Institute for Medical Informatics, Biometry and Epidemiology,
                 Ludwig Maximilians University, Munich, Germany.",
  journal     = "PLoS Comput. Biol.",
  volume      =  11,
  number      =  4,
  pages       = "e1004191",
  month       =  apr,
  year        =  2015
}

@ARTICLE{Weinberg2006-tm,
  title    = "Sequence-based heuristics for faster annotation of non-coding
              {RNA} families",
  author   = "Weinberg, Z and Ruzzo, W L",
  journal  = "Bioinformatics",
  volume   =  22,
  number   =  1,
  pages    = "35--39",
  month    =  jan,
  year     =  2006,
  keywords = "paulall.bib"
}

@ARTICLE{Ruffalo2011-rl,
  title       = "Comparative analysis of algorithms for next-generation
                 sequencing read alignment",
  author      = "Ruffalo, M and LaFramboise, T and Koyut{\"{u}}rk,
                 M",
  affiliation = "Department of Electrical Engineering and Computer Science,
                 Case Western Reserve University, Cleveland, OH 44106, USA.
                 matthew.ruffalo@case.edu",
  abstract    = "MOTIVATION: The advent of next-generation sequencing (NGS)
                 techniques presents many novel opportunities for many
                 applications in life sciences. The vast number of short reads
                 produced by these techniques, however, pose significant
                 computational challenges. The first step in many types of
                 genomic analysis is the mapping of short reads to a reference
                 genome, and several groups have developed dedicated algorithms
                 and software packages to perform this function. As the
                 developers of these packages optimize their algorithms with
                 respect to various considerations, the relative merits of
                 different software packages remain unclear. However, for
                 scientists who generate and use NGS data for their specific
                 research projects, an important consideration is choosing the
                 software that is most suitable for their application. RESULTS:
                 With a view to comparing existing short read alignment
                 software, we develop a simulation and evaluation suite, Seal,
                 which simulates NGS runs for different configurations of
                 various factors, including sequencing error, indels and
                 coverage. We also develop criteria to compare the performances
                 of software with disparate output structure (e.g. some
                 packages return a single alignment while some return multiple
                 possible alignments). Using these criteria, we comprehensively
                 evaluate the performances of Bowtie, BWA, mr- and mrsFAST,
                 Novoalign, SHRiMP and SOAPv2, with regard to accuracy and
                 runtime. CONCLUSION: We expect that the results presented here
                 will be useful to investigators in choosing the alignment
                 software that is most suitable for their specific research
                 aims. Our results also provide insights into the factors that
                 should be considered to use alignment results effectively.
                 Seal can also be used to evaluate the performance of
                 algorithms that use deep sequencing data for various purposes
                 (e.g. identification of genomic variants). AVAILABILITY: Seal
                 is available as open source at http://compbio.case.edu/seal/.
                 CONTACT: matthew.ruffalo@case.edu SUPPLEMENTARY INFORMATION:
                 Supplementary data are available at Bioinformatics online.",
  journal     = "Bioinformatics",
  volume      =  27,
  number      =  20,
  pages       = "2790--2796",
  month       =  "15~" # oct,
  year        =  2011
}

@MISC{Loman2015-bw,
  title     = "Bioinformatics infrastructure and training survey",
  author    = "Loman, N and Connor, T",
  publisher = "Figshare",
  year      =  2015
}

@ARTICLE{Bazinet2012-wf,
  title       = "A comparative evaluation of sequence classification programs",
  author      = "Bazinet, Adam L and Cummings, Michael P",
  affiliation = "Laboratory of Molecular Evolution, Center for Bioinformatics
                 and Computational Biology, University of Maryland, College
                 Park, MD 20874, USA. adam.bazinet@umiacs.umd.edu",
  abstract    = "BACKGROUND: A fundamental problem in modern genomics is to
                 taxonomically or functionally classify DNA sequence fragments
                 derived from environmental sampling (i.e., metagenomics).
                 Several different methods have been proposed for doing this
                 effectively and efficiently, and many have been implemented in
                 software. In addition to varying their basic algorithmic
                 approach to classification, some methods screen sequence reads
                 for 'barcoding genes' like 16S rRNA, or various types of
                 protein-coding genes. Due to the sheer number and complexity
                 of methods, it can be difficult for a researcher to choose one
                 that is well-suited for a particular analysis. RESULTS: We
                 divided the very large number of programs that have been
                 released in recent years for solving the sequence
                 classification problem into three main categories based on the
                 general algorithm they use to compare a query sequence against
                 a database of sequences. We also evaluated the performance of
                 the leading programs in each category on data sets whose
                 taxonomic and functional composition is known. CONCLUSIONS: We
                 found significant variability in classification accuracy,
                 precision, and resource consumption of sequence classification
                 programs when used to analyze various metagenomics data sets.
                 However, we observe some general trends and patterns that will
                 be useful to researchers who use sequence classification
                 programs.",
  journal     = "BMC Bioinformatics",
  volume      =  13,
  pages       = "92",
  month       =  "10~" # may,
  year        =  2012
}

@ARTICLE{Zaykin2011-tj,
  title     = "Optimally weighted Z-test is a powerful method for combining
               probabilities in meta-analysis",
  author    = "Zaykin, Dmitri V",
  journal   = "J. Evol. Biol.",
  publisher = "Wiley Online Library",
  volume    =  24,
  number    =  8,
  pages     = "1836--1841",
  year      =  2011
}

@ARTICLE{Laskowski1993-vi,
  title   = "{PROCHECK}: a program to check the stereochemical quality of
             protein structures",
  author  = "Laskowski, R A and MacArthur, M W and Moss, D S and Thornton, J M",
  journal = "J. Appl. Crystallogr.",
  volume  =  26,
  number  =  2,
  pages   = "283--291",
  month   =  "1~" # apr,
  year    =  1993
}

@ARTICLE{Wren2016-xy,
  title       = "Bioinformatics programs are 31-fold over-represented among the
                 highest impact scientific papers of the past two decades",
  author      = "Wren, J D",
  affiliation = "Arthritis and Clinical Immunology Research Program, Oklahoma
                 Medical Research Foundation, 825 N.E. 13th Street, Oklahoma
                 City, Oklahoma 73104-5005 Department of Biochemistry and
                 Molecular Biology, University of Oklahoma Health Sciences
                 Center Jonathan-Wren@OMRF.org jdwren@gmail.com.",
  abstract    = "MOTIVATION: To analyze the relative proportion of
                 bioinformatics papers and their non-bioinformatics
                 counterparts in the top 20 most cited papers annually for the
                 past two decades. RESULTS: When defining bioinformatics papers
                 as encompassing both those that provide software for data
                 analysis or methods underlying data analysis software, we find
                 that over the past two decades, more than a third (34\%) of
                 the most cited papers in science were bioinformatics papers,
                 which is approximately a 31-fold enrichment relative to the
                 total number of bioinformatics papers published. More than
                 half of the most cited papers during this span were
                 bioinformatics papers. Yet, the average 5-year JIF of top 20
                 bioinformatics papers was 7.7, whereas the average JIF for top
                 20 non-bioinformatics papers was 25.8, significantly higher
                 (p<4.5x10(-29)). The 20-year trend in the average JIF between
                 the two groups suggests the gap does not appear to be
                 significantly narrowing. For a sampling of the journals
                 producing top papers, bioinformatics journals tended to have
                 higher Gini coefficients, suggesting that development of novel
                 bioinformatics resources may be somewhat ``hit or miss''. That
                 is, relative to other fields, bioinformatics produces some
                 programs that are extremely widely adopted and cited, yet
                 there are fewer of intermediate success. AVAILABILITY AND
                 IMPLEMENTATION: N/A CONTACT: Jonathan-Wren@OMRF.org;
                 jdwren@gmail.com SUPPLEMENTARY INFORMATION: Available online.",
  journal     = "Bioinformatics",
  month       =  "5~" # may,
  year        =  2016
}

@ARTICLE{Tamura2007-ei,
  title       = "{MEGA4}: Molecular Evolutionary Genetics Analysis ({MEGA})
                 software version 4.0",
  author      = "Tamura, K and Dudley, J and Nei, M and
                 Kumar, S",
  affiliation = "Center for Evolutionary Functional Genomics, The Biodesign
                 Institute, Arizona State University, AZ, USA.",
  abstract    = "We announce the release of the fourth version of MEGA
                 software, which expands on the existing facilities for editing
                 DNA sequence data from autosequencers, mining Web-databases,
                 performing automatic and manual sequence alignment, analyzing
                 sequence alignments to estimate evolutionary distances,
                 inferring phylogenetic trees, and testing evolutionary
                 hypotheses. Version 4 includes a unique facility to generate
                 captions, written in figure legend format, in order to provide
                 natural language descriptions of the models and methods used
                 in the analyses. This facility aims to promote a better
                 understanding of the underlying assumptions used in analyses,
                 and of the results generated. Another new feature is the
                 Maximum Composite Likelihood (MCL) method for estimating
                 evolutionary distances between all pairs of sequences
                 simultaneously, with and without incorporating rate variation
                 among sites and substitution pattern heterogeneities among
                 lineages. This MCL method also can be used to estimate
                 transition/transversion bias and nucleotide substitution
                 pattern without knowledge of the phylogenetic tree. This new
                 version is a native 32-bit Windows application with
                 multi-threading and multi-user supports, and it is also
                 available to run in a Linux desktop environment (via the Wine
                 compatibility layer) and on Intel-based Macintosh computers
                 under the Parallels program. The current version of MEGA is
                 available free of charge at (http://www.megasoftware.net).",
  journal     = "Mol. Biol. Evol.",
  volume      =  24,
  number      =  8,
  pages       = "1596--1599",
  month       =  aug,
  year        =  2007
}

@ARTICLE{Rognes2000-sm,
  title    = "Six-fold speed-up of {Smith-Waterman} sequence database searches
              using parallel processing on common microprocessors",
  author   = "Rognes, Torbj\o{}rn and Seeberg, Erling",
  journal  = "Bioinformatics",
  volume   =  16,
  number   =  8,
  pages    = "699--706",
  month    =  "1~" # aug,
  year     =  2000
}

@ARTICLE{Junemann2014-mb,
  title       = "{GABenchToB}: a genome assembly benchmark tuned on bacteria
                 and benchtop sequencers",
  author      = "J{\"{u}}nemann, Sebastian and Prior, Karola and Albersmeier,
                 Andreas and Albaum, Stefan and Kalinowski, J{\"{o}}rn and
                 Goesmann, Alexander and Stoye, Jens and Harmsen, Dag",
  affiliation = "Department for Periodontology, University of M{\"{u}}nster,
                 M{\"{u}}nster, Germany; Institute for Bioinformatics, Center
                 for Biotechnology, Bielefeld University, Bielefeld, Germany.
                 Department for Periodontology, University of M{\"{u}}nster,
                 M{\"{u}}nster, Germany. Technology Platform Genomics, Center
                 for Biotechnology, Bielefeld University, Bielefeld, Germany.
                 Bioinformatics Resource Facility, Center for Biotechnology,
                 Bielefeld University, Bielefeld, Germany. Technology Platform
                 Genomics, Center for Biotechnology, Bielefeld University,
                 Bielefeld, Germany. Bioinformatics and Systems Biology,
                 Justus-Liebig-Univeristy Gie\ss{}en, Gie\ss{}en, Germany.
                 Institute for Bioinformatics, Center for Biotechnology,
                 Bielefeld University, Bielefeld, Germany; Genome Informatics
                 Group, Faculty of Technology, Bielefeld University, Bielefeld,
                 Germany. Department for Periodontology, University of
                 M{\"{u}}nster, M{\"{u}}nster, Germany.",
  abstract    = "De novo genome assembly is the process of reconstructing a
                 complete genomic sequence from countless small sequencing
                 reads. Due to the complexity of this task, numerous genome
                 assemblers have been developed to cope with different
                 requirements and the different kinds of data provided by
                 sequencers within the fast evolving field of next-generation
                 sequencing technologies. In particular, the recently
                 introduced generation of benchtop sequencers, like Illumina's
                 MiSeq and Ion Torrent's Personal Genome Machine (PGM),
                 popularized the easy, fast, and cheap sequencing of bacterial
                 organisms to a broad range of academic and clinical
                 institutions. With a strong pragmatic focus, here, we give a
                 novel insight into the line of assembly evaluation surveys as
                 we benchmark popular de novo genome assemblers based on
                 bacterial data generated by benchtop sequencers. Therefore,
                 single-library assemblies were generated, assembled, and
                 compared to each other by metrics describing assembly
                 contiguity and accuracy, and also by practice-oriented
                 criteria as for instance computing time. In addition, we
                 extensively analyzed the effect of the depth of coverage on
                 the genome assemblies within reasonable ranges and the k-mer
                 optimization problem of de Bruijn Graph assemblers. Our
                 results show that, although both MiSeq and PGM allow for good
                 genome assemblies, they require different approaches. They not
                 only pair with different assembler types, but also affect
                 assemblies differently regarding the depth of coverage where
                 oversampling can become problematic. Assemblies vary greatly
                 with respect to contiguity and accuracy but also by the
                 requirement on the computing power. Consequently, no assembler
                 can be rated best for all preconditions. Instead, the given
                 kind of data, the demands on assembly quality, and the
                 available computing infrastructure determines which assembler
                 suits best. The data sets, scripts and all additional
                 information needed to replicate our results are freely
                 available at
                 ftp://ftp.cebitec.uni-bielefeld.de/pub/GABenchToB.",
  journal     = "PLoS One",
  volume      =  9,
  number      =  9,
  pages       = "e107014",
  month       =  "8~" # sep,
  year        =  2014
}

@ARTICLE{Van_Noorden2014-kc,
  title   = "The top 100 papers",
  author  = "Van Noorden, R and Maher, B and Nuzzo, R",
  journal = "Nature",
  volume  =  514,
  number  =  7524,
  pages   = "550--553",
  month   =  "30~" # oct,
  year    =  2014
}

@ARTICLE{Liu2009-lx,
  title       = "Barking up the wrong treelength: the impact of gap penalty on
                 alignment and tree accuracy",
  author      = "Liu, Kevin and Nelesen, Serita and Raghavan, Sindhu and
                 Linder, C Randal and Warnow, Tandy",
  affiliation = "The University of Texas at Austin, Austin, TX 78712, USA.
                 kliu@cs.utexas.edu",
  abstract    = "Several methods have been developed for simultaneous
                 estimation of alignment and tree, of which POY is the most
                 popular. In a 2007 paper published in Systematic Biology,
                 Ogden and Rosenberg reported on a simulation study in which
                 they compared POY to estimating the alignment using ClustalW
                 and then analyzing the resultant alignment using maximum
                 parsimony. They found that ClustalW+MP outperformed POY with
                 respect to alignment and phylogenetic tree accuracy, and they
                 concluded that simultaneous estimation techniques are not
                 competitive with two-phase techniques. Our paper presents a
                 simulation study in which we focus on the NP-hard optimization
                 problem that POY addresses: minimizing treelength. Our study
                 considers the impact of the gap penalty and suggests that the
                 poor performance observed for POY by Ogden and Rosenberg is
                 due to the simple gap penalties they used to score
                 alignment/tree pairs. Our study suggests that optimizing under
                 an affine gap penalty might produce alignments that are better
                 than ClustalW alignments, and competitive with those produced
                 by the best current alignment methods. We also show that
                 optimizing under this affine gap penalty produces trees whose
                 topological accuracy is better than ClustalW+MP, and
                 competitive with the current best two-phase methods.",
  journal     = "IEEE/ACM Trans. Comput. Biol. Bioinform.",
  volume      =  6,
  number      =  1,
  pages       = "7--21",
  month       =  jan,
  year        =  2009
}

@BOOK{Carroll1865-hk,
  title     = "Alice's adventures in Wonderland",
  author    = "Carroll, Lewis",
  publisher = "Macmillan and Co.",
  year      =  1865,
  address   = "London"
}

@MISC{Wikipedia_contributors2015-vj,
  title        = "List of sequence alignment software",
  booktitle    = "Wikipedia, The Free Encyclopedia",
  author       = "{Wikipedia contributors}",
  abstract     = "This list of sequence alignment software is a compilation of
                  software tools and web portals used in pairwise sequence
                  alignment and multiple sequence alignment. See structural
                  alignment software for structural alignment of proteins.",
  month        =  "3~" # dec,
  year         =  2015,
  howpublished = "\url{https://en.wikipedia.org/w/index.php?title=List_of_sequence_alignment_software&oldid=693586242}",
  note         = "Accessed: 2015-12-18"
}

@INCOLLECTION{Papadimitriou_undated-bo,
  title     = "Computational Complexity",
  booktitle = "Encyclopedia of Computer Science",
  author    = "Papadimitriou, C H",
  publisher = "John Wiley and Sons Ltd.",
  pages     = "260--265",
  address   = "Chichester, UK",
  year      = 2003
}

@ARTICLE{Tran2014-pe,
  title       = "Objective and comprehensive evaluation of bisulfite short read
                 mapping tools",
  author      = "Tran, Hong and Porter, Jacob and Sun, Ming-An and Xie, Hehuang
                 and Zhang, Liqing",
  affiliation = "Department of Computer Science, Virginia Tech, Blacksburg, VA
                 24061, USA. Department of Computer Science, Virginia Tech,
                 Blacksburg, VA 24061, USA. Virginia Bioinformatics Institute,
                 Virginia Tech, Blacksburg, VA 24061, USA. Virginia
                 Bioinformatics Institute, Virginia Tech, Blacksburg, VA 24061,
                 USA. Department of Computer Science, Virginia Tech,
                 Blacksburg, VA 24061, USA.",
  abstract    = "Background. Large-scale bisulfite treatment and short reads
                 sequencing technology allow comprehensive estimation of
                 methylation states of Cs in the genomes of different tissues,
                 cell types, and developmental stages. Accurate
                 characterization of DNA methylation is essential for
                 understanding genotype phenotype association, gene and
                 environment interaction, diseases, and cancer. Aligning
                 bisulfite short reads to a reference genome has been a
                 challenging task. We compared five bisulfite short read
                 mapping tools, BSMAP, Bismark, BS-Seeker, BiSS, and BRAT-BW,
                 representing two classes of mapping algorithms (hash table and
                 suffix/prefix tries). We examined their mapping efficiency
                 (i.e., the percentage of reads that can be mapped to the
                 genomes), usability, running time, and effects of changing
                 default parameter settings using both real and simulated
                 reads. We also investigated how preprocessing data might
                 affect mapping efficiency. Conclusion. Among the five programs
                 compared, in terms of mapping efficiency, Bismark performs the
                 best on the real data, followed by BiSS, BSMAP, and finally
                 BRAT-BW and BS-Seeker with very similar performance. If CPU
                 time is not a constraint, Bismark is a good choice of program
                 for mapping bisulfite treated short reads. Data quality
                 impacts a great deal mapping efficiency. Although increasing
                 the number of mismatches allowed can increase mapping
                 efficiency, it not only significantly slows down the program,
                 but also runs the risk of having increased false positives.
                 Therefore, users should carefully set the related parameters
                 depending on the quality of their sequencing data.",
  journal     = "Adv. Bioinformatics",
  volume      =  2014,
  pages       = "472045",
  month       =  "15~" # apr,
  year        =  2014
}

@ARTICLE{Editors2006-ue,
  title     = "The Impact Factor Game",
  author    = "Editors, The Plos Medicine",
  abstract  = "The PLoS Medicine editors argue that we need a better measure
               than the impact factor for assessing the biomedical literature.",
  journal   = "PLoS Med.",
  publisher = "Public Library of Science",
  volume    =  3,
  number    =  6,
  pages     = "e291",
  month     =  "6~" # jun,
  year      =  2006
}

@ARTICLE{Yang2011-dv,
  title       = "Fast and accurate methods for phylogenomic analyses",
  author      = "Yang, Jimmy and Warnow, Tandy",
  affiliation = "Department of Computer Science, University of Texas at Austin,
                 Austin TX 78712, USA.",
  abstract    = "BACKGROUND: Species phylogenies are not estimated directly,
                 but rather through phylogenetic analyses of different gene
                 datasets. However, true gene trees can differ from the true
                 species tree (and hence from one another) due to biological
                 processes such as horizontal gene transfer, incomplete lineage
                 sorting, and gene duplication and loss, so that no single gene
                 tree is a reliable estimate of the species tree. Several
                 methods have been developed to estimate species trees from
                 estimated gene trees, differing according to the specific
                 algorithmic technique used and the biological model used to
                 explain differences between species and gene trees. Relatively
                 little is known about the relative performance of these
                 methods. RESULTS: We report on a study evaluating several
                 different methods for estimating species trees from sequence
                 datasets, simulating sequence evolution under a complex model
                 including indels (insertions and deletions), substitutions,
                 and incomplete lineage sorting. The most important finding of
                 our study is that some fast and simple methods are nearly as
                 accurate as the most accurate methods, which employ
                 sophisticated statistical methods and are computationally
                 quite intensive. We also observe that methods that explicitly
                 consider errors in the estimated gene trees produce more
                 accurate trees than methods that assume the estimated gene
                 trees are correct. CONCLUSIONS: Our study shows that highly
                 accurate estimations of species trees are achievable, even
                 when gene trees differ from each other and from the species
                 tree, and that these estimations can be obtained using fairly
                 simple and computationally tractable methods.",
  journal     = "BMC Bioinformatics",
  volume      = "12 Suppl 9",
  pages       = "S4",
  month       =  "5~" # oct,
  year        =  2011
}

@ARTICLE{Kraulis1991-lt,
  title   = "{MOLSCRIPT}: a program to produce both detailed and schematic
             plots of protein structures",
  author  = "Kraulis, P J",
  journal = "J. Appl. Crystallogr.",
  volume  =  24,
  number  =  5,
  pages   = "946--950",
  month   =  "1~" # oct,
  year    =  1991
}

@ARTICLE{Ronquist2003-yh,
  title       = "{MrBayes} 3: Bayesian phylogenetic inference under mixed
                 models",
  author      = "Ronquist, F and Huelsenbeck, J P",
  affiliation = "Department of Systematic Zoology, Evolutionary Biology Centre,
                 Uppsala University, Norbyv. 18D, SE-752 36 Uppsala, Sweden.
                 fredrik.ronquist@ebc.uu.se",
  abstract    = "MrBayes 3 performs Bayesian phylogenetic analysis combining
                 information from different data partitions or subsets evolving
                 under different stochastic evolutionary models. This allows
                 the user to analyze heterogeneous data sets consisting of
                 different data types-e.g. morphological, nucleotide, and
                 protein-and to explore a wide variety of structured models
                 mixing partition-unique and shared parameters. The program
                 employs MPI to parallelize Metropolis coupling on Macintosh or
                 UNIX clusters.",
  journal     = "Bioinformatics",
  volume      =  19,
  number      =  12,
  pages       = "1572--1574",
  month       =  "12~" # aug,
  year        =  2003
}

@ARTICLE{Thompson1997-rl,
  title       = "The {CLUSTAL\_X} windows interface: flexible strategies for
                 multiple sequence alignment aided by quality analysis tools",
  author      = "Thompson, J D and Gibson, T J and Plewniak, F and Jeanmougin,
                 F and Higgins, D G",
  affiliation = "Institut de Genetique et de Biologie Moleculaire et Cellulaire
                 (CNRS/INSERM/ULP), BP 163, 67404 Illkirch Cedex, France.",
  abstract    = "CLUSTAL X is a new windows interface for the widely-used
                 progressive multiple sequence alignment program CLUSTAL W. The
                 new system is easy to use, providing an integrated system for
                 performing multiple sequence and profile alignments and
                 analysing the results. CLUSTAL X displays the sequence
                 alignment in a window on the screen. A versatile sequence
                 colouring scheme allows the user to highlight conserved
                 features in the alignment. Pull-down menus provide all the
                 options required for traditional multiple sequence and profile
                 alignment. New features include: the ability to cut-and-paste
                 sequences to change the order of the alignment, selection of a
                 subset of the sequences to be realigned, and selection of a
                 sub-range of the alignment to be realigned and inserted back
                 into the original alignment. Alignment quality analysis can be
                 performed and low-scoring segments or exceptional residues can
                 be highlighted. Quality analysis and realignment of selected
                 residue ranges provide the user with a powerful tool to
                 improve and refine difficult alignments and to trap errors in
                 input sequences. CLUSTAL X has been compiled on SUN Solaris,
                 IRIX5.3 on Silicon Graphics, Digital UNIX on DECstations,
                 Microsoft Windows (32 bit) for PCs, Linux ELF for x86 PCs, and
                 Macintosh PowerMac.",
  journal     = "Nucleic Acids Res.",
  volume      =  25,
  number      =  24,
  pages       = "4876--4882",
  month       =  "15~" # dec,
  year        =  1997
}

@ARTICLE{Schbath2012-ob,
  title       = "Mapping reads on a genomic sequence: an algorithmic overview
                 and a practical comparative analysis",
  author      = "Schbath, S and Martin, V and Zytnicki,
                 M and Fayolle, J and Loux, V and Gibrat,
                 J",
  affiliation = "INRA, UR1077 Unit\'{e} Math\'{e}matique Informatique et
                 G\'{e}nome, Jouy-en-Josas, France. Sophie.Schbath@jouy.inra.fr",
  abstract    = "Mapping short reads against a reference genome is classically
                 the first step of many next-generation sequencing data
                 analyses, and it should be as accurate as possible. Because of
                 the large number of reads to handle, numerous sophisticated
                 algorithms have been developped in the last 3 years to tackle
                 this problem. In this article, we first review the underlying
                 algorithms used in most of the existing mapping tools, and
                 then we compare the performance of nine of these tools on a
                 well controled benchmark built for this purpose. We built a
                 set of reads that exist in single or multiple copies in a
                 reference genome and for which there is no mismatch, and a set
                 of reads with three mismatches. We considered as reference
                 genome both the human genome and a concatenation of all
                 complete bacterial genomes. On each dataset, we quantified the
                 capacity of the different tools to retrieve all the
                 occurrences of the reads in the reference genome. Special
                 attention was paid to reads uniquely reported and to reads
                 with multiple hits.",
  journal     = "J. Comput. Biol.",
  volume      =  19,
  number      =  6,
  pages       = "796--813",
  month       =  jun,
  year        =  2012
}

@ARTICLE{Hatem2013-cs,
  title       = "Benchmarking short sequence mapping tools",
  author      = "Hatem, A and Bozda\u{g}, D and Toland, A E and
                 \c{C}ataly{\"{u}}rek, {\"{U}} V",
  affiliation = "Department of Electrical and Computer Engineering, The Ohio
                 State University, Columbus, OH, USA.",
  abstract    = "BACKGROUND: The development of next-generation sequencing
                 instruments has led to the generation of millions of short
                 sequences in a single run. The process of aligning these reads
                 to a reference genome is time consuming and demands the
                 development of fast and accurate alignment tools. However, the
                 current proposed tools make different compromises between the
                 accuracy and the speed of mapping. Moreover, many important
                 aspects are overlooked while comparing the performance of a
                 newly developed tool to the state of the art. Therefore, there
                 is a need for an objective evaluation method that covers all
                 the aspects. In this work, we introduce a benchmarking suite
                 to extensively analyze sequencing tools with respect to
                 various aspects and provide an objective comparison. RESULTS:
                 We applied our benchmarking tests on 9 well known mapping
                 tools, namely, Bowtie, Bowtie2, BWA, SOAP2, MAQ, RMAP, GSNAP,
                 Novoalign, and mrsFAST (mrFAST) using synthetic data and real
                 RNA-Seq data. MAQ and RMAP are based on building hash tables
                 for the reads, whereas the remaining tools are based on
                 indexing the reference genome. The benchmarking tests reveal
                 the strengths and weaknesses of each tool. The results show
                 that no single tool outperforms all others in all metrics.
                 However, Bowtie maintained the best throughput for most of the
                 tests while BWA performed better for longer read lengths. The
                 benchmarking tests are not restricted to the mentioned tools
                 and can be further applied to others. CONCLUSION: The mapping
                 process is still a hard problem that is affected by many
                 factors. In this work, we provided a benchmarking suite that
                 reveals and evaluates the different factors affecting the
                 mapping process. Still, there is no tool that outperforms all
                 of the others in all the tests. Therefore, the end user should
                 clearly specify his needs in order to choose the tool that
                 provides the best results.",
  journal     = "BMC Bioinformatics",
  volume      =  14,
  pages       = "184",
  month       =  "7~" # jun,
  year        =  2013
}

@TECHREPORT{Wadi2016-dj,
  title  = "Impact of knowledge accumulation on pathway enrichment analysis",
  author = "Wadi, Lina and Meyer, Mona and Weiser, Joel and Stein, Lincoln D
            and Reimand, Juri",
  month  =  "19~" # apr,
  year   =  2016
}

@ARTICLE{Pervez2014-zp,
  title       = "Evaluating the accuracy and efficiency of multiple sequence
                 alignment methods",
  author      = "Pervez, Muhammad Tariq and Babar, Masroor Ellahi and Nadeem,
                 Asif and Aslam, Muhammad and Awan, Ali Raza and Aslam, Naeem
                 and Hussain, Tanveer and Naveed, Nasir and Qadri, Salman and
                 Waheed, Usman and Shoaib, Muhammad",
  affiliation = "Department of Computer Science, Virtual University of
                 Pakistan, Lahore, Pakistan. ; Institute of Biochemistry and
                 Biotechnology, University of Veterinary and Animal Sciences,
                 Lahore, Pakistan. Department of Bioinformatics, Virtual
                 University of Pakistan, Lahore, Pakistan. Institute of
                 Biochemistry and Biotechnology, University of Veterinary and
                 Animal Sciences, Lahore, Pakistan. Department of Computer
                 Science and Engineering, University of Engineering and
                 Technology, Lahore, Pakistan. Institute of Biochemistry and
                 Biotechnology, University of Veterinary and Animal Sciences,
                 Lahore, Pakistan. Institute of Biochemistry and Biotechnology,
                 University of Veterinary and Animal Sciences, Lahore,
                 Pakistan. ; Department of Computer Science, NFC Institute of
                 Engineering and Technological Training, Multan, Pakistan.
                 Institute of Biochemistry and Biotechnology, University of
                 Veterinary and Animal Sciences, Lahore, Pakistan. University
                 of Koblenz-Landau, Germany. Department of Computer Science,
                 Islamia University of Bahawalpur, Pakistan. Department of
                 Computer Science, Virtual University of Pakistan, Lahore,
                 Pakistan. Department of Computer Science and Engineering,
                 University of Engineering and Technology, Lahore, Pakistan.",
  abstract    = "A comparison of 10 most popular Multiple Sequence Alignment
                 (MSA) tools, namely, MUSCLE, MAFFT(L-INS-i), MAFFT (FFT-NS-2),
                 T-Coffee, ProbCons, SATe, Clustal Omega, Kalign, Multalin, and
                 Dialign-TX is presented. We also focused on the significance
                 of some implementations embedded in algorithm of each tool.
                 Based on 10 simulated trees of different number of taxa
                 generated by R, 400 known alignments and sequence files were
                 constructed using indel-Seq-Gen. A total of 4000 test
                 alignments were generated to study the effect of sequence
                 length, indel size, deletion rate, and insertion rate. Results
                 showed that alignment quality was highly dependent on the
                 number of deletions and insertions in the sequences and that
                 the sequence length and indel size had a weaker effect.
                 Overall, ProbCons was consistently on the top of list of the
                 evaluated MSA tools. SATe, being little less accurate, was
                 529.10\% faster than ProbCons and 236.72\% faster than
                 MAFFT(L-INS-i). Among other tools, Kalign and MUSCLE achieved
                 the highest sum of pairs. We also considered BALiBASE
                 benchmark datasets and the results relative to BAliBASE- and
                 indel-Seq-Gen-generated alignments were consistent in the most
                 cases.",
  journal     = "Evol. Bioinform. Online",
  volume      =  10,
  pages       = "205--217",
  month       =  "7~" # dec,
  year        =  2014,
  keywords    = "Multiple Sequence Alignment Tools; column score; comparative
                 study of MSA tools; evolutionary parameters; sum of pairs
                 score"
}

@ARTICLE{Briand2000-nx,
  title    = "Exploring the relationships between design measures and software
              quality in object-oriented systems",
  author   = "Briand, Lionel C and W{\"{u}}st, J{\"{u}}rgen and Daly, John W
              and Victor Porter, D",
  abstract = "One goal of this paper is to empirically explore the
              relationships between existing object-oriented (OO) coupling,
              cohesion, and inheritance measures and the probability of fault
              detection in system classes during testing. In other words, we
              wish to better understand the relationship between existing
              design measurement in OO systems and the quality of the software
              developed. The second goal is to propose an investigation and
              analysis strategy to make these kind of studies more repeatable
              and comparable, a problem which is pervasive in the literature on
              quality measurement. Results show that many of the measures
              capture similar dimensions in the data set, thus reflecting the
              fact that many of them are based on similar principles and
              hypotheses. However, it is shown that by using a subset of
              measures, accurate models can be built to predict which classes
              most of the faults are likely to lie in. When predicting
              fault-prone classes, the best model shows a percentage of correct
              classifications higher than 80\% and finds more than 90\% of
              faulty classes. Besides the size of classes, the frequency of
              method invocations and the depth of inheritance hierarchies seem
              to be the main driving factors of fault-proneness.",
  journal  = "J. Syst. Softw.",
  volume   =  51,
  number   =  3,
  pages    = "245--273",
  month    =  "1~" # may,
  year     =  2000,
  keywords = "Coupling; Cohesion; Inheritance; Object-oriented; Metrics;
              Measurement; Empirical validation"
}

@ARTICLE{Ioannidis2005-xh,
  title       = "Why most published research findings are false",
  author      = "Ioannidis, John P A",
  affiliation = "Department of Hygiene and Epidemiology, University of Ioannina
                 School of Medicine, Ioannina, Greece. jioannid@cc.uoi.gr",
  abstract    = "There is increasing concern that most current published
                 research findings are false. The probability that a research
                 claim is true may depend on study power and bias, the number
                 of other studies on the same question, and, importantly, the
                 ratio of true to no relationships among the relationships
                 probed in each scientific field. In this framework, a research
                 finding is less likely to be true when the studies conducted
                 in a field are smaller; when effect sizes are smaller; when
                 there is a greater number and lesser preselection of tested
                 relationships; where there is greater flexibility in designs,
                 definitions, outcomes, and analytical modes; when there is
                 greater financial and other interest and prejudice; and when
                 more teams are involved in a scientific field in chase of
                 statistical significance. Simulations show that for most study
                 designs and settings, it is more likely for a research claim
                 to be false than true. Moreover, for many current scientific
                 fields, claimed research findings may often be simply accurate
                 measures of the prevailing bias. In this essay, I discuss the
                 implications of these problems for the conduct and
                 interpretation of research.",
  journal     = "PLoS Med.",
  volume      =  2,
  number      =  8,
  pages       = "e124",
  month       =  aug,
  year        =  2005,
  language    = "en"
}

@ARTICLE{Cheruvelil2014-xn,
  title     = "Creating and maintaining high-performing collaborative research
               teams: the importance of diversity and interpersonal skills",
  author    = "Cheruvelil, K S and Soranno, P A and Weathers,
               K C and Hanson, P C and Goring, S J and Filstrup,
               C T and Read, E K",
  abstract  = "Collaborative research teams are a necessary and desirable
               component of most scientific endeavors. Effective collaborative
               teams exhibit important research outcomes, far beyond what could
               be accomplished by individuals working independently. These
               teams are made up of researchers who are committed to a common
               purpose, approach, and performance goals for which they hold
               themselves mutually accountable. We call such collaborations
               ``high-performing collaborative research teams''. Here, we share
               lessons learned from our collective experience working with a
               wide range of collaborative teams and structure those lessons
               within a framework developed from literature in business,
               education, and a relatively new discipline, ``science of team
               science''. We propose that high-performing collaborative
               research teams are created and maintained when team diversity
               (broadly defined) is effectively fostered and interpersonal
               skills are taught and practiced. Finally, we provide some
               strategies to foster team functioning and make recommendations
               for improving the collaborative culture in ecology.",
  journal   = "Front. Ecol. Environ.",
  publisher = "Ecological Society of America",
  volume    =  12,
  number    =  1,
  pages     = "31--38",
  month     =  "1~" # feb,
  year      =  2014
}

@Article{Eddy:2009,
  author =       "Eddy, S R",
  title =        {A new generation of homology search tools based on
                 probabilistic inference.},
  journal =      "Genome Inform",
  year =         "2009",
  month =        "Oct",
  number =       "1",
  volume =       "23",
  pages =        "205-11",
  doi =          "",
  PMID =         "20180275"}

@Article{Simpson:2009,
  author =       "Simpson, J T and Wong, K and Jackman, S D and
                 Schein, J E and Jones, S J and Birol, I",
  title =        {ABySS: a parallel assembler for short read
                 sequence data.},
  journal =      "Genome Res",
  year =         "2009",
  month =        "Jun",
  number =       "6",
  volume =       "19",
  pages =        "1117-23",
  doi =          "10.1101/gr.089532.108",
  PMID =         "19251739"}

@Article{Li:2008,
  author =       "Li, H and Ruan, J and Durbin, R",
  title =        {Mapping short DNA sequencing reads and calling
                 variants using mapping quality scores.},
  journal =      "Genome Res",
  year =         "2008",
  month =        "Nov",
  number =       "11",
  volume =       "18",
  pages =        "1851-8",
  doi =          "10.1101/gr.078212.108",
  PMID =         "18714091"}

@Article{Margulies:2005,
  author =       "Margulies, M and Egholm, M and Altman, W E and
                 Attiya, S and Bader, J S and Bemben, L A and
                 Berka, J and Braverman, M S and Chen, Y J and
                 Chen, Z and Dewell, S B and Du, L and Fierro, J M
                 and Gomes, X V and Godwin, B C and He, W and
                 Helgesen, S and Ho, C H and Ho, C H and Irzyk, G P
                 and Jando, S C and Alenquer, M L and Jarvie, T P
                 and Jirage, K B and Kim, J B and Knight, J R and
                 Lanza, J R and Leamon, J H and Lefkowitz, S M and
                 Lei, M and Li, J and Lohman, K L and Lu, H and
                 Makhijani, V B and McDade, K E and McKenna, M P
                 and Myers, E W and Nickerson, E and Nobile, J R
                 and Plant, R and Puc, B P and Ronan, M T and Roth,
                 G T and Sarkis, G J and Simons, J F and Simpson, J
                 W and Srinivasan, M and Tartaro, K R and Tomasz, A
                 and Vogt, K A and Volkmer, G A and Wang, S H and
                 Wang, Y and Weiner, M P and Yu, P and Begley, R F
                 and Rothberg, J M",
  title =        {Genome sequencing in microfabricated high-density
                 picolitre reactors.},
  journal =      "Nature",
  year =         "2005",
  month =        "Sep",
  number =       "7057",
  volume =       "437",
  pages =        "376-80",
  doi =          "10.1038/nature03959",
  PMID =         "16056220"}

@Article{Weber:2019,
  author =       "Weber, L M and Saelens, W and Cannoodt, R and
                 Soneson, C and Hapfelmeier, A and Gardner, P P and
                 Boulesteix, A L and Saeys, Y and Robinson, M D",
  title =        {Essential guidelines for computational method
                 benchmarking.},
  journal =      "Genome Biol",
  year =         "2019",
  month =        "06",
  number =       "1",
  volume =       "20",
  pages =        "125",
  doi =          "10.1186/s13059-019-1738-8",
  PMID =         "31221194\cite{Weber:2019}"}

@Article{Buchka:2021,
 title={On the optimistic performance evaluation of newly introduced bioinformatic methods},
 url={osf.io/preprints/metaarxiv/pkqdx},
 doi={10.31222/osf.io/pkqdx},
 journal={MetaArXiv},
 author={Buchka, Stefan and Hapfelmeier, Alexander and Gardner, Paul P and Wilson, Rory and Boulesteix, Anne-Laure},
 year={2021},
 month={Jan}
}


@article{nasir2011critical,
  title={Critical success factors for software projects: A comparative study},
  author={Nasir, Mohd Hairul Nizam and Sahibuddin, Shamsul},
  journal={Scientific research and essays},
  volume={6},
  number={10},
  pages={2174--2186},
  year={2011},
  publisher={Academic Journals}
}
@Article{Dozmorov:2018,
  author =       "Dozmorov, M G",
  title =        {GitHub Statistics as a Measure of the Impact of
                 Open-Source Bioinformatics Software.},
  journal =      "Front Bioeng Biotechnol",
  year =         "2018",
  month =        "",
  number =       "",
  volume =       "6",
  pages =        "198",
  doi =          "10.3389/fbioe.2018.00198",
  PMID =         "30619845"}


@inproceedings{ray2014large,
  title={A large scale study of programming languages and code quality in github},
  author={Ray, B and Posnett, D and Filkov, V and Devanbu, P},
  booktitle={Proceedings of the 22nd ACM SIGSOFT International Symposium on Foundations of Software Engineering},
  pages={155--165},
  year={2014}
}

@article{siepel2019challenges,
  title={Challenges in funding and developing genomic software: roots and remedies},
  author={Siepel, A},
  journal={Genome biology},
  volume={20},
  number={1},
  pages={1--14},
  year={2019},
  publisher={BioMed Central}
}

@article{Nissen:2016,
  title={Publication bias and the canonization of false facts},
  author={Nissen, S B and Magidson, T and Gross, K and Bergstrom, C T},
  journal={Elife},
  volume={5},
  pages={e21451},
  year={2016},
  publisher={eLife Sciences Publications Limited}
}

@article{leveson1993investigation,
  title={An investigation of the Therac-25 accidents},
  author={Leveson, N G and Turner, C S},
  journal={Computer},
  volume={26},
  number={7},
  pages={18--41},
  year={1993},
  publisher={IEEE}
}

@article{sterling1995publication,
  title={Publication decisions revisited: The effect of the outcome of statistical tests on the decision to publish and vice versa},
  author={Sterling, T D and Rosenbaum, W L and Weinkam, J J},
  journal={The American Statistician},
  volume={49},
  number={1},
  pages={108--112},
  year={1995},
  publisher={Taylor \& Francis Group}
}

@Article{Herkert:2020,
  author =       "Herkert, J and Borenstein, J and Miller, K",
  title =        {The Boeing 737 MAX: Lessons for Engineering
                 Ethics.},
  journal =      "Sci Eng Ethics",
  year =         "2020",
  month =        "Dec",
  number =       "6",
  volume =       "26",
  pages =        "2957-2974",
  doi =          "10.1007/s11948-020-00252-y",
  PMID =         "32651773"}

@incollection{cummings2020regulating,
  title={Regulating safety-critical autonomous systems: past, present, and future perspectives},
  author={Cummings, ML and Britton, D},
  booktitle={Living with robots},
  pages={119--140},
  year={2020},
  address   = {London},
  publisher={Elsevier}
}

@article{fanelli2012negative,
  title={Negative results are disappearing from most disciplines and countries},
  author={Fanelli, D},
  journal={Scientometrics},
  volume={90},
  number={3},
  pages={891--904},
  year={2012},
  publisher={Akad{\'e}miai Kiad{\'o}, co-published with Springer Science+ Business Media BV~…}
}

@article{leiserson2020there,
  title={There’s plenty of room at the Top: What will drive computer performance after Moore’s law?},
  author={Leiserson, C E and Thompson, N C and Emer, J S and Kuszmaul, B C and Lampson, B W and Sanchez, D and Schardl, T B},
  journal={Science},
  volume={368},
  number={6495},
  year={2020},
  publisher={American Association for the Advancement of Science}
}
@Article{Buchka:2021a,
  author =       "Buchka, S and Hapfelmeier, A and Gardner, P P and
                 Wilson, R and Boulesteix, A L",
  title =        {On the optimistic performance evaluation of newly
                 introduced bioinformatic methods.},
  journal =      "Genome Biol",
  year =         "2021",
  month =        "May",
  number =       "1",
  volume =       "22",
  pages =        "152",
  doi =          "10.1186/s13059-021-02365-4",
  PMID =         "33975646"}

@Article{Mangul:2019,
  author =       "Mangul, S and Mosqueiro, T and Abdill, R J and
                 Duong, D and Mitchell, K and Sarwal, V and Hill, B
                 and Brito, J and Littman, R J and Statz, B and
                 Lam, A K and Dayama, G and Grieneisen, L and
                 Martin, L S and Flint, J and Eskin, E and
                 Blekhman, R",
  title =        {Challenges and recommendations to improve the
                 installability and archival stability of omics
                 computational tools.},
  journal =      "PLoS Biol",
  year =         "2019",
  month =        "06",
  number =       "6",
  volume =       "17",
  pages =        "e3000333",
  doi =          "10.1371/journal.pbio.3000333",
  PMID =         "31220077"}


@article{brembs2019reliable,
  title={Reliable novelty: New should not trump true},
  author={Brembs, Bj{\"o}rn},
  journal={PLoS Biology},
  volume={17},
  number={2},
  pages={e3000117},
  year={2019},
  publisher={Public Library of Science San Francisco, CA USA}
}

@article {Gardner092205,
	author = {Gardner, Paul P. and Paterson, James M. and McGimpsey, Stephanie and Ashari-Ghomi, Fatemeh and Umu, Sinan U. and Pawlik, Aleksandra and Gavryushkin, Alex and Black, Michael A.},
	title = {Sustained software development, not number of citations or journal choice, is indicative of accurate bioinformatic software},
	elocation-id = {092205},
	year = {2021},
	doi = {10.1101/092205},
	publisher = {Cold Spring Harbor Laboratory},
	abstract = {Background Computational biology provides widely used and powerful software tools for testing and making inferences about biological data. In the face of rapidly increasing volumes of data, heuristic methods that trade software speed for accuracy may be employed. We are have studied these trade-offs using the results of a large number of independent software benchmarks, and evaluated whether external factors are indicative of accurate software.Method We have extracted accuracy and speed ranks from independent benchmarks of different bioinformatic software tools, and evaluated whether the speed, author reputation, journal impact, recency and developer efforts are indicative of accuracy.Results We found that software speed, author reputation, journal impact, number of citations and age are all unreliable predictors of software accuracy. This is unfortunate because citations, author and journal reputation are frequently cited reasons for selecting software tools. However, GitHub-derived records and high version numbers show that the accurate bioinformatic software tools are generally the product of many improvements over time, often from multiple developers.Discussion We also find that the field of bioinformatics has a large excess of slow and inaccurate software tools, and this is consistent across many sub-disciplines. Meanwhile, there are few tools that are middle-of-road in terms of accuracy and speed trade-offs. We hypothesise that a form of publication-bias influences the publication and development of bioinformatic software. In other words, software that is intermediate in terms of both speed and accuracy may be difficult to publish - possibly due to author, editor and reviewer practices. This leaves an unfortunate hole in the literature as the ideal tools may fall into this gap. For example, high accuracy tools are not always useful if years of CPU time are required, while high speed is not useful if the results are also inaccurate.Competing Interest StatementThe authors have declared no competing interest.},
	URL = {https://www.biorxiv.org/content/early/2021/09/02/092205},
	eprint = {https://www.biorxiv.org/content/early/2021/09/02/092205.full.pdf},
	journal = {bioRxiv}
}


@MISC{Gardner2022a,
  title        = "Sustained software development, not number of
citations or journal choice, is indicative of
accurate bioinformatic software. Github",
  author       = {Gardner, Paul P. and Paterson, James M. and McGimpsey, Stephanie and Ashari-Ghomi, Fatemeh and Umu, Sinan U. and Pawlik, Aleksandra and Gavryushkin, Alex and Black, Michael A.},
  year         = 2022,
  note = "\url{https://github.com/Gardner-BinfLab/speed-vs-accuracy-meta-analysis}"
}

@MISC{Gardner2022b,
  title        = "Sustained software development, not number of
citations or journal choice, is indicative of
accurate bioinformatic software. FigShare",
  author       = {Gardner, Paul P. and Paterson, James M. and McGimpsey, Stephanie and Ashari-Ghomi, Fatemeh and Umu, Sinan U. and Pawlik, Aleksandra and Gavryushkin, Alex and Black, Michael A.},
  year         = 2022,
  howpublished = "\url{https://doi.org/10.6084/m9.figshare.15121818.v2}"  
}