@techreport{Manivannan2020, author = {Manivannan, Iswariya}, title = {A Comparative Study of Uncertainty Estimation Methods in Deep Learning Based Classification Models}, organization = {Bosch Center for Artificial Intelligence; Bonn-Aachen International Center for Information Technology (b-it)}, isbn = {978-3-96043-085-8}, issn = {1869-5272}, doi = {10.18418/978-3-96043-085-8}, institution = {Fachbereich Informatik}, series = {Technical Report / Hochschule Bonn-Rhein-Sieg University of Applied Sciences. Department of Computer Science}, number = {04-2020}, pages = {viii, 77}, year = {2020}, abstract = {Deep learning models produce overconfident predictions even for misclassified data. This work aims to improve the safety guarantees of software-intensive systems that use deep learning based classification models for decision making by performing comparative evaluation of different uncertainty estimation methods to identify possible misclassifications. In this work, uncertainty estimation methods applicable to deep learning models are reviewed and those which can be seamlessly integrated to existing deployed deep learning architectures are selected for evaluation. The different uncertainty estimation methods, deep ensembles, test-time data augmentation and Monte Carlo dropout with its variants, are empirically evaluated on two standard datasets (CIFAR-10 and CIFAR-100) and two custom classification datasets (optical inspection and RoboCup@Work dataset). A relative ranking between the methods is provided by evaluating the deep learning classifiers on various aspects such as uncertainty quality, classifier performance and calibration. Standard metrics like entropy, cross-entropy, mutual information, and variance, combined with a rank histogram based method to identify uncertain predictions by thresholding on these metrics, are used to evaluate uncertainty quality. The results indicate that Monte Carlo dropout combined with test-time data augmentation outperforms all other methods by identifying more than 95\% of the misclassifications and representing uncertainty in the highest number of samples in the test set. It also yields a better classifier performance and calibration in terms of higher accuracy and lower Expected Calibration Error (ECE), respectively. A python based uncertainty estimation library for training and real-time uncertainty estimation of deep learning based classification models is also developed.}, language = {en} }