@phdthesis{Padmanabhan2022,
  type      = {Master Thesis},
  author    = {Deepan Chakravarthi Padmanabhan},
  title     = {DExT: Detector Explanation Toolkit for Explaining Multiple Detections Using Saliency Methods},
  isbn      = {978-3-96043-101-5},
  issn      = {1869-5272},
  doi       = {10.18418/978-3-96043-101-5},
  url       = {https://nbn-resolving.org/urn:nbn:de:hbz:1044-opus-62460},
  institution = {Fachbereich Informatik},
  series    = {Technical Report / University of Applied Sciences Bonn-Rhein-Sieg. Department of Computer Science},
  pages     = {xviii, 159},
  year      = {2022},
  abstract  = {As cameras are ubiquitous in autonomous systems, object detection is a crucial task. Object detectors are widely used in applications such as autonomous driving, healthcare, and robotics. Given an image, an object detector outputs both the bounding box coordinates as well as classification probabilities for each object detected. The state-of-the-art detectors are treated as black boxes due to their highly non-linear internal computations. Even with unprecedented advancements in detector performance, the inability to explain how their outputs are generated limits their use in safety-critical applications in particular. It is therefore crucial to explain the reason behind each detector decision in order to gain user trust, enhance detector performance, and analyze their failure. Previous work fails to explain as well as evaluate both bounding box and classification decisions individually for various detectors. Moreover, no tools explain each detector decision, evaluate the explanations, and also identify the reasons for detector failures. This restricts the flexibility to analyze detectors. The main contribution presented here is an open-source Detector Explanation Toolkit (DExT). It is used to explain the detector decisions, evaluate the explanations, and analyze detector errors. The detector decisions are explained visually by highlighting the image pixels that most influence a particular decision. The toolkit implements the proposed approach to generate a holistic explanation for all detector decisions using certain gradient-based explanation methods. To the author’s knowledge, this is the first work to conduct extensive qualitative and novel quantitative evaluations of different explanation methods across various detectors. The qualitative evaluation incorporates a visual analysis of the explanations carried out by the author as well as a human-centric evaluation. The human-centric evaluation includes a user study to understand user trust in the explanations generated across various explanation methods for different detectors. Four multi-object visualization methods are provided to merge the explanations of multiple objects detected in an image as well as the corresponding detector outputs in a single image. Finally, DExT implements the procedure to analyze detector failures using the formulated approach. The visual analysis illustrates that the ability to explain a model is more dependent on the model itself than the actual ability of the explanation method. In addition, the explanations are affected by the object explained, the decision explained, detector architecture, training data labels, and model parameters. The results of the quantitative evaluation show that the Single Shot MultiBox Detector (SSD) is more faithfully explained compared to other detectors regardless of the explanation methods. In addition, a single explanation method cannot generate more faithful explanations than other methods for both the bounding box and the classification decision across different detectors. Both the quantitative and human-centric evaluations identify that SmoothGrad with Guided Backpropagation (GBP) provides more trustworthy explanations among selected methods across all detectors. Finally, a convex polygon-based multi-object visualization method provides more human-understandable visualization than other methods. The author expects that DExT will motivate practitioners to evaluate object detectors from the interpretability perspective by explaining both bounding box and classification decisions.},
  language  = {en}
}