@inproceedings{HaghighatMetzlerThodukaetal.2025, author = {Haghighat, Sogol and Metzler, Tim Daniel and Thoduka, Santosh and Houben, Sebastian}, title = {Visual Latent Captioning - Towards Verbalizing Vision Transformer Encoders}, booktitle = {Hauff, Macdonald et al. (Eds.): Advances in Information Retrieval. 47th European Conference on Infor}, isbn = {978-3-031-88710-9}, doi = {10.1007/978-3-031-88711-6_25}, institution = {Fachbereich Informatik}, pages = {393 -- 406}, year = {2025}, language = {en} }