@article{BerrendorfWeierstallMannuss2016, author = {Rudolf Berrendorf and Max Weierstall and Florian Mannuss}, title = {SpMV Runtime Improvements with Program Optimization Techniques on Different Abstraction Levels}, series = {IntSys (International Journal On Advances in Intelligent Systems)}, volume = {9}, number = {3\&4}, publisher = {ThinkMind}, issn = {1942-2679}, pages = {417 -- 429}, year = {2016}, abstract = {The multiplication of a sparse matrix with a dense vector is a performance critical computational kernel in many applications, especially in natural and engineering sciences. To speed up this operation, many optimization techniques have been developed in the past, mainly focusing on the data layout for the sparse matrix. Strongly related to the data layout is the program code for the multiplication. But even for a fixed data layout with an accommodated kernel, there are several alternatives for program optimizations. This paper discusses a spectrum of program optimization techniques on different abstraction layers for six different sparse matrix data format and kernels. At the one end of the spectrum, compiler options can be used that hide from the programmer all optimizations done by the compiler internally. On the other end of the spectrum, a multiplication kernel can be programmed that use highly sophisticated intrinsics on an assembler level that ask for a programmer with a deep understanding of processor architectures. These special instructions can be used to efficiently utilize hardware features in processors like vector units that have the potential to speed up sparse matrix computations. The paper compares the programming effort and required knowledge level for certain program optimizations in relation to the gained runtime improvements.}, language = {en} }