@article{Berrendorf2014, author = {Rudolf Berrendorf}, title = {A Technique to Avoid Atomic Operations on Large Shared Memory Parallel Systems}, series = {Soft (International Journal on Advances in Software)}, volume = {7}, number = {1\&2}, publisher = {ThinkMind}, issn = {1942-2628}, pages = {197 -- 210}, year = {2014}, abstract = {Updating a shared data structure in a parallel program is usually done with some sort of high-level synchronization operation to ensure correctness and consistency. The realization of such high-level synchronization operations is done with appropriate low-level atomic synchronization instructions that the target processor architecture provides. These instructions are costly and often limited in their scalability on larger multi-core / multi-processor systems. In this paper, a technique is discussed that replaces atomic updates of a shared data structure with ordinary and cheaper read/write operations. The necessary conditions are specified that must be fulfilled to ensure overall correctness of the program despite missing synchronization. The advantage of this technique is the reduction of access costs as well as more scalability due to elided atomic operations. But on the other side, possibly more work has to be done caused by missing synchronization. Therefore, additional work is traded against costly atomic operations. A practical application is shown with level-synchronous parallel Breadth-First Search on an undirected graph where two vertex frontiers are accessed in parallel. This application scenario is also used for an evaluation of the technique. Tests were done on four different large parallel systems with up to 64-way parallelism. It will be shown that for the graph application examined the amount of additional work caused by missing synchronization is neglectible and the performance is almost always better than the approach with atomic operations.}, language = {en} }