 buffer1(i+imax*(5-1)) = buffer1(i+imax*(5-1)) + work(1+(i-1)*kmax) * sphi_a(1,5+s_offset_a1)
 buffer1(i+imax*(7-1)) = buffer1(i+imax*(7-1)) + work(1+(i-1)*kmax) * sphi_a(1,7+s_offset_a1)
 buffer1(i+imax*(9-1)) = buffer1(i+imax*(9-1)) + work(1+(i-1)*kmax) * sphi_a(1,9+s_offset_a1)
 buffer1(i+imax*(1-1)) = buffer1(i+imax*(1-1)) + work(2+(i-1)*kmax) * sphi_a(2,1+s_offset_a1)
 buffer1(i+imax*(3-1)) = buffer1(i+imax*(3-1)) + work(2+(i-1)*kmax) * sphi_a(2,3+s_offset_a1)
 buffer1(i+imax*(6-1)) = buffer1(i+imax*(6-1)) + work(3+(i-1)*kmax) * sphi_a(3,6+s_offset_a1)
 buffer1(i+imax*(8-1)) = buffer1(i+imax*(8-1)) + work(3+(i-1)*kmax) * sphi_a(3,8+s_offset_a1)
 buffer1(i+imax*(5-1)) = buffer1(i+imax*(5-1)) + work(4+(i-1)*kmax) * sphi_a(4,5+s_offset_a1)
 buffer1(i+imax*(9-1)) = buffer1(i+imax*(9-1)) + work(4+(i-1)*kmax) * sphi_a(4,9+s_offset_a1)
 buffer1(i+imax*(2-1)) = buffer1(i+imax*(2-1)) + work(5+(i-1)*kmax) * sphi_a(5,2+s_offset_a1)
 buffer1(i+imax*(4-1)) = buffer1(i+imax*(4-1)) + work(5+(i-1)*kmax) * sphi_a(5,4+s_offset_a1)
 buffer1(i+imax*(5-1)) = buffer1(i+imax*(5-1)) + work(6+(i-1)*kmax) * sphi_a(6,5+s_offset_a1)
 buffer1(i+imax*(7-1)) = buffer1(i+imax*(7-1)) + work(6+(i-1)*kmax) * sphi_a(6,7+s_offset_a1)
 buffer1(i+imax*(1-1)) = buffer1(i+imax*(1-1)) + work(7+(i-1)*kmax) * sphi_a(7,1+s_offset_a1)
 buffer1(i+imax*(3-1)) = buffer1(i+imax*(3-1)) + work(7+(i-1)*kmax) * sphi_a(7,3+s_offset_a1)
 buffer1(i+imax*(6-1)) = buffer1(i+imax*(6-1)) + work(8+(i-1)*kmax) * sphi_a(8,6+s_offset_a1)
 buffer1(i+imax*(8-1)) = buffer1(i+imax*(8-1)) + work(8+(i-1)*kmax) * sphi_a(8,8+s_offset_a1)
 buffer1(i+imax*(3-1)) = buffer1(i+imax*(3-1)) + work(9+(i-1)*kmax) * sphi_a(9,3+s_offset_a1)
 buffer1(i+imax*(6-1)) = buffer1(i+imax*(6-1)) + work(10+(i-1)*kmax) * sphi_a(10,6 +s_offset_a1)
 buffer1(i+imax*(5-1)) = buffer1(i+imax*(5-1)) + work(11+(i-1)*kmax) * sphi_a(11,5 +s_offset_a1)
 buffer1(i+imax*(7-1)) = buffer1(i+imax*(7-1)) + work(11+(i-1)*kmax) * sphi_a(11,7 +s_offset_a1)
 buffer1(i+imax*(9-1)) = buffer1(i+imax*(9-1)) + work(11+(i-1)*kmax) * sphi_a(11,9 +s_offset_a1)
 buffer1(i+imax*(2-1)) = buffer1(i+imax*(2-1)) + work(12+(i-1)*kmax) * sphi_a(12,2 +s_offset_a1)
 buffer1(i+imax*(4-1)) = buffer1(i+imax*(4-1)) + work(12+(i-1)*kmax) * sphi_a(12,4 +s_offset_a1)
 buffer1(i+imax*(5-1)) = buffer1(i+imax*(5-1)) + work(13+(i-1)*kmax) * sphi_a(13,5 +s_offset_a1)
 buffer1(i+imax*(7-1)) = buffer1(i+imax*(7-1)) + work(13+(i-1)*kmax) * sphi_a(13,7 +s_offset_a1)
 buffer1(i+imax*(4-1)) = buffer1(i+imax*(4-1)) + work(14+(i-1)*kmax) * sphi_a(14,4 +s_offset_a1)
 buffer1(i+imax*(5-1)) = buffer1(i+imax*(5-1)) + work(15+(i-1)*kmax) * sphi_a(15,5 +s_offset_a1)
