Intel ARCHITECTURE IA-32 User Manual

Page 282

Advertising
background image

IA-32 Intel® Architecture Optimization

5-20

// START HORIZONTAL ADD

movaps xmm5, xmm0

// xmm5= A1,A2,A3,A4

movlhps xmm5, xmm1

// xmm5= A1,A2,B1,B2

movhlps xmm1, xmm0

// xmm1= A3,A4,B3,B4

addps xmm5, xmm1

// xmm5= A1+A3,A2+A4,B1+B3,B2+B4

movaps xmm4, xmm2

movlhps xmm2, xmm3

// xmm2= C1,C2,D1,D2

movhlps xmm3, xmm4

// xmm3= C3,C4,D3,D4

addps xmm3, xmm2

// xmm3= C1+C3,C2+C4,D1+D3,D2+D4

movaps xmm6, xmm3

// xmm6= C1+C3,C2+C4,D1+D3,D2+D4

shufps xmm3, xmm5, 0xDD

//xmm6=A1+A3,B1+B3,C1+C3,D1+D3

shufps xmm5, xmm6, 0x88

// xmm5= A2+A4,B2+B4,C2+C4,D2+D4

addps xmm6, xmm5

// xmm6= D,C,B,A

// END HORIZONTAL ADD

movaps [edx], xmm6

}

}

Example 5-9

Horizontal Add Using movhlps/movlhps (continued)

Advertising