Intel ARCHITECTURE IA-32 User Manual

Page 273

Advertising
background image

Optimizing for SIMD Floating-point Applications

5

5-11

y1 x1

movhps xmm7, [ecx+16]

// xmm7 = y2 x2 y1 x1

movlps xmm0, [ecx+32]

// xmm0 = -- -- y3 x3

movhps xmm0, [ecx+48]

// xmm0 = y4 x4 y3 x3

movaps xmm6, xmm7

// xmm6 = y1 x1 y1 x1

shufps xmm7, xmm0, 0x88

// xmm7 = x1 x2 x3 x4 => X

shufps xmm6, xmm0, 0xDD

// xmm6 = y1 y2 y3 y4 => Y

movlps xmm2, [ecx+8]

// xmm2 = -- -- w1 z1

movhps xmm2, [ecx+24]

// xmm2 = w2 z2 u1 z1

movlps xmm1, [ecx+40]

// xmm1 = -- -- s3 z3

movhps xmm1, [ecx+56]

// xmm1 = w4 z4 w3 z3

movaps xmm0, xmm2

// xmm0 = w1 z1 w1 z1

shufps xmm2, xmm1, 0x88

// xmm2 = z1 z2 z3 z4 => Z

movlps xmm7, [ecx]

// xmm7 = -- --shufps xmm0, xmm1,

// 0xDD xmm6 = w1 w2 w3 w4 => W

movaps [edx], xmm7

// store X

movaps [edx+16], xmm6

// store Y

movaps [edx+32], xmm2

// store Z

movaps [edx+48], xmm0

// store W

// SWIZZLE XYZ -> XXX

}

}

Example 5-3

Swizzling Data (continued)

Advertising