Example 5-12 – Intel ARCHITECTURE IA-32 User Manual

Page 287

Advertising
background image

Optimizing for SIMD Floating-point Applications

5

5-25

Example 5-12 Division of Two Pair of Single-precision Complex Number

// Division of (ak + i bk ) / (ck + i dk )

movshdup xmm0, Src1; load imaginary parts into the

; destination, b1, b1, b0, b0

movaps xmm1, src2; load the 2nd pair of complex values,

; i.e. d1, c1, d0, c0

mulps xmm0, xmm1; temporary results, b1d1, b1c1, b0d0,

; b0c0

shufps xmm1, xmm1, b1; reorder the real and imaginary

; parts, c1, d1, c0, d0

movsldup xmm2, Src1; load the real parts into the

; destination, a1, a1, a0, a0

mulps xmm2, xmm1; temp results, a1c1, a1d1, a0c0, a0d0

addsubps xmm0, xmm2; a1c1+b1d1, b1c1-a1d1, a0c0+b0d0,

; b0c0-a0d0

mulps xmm1, xmm1; c1c1, d1d1, c0c0, d0d0

movps xmm2, xmm1; c1c1, d1d1, c0c0, d0d0

shufps xmm2, xmm2, b1; d1d1, c1c1, d0d0, c0c0

addps xmm2, xmm1; c1c1+d1d1, c1c1+d1d1, c0c0+d0d0,

; c0c0+d0d0

divps xmm0, xmm2

shufps xmm0, xmm0, b1 ; (b1c1-a1d1)/(c1c1+d1d1),

; (a1c1+b1d1)/(c1c1+d1d1),

; (b0c0-a0d0)/( c0c0+d0d0),

; (a0c0+b0d0)/( c0c0+d0d0)

Advertising