Example 2-21, Example 2-22 – Intel ARCHITECTURE IA-32 User Manual

Page 126

Advertising
background image

IA-32 Intel® Architecture Optimization

2-54

Example 2-21 Non-temporal Stores and 64-byte Bus Write Transactions

Example 2-22 Non-temporal Stores and Partial Bus Write Transactions

#define STRIDESIZE 256

Lea ecx, p64byte_Aligned

Mov edx, ARRAY_LEN

Xor eax, eax

slloop:

movntps XMMWORD ptr [ecx + eax], xmm0

movntps XMMWORD ptr [ecx + eax+16], xmm0

movntps XMMWORD ptr [ecx + eax+32], xmm0

movntps XMMWORD ptr [ecx + eax+48], xmm0

; 64 bytes is written in one bus transaction

add eax, STRIDESIZE

cmp eax, edx

jl slloop

#define STRIDESIZE 256

Lea ecx, p64byte_Aligned

Mov edx, ARRAY_LEN

Xor eax, eax

slloop:

movntps XMMWORD ptr [ecx + eax], xmm0

movntps XMMWORD ptr [ecx + eax+16], xmm0

movntps XMMWORD ptr [ecx + eax+32], xmm0

; Storing 48 bytes results in 6 bus partial transactions

add eax, STRIDESIZE

cmp eax, edx

Advertising