Intel ARCHITECTURE IA-32 User Manual

Page 157

Advertising
background image

General Optimization Guidelines

2

2-85

Memory routines in the runtime library generated by Intel Compilers are
optimized across wide range of address alignment, counter values, and
microarchitectures. In most cases, applications should take advantage of
the default memory routines provided by Intel Compilers.

Table 2-5

Using REP STOSD with Arbitrary Count Size and 4-Byte-Aligned
Destination

A ‘C’ example of Memset()

Equivalent Implementation Using REP STOSD

void memset(void *dst,int

c,size_t size)

{

char *d = (char *)dst;

size_t i;

for (i=0;i<size;i++)

*d++ = (char)c;

}

push edi

movzx eax,byte ptr [esp+12]

mov ecx,eax

shl ecx,8

or ecx,eax

mov ecx,eax

shl ecx,16

or eax,ecx

mov edi,[esp+8] : 4-byte aligned

mov ecx,[esp+16] ; byte count

shr ecx,2 ; do dword

cmp ecx,127

jle _main

test edi,4

jz _main

stosd ; peel off one dword

dec ecx

_main: ; 8-byte aligned

rep stosd

mov ecx,[esp + 16]

and ecx,3 ; do count <= 3

rep stosb ; optimal with <= 3

pop edi

ret

Advertising