From 554fd8c5195424bdbcabf5de30fdc183aba391bd Mon Sep 17 00:00:00 2001 From: upstream source tree Date: Sun, 15 Mar 2015 20:14:05 -0400 Subject: obtained gcc-4.6.4.tar.bz2 from upstream website; verified gcc-4.6.4.tar.bz2.sig; imported gcc-4.6.4 source tree from verified upstream tarball. downloading a git-generated archive based on the 'upstream' tag should provide you with a source tree that is binary identical to the one extracted from the above tarball. if you have obtained the source via the command 'git clone', however, do note that line-endings of files in your working directory might differ from line-endings of the respective files in the upstream repository. --- zlib/contrib/masmx86/gvmat32.asm | 972 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 972 insertions(+) create mode 100644 zlib/contrib/masmx86/gvmat32.asm (limited to 'zlib/contrib/masmx86/gvmat32.asm') diff --git a/zlib/contrib/masmx86/gvmat32.asm b/zlib/contrib/masmx86/gvmat32.asm new file mode 100644 index 000000000..874bb2d48 --- /dev/null +++ b/zlib/contrib/masmx86/gvmat32.asm @@ -0,0 +1,972 @@ +; gvmat32.asm -- Asm portion of the optimized longest_match for 32 bits x86 +; Copyright (C) 1995-1996 Jean-loup Gailly and Gilles Vollant. +; File written by Gilles Vollant, by modifiying the longest_match +; from Jean-loup Gailly in deflate.c +; +; http://www.zlib.net +; http://www.winimage.com/zLibDll +; http://www.muppetlabs.com/~breadbox/software/assembly.html +; +; For Visual C++ 4.x and higher and ML 6.x and higher +; ml.exe is in directory \MASM611C of Win95 DDK +; ml.exe is also distributed in http://www.masm32.com/masmdl.htm +; and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/ +; +; this file contain two implementation of longest_match +; +; longest_match_7fff : written 1996 by Gilles Vollant optimized for +; first Pentium. Assume s->w_mask == 0x7fff +; longest_match_686 : written by Brian raiter (1998), optimized for Pentium Pro +; +; for using an seembly version of longest_match, you need define ASMV in project +; There is two way in using gvmat32.asm +; +; A) Suggested method +; if you want include both longest_match_7fff and longest_match_686 +; compile the asm file running +; ml /coff /Zi /Flgvmat32.lst /c gvmat32.asm +; and include gvmat32c.c in your project +; if you have an old cpu (386,486 or first Pentium) and s->w_mask==0x7fff, +; longest_match_7fff will be used +; if you have a more modern CPU (Pentium Pro, II and higher) +; longest_match_686 will be used +; on old cpu with s->w_mask!=0x7fff, longest_match_686 will be used, +; but this is not a sitation you'll find often +; +; B) Alternative +; if you are not interresed in old cpu performance and want the smaller +; binaries possible +; +; compile the asm file running +; ml /coff /Zi /c /Flgvmat32.lst /DNOOLDPENTIUMCODE gvmat32.asm +; and do not include gvmat32c.c in your project (ou define also +; NOOLDPENTIUMCODE) +; +; note : as I known, longest_match_686 is very faster than longest_match_7fff +; on pentium Pro/II/III, faster (but less) in P4, but it seem +; longest_match_7fff can be faster (very very litte) on AMD Athlon64/K8 +; +; see below : zlib1222add must be adjuster if you use a zlib version < 1.2.2.2 + +;uInt longest_match_7fff(s, cur_match) +; deflate_state *s; +; IPos cur_match; /* current match */ + + NbStack equ 76 + cur_match equ dword ptr[esp+NbStack-0] + str_s equ dword ptr[esp+NbStack-4] +; 5 dword on top (ret,ebp,esi,edi,ebx) + adrret equ dword ptr[esp+NbStack-8] + pushebp equ dword ptr[esp+NbStack-12] + pushedi equ dword ptr[esp+NbStack-16] + pushesi equ dword ptr[esp+NbStack-20] + pushebx equ dword ptr[esp+NbStack-24] + + chain_length equ dword ptr [esp+NbStack-28] + limit equ dword ptr [esp+NbStack-32] + best_len equ dword ptr [esp+NbStack-36] + window equ dword ptr [esp+NbStack-40] + prev equ dword ptr [esp+NbStack-44] + scan_start equ word ptr [esp+NbStack-48] + wmask equ dword ptr [esp+NbStack-52] + match_start_ptr equ dword ptr [esp+NbStack-56] + nice_match equ dword ptr [esp+NbStack-60] + scan equ dword ptr [esp+NbStack-64] + + windowlen equ dword ptr [esp+NbStack-68] + match_start equ dword ptr [esp+NbStack-72] + strend equ dword ptr [esp+NbStack-76] + NbStackAdd equ (NbStack-24) + + .386p + + name gvmatch + .MODEL FLAT + + + +; all the +zlib1222add offsets are due to the addition of fields +; in zlib in the deflate_state structure since the asm code was first written +; (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)"). +; (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0"). +; if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8"). + + zlib1222add equ 8 + +; Note : these value are good with a 8 bytes boundary pack structure + dep_chain_length equ 74h+zlib1222add + dep_window equ 30h+zlib1222add + dep_strstart equ 64h+zlib1222add + dep_prev_length equ 70h+zlib1222add + dep_nice_match equ 88h+zlib1222add + dep_w_size equ 24h+zlib1222add + dep_prev equ 38h+zlib1222add + dep_w_mask equ 2ch+zlib1222add + dep_good_match equ 84h+zlib1222add + dep_match_start equ 68h+zlib1222add + dep_lookahead equ 6ch+zlib1222add + + +_TEXT segment + +IFDEF NOUNDERLINE + IFDEF NOOLDPENTIUMCODE + public longest_match + public match_init + ELSE + public longest_match_7fff + public cpudetect32 + public longest_match_686 + ENDIF +ELSE + IFDEF NOOLDPENTIUMCODE + public _longest_match + public _match_init + ELSE + public _longest_match_7fff + public _cpudetect32 + public _longest_match_686 + ENDIF +ENDIF + + MAX_MATCH equ 258 + MIN_MATCH equ 3 + MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1) + + + +IFNDEF NOOLDPENTIUMCODE +IFDEF NOUNDERLINE +longest_match_7fff proc near +ELSE +_longest_match_7fff proc near +ENDIF + + mov edx,[esp+4] + + + + push ebp + push edi + push esi + push ebx + + sub esp,NbStackAdd + +; initialize or check the variables used in match.asm. + mov ebp,edx + +; chain_length = s->max_chain_length +; if (prev_length>=good_match) chain_length >>= 2 + mov edx,[ebp+dep_chain_length] + mov ebx,[ebp+dep_prev_length] + cmp [ebp+dep_good_match],ebx + ja noshr + shr edx,2 +noshr: +; we increment chain_length because in the asm, the --chain_lenght is in the beginning of the loop + inc edx + mov edi,[ebp+dep_nice_match] + mov chain_length,edx + mov eax,[ebp+dep_lookahead] + cmp eax,edi +; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; + jae nolookaheadnicematch + mov edi,eax +nolookaheadnicematch: +; best_len = s->prev_length + mov best_len,ebx + +; window = s->window + mov esi,[ebp+dep_window] + mov ecx,[ebp+dep_strstart] + mov window,esi + + mov nice_match,edi +; scan = window + strstart + add esi,ecx + mov scan,esi +; dx = *window + mov dx,word ptr [esi] +; bx = *(window+best_len-1) + mov bx,word ptr [esi+ebx-1] + add esi,MAX_MATCH-1 +; scan_start = *scan + mov scan_start,dx +; strend = scan + MAX_MATCH-1 + mov strend,esi +; bx = scan_end = *(window+best_len-1) + +; IPos limit = s->strstart > (IPos)MAX_DIST(s) ? +; s->strstart - (IPos)MAX_DIST(s) : NIL; + + mov esi,[ebp+dep_w_size] + sub esi,MIN_LOOKAHEAD +; here esi = MAX_DIST(s) + sub ecx,esi + ja nodist + xor ecx,ecx +nodist: + mov limit,ecx + +; prev = s->prev + mov edx,[ebp+dep_prev] + mov prev,edx + +; + mov edx,dword ptr [ebp+dep_match_start] + mov bp,scan_start + mov eax,cur_match + mov match_start,edx + + mov edx,window + mov edi,edx + add edi,best_len + mov esi,prev + dec edi +; windowlen = window + best_len -1 + mov windowlen,edi + + jmp beginloop2 + align 4 + +; here, in the loop +; eax = ax = cur_match +; ecx = limit +; bx = scan_end +; bp = scan_start +; edi = windowlen (window + best_len -1) +; esi = prev + + +;// here; chain_length <=16 +normalbeg0add16: + add chain_length,16 + jz exitloop +normalbeg0: + cmp word ptr[edi+eax],bx + je normalbeg2noroll +rcontlabnoroll: +; cur_match = prev[cur_match & wmask] + and eax,7fffh + mov ax,word ptr[esi+eax*2] +; if cur_match > limit, go to exitloop + cmp ecx,eax + jnb exitloop +; if --chain_length != 0, go to exitloop + dec chain_length + jnz normalbeg0 + jmp exitloop + +normalbeg2noroll: +; if (scan_start==*(cur_match+window)) goto normalbeg2 + cmp bp,word ptr[edx+eax] + jne rcontlabnoroll + jmp normalbeg2 + +contloop3: + mov edi,windowlen + +; cur_match = prev[cur_match & wmask] + and eax,7fffh + mov ax,word ptr[esi+eax*2] +; if cur_match > limit, go to exitloop + cmp ecx,eax +jnbexitloopshort1: + jnb exitloop +; if --chain_length != 0, go to exitloop + + +; begin the main loop +beginloop2: + sub chain_length,16+1 +; if chain_length <=16, don't use the unrolled loop + jna normalbeg0add16 + +do16: + cmp word ptr[edi+eax],bx + je normalbeg2dc0 + +maccn MACRO lab + and eax,7fffh + mov ax,word ptr[esi+eax*2] + cmp ecx,eax + jnb exitloop + cmp word ptr[edi+eax],bx + je lab + ENDM + +rcontloop0: + maccn normalbeg2dc1 + +rcontloop1: + maccn normalbeg2dc2 + +rcontloop2: + maccn normalbeg2dc3 + +rcontloop3: + maccn normalbeg2dc4 + +rcontloop4: + maccn normalbeg2dc5 + +rcontloop5: + maccn normalbeg2dc6 + +rcontloop6: + maccn normalbeg2dc7 + +rcontloop7: + maccn normalbeg2dc8 + +rcontloop8: + maccn normalbeg2dc9 + +rcontloop9: + maccn normalbeg2dc10 + +rcontloop10: + maccn short normalbeg2dc11 + +rcontloop11: + maccn short normalbeg2dc12 + +rcontloop12: + maccn short normalbeg2dc13 + +rcontloop13: + maccn short normalbeg2dc14 + +rcontloop14: + maccn short normalbeg2dc15 + +rcontloop15: + and eax,7fffh + mov ax,word ptr[esi+eax*2] + cmp ecx,eax + jnb exitloop + + sub chain_length,16 + ja do16 + jmp normalbeg0add16 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +normbeg MACRO rcontlab,valsub +; if we are here, we know that *(match+best_len-1) == scan_end + cmp bp,word ptr[edx+eax] +; if (match != scan_start) goto rcontlab + jne rcontlab +; calculate the good chain_length, and we'll compare scan and match string + add chain_length,16-valsub + jmp iseq + ENDM + + +normalbeg2dc11: + normbeg rcontloop11,11 + +normalbeg2dc12: + normbeg short rcontloop12,12 + +normalbeg2dc13: + normbeg short rcontloop13,13 + +normalbeg2dc14: + normbeg short rcontloop14,14 + +normalbeg2dc15: + normbeg short rcontloop15,15 + +normalbeg2dc10: + normbeg rcontloop10,10 + +normalbeg2dc9: + normbeg rcontloop9,9 + +normalbeg2dc8: + normbeg rcontloop8,8 + +normalbeg2dc7: + normbeg rcontloop7,7 + +normalbeg2dc6: + normbeg rcontloop6,6 + +normalbeg2dc5: + normbeg rcontloop5,5 + +normalbeg2dc4: + normbeg rcontloop4,4 + +normalbeg2dc3: + normbeg rcontloop3,3 + +normalbeg2dc2: + normbeg rcontloop2,2 + +normalbeg2dc1: + normbeg rcontloop1,1 + +normalbeg2dc0: + normbeg rcontloop0,0 + + +; we go in normalbeg2 because *(ushf*)(match+best_len-1) == scan_end + +normalbeg2: + mov edi,window + + cmp bp,word ptr[edi+eax] + jne contloop3 ; if *(ushf*)match != scan_start, continue + +iseq: +; if we are here, we know that *(match+best_len-1) == scan_end +; and (match == scan_start) + + mov edi,edx + mov esi,scan ; esi = scan + add edi,eax ; edi = window + cur_match = match + + mov edx,[esi+3] ; compare manually dword at match+3 + xor edx,[edi+3] ; and scan +3 + + jz begincompare ; if equal, go to long compare + +; we will determine the unmatch byte and calculate len (in esi) + or dl,dl + je eq1rr + mov esi,3 + jmp trfinval +eq1rr: + or dx,dx + je eq1 + + mov esi,4 + jmp trfinval +eq1: + and edx,0ffffffh + jz eq11 + mov esi,5 + jmp trfinval +eq11: + mov esi,6 + jmp trfinval + +begincompare: + ; here we now scan and match begin same + add edi,6 + add esi,6 + mov ecx,(MAX_MATCH-(2+4))/4 ; scan for at most MAX_MATCH bytes + repe cmpsd ; loop until mismatch + + je trfin ; go to trfin if not unmatch +; we determine the unmatch byte + sub esi,4 + mov edx,[edi-4] + xor edx,[esi] + + or dl,dl + jnz trfin + inc esi + + or dx,dx + jnz trfin + inc esi + + and edx,0ffffffh + jnz trfin + inc esi + +trfin: + sub esi,scan ; esi = len +trfinval: +; here we have finised compare, and esi contain len of equal string + cmp esi,best_len ; if len > best_len, go newbestlen + ja short newbestlen +; now we restore edx, ecx and esi, for the big loop + mov esi,prev + mov ecx,limit + mov edx,window + jmp contloop3 + +newbestlen: + mov best_len,esi ; len become best_len + + mov match_start,eax ; save new position as match_start + cmp esi,nice_match ; if best_len >= nice_match, exit + jae exitloop + mov ecx,scan + mov edx,window ; restore edx=window + add ecx,esi + add esi,edx + + dec esi + mov windowlen,esi ; windowlen = window + best_len-1 + mov bx,[ecx-1] ; bx = *(scan+best_len-1) = scan_end + +; now we restore ecx and esi, for the big loop : + mov esi,prev + mov ecx,limit + jmp contloop3 + +exitloop: +; exit : s->match_start=match_start + mov ebx,match_start + mov ebp,str_s + mov ecx,best_len + mov dword ptr [ebp+dep_match_start],ebx + mov eax,dword ptr [ebp+dep_lookahead] + cmp ecx,eax + ja minexlo + mov eax,ecx +minexlo: +; return min(best_len,s->lookahead) + +; restore stack and register ebx,esi,edi,ebp + add esp,NbStackAdd + + pop ebx + pop esi + pop edi + pop ebp + ret +InfoAuthor: +; please don't remove this string ! +; Your are free use gvmat32 in any fre or commercial apps if you don't remove the string in the binary! + db 0dh,0ah,"GVMat32 optimised assembly code written 1996-98 by Gilles Vollant",0dh,0ah + + + +IFDEF NOUNDERLINE +longest_match_7fff endp +ELSE +_longest_match_7fff endp +ENDIF + + +IFDEF NOUNDERLINE +cpudetect32 proc near +ELSE +_cpudetect32 proc near +ENDIF + + push ebx + + pushfd ; push original EFLAGS + pop eax ; get original EFLAGS + mov ecx, eax ; save original EFLAGS + xor eax, 40000h ; flip AC bit in EFLAGS + push eax ; save new EFLAGS value on stack + popfd ; replace current EFLAGS value + pushfd ; get new EFLAGS + pop eax ; store new EFLAGS in EAX + xor eax, ecx ; can’t toggle AC bit, processor=80386 + jz end_cpu_is_386 ; jump if 80386 processor + push ecx + popfd ; restore AC bit in EFLAGS first + + pushfd + pushfd + pop ecx + + mov eax, ecx ; get original EFLAGS + xor eax, 200000h ; flip ID bit in EFLAGS + push eax ; save new EFLAGS value on stack + popfd ; replace current EFLAGS value + pushfd ; get new EFLAGS + pop eax ; store new EFLAGS in EAX + popfd ; restore original EFLAGS + xor eax, ecx ; can’t toggle ID bit, + je is_old_486 ; processor=old + + mov eax,1 + db 0fh,0a2h ;CPUID + +exitcpudetect: + pop ebx + ret + +end_cpu_is_386: + mov eax,0300h + jmp exitcpudetect + +is_old_486: + mov eax,0400h + jmp exitcpudetect + +IFDEF NOUNDERLINE +cpudetect32 endp +ELSE +_cpudetect32 endp +ENDIF +ENDIF + +MAX_MATCH equ 258 +MIN_MATCH equ 3 +MIN_LOOKAHEAD equ (MAX_MATCH + MIN_MATCH + 1) +MAX_MATCH_8_ equ ((MAX_MATCH + 7) AND 0FFF0h) + + +;;; stack frame offsets + +chainlenwmask equ esp + 0 ; high word: current chain len + ; low word: s->wmask +window equ esp + 4 ; local copy of s->window +windowbestlen equ esp + 8 ; s->window + bestlen +scanstart equ esp + 16 ; first two bytes of string +scanend equ esp + 12 ; last two bytes of string +scanalign equ esp + 20 ; dword-misalignment of string +nicematch equ esp + 24 ; a good enough match size +bestlen equ esp + 28 ; size of best match so far +scan equ esp + 32 ; ptr to string wanting match + +LocalVarsSize equ 36 +; saved ebx byte esp + 36 +; saved edi byte esp + 40 +; saved esi byte esp + 44 +; saved ebp byte esp + 48 +; return address byte esp + 52 +deflatestate equ esp + 56 ; the function arguments +curmatch equ esp + 60 + +;;; Offsets for fields in the deflate_state structure. These numbers +;;; are calculated from the definition of deflate_state, with the +;;; assumption that the compiler will dword-align the fields. (Thus, +;;; changing the definition of deflate_state could easily cause this +;;; program to crash horribly, without so much as a warning at +;;; compile time. Sigh.) + +dsWSize equ 36+zlib1222add +dsWMask equ 44+zlib1222add +dsWindow equ 48+zlib1222add +dsPrev equ 56+zlib1222add +dsMatchLen equ 88+zlib1222add +dsPrevMatch equ 92+zlib1222add +dsStrStart equ 100+zlib1222add +dsMatchStart equ 104+zlib1222add +dsLookahead equ 108+zlib1222add +dsPrevLen equ 112+zlib1222add +dsMaxChainLen equ 116+zlib1222add +dsGoodMatch equ 132+zlib1222add +dsNiceMatch equ 136+zlib1222add + + +;;; match.asm -- Pentium-Pro-optimized version of longest_match() +;;; Written for zlib 1.1.2 +;;; Copyright (C) 1998 Brian Raiter +;;; You can look at http://www.muppetlabs.com/~breadbox/software/assembly.html +;;; +;;; This is free software; you can redistribute it and/or modify it +;;; under the terms of the GNU General Public License. + +;GLOBAL _longest_match, _match_init + + +;SECTION .text + +;;; uInt longest_match(deflate_state *deflatestate, IPos curmatch) + +;_longest_match: +IFDEF NOOLDPENTIUMCODE + IFDEF NOUNDERLINE + longest_match proc near + ELSE + _longest_match proc near + ENDIF +ELSE + IFDEF NOUNDERLINE + longest_match_686 proc near + ELSE + _longest_match_686 proc near + ENDIF +ENDIF + +;;; Save registers that the compiler may be using, and adjust esp to +;;; make room for our stack frame. + + push ebp + push edi + push esi + push ebx + sub esp, LocalVarsSize + +;;; Retrieve the function arguments. ecx will hold cur_match +;;; throughout the entire function. edx will hold the pointer to the +;;; deflate_state structure during the function's setup (before +;;; entering the main loop. + + mov edx, [deflatestate] + mov ecx, [curmatch] + +;;; uInt wmask = s->w_mask; +;;; unsigned chain_length = s->max_chain_length; +;;; if (s->prev_length >= s->good_match) { +;;; chain_length >>= 2; +;;; } + + mov eax, [edx + dsPrevLen] + mov ebx, [edx + dsGoodMatch] + cmp eax, ebx + mov eax, [edx + dsWMask] + mov ebx, [edx + dsMaxChainLen] + jl LastMatchGood + shr ebx, 2 +LastMatchGood: + +;;; chainlen is decremented once beforehand so that the function can +;;; use the sign flag instead of the zero flag for the exit test. +;;; It is then shifted into the high word, to make room for the wmask +;;; value, which it will always accompany. + + dec ebx + shl ebx, 16 + or ebx, eax + mov [chainlenwmask], ebx + +;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; + + mov eax, [edx + dsNiceMatch] + mov ebx, [edx + dsLookahead] + cmp ebx, eax + jl LookaheadLess + mov ebx, eax +LookaheadLess: mov [nicematch], ebx + +;;; register Bytef *scan = s->window + s->strstart; + + mov esi, [edx + dsWindow] + mov [window], esi + mov ebp, [edx + dsStrStart] + lea edi, [esi + ebp] + mov [scan], edi + +;;; Determine how many bytes the scan ptr is off from being +;;; dword-aligned. + + mov eax, edi + neg eax + and eax, 3 + mov [scanalign], eax + +;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ? +;;; s->strstart - (IPos)MAX_DIST(s) : NIL; + + mov eax, [edx + dsWSize] + sub eax, MIN_LOOKAHEAD + sub ebp, eax + jg LimitPositive + xor ebp, ebp +LimitPositive: + +;;; int best_len = s->prev_length; + + mov eax, [edx + dsPrevLen] + mov [bestlen], eax + +;;; Store the sum of s->window + best_len in esi locally, and in esi. + + add esi, eax + mov [windowbestlen], esi + +;;; register ush scan_start = *(ushf*)scan; +;;; register ush scan_end = *(ushf*)(scan+best_len-1); +;;; Posf *prev = s->prev; + + movzx ebx, word ptr [edi] + mov [scanstart], ebx + movzx ebx, word ptr [edi + eax - 1] + mov [scanend], ebx + mov edi, [edx + dsPrev] + +;;; Jump into the main loop. + + mov edx, [chainlenwmask] + jmp short LoopEntry + +align 4 + +;;; do { +;;; match = s->window + cur_match; +;;; if (*(ushf*)(match+best_len-1) != scan_end || +;;; *(ushf*)match != scan_start) continue; +;;; [...] +;;; } while ((cur_match = prev[cur_match & wmask]) > limit +;;; && --chain_length != 0); +;;; +;;; Here is the inner loop of the function. The function will spend the +;;; majority of its time in this loop, and majority of that time will +;;; be spent in the first ten instructions. +;;; +;;; Within this loop: +;;; ebx = scanend +;;; ecx = curmatch +;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask) +;;; esi = windowbestlen - i.e., (window + bestlen) +;;; edi = prev +;;; ebp = limit + +LookupLoop: + and ecx, edx + movzx ecx, word ptr [edi + ecx*2] + cmp ecx, ebp + jbe LeaveNow + sub edx, 00010000h + js LeaveNow +LoopEntry: movzx eax, word ptr [esi + ecx - 1] + cmp eax, ebx + jnz LookupLoop + mov eax, [window] + movzx eax, word ptr [eax + ecx] + cmp eax, [scanstart] + jnz LookupLoop + +;;; Store the current value of chainlen. + + mov [chainlenwmask], edx + +;;; Point edi to the string under scrutiny, and esi to the string we +;;; are hoping to match it up with. In actuality, esi and edi are +;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is +;;; initialized to -(MAX_MATCH_8 - scanalign). + + mov esi, [window] + mov edi, [scan] + add esi, ecx + mov eax, [scanalign] + mov edx, 0fffffef8h; -(MAX_MATCH_8) + lea edi, [edi + eax + 0108h] ;MAX_MATCH_8] + lea esi, [esi + eax + 0108h] ;MAX_MATCH_8] + +;;; Test the strings for equality, 8 bytes at a time. At the end, +;;; adjust edx so that it is offset to the exact byte that mismatched. +;;; +;;; We already know at this point that the first three bytes of the +;;; strings match each other, and they can be safely passed over before +;;; starting the compare loop. So what this code does is skip over 0-3 +;;; bytes, as much as necessary in order to dword-align the edi +;;; pointer. (esi will still be misaligned three times out of four.) +;;; +;;; It should be confessed that this loop usually does not represent +;;; much of the total running time. Replacing it with a more +;;; straightforward "rep cmpsb" would not drastically degrade +;;; performance. + +LoopCmps: + mov eax, [esi + edx] + xor eax, [edi + edx] + jnz LeaveLoopCmps + mov eax, [esi + edx + 4] + xor eax, [edi + edx + 4] + jnz LeaveLoopCmps4 + add edx, 8 + jnz LoopCmps + jmp short LenMaximum +LeaveLoopCmps4: add edx, 4 +LeaveLoopCmps: test eax, 0000FFFFh + jnz LenLower + add edx, 2 + shr eax, 16 +LenLower: sub al, 1 + adc edx, 0 + +;;; Calculate the length of the match. If it is longer than MAX_MATCH, +;;; then automatically accept it as the best possible match and leave. + + lea eax, [edi + edx] + mov edi, [scan] + sub eax, edi + cmp eax, MAX_MATCH + jge LenMaximum + +;;; If the length of the match is not longer than the best match we +;;; have so far, then forget it and return to the lookup loop. + + mov edx, [deflatestate] + mov ebx, [bestlen] + cmp eax, ebx + jg LongerMatch + mov esi, [windowbestlen] + mov edi, [edx + dsPrev] + mov ebx, [scanend] + mov edx, [chainlenwmask] + jmp LookupLoop + +;;; s->match_start = cur_match; +;;; best_len = len; +;;; if (len >= nice_match) break; +;;; scan_end = *(ushf*)(scan+best_len-1); + +LongerMatch: mov ebx, [nicematch] + mov [bestlen], eax + mov [edx + dsMatchStart], ecx + cmp eax, ebx + jge LeaveNow + mov esi, [window] + add esi, eax + mov [windowbestlen], esi + movzx ebx, word ptr [edi + eax - 1] + mov edi, [edx + dsPrev] + mov [scanend], ebx + mov edx, [chainlenwmask] + jmp LookupLoop + +;;; Accept the current string, with the maximum possible length. + +LenMaximum: mov edx, [deflatestate] + mov dword ptr [bestlen], MAX_MATCH + mov [edx + dsMatchStart], ecx + +;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len; +;;; return s->lookahead; + +LeaveNow: + mov edx, [deflatestate] + mov ebx, [bestlen] + mov eax, [edx + dsLookahead] + cmp ebx, eax + jg LookaheadRet + mov eax, ebx +LookaheadRet: + +;;; Restore the stack and return from whence we came. + + add esp, LocalVarsSize + pop ebx + pop esi + pop edi + pop ebp + + ret +; please don't remove this string ! +; Your can freely use gvmat32 in any free or commercial app if you don't remove the string in the binary! + db 0dh,0ah,"asm686 with masm, optimised assembly code from Brian Raiter, written 1998",0dh,0ah + + +IFDEF NOOLDPENTIUMCODE + IFDEF NOUNDERLINE + longest_match endp + ELSE + _longest_match endp + ENDIF + + IFDEF NOUNDERLINE + match_init proc near + ret + match_init endp + ELSE + _match_init proc near + ret + _match_init endp + ENDIF +ELSE + IFDEF NOUNDERLINE + longest_match_686 endp + ELSE + _longest_match_686 endp + ENDIF +ENDIF + +_TEXT ends +end -- cgit v1.2.3