; ------------------------------------------------------------------------
; HeavyThing x86_64 assembly language library and showcase programs
; Copyright © 2015-2018 2 Ton Digital
; Homepage: https://2ton.com.au/
; Author: Jeff Marrison <jeff@2ton.com.au>
;
; This file is part of the HeavyThing library.
;
; HeavyThing is free software: you can redistribute it and/or modify
; it under the terms of the GNU General Public License, or
; (at your option) any later version.
;
; HeavyThing is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
; GNU General Public License for more details.
;
; You should have received a copy of the GNU General Public License along
; with the HeavyThing library. If not, see <http://www.gnu.org/licenses/>.
; ------------------------------------------------------------------------
;
; zlib_inflate.inc: inflate side of zlib port
; see zlib_deflate.inc for my amusing commentary re: same
; hahah
;
; This is quite literally a hand compilation (and thus interpretation/
; modification) of the "reference zlib."
; As such, the original zlib.h copyright appears below, although I am not
; sure that is really necessary. Cheers to Jean-Loup Gailly and the legend
; Mark Adler are definitely in order regardless of whether it is necessary
; or not!
;
; zlib.h copyright notice appears below:
;/* zlib.h -- interface of the 'zlib' general purpose compression library
; version 1.2.8, April 28th, 2013
;
; Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
;
; Jean-loup Gailly Mark Adler
; jloup@gzip.org madler@alumni.caltech.edu
;
;
; The data format used by the zlib library is described by RFCs (Request for
; Comments) 1950 to 1952 in the files http://tools.ietf.org/html/rfc1950
; (zlib format), rfc1951 (deflate format) and rfc1952 (gzip format).
;*/
; zlib_stream_size/offsets/etc apply from zlib_deflate which is included before this one
zmode_head = 0
zmode_flags = 1
zmode_time = 2
zmode_os = 3
zmode_exlen = 4
zmode_extra = 5
zmode_name = 6
zmode_comment = 7
zmode_hcrc = 8
zmode_dictid = 9
zmode_dict = 10
zmode_type = 11
zmode_typedo = 12
zmode_stored = 13
zmode_copy_ = 14
zmode_copy = 15
zmode_table = 16
zmode_lenlens = 17
zmode_codelens = 18
zmode_len_ = 19
zmode_len = 20
zmode_lenext = 21
zmode_dist = 22
zmode_distext = 23
zmode_match = 24
zmode_lit = 25
zmode_check = 26
zmode_length = 27
zmode_done = 28
zmode_bad = 29
zmode_mem = 30
zmode_sync = 31
zlib_istate_mode_ofs = 0 ; dd
zlib_istate_last_ofs = 8 ; dd
zlib_istate_wrap_ofs = 16 ; dd(b)
zlib_istate_havedict_ofs = 24 ; dd(b)
zlib_istate_flags_ofs = 32 ; dd
zlib_istate_dmax_ofs = 40 ; dd
zlib_istate_check_ofs = 48 ; dq
zlib_istate_total_ofs = 56 ; dq
zlib_istate_head_ofs = 64 ; dq->
zlib_istate_wbits_ofs = 72 ; dd
zlib_istate_wsize_ofs = 80 ; dd
zlib_istate_whave_ofs = 88 ; dd
zlib_istate_wnext_ofs = 96 ; dd
zlib_istate_window_ofs = 104 ; dq->
zlib_istate_hold_ofs = 112 ; dq
zlib_istate_bits_ofs = 120 ; dd
zlib_istate_length_ofs = 128 ; dd
zlib_istate_offset_ofs = 136 ; dd
zlib_istate_extra_ofs = 144 ; dd
zlib_istate_lencode_ofs = 152 ; dq->
zlib_istate_distcode_ofs = 160 ; dq->
zlib_istate_lenbits_ofs = 168 ; dd
zlib_istate_distbits_ofs = 176 ; dd
zlib_istate_ncode_ofs = 184 ; dd
zlib_istate_nlen_ofs = 192 ; dd
zlib_istate_ndist_ofs = 200 ; dd
zlib_istate_have_ofs = 208 ; dd
zlib_istate_next_ofs = 216 ; dq->
zlib_istate_lens_ofs = 224 ; array of short[320]
zlib_istate_work_ofs = 864 ; array of short[288]
zlib_istate_codes_ofs = 1440 ; array of code[1444] (ENOUGH), code is 4 bytes each
zlib_istate_sane_ofs = 7216 ; dd
zlib_istate_back_ofs = 7224 ; dd
zlib_istate_was_ofs = 7232 ; dd
zlib_istate_flush_ofs = 7240 ; dd (nonstandard, but we save it here on entry)
zlib_istate_streamp_ofs = 7248 ; dq-> (back to the z_stream pointer)
zlib_istate_lenbitsmask_ofs = 7256 ; dd (nonstandard, but no sense in constantly doing 1 shl lenbits - 1)
zlib_istate_distbitsmask_ofs = 7264 ; dd (nonstandard, but no sense in constantly doing 1 shl distbits - 1)
zlib_istate_beg_ofs = 7272 ; dq (nonstandard, used to store the max distance in output during inflate)
zlib_istate_orig_outlength_ofs = 7280 ; dq (nonstandard, used to store the incoming outbuf length)
zlib_istate_fastlast_ofs = 7288 ; dq (nonstandard, used for the inlined inflate fast)
zlib_istate_realwindow_ofs = 7296 ; 32768 bytes, i loath doing multiple allocs, and in what seems to be every use case
; that I have, a window does indeed get allocated, so may as well hang it off the end
; here
zlib_istate_size = 7296 + 32768
zlib_inflate_window_bits = 15
if zlib_inflate_window_bits < 8 | zlib_inflate_window_bits > 15
display 'bad inflate window bits',10
err
end if
zlib_inftree_codes = 0
zlib_inftree_lens = 1
zlib_inftree_dists = 2
macro zlib_debug preface*, reg* {
local ..continue, ..string
push rax rcx rdx rdi rsi r8 r9 r10 r11
sub rsp, 8
mov rdi, reg
mov esi, 10
call string$from_unsigned
mov [rsp], rax
mov rdi, ..string
call string$to_stdout
mov rdi, [rsp]
call string$to_stdoutln
mov rdi, [rsp]
call heap$free
add rsp, 8
pop r11 r10 r9 r8 rsi rdi rdx rcx rax
jmp ..continue
cleartext ..string, preface
calign
..continue:
}
if used zlib$inflateEnd | defined include_everything
; single argument in rdi: a zlib_stream pointer
; all we do is heap$free the zlib_state_ofs that we allocated during init
; we leave everything else well alone
falign
zlib$inflateEnd:
prolog zlib$inflateEnd
mov rdi, [rdi+zlib_state_ofs]
call heap$free
epilog
end if
if used zlib$inflateInit | defined include_everything
; two arguements: rdi == zlib_stream_size memory chunk for our state, esi == wrap
; wrap == 1 == zheaders, wrap == 2 == gzheaders
; we do not mess with inbuf or outbuf
falign
zlib$inflateInit:
prolog zlib$inflateInit
xor ecx, ecx
sub rsp, 24
mov [rsp], rdi
mov [rsp+16], esi
mov [rdi+zlib_totalin_ofs], rcx
mov [rdi+zlib_totalout_ofs], rcx
mov edi, zlib_istate_size
call heap$alloc
mov rcx, [rsp]
mov [rsp+8], rax
mov [rcx+zlib_state_ofs], rax
mov rdi, rax
xor esi, esi
mov edx, zlib_istate_size - 32768 ; we do not need to clear the window itself at the end
call memset
mov rsi, [rsp]
mov rdi, [rsp+8]
mov eax, [rsp+16]
mov [rdi+zlib_istate_streamp_ofs], rsi
mov dword [rdi+zlib_istate_wbits_ofs], zlib_inflate_window_bits
mov dword [rdi+zlib_istate_wrap_ofs], eax
and eax, 1
; inflateResetKeep(strm) next
lea rcx, [rdi+zlib_istate_codes_ofs]
mov dword [rsi+zlib_adler_ofs], eax
; zmode_head == 0 anyway, so this isn't necessary:
; mov dword [rdi+zlib_istate_mode_ofs], zmode_head
mov dword [rdi+zlib_istate_dmax_ofs], 32768
mov [rdi+zlib_istate_lencode_ofs], rcx
mov [rdi+zlib_istate_distcode_ofs], rcx
mov [rdi+zlib_istate_next_ofs], rcx
mov dword [rdi+zlib_istate_sane_ofs], 1
mov dword [rdi+zlib_istate_back_ofs], -1
lea rsi, [rdi+zlib_istate_realwindow_ofs]
mov [rdi+zlib_istate_window_ofs], rsi
mov dword [rdi+zlib_istate_wsize_ofs], 32768
; our memset atop cleared wnext/whave
mov rax, rsi
add rsp, 24
epilog
end if
if used zlib$inflate | defined include_everything
; two arguments: rdi == z_stream pointer, esi == flush_flags
; we return a bool in eax (unlike the actual zlib), 1 == Z_OK, 0 == fail
; and in our implementation, we really don't care WHY it failed, only that it did
falign
zlib$inflate:
prolog zlib$inflate
push rbx r12 r13 r14 r15
test rdi, rdi
jz .error_return
mov rbx, [rdi+zlib_state_ofs]
test rbx, rbx
jz .error_return
mov dword [rbx+zlib_istate_flush_ofs], esi
mov r14, [rdi+zlib_inbuf_ofs]
mov r15, [rdi+zlib_outbuf_ofs]
test r14, r14
jz .error_return
test r15, r15
jz .error_return
mov ecx, [rbx+zlib_istate_mode_ofs]
mov eax, zmode_typedo
cmp ecx, zmode_type
cmove ecx, eax
mov [rbx+zlib_istate_mode_ofs], ecx
; setup our user-space vars inside the inbuf so that we don't have to use the head of it
; and consume (bad for large buffers)
mov rax, [r14+buffer_length_ofs]
mov rcx, [r14+buffer_itself_ofs]
mov [r14+buffer_user_ofs], rcx ; user_ofs == current pointer
mov [r14+buffer_user_ofs+8], rax ; user_ofs+8 == remaining bytes
; left == strm->avail_out ... we don't need, we will always grow our output buffer
; next = strm->next_in ... our input buffer, which we already have pointers for and we'll 'consume' from this
; have == strm->avail_in == our input buffer also, we will consume from here too
mov r12, qword [rbx+zlib_istate_hold_ofs] ; hold
mov r13d, dword [rbx+zlib_istate_bits_ofs] ; bits
mov rax, [r15+buffer_length_ofs]
mov [rbx+zlib_istate_orig_outlength_ofs], rax ; save this so we know how much we really added
; unlike the reference version, I really dislike pulling 8 bits at a time out of the input buffer
; so we do 32 bits at a time or whatever is left
; zlib_inflate_needbits:
; we smash eax, ecx, edx
macro zlib_inflate_needbits n* {
local .getfour, .allgood
cmp r13d, n
jae .allgood
mov rax, [r14+buffer_user_ofs]
cmp qword [r14+buffer_user_ofs+8], 4
jae .getfour
; less than 4 bytes remain, determine existing bits + (remaining bytes * 8) is >= n, else goto .inf_leave
; it will be safe here to pull a dword even if it is past the end of input
mov r8d, [r14+buffer_user_ofs+8]
mov ecx, r13d
mov edx, dword [rax]
; this needs to be and'd with the right mask corresponding to how much data we have left
and edx, [r8*4+.bytesleft]
shl rdx, cl
add r12, rdx
mov rcx, r8
xor edx, edx
add qword [r14+buffer_user_ofs], rcx ; move pointer forward
shl ecx, 3
add r13d, ecx ; number of bits we really added
mov qword [r14+buffer_user_ofs+8], rdx ; no more bytes left
cmp r13d, n
jae .allgood
jmp .inf_leave
calign
.getfour:
mov ecx, r13d
mov edx, dword [rax]
shl rdx, cl
add r12, rdx
add r13d, 32
add qword [r14+buffer_user_ofs], 4
sub qword [r14+buffer_user_ofs+8], 4
; fallthrough to allgood
calign
.allgood:
}
macro zlib_inflate_fastcheck {
local .allgood
cmp r13d, 15
jae .allgood
; add 16 more bits to the hold
mov ecx, r13d
mov rax, [r14+buffer_user_ofs]
movzx edx, word [rax]
shl rdx, cl
add r12, rdx
add r13d, 16
add qword [r14+buffer_user_ofs], 2
sub qword [r14+buffer_user_ofs+8], 2
; fallthrough to allgood
calign
.allgood:
}
macro zlib_inflate_fastcheck2 {
; add 8 more bits to the hold
mov ecx, r13d
mov rax, [r14+buffer_user_ofs]
movzx edx, byte [rax]
shl rdx, cl
add r12, rdx
add r13d, 8
add qword [r14+buffer_user_ofs], 1
sub qword [r14+buffer_user_ofs+8], 1
}
; this one is different and only used inside inflate_fast
; and the reason for the difference is because if there were 40 bytes in the accum
; and the above macro were called with needing 48, it would overfill the accumulator
macro zlib_inflate_need6bytes {
local .allgood, .checkfour, .getwhateverisleft
cmp r13d, 48
jae .allgood
cmp r13d, 32
jle .checkfour ; it is okay to pull up to what we are after in dword size
; otherwise, we know we can do at least 2 bytes at a time, because there is at least 16 bits left in the accum
; and we know that we have more than 32 bits in our accum
cmp qword [r14+buffer_user_ofs+8], 2
jb .getwhateverisleft
mov ecx, r13d
mov rax, [r14+buffer_user_ofs]
movzx edx, word [rax]
shl rdx, cl
add r12, rdx
add r13d, 16
add qword [r14+buffer_user_ofs], 2
sub qword [r14+buffer_user_ofs+8], 2
jmp .allgood
calign
.getwhateverisleft:
; determine existing bits + (remaining bytes * 8) is >= n, else goto .inf_leave
; it will be safe here to pull a dword even if it is past the end of input
mov ecx, r13d
mov rax, [r14+buffer_user_ofs]
mov r8, [r14+buffer_user_ofs+8]
mov edx, dword [rax]
and edx, [r8*4+.bytesleft]
shl rdx, cl
add r12, rdx
mov rcx, r8
xor edx, edx
add qword [r14+buffer_user_ofs], rcx ; move pointer forward
shl ecx, 3
add r13d, ecx ; number of bits we really added
mov qword [r14+buffer_user_ofs+8], rdx ; no more bytes left
cmp r13d, 48
jae .allgood
jmp .inf_leave
calign
.checkfour:
cmp qword [r14+buffer_user_ofs+8], 4
jb .getwhateverisleft
; else, safe to grab 4 bytes
mov ecx, r13d
mov rax, [r14+buffer_user_ofs]
mov edx, dword [rax]
shl rdx, cl
add r12, rdx
add r13d, 32
add qword [r14+buffer_user_ofs], 4
sub qword [r14+buffer_user_ofs+8], 4
; fallthrough to allgood
calign
.allgood:
}
; zlib_inflate_needbits_reg:
; we smash eax, ecx, edx, so n must be a register (32 bit) and not one of the ones we smash
; NOTE: this is (for the moment) exactly the same code as zlib_inflate_needbits
macro zlib_inflate_needbits_reg n* {
local .getfour, .allgood
cmp r13d, n
jae .allgood
cmp qword [r14+buffer_user_ofs+8], 4
jae .getfour
; less than 4 bytes remain, determine existing bits + (remaining bytes * 8) is >= n, else goto .inf_leave
; it will be safe here to pull a dword even if it is past the end of input
push r8
mov ecx, r13d
mov rax, [r14+buffer_user_ofs]
mov r8, [r14+buffer_user_ofs+8]
mov edx, dword [rax]
and edx, [r8*4+.bytesleft]
shl rdx, cl
add r12, rdx
mov rcx, r8
xor edx, edx
pop r8
add qword [r14+buffer_user_ofs], rcx ; move pointer forward
shl ecx, 3
add r13d, ecx ; number of bits we really added
mov qword [r14+buffer_user_ofs+8], rdx ; no more bytes left
cmp r13d, n
jae .allgood
jmp .inf_leave
calign
.getfour:
mov ecx, r13d
mov rax, [r14+buffer_user_ofs]
mov edx, dword [rax]
shl rdx, cl
add r12, rdx
add r13d, 32
add qword [r14+buffer_user_ofs], 4
sub qword [r14+buffer_user_ofs+8], 4
; fallthrough to allgood
calign
.allgood:
}
; zlib_inflate_unwindbits:
; _requires_ bits to be byte aligned of course
; but "puts them back"
macro zlib_inflate_unwindbits {
shr r13d, 3
sub qword [r14+buffer_user_ofs], r13 ; move pointer backwards
add qword [r14+buffer_user_ofs+8], r13 ; add bytes back into remaining
xor r13d, r13d
xor r12d, r12d
}
; zlib_inflate_dropbits:
; no smashes
macro zlib_inflate_dropbits n* {
shr r12, n
sub r13d, n
}
; zlib_inflate_dropbits_reg:
; smashes ecx (as it must), n must be a register not a literal (32 bits)
macro zlib_inflate_dropbits_reg n* {
mov ecx, n
shr r12, cl
sub r13d, n
}
; zlib_inflate_bytebits:
; we smash ecx
macro zlib_inflate_bytebits {
mov ecx, r13d
and ecx, 7
shr r12, cl
sub r13d, ecx
}
calign
.top:
mov eax, [rbx+zlib_istate_mode_ofs]
shl eax, 3
add rax, .modejumps
jmp qword [rax]
dalign
.bytesleft:
dd 0x00000000, 0x000000ff, 0x0000ffff, 0x00ffffff
calign
.mode_head:
xor eax, eax
mov ecx, zmode_typedo
cmp dword [rbx+zlib_istate_wrap_ofs], 0
cmove eax, ecx
mov dword [rbx+zlib_istate_mode_ofs], eax
je .mode_typedo
zlib_inflate_needbits 16
mov eax, r12d
and eax, 0xffff
cmp eax, 0x8b1f
jne .mode_head_nogzhead
test dword [rbx+zlib_istate_wrap_ofs], 2
jz .mode_head_nogzhead
; CRC2(state->check, hold)
xor edi, edi ; crc32(0, null, 0) == 0
sub rsp, 8
mov dword [rsp], eax
mov rsi, rsp
mov edx, 2
call crc$32
add rsp, 8
mov qword [rbx+zlib_istate_check_ofs], rax
zlib_inflate_dropbits 16
mov dword [rbx+zlib_istate_mode_ofs], zmode_flags
jmp .mode_flags
calign
.mode_head_nogzhead:
mov dword [rbx+zlib_istate_flags_ofs], 0
test dword [rbx+zlib_istate_wrap_ofs], 1
jz .mode_bad
; we don't do headers, none of my stream stuff needs it
mov eax, r12d
mov ecx, r12d
and eax, 0xff ; low byte from the accum
and ecx, 0xffff
shl eax, 8 ; << 8
shr ecx, 8 ; high byte from the accum
add eax, ecx
mov ecx, 31
xor edx, edx
div ecx
test edx, edx
jnz .mode_bad
calign
.mode_head_noheadercheck:
mov eax, r12d
and eax, 0xf
cmp eax, 8 ; BITS(4) != Z_DEFLATED
jne .mode_bad
zlib_inflate_dropbits 4
mov eax, 1
mov ecx, r12d
and ecx, 0xf
add ecx, 8
cmp dword [rbx+zlib_istate_wbits_ofs], 0
je .mode_head_wbits_zero
cmp ecx, dword [rbx+zlib_istate_wbits_ofs]
ja .mode_bad
shl eax, cl
mov rdi, [rbx+zlib_istate_streamp_ofs]
mov qword [rbx+zlib_istate_check_ofs], 1 ; adler32(0,null,0) == 1
mov qword [rdi+zlib_adler_ofs], 1
mov dword [rbx+zlib_istate_dmax_ofs], eax
mov eax, r12d
zlib_inflate_dropbits 12
mov ecx, zmode_dictid
mov edx, zmode_type
test eax, 0x200
cmovz ecx, edx
mov [rbx+zlib_istate_mode_ofs], ecx
jnz .mode_dictid
jmp .mode_type
calign
.mode_head_wbits_zero:
mov dword [rbx+zlib_istate_wbits_ofs], ecx
shl eax, cl
mov rdi, [rbx+zlib_istate_streamp_ofs]
mov qword [rbx+zlib_istate_check_ofs], 1 ; adler32(0,null,0) == 1
mov qword [rdi+zlib_adler_ofs], 1
mov dword [rbx+zlib_istate_dmax_ofs], eax
mov eax, r12d
zlib_inflate_dropbits 12
mov ecx, zmode_dictid
mov edx, zmode_type
test eax, 0x200
cmovz ecx, edx
mov [rbx+zlib_istate_mode_ofs], ecx
jnz .mode_dictid
jmp .mode_type
calign
.mode_flags:
zlib_inflate_needbits 16
mov eax, r12d
and eax, 0xffff
mov dword [rbx+zlib_istate_flags_ofs], eax
mov ecx, eax
and ecx, 0xff
cmp ecx, 8 ; flags & 0xff != Z_DEFLATED
jne .mode_bad
test eax, 0xe000
jnz .mode_bad
test eax, 0x0200
jnz .mode_flags_docrc
zlib_inflate_dropbits 16
mov dword [rbx+zlib_istate_mode_ofs], zmode_time
jmp .mode_time
calign
.mode_flags_docrc:
; CRC2(state->check, hold)
mov rdi, qword [rbx+zlib_istate_check_ofs]
sub rsp, 8
mov dword [rsp], r12d
mov rsi, rsp
mov edx, 2
call crc$32
add rsp, 8
mov qword [rbx+zlib_istate_check_ofs], rax
zlib_inflate_dropbits 16
mov dword [rbx+zlib_istate_mode_ofs], zmode_time
; fallthrough to .mode_time
calign
.mode_time:
zlib_inflate_needbits 32
test dword [rbx+zlib_istate_flags_ofs], 0x0200
jnz .mode_time_docrc
zlib_inflate_dropbits 32
mov dword [rbx+zlib_istate_mode_ofs], zmode_os
jmp .mode_os
calign
.mode_time_docrc:
; CRC4(state->check, hold)
mov rdi, qword [rbx+zlib_istate_check_ofs]
sub rsp, 8
mov dword [rsp], r12d
mov rsi, rsp
mov edx, 4
call crc$32
add rsp, 8
mov qword [rbx+zlib_istate_check_ofs], rax
zlib_inflate_dropbits 32
mov dword [rbx+zlib_istate_mode_ofs], zmode_os
; fallthrough to .mode_os
calign
.mode_os:
zlib_inflate_needbits 16
test dword [rbx+zlib_istate_flags_ofs], 0x0200
jnz .mode_os_docrc
zlib_inflate_dropbits 16
mov dword [rbx+zlib_istate_mode_ofs], zmode_exlen
jmp .mode_exlen
calign
.mode_os_docrc:
; CRC2(state->check, hold)
mov rdi, qword [rbx+zlib_istate_check_ofs]
sub rsp, 8
mov dword [rsp], r12d
mov rsi, rsp
mov edx, 2
call crc$32
add rsp, 8
mov qword [rbx+zlib_istate_check_ofs], rax
zlib_inflate_dropbits 16
mov dword [rbx+zlib_istate_mode_ofs], zmode_exlen
; fallthrough to .mode_exlen
calign
.mode_exlen:
test dword [rbx+zlib_istate_flags_ofs], 0x0400
jz .mode_exlen_nolength
zlib_inflate_needbits 16
mov eax, r12d
and eax, 0xffff
mov dword [rbx+zlib_istate_length_ofs], eax
test dword [rbx+zlib_istate_flags_ofs], 0x0200
jnz .mode_exlen_docrc
zlib_inflate_dropbits 16
mov dword [rbx+zlib_istate_mode_ofs], zmode_extra
jmp .mode_extra
calign
.mode_exlen_docrc:
; CRC2(state->check, hold)
mov rdi, qword [rbx+zlib_istate_check_ofs]
sub rsp, 8
mov dword [rsp], r12d
mov rsi, rsp
mov edx, 2
call crc$32
add rsp, 8
mov qword [rbx+zlib_istate_check_ofs], rax
zlib_inflate_dropbits 16
mov dword [rbx+zlib_istate_mode_ofs], zmode_extra
jmp .mode_extra
calign
.mode_exlen_nolength:
mov dword [rbx+zlib_istate_mode_ofs], zmode_extra
; fallthrough to .mode_extra
calign
.mode_extra:
test dword [rbx+zlib_istate_flags_ofs], 0x0400
jz .mode_extra_nolength
; ok so, here my pulling extra 32 bits at a time may not have exactly worked out so well
; BUT, we can do an unwind of whatever is in our accumulator first
; which solves the problem nicely
zlib_inflate_unwindbits
mov edx, dword [rbx+zlib_istate_length_ofs] ; copy
mov rax, qword [r14+buffer_user_ofs+8] ; remaining bytes
cmp rdx, rax ; copy > have?
cmova rdx, rax ; if so, copy = have
test rdx, rdx
jz .mode_extra_nocopy
test dword [rbx+zlib_istate_flags_ofs], 0x0200
jnz .mode_extra_docrc
sub qword [r14+buffer_user_ofs+8], rdx
add qword [r14+buffer_user_ofs], rdx
sub dword [rbx+zlib_istate_length_ofs], edx
cmp dword [rbx+zlib_istate_length_ofs], 0
jne .inf_leave
mov dword [rbx+zlib_istate_length_ofs], 0
mov dword [rbx+zlib_istate_mode_ofs], zmode_name
jmp .mode_name
calign
.mode_extra_docrc:
mov rdi, qword [rbx+zlib_istate_check_ofs]
mov rsi, [r14+buffer_user_ofs]
push rdx
; rdx already set
call crc$32
pop rdx
mov qword [rbx+zlib_istate_check_ofs], rax
sub qword [r14+buffer_user_ofs+8], rdx
add qword [r14+buffer_user_ofs], rdx
sub dword [rbx+zlib_istate_length_ofs], edx
cmp dword [rbx+zlib_istate_length_ofs], 0
jne .inf_leave
mov dword [rbx+zlib_istate_length_ofs], 0
mov dword [rbx+zlib_istate_mode_ofs], zmode_name
jmp .mode_name
calign
.mode_extra_nocopy:
cmp dword [rbx+zlib_istate_length_ofs], 0
jne .inf_leave
mov dword [rbx+zlib_istate_length_ofs], 0
mov dword [rbx+zlib_istate_mode_ofs], zmode_name
jmp .mode_name
calign
.mode_extra_nolength:
mov dword [rbx+zlib_istate_length_ofs], 0
mov dword [rbx+zlib_istate_mode_ofs], zmode_name
; fallthrough to .mode_name
calign
.mode_name:
; the bit accumulator has to be empty at this stage
test dword [rbx+zlib_istate_flags_ofs], 0x0800
jz .mode_name_noname
; yuck, a null terminated name?
cmp qword [r14+buffer_user_ofs+8], 0
je .inf_leave
xor edx, edx ; copy = 0
mov rax, [r14+buffer_user_ofs]
calign
.mode_name_findnull:
movzx ecx, byte [rax]
add rax, 1
add edx, 1
test ecx, ecx
jz .mode_name_nullfound
cmp rdx, qword [r14+buffer_user_ofs+8]
jb .mode_name_findnull
calign
.mode_name_nullfound:
; could have fallen through due to running out of bytes
test dword [rbx+zlib_istate_flags_ofs], 0x0200
jnz .mode_name_nullfound_docrc
sub qword [r14+buffer_user_ofs+8], rdx ; have -= copy
add qword [r14+buffer_user_ofs], rdx ; ptr += copy
test ecx, ecx
jnz .inf_leave
mov dword [rbx+zlib_istate_length_ofs], 0
mov dword [rbx+zlib_istate_mode_ofs], zmode_comment
jmp .mode_comment
calign
.mode_name_nullfound_docrc:
mov rdi, qword [rbx+zlib_istate_check_ofs]
mov rsi, [r14+buffer_user_ofs]
; rdx already set, preserve it and rcx
push rcx rdx
call crc$32
pop rdx rcx
mov qword [rbx+zlib_istate_check_ofs], rax
sub qword [r14+buffer_user_ofs+8], rdx ; have -= copy
add qword [r14+buffer_user_ofs], rdx ; ptr += copy
test ecx, ecx
jnz .inf_leave
mov dword [rbx+zlib_istate_length_ofs], 0
mov dword [rbx+zlib_istate_mode_ofs], zmode_comment
jmp .mode_comment
calign
.mode_name_noname:
mov dword [rbx+zlib_istate_length_ofs], 0
mov dword [rbx+zlib_istate_mode_ofs], zmode_comment
; fallthrough to .mode_comment
calign
.mode_comment:
test dword [rbx+zlib_istate_flags_ofs], 0x1000
jz .mode_comment_nocomment
; yuck, a null terminated comment too?
cmp qword [r14+buffer_user_ofs+8], 0
je .inf_leave
xor edx, edx ; copy = 0
mov rax, [r14+buffer_user_ofs]
calign
.mode_comment_findnull:
movzx ecx, byte [rax]
add rax, 1
add edx, 1
test ecx, ecx
jz .mode_comment_nullfound
cmp rdx, qword [r14+buffer_user_ofs+8]
jb .mode_comment_findnull
calign
.mode_comment_nullfound:
; could have fallen through due to running out of bytes
test dword [rbx+zlib_istate_flags_ofs], 0x0200
jnz .mode_comment_nullfound_docrc
sub qword [r14+buffer_user_ofs+8], rdx ; have -= copy
add qword [r14+buffer_user_ofs], rdx ; ptr += copy
test ecx, ecx
jnz .inf_leave
mov dword [rbx+zlib_istate_mode_ofs], zmode_hcrc
jmp .mode_hcrc
calign
.mode_comment_nullfound_docrc:
mov rdi, qword [rbx+zlib_istate_check_ofs]
mov rsi, [r14+buffer_user_ofs]
; rdx already set, preserve it and rcx
push rcx rdx
call crc$32
pop rdx rcx
mov qword [rbx+zlib_istate_check_ofs], rax
sub qword [r14+buffer_user_ofs+8], rdx ; have -= copy
add qword [r14+buffer_user_ofs], rdx ; ptr += copy
test ecx, ecx
jnz .inf_leave
mov dword [rbx+zlib_istate_mode_ofs], zmode_hcrc
jmp .mode_hcrc
calign
.mode_comment_nocomment:
mov dword [rbx+zlib_istate_mode_ofs], zmode_hcrc
; fallthrough to .mode_hcrc
calign
.mode_hcrc:
test dword [rbx+zlib_istate_flags_ofs], 0x0200
jnz .mode_hcrc_checkit
mov rsi, [rbx+zlib_istate_streamp_ofs]
mov qword [rsi+zlib_adler_ofs], 0
mov qword [rbx+zlib_istate_check_ofs], 0 ; crc32(0, null, 0) == 0
mov dword [rbx+zlib_istate_mode_ofs], zmode_type
jmp .mode_type
calign
.mode_hcrc_checkit:
zlib_inflate_needbits 16
mov eax, r12d
and eax, 0xffff
mov rcx, qword [rbx+zlib_istate_check_ofs]
and ecx, 0xffff
cmp eax, ecx
jne .mode_bad ; header crc mismatch
zlib_inflate_dropbits 16
mov rsi, [rbx+zlib_istate_streamp_ofs]
mov qword [rsi+zlib_adler_ofs], 0
mov qword [rbx+zlib_istate_check_ofs], 0 ; crc32(0, null, 0) == 0
mov dword [rbx+zlib_istate_mode_ofs], zmode_type
jmp .mode_type
calign
.mode_dictid:
zlib_inflate_needbits 32
mov rsi, [rbx+zlib_istate_streamp_ofs]
mov eax, r12d
bswap eax
mov qword [rsi+zlib_adler_ofs], rax
mov qword [rbx+zlib_istate_check_ofs], rax
zlib_inflate_dropbits 32
mov dword [rbx+zlib_istate_mode_ofs], zmode_dict
; fallthrough to .mode_dict
calign
.mode_dict:
; we don't support adding a custom user dictionary here... maybe someday when I am bored
; we can come back and add it, for now, add a breakpoint
cmp dword [rbx+zlib_istate_havedict_ofs], 0
je .mode_dict_todo
mov rsi, [rbx+zlib_istate_streamp_ofs]
mov qword [rbx+zlib_istate_check_ofs], 1
mov qword [rsi+zlib_adler_ofs], 1 ; adler32(0, null, 0) == 1
mov dword [rbx+zlib_istate_mode_ofs], zmode_type
jmp .mode_type
calign
.mode_dict_todo:
; in the reference version, this would be a Z_NEED_DICT return
; which, none of my ssl/streaming/gzip goods use
; TODO: someday when I am bored and have nothing better to do, add custom dictionary support to this
; and NOTE: since zlib_istate_havedict_ofs is set to 0, this won't happen during normal runtime
; unless you are playing with it :-)
breakpoint
calign
.mode_type:
cmp dword [rbx+zlib_istate_flush_ofs], zlib_block
je .inf_leave
cmp dword [rbx+zlib_istate_flush_ofs], zlib_trees ; zlib_trees is defined with the other flush constants in deflate
je .inf_leave
; fall through to .mode_typedo
calign
.mode_typedo:
cmp dword [rbx+zlib_istate_last_ofs], 0
jne .mode_typedo_last
zlib_inflate_needbits 3
mov eax, r12d
and eax, 1
mov dword [rbx+zlib_istate_last_ofs], eax
zlib_inflate_dropbits 1
mov eax, r12d
and eax, 3
cmp eax, 0
je .mode_typedo_stored
cmp eax, 1
je .mode_typedo_fixed
cmp eax, 2
je .mode_typedo_dynamic
; invalid block type
zlib_inflate_dropbits 2
jmp .mode_bad
calign
.mode_typedo_stored:
mov dword [rbx+zlib_istate_mode_ofs], zmode_stored
zlib_inflate_dropbits 2
jmp .mode_stored
calign
.mode_typedo_fixed:
; fixedtables(state)
mov qword [rbx+zlib_istate_lencode_ofs], .lenfix
mov dword [rbx+zlib_istate_lenbits_ofs], 9
mov dword [rbx+zlib_istate_lenbitsmask_ofs], 511 ; 1 shl 9 - 1
mov qword [rbx+zlib_istate_distcode_ofs], .distfix
mov dword [rbx+zlib_istate_distbits_ofs], 5
mov dword [rbx+zlib_istate_distbitsmask_ofs], 31 ; 1 shl 5 - 1
; end of fixedtables
mov dword [rbx+zlib_istate_mode_ofs], zmode_len_
cmp dword [rbx+zlib_istate_flush_ofs], zlib_trees
je .mode_typedo_fixed_treesflush
zlib_inflate_dropbits 2
jmp .mode_len_
calign
.mode_typedo_fixed_treesflush:
zlib_inflate_dropbits 2
jmp .inf_leave
calign
.mode_typedo_dynamic:
mov dword [rbx+zlib_istate_mode_ofs], zmode_table
zlib_inflate_dropbits 2
jmp .mode_table
calign
.mode_typedo_last:
zlib_inflate_bytebits
mov dword [rbx+zlib_istate_mode_ofs], zmode_check
jmp .mode_check
calign
.mode_stored:
zlib_inflate_bytebits
zlib_inflate_needbits 32
mov eax, r12d
mov ecx, r12d
shr ecx, 16
xor ecx, 0xffff
and eax, 0xffff
cmp eax, ecx
jne .mode_bad ; invalid stored block lengths
mov dword [rbx+zlib_istate_length_ofs], eax
zlib_inflate_dropbits 32
zlib_inflate_unwindbits ; put back whatever remains
mov dword [rbx+zlib_istate_mode_ofs], zmode_copy_
cmp dword [rbx+zlib_istate_flush_ofs], zlib_trees
je .inf_leave
; fallthrough to .mode_copy_
calign
.mode_copy_:
mov dword [rbx+zlib_istate_mode_ofs], zmode_copy
; fallthroguh to mode_copy
calign
.mode_copy:
; do a buffer append
cmp dword [rbx+zlib_istate_length_ofs], 0
je .mode_copy_nolength
mov edx, [rbx+zlib_istate_length_ofs]
mov rcx, [r14+buffer_user_ofs+8] ; remaining bytes left
cmp rdx, rcx
cmova rdx, rcx
test rdx, rdx
jz .inf_leave
; our next pointer is the buffer current pointer itself
mov rsi, [r14+buffer_user_ofs]
mov rdi, r15
; before we call buffer$append, modify our remaining pointers and update length
sub dword [rbx+zlib_istate_length_ofs], edx
sub qword [r14+buffer_user_ofs+8], rdx
add qword [r14+buffer_user_ofs], rdx
call buffer$append
cmp dword [rbx+zlib_istate_length_ofs], 0
je .mode_copy_nolength
; otherwise, for there still to be a length remaining, it means we _must_ have run out of input bytes
jmp .inf_leave
calign
.mode_copy_nolength:
mov dword [rbx+zlib_istate_mode_ofs], zmode_type
jmp .mode_type
calign
.mode_table:
zlib_inflate_needbits 14
mov eax, r12d
and eax, 31
add eax, 257
mov dword [rbx+zlib_istate_nlen_ofs], eax
cmp eax, 286
ja .mode_bad ; too many length or distance symbols
zlib_inflate_dropbits 5
mov eax, r12d
and eax, 31
add eax, 1
mov dword [rbx+zlib_istate_ndist_ofs], eax
cmp eax, 30
ja .mode_bad ; too many length or distance symbols
zlib_inflate_dropbits 5
mov eax, r12d
and eax, 15
add eax, 4
mov dword [rbx+zlib_istate_ncode_ofs], eax
zlib_inflate_dropbits 4
mov dword [rbx+zlib_istate_have_ofs], 0
mov dword [rbx+zlib_istate_mode_ofs], zmode_lenlens
; fallthrough to .mode_lenlens
calign
.mode_lenlens:
mov ecx, [rbx+zlib_istate_have_ofs]
cmp ecx, dword [rbx+zlib_istate_ncode_ofs]
jae .mode_lenlens_ncodesdone
zlib_inflate_needbits 3
; lens is array of unsigned short, inline with state
; order is array of dd
lea rdx, [rbx+zlib_istate_lens_ofs]
mov ecx, [rbx+zlib_istate_have_ofs]
mov r8d, dword [rcx*4+.order]
add ecx, 1
mov [rbx+zlib_istate_have_ofs], ecx
mov eax, r12d
and eax, 7 ; BITS(3)
mov word [rdx+r8*2], ax
zlib_inflate_dropbits 3
jmp .mode_lenlens
calign
.mode_lenlens_ncodesdone:
cmp dword [rbx+zlib_istate_have_ofs], 19
jae .mode_lenlens_ncodefilldone
lea rdx, [rbx+zlib_istate_lens_ofs]
mov ecx, [rbx+zlib_istate_have_ofs]
mov r8d, dword [rcx*4+.order]
add ecx, 1
mov [rbx+zlib_istate_have_ofs], ecx
xor eax, eax
mov word [rdx+r8*2], ax
jmp .mode_lenlens_ncodesdone
calign
.mode_lenlens_ncodefilldone:
lea rax, [rbx+zlib_istate_codes_ofs]
mov [rbx+zlib_istate_next_ofs], rax
mov [rbx+zlib_istate_lencode_ofs], rax
mov dword [rbx+zlib_istate_lenbits_ofs], 7
mov dword [rbx+zlib_istate_lenbitsmask_ofs], 127 ; 1 shl 7 - 1
mov edi, zlib_inftree_codes
lea rsi, [rbx+zlib_istate_lens_ofs]
mov edx, 19
lea rcx, [rbx+zlib_istate_next_ofs]
lea r8, [rbx+zlib_istate_lenbits_ofs]
lea r9, [rbx+zlib_istate_work_ofs]
call .inflate_table
; do the calculation of lenbitsmask once
mov edx, 1
mov ecx, [rbx+zlib_istate_lenbits_ofs]
shl edx, cl
sub edx, 1
mov [rbx+zlib_istate_lenbitsmask_ofs], edx
test eax, eax
jz .mode_bad ; invalid code lengths set
mov dword [rbx+zlib_istate_have_ofs], 0
mov dword [rbx+zlib_istate_mode_ofs], zmode_codelens
; fallthrough to .mode_codelens
calign
.mode_codelens:
mov eax, dword [rbx+zlib_istate_nlen_ofs]
add eax, dword [rbx+zlib_istate_ndist_ofs]
cmp dword [rbx+zlib_istate_have_ofs], eax
jae .mode_codelens_whiledone
calign
.mode_codelens_getbits:
; accum in r12, bits in r13d
mov rsi, [rbx+zlib_istate_lencode_ofs]
mov edx, dword [rbx+zlib_istate_lenbitsmask_ofs]
mov ecx, r12d
and ecx, edx
mov eax, [rsi+rcx*4] ; here = state->lencode[BITS(state->lenbits)]
movzx ecx, ah ; here.bits
cmp ecx, r13d
jbe .mode_codelens_gotbits
; else, we need MORE bits... but we don't want to necessarily assume that a dword would do the trick
; hmmm... TODO: see how many times this thing actually gets called, and monitor the bit accumulator
; ... the REASON we can't just say "hey gimme 32 bits" is because this code might not actually need that many
; and we'd be falsely stating that we did and prematurely bailing out if in fact there were enough here
; already ... hmmm
mov r8d, r13d
add r8d, 8
zlib_inflate_needbits_reg r8d
jmp .mode_codelens_getbits
calign
.mode_codelens_gotbits:
mov edx, eax
shr edx, 16
; eax contains here
; ecx contains here.bits
; edx == here.val
cmp edx, 16
jb .mode_codelens_lowval
je .mode_codelens_16
cmp edx, 17
je .mode_codelens_17
; final else
mov r8d, ecx
add r8d, 7
mov r9d, ecx ; save here.bits
zlib_inflate_needbits_reg r8d ; NEEDBITS(here.bits + 7)
zlib_inflate_dropbits_reg r9d ; DROPBITS(here.bits)
xor r10d, r10d ; len = 0
mov r11d, r12d
and r11d, 127
add r11d, 11 ; copy = 11 + BITS(7)
zlib_inflate_dropbits 7
jmp .mode_codelens_lencopyset
calign
.mode_codelens_lowval:
; state->lens[state->have++] = here.val
mov esi, dword [rbx+zlib_istate_have_ofs]
lea rdi, [rbx+zlib_istate_lens_ofs]
mov word [rdi+rsi*2], dx
add esi, 1
mov dword [rbx+zlib_istate_have_ofs], esi
mov r8d, ecx
zlib_inflate_dropbits_reg r8d
jmp .mode_codelens ; back to the outermost while
calign
.mode_codelens_16:
mov r8d, ecx
add r8d, 2
mov r9d, ecx ; save here.bits
zlib_inflate_needbits_reg r8d ; NEEDBITS(here.bits + 2)
zlib_inflate_dropbits_reg r9d ; DROPBITS(here.bits)
lea rdi, [rbx+zlib_istate_lens_ofs]
mov esi, dword [rbx+zlib_istate_have_ofs]
test esi, esi
jz .mode_bad ; invalid bit length repeat
sub esi, 1
movzx r10d, word [rdi+rsi*2] ; len = state->lens[state->have - 1]
mov r11d, r12d
and r11d, 3
add r11d, 3 ; copy = 3 + BITS(2)
zlib_inflate_dropbits 2
jmp .mode_codelens_lencopyset
calign
.mode_codelens_17:
mov r8d, ecx
add r8d, 3
mov r9d, ecx ; save here.bits
zlib_inflate_needbits_reg r8d ; NEEDBITS(here.bits + 3)
zlib_inflate_dropbits_reg r9d ; DROPBITS(here.bits)
xor r10d, r10d ; len = 0
mov r11d, r12d
and r11d, 7
add r11d, 3 ; copy = 3 + BITS(3)
zlib_inflate_dropbits 3
jmp .mode_codelens_lencopyset
calign
.mode_codelens_lencopyset:
mov esi, dword [rbx+zlib_istate_have_ofs]
lea rdi, [rbx+zlib_istate_lens_ofs]
mov ecx, esi
add ecx, r11d
mov eax, dword [rbx+zlib_istate_nlen_ofs]
add eax, dword [rbx+zlib_istate_ndist_ofs]
cmp ecx, eax
ja .mode_bad ; invalid bit length repeat
; otherwise, while (copy--) state->lens[state->have++] = (unsigned short)len
calign
.mode_codelens_lenloop:
mov word [rdi+rsi*2], r10w
add esi, 1
sub r11d, 1
jnz .mode_codelens_lenloop
mov dword [rbx+zlib_istate_have_ofs], esi
jmp .mode_codelens ; back to the outermost while
calign
.mode_codelens_whiledone:
; check for end-of-block code
lea rdi, [rbx+zlib_istate_lens_ofs]
cmp word [rdi+512], 0
je .mode_bad ; invalid code -- missing end-of-block
; build code tables
lea rax, [rbx+zlib_istate_codes_ofs]
mov [rbx+zlib_istate_next_ofs], rax
mov [rbx+zlib_istate_lencode_ofs], rax
mov dword [rbx+zlib_istate_lenbits_ofs], 9
mov dword [rbx+zlib_istate_lenbitsmask_ofs], 511 ; 1 shl 9 - 1
mov edi, zlib_inftree_lens
lea rsi, [rbx+zlib_istate_lens_ofs]
mov edx, dword [rbx+zlib_istate_nlen_ofs]
lea rcx, [rbx+zlib_istate_next_ofs]
lea r8, [rbx+zlib_istate_lenbits_ofs]
lea r9, [rbx+zlib_istate_work_ofs]
call .inflate_table
; do the calculation of lenbitsmask once
mov edx, 1
mov ecx, [rbx+zlib_istate_lenbits_ofs]
shl edx, cl
sub edx, 1
mov [rbx+zlib_istate_lenbitsmask_ofs], edx
test eax, eax
jz .mode_bad ; invalid literal/lengths set
mov rax, [rbx+zlib_istate_next_ofs]
mov [rbx+zlib_istate_distcode_ofs], rax
mov dword [rbx+zlib_istate_distbits_ofs], 6
mov dword [rbx+zlib_istate_distbitsmask_ofs], 63 ; 1 shl 6 - 1
mov edi, zlib_inftree_dists
lea rsi, [rbx+zlib_istate_lens_ofs]
mov eax, dword [rbx+zlib_istate_nlen_ofs]
shl eax, 1
add rsi, rax ; state->lens + state->nlen
mov edx, dword [rbx+zlib_istate_ndist_ofs]
lea rcx, [rbx+zlib_istate_next_ofs]
lea r8, [rbx+zlib_istate_distbits_ofs]
lea r9, [rbx+zlib_istate_work_ofs]
call .inflate_table
; do the calculation of distbitsmask
mov edx, 1
mov ecx, [rbx+zlib_istate_distbits_ofs]
shl edx, cl
sub edx, 1
mov [rbx+zlib_istate_distbitsmask_ofs], edx
test eax, eax
jz .mode_bad ; invalid distances set
mov dword [rbx+zlib_istate_mode_ofs], zmode_len_
cmp dword [rbx+zlib_istate_flush_ofs], zlib_trees
je .inf_leave
; fallthrough to .mode_len_
calign
.mode_len_:
; why do we do this?
mov dword [rbx+zlib_istate_mode_ofs], zmode_len
; fallthrough to .mode_len
calign
.mode_len:
; uggh, HACKOLA, TODO: fix this properly, ughgh
; ok so, inflate_fast and the have/bailout logic assumes that the bits in our hold is <8, so we effectively have to unwind them _the first_ time this happens
mov rax, [r14+buffer_user_ofs+8]
mov eax, r13d
shr eax, 3
; subtract that from our user pointer/length
sub qword [r14+buffer_user_ofs], rax
add qword [r14+buffer_user_ofs+8], rax
shl eax, 3
sub r13d, eax
mov ecx, r13d
mov edx, 1
shl edx, cl
sub edx, 1
and r12, rdx
cmp qword [r14+buffer_user_ofs+8], 6
; cmp dword [rbx+zlib_istate_have_ofs], 6 hmmm, since we use the buffer user vars directly, this i think is incorrect
jb .mode_len_noinflate
; else, we have >= 6, so inflate_fast(strm, out) inlined here, with mods to use the pre-existing accumulator
mov rax, [rbx+zlib_istate_orig_outlength_ofs]
mov [rbx+zlib_istate_beg_ofs], rax
; set fastlast to whatever we actually have left - 5
mov rax, [r14+buffer_user_ofs] ; our current pointer
add rax, [r14+buffer_user_ofs+8] ; + how many bytes we have left
sub rax, 5
mov [rbx+zlib_istate_fastlast_ofs], rax
; TODO: zlib_istate_beg_ofs and zlib_istate_orig_outlength_ofs do the same thing, but didn't when I first translated all this
; so it will be safe to remove one of them
calign
.mode_len_inflate_fast_top:
; hmmm, this is NQR: zlib_inflate_need6bytes
zlib_inflate_fastcheck
mov rdi, r15
mov esi, 258
call buffer$reserve ; this is not an expensive call if there is room available
mov r8, [rbx+zlib_istate_lencode_ofs]
mov eax, r12d
and eax, dword [rbx+zlib_istate_lenbitsmask_ofs] ; hold & lmask
mov eax, dword [r8+rax*4] ; here = lcode[hold & lmask]
calign
.mode_len_inflate_fast_dolen:
mov r8d, eax
shr r8d, 8
and r8d, 0xff ; here.bits (op)
zlib_inflate_dropbits_reg r8d ; this doesn't blast eax, but does blast ecx for the shr
movzx r8d, al ; here.op
test r8d, r8d
jz .mode_len_inflate_fast_literal
test r8d, 16
jnz .mode_len_inflate_fast_length_base
test r8d, 64
jz .mode_len_inflate_fast_2ndlevellength
test r8d, 32
jz .mode_bad ; invalid literal/length code
; otherwise, end of block
mov dword [rbx+zlib_istate_mode_ofs], zmode_type
mov dword [rbx+zlib_istate_back_ofs], -1
jmp .mode_type
calign
.mode_len_inflate_fast_literal:
mov rdi, r15
mov esi, eax
shr esi, 16 ; here.val
call buffer$append_byte_noreserve
jmp .mode_len_inflate_fast_nextiteration ; check our while condition
calign
.mode_len_inflate_fast_length_base:
mov r10d, eax
shr r10d, 16 ; len = here.val
and r8d, 15 ; op &= 15 (extra bits)
test r8d, r8d
jz .mode_len_inflate_fast_length_base_noextra
cmp r13d, r8d
jae .mode_len_inflate_fast_noadd
zlib_inflate_fastcheck2
calign
.mode_len_inflate_fast_noadd:
; we need to do len += hold & ((1U << op) - 1)
mov edx, 1
mov ecx, r8d
shl edx, cl
sub edx, 1
mov ecx, r12d
and ecx, edx ; hold & ((1 << op) - 1)
add r10d, ecx ; len += ""
zlib_inflate_dropbits_reg r8d ; hold >>= op, bits -= op
calign
.mode_len_inflate_fast_length_base_noextra:
zlib_inflate_fastcheck
; here = dcode[hold & dmask]
mov r8, [rbx+zlib_istate_distcode_ofs]
mov eax, r12d
and eax, dword [rbx+zlib_istate_distbitsmask_ofs] ; hold & dmask
mov eax, dword [r8+rax*4] ; here = dcode[hold & dmask]
calign
.mode_len_inflate_fast_dodist:
mov r8d, eax
shr r8d, 8
and r8d, 0xff ; here.bits (op)
zlib_inflate_dropbits_reg r8d ; this doesn't blast eax, but does blast ecx for the shr
movzx r8d, al ; here.op
test r8d, 16
jnz .mode_len_inflate_fast_distbase
test r8d, 64
jnz .mode_bad ; invalid distance code
; else, 2nd level distance code
; so, here = dcode[here.val + (hold & ((1U << op) - 1))]
mov r9, [rbx+zlib_istate_distcode_ofs]
; here.val == high word in eax
; op is in r8b
mov edx, 1
mov ecx, r8d
shl edx, cl ; 1 << op
sub edx, 1 ; - 1
mov ecx, r12d
and ecx, edx ; hold & ((1 << op) - 1)
shr eax, 16 ; here.val
add eax, ecx
mov eax, dword [r9+rax*4] ; here = dcode[here.val + (hold & ((1U << op) - 1))]
jmp .mode_len_inflate_fast_dodist
calign
.mode_len_inflate_fast_distbase:
mov r11d, eax
shr r11d, 16 ; dist = here.val
and r8d, 15 ; op &= 15 (extra bits)
cmp r13d, r8d
jae .mode_len_inflate_fast_distbase_noadd
zlib_inflate_fastcheck2
cmp r13d, r8d
jae .mode_len_inflate_fast_distbase_noadd
zlib_inflate_fastcheck2
calign
.mode_len_inflate_fast_distbase_noadd:
; we need to do dist += hold & ((1U << op) - 1)
mov edx, 1
mov ecx, r8d
shl edx, cl
sub edx, 1
mov ecx, r12d
and ecx, edx ; hold & ((1 << op) - 1)
add r11d, ecx ; dist += ""
zlib_inflate_dropbits_reg r8d ; hold >>= op, bits -= op
mov r8, [r15+buffer_length_ofs]
sub r8, qword [rbx+zlib_istate_beg_ofs] ; max distance in output
cmp r11, r8 ; see if copy from window
ja .mode_len_inflate_fast_fromwindow
; else, copy direct from output
; note: since we did a reserve atop for the output buffer, this should be okay since the buffer won't expand
mov rdi, r15
mov rsi, [r15+buffer_length_ofs]
sub rsi, r11
add rsi, [r15+buffer_itself_ofs] ; out - dist
mov edx, r10d ; length
; _wow_ I am gobsmacked by this little gem...
; SO, it happens that distance can be _less_ than length, which means that the reference zlib
; _relies_ on byte for byte copies.... that was not my idea of fun figuring out.
; so if distance is less than length, then we didn't go as far BACK in the output as the lenght of our new goods
; and thus _replicate_ the byte(s) at the end, what a mess
; so, everywhere we do out - dist, we have to determine whether this condition happens
; and fallback to byte by byte copies like the reference version does
cmp r11, r10
jb .mode_len_inflate_fast_distbase_bytebybyte
call buffer$append
jmp .mode_len_inflate_fast_nextiteration ; check our while condition
calign
.mode_len_inflate_fast_distbase_bytebybyte:
; see above commentary about why this is necessary
mov rdi, [r15+buffer_endptr_ofs]
mov rcx, rdx
calign
.mode_len_inflate_fast_distbase_byteloop:
movzx eax, byte [rsi]
mov byte [rdi], al
add rsi, 1
add rdi, 1
sub rcx, 1
jnz .mode_len_inflate_fast_distbase_byteloop
; update endptr/length
add qword [r15+buffer_endptr_ofs], rdx
add qword [r15+buffer_length_ofs], rdx
jmp .mode_len_inflate_fast_nextiteration ; check our while condition
calign
.mode_len_inflate_fast_fromwindow:
mov eax, r11d ; dist
sub eax, r8d ; - op
mov r8d, eax ; op=
cmp eax, dword [rbx+zlib_istate_whave_ofs]
ja .mode_len_inflate_fast_fromwindow_case1
calign
.mode_len_inflate_fast_fromwindow_case2:
mov r9d, dword [rbx+zlib_istate_wnext_ofs]
test r9d, r9d
jz .mode_len_inflate_fast_fromwindow_case3
cmp r9d, r8d
jb .mode_len_inflate_fast_fromwindow_case4
; else, contiguous in window
mov esi, r9d ; from = wnext
sub esi, r8d ; - op
add rsi, qword [rbx+zlib_istate_window_ofs] ; + window
cmp r8d, r10d ; op < len?
jae .mode_len_inflate_fast_restfromoutput
; some from window
sub r10d, r8d ; len -= op
mov edx, r8d ; amount to copy from window
mov rdi, r15
push r10 r11 ; save dist and len
call buffer$append
pop r11 r10
mov rsi, [r15+buffer_length_ofs]
sub rsi, r11
add rsi, [r15+buffer_itself_ofs] ; from = out - dist
jmp .mode_len_inflate_fast_restfromoutput
calign
.mode_len_inflate_fast_fromwindow_case4:
; wrap around window
mov esi, dword [rbx+zlib_istate_wsize_ofs] ; from = wsize
add esi, dword [rbx+zlib_istate_wnext_ofs] ; + wnext
sub esi, r8d ; - op
add rsi, qword [rbx+zlib_istate_window_ofs] ; + window
sub r8d, dword [rbx+zlib_istate_wnext_ofs] ; op -= wnext
cmp r8d, r10d ; op < len?
jae .mode_len_inflate_fast_restfromoutput
; else, some from end of window
sub r10d, r8d ; len -= op
mov edx, r8d ; amount to copy from window
mov rdi, r15
push r8 r10 r11 ; save op, dist and len
call buffer$append
pop r11 r10 r8
mov rsi, qword [rbx+zlib_istate_window_ofs]
cmp dword [rbx+zlib_istate_wnext_ofs], r10d ; wnext < len?
jae .mode_len_inflate_fast_restfromoutput
; some from start of the window
mov r8d, dword [rbx+zlib_istate_wnext_ofs] ; op = wnext
sub r10d, r8d ; len -= op
mov edx, r8d
mov rdi, r15
push r10 r11
call buffer$append
pop r11 r10
mov rsi, [r15+buffer_length_ofs]
sub rsi, r11
add rsi, [r15+buffer_itself_ofs] ; from = out - dist
jmp .mode_len_inflate_fast_restfromoutput
calign
.mode_len_inflate_fast_fromwindow_case3:
; very common case
mov esi, dword [rbx+zlib_istate_wsize_ofs] ; from = wsize
sub esi, r8d ; - op
add rsi, qword [rbx+zlib_istate_window_ofs] ; + window
cmp r8d, r10d
jae .mode_len_inflate_fast_restfromoutput
; some from window
sub r10d, r8d ; len =- op
mov edx, r8d ; amount to copy from window
mov rdi, r15
push r10 r11 ; save dist and len
call buffer$append
pop r11 r10
mov rsi, [r15+buffer_length_ofs]
sub rsi, r11
add rsi, [r15+buffer_itself_ofs] ; from = out - dist
jmp .mode_len_inflate_fast_restfromoutput
calign
.mode_len_inflate_fast_fromwindow_case1:
cmp dword [rbx+zlib_istate_sane_ofs], 0
je .mode_len_inflate_fast_fromwindow_case2
jmp .mode_bad ; invalid distance too far back
calign
.mode_len_inflate_fast_restfromoutput:
; from in rsi already setup, and len in r10d should be valid
mov rdi, r15
mov edx, r10d
call buffer$append
jmp .mode_len_inflate_fast_nextiteration ; check our while condition
calign
.mode_len_inflate_fast_2ndlevellength:
; here = lcode[here.val + (hold & ((1U << op) - 1))]
mov r9, [rbx+zlib_istate_lencode_ofs] ; get lcode ready
; here.val == high word in eax
; op is in r8b
mov edx, 1
mov ecx, r8d
shl edx, cl
sub edx, 1
mov ecx, r12d
and ecx, edx ; hold & ((1 << op) - 1)
shr eax, 16 ; here.val
add eax, ecx
mov eax, dword [r9+rax*4] ; here = lcode[here.val + (hold & ((1U << op) - 1))]
jmp .mode_len_inflate_fast_dolen
calign
.mode_len_inflate_fast_nextiteration:
; he has a ...do while...
; we set fastlast to the current buffer pointer - 5, so we can compare what is sitting in the user buffer now against it
mov rax, [r14+buffer_user_ofs]
cmp rax, [rbx+zlib_istate_fastlast_ofs]
jb .mode_len_inflate_fast_top
; otherwise, fall back out to mode_len
mov eax, r13d
shr eax, 3
; subtract that from our user pointer/length
sub qword [r14+buffer_user_ofs], rax
add qword [r14+buffer_user_ofs+8], rax
shl eax, 3
sub r13d, eax
mov ecx, r13d
mov edx, 1
shl edx, cl
sub edx, 1
and r12, rdx
jmp .mode_len
calign
.mode_len_noinflate:
; so if have < 6 and mode == zmode_len, we end up here
mov dword [rbx+zlib_istate_back_ofs], 0
calign
.mode_len_getbits:
; accum in r12, bits in r13d
mov rsi, [rbx+zlib_istate_lencode_ofs]
mov edx, dword [rbx+zlib_istate_lenbitsmask_ofs]
mov ecx, r12d
and ecx, edx
mov eax, [rsi+rcx*4] ; here = state->lencode[BITS(state->lenbits)]
movzx ecx, ah ; here.bits
cmp ecx, r13d
jbe .mode_len_gotbits
; else, we need MORE bits... but we don't want to necessarily assume that a dword would do the trick
; hmmm... TODO: see how many times this thing actually gets called, and monitor the bit accumulator
; ... the REASON we can't just say "hey gimme 32 bits" is because this code might not actually need that many
; and we'd be falsely stating that we did and prematurely bailing out if in fact there were enough here
; already ... hmmm
mov r8d, r13d
add r8d, 8
zlib_inflate_needbits_reg r8d
jmp .mode_len_getbits
calign
.mode_len_gotbits:
; if (here.op && (here.op & 0xf0) == 0)
test al, al ; here.op ?
jz .mode_len_nolast
test al, 0xf0
jnz .mode_len_nolast
; similar to the above getbits, but we need: here = state->lencode[last.val + (BITS(last.bits + last.op) >> last.bits)]
; and the enough check to be last.bits + here.bits
mov r11d, eax
shr r11d, 16 ; r11d == last.val
movzx ecx, al
movzx edx, ah
mov r9d, edx ; last.bits
add ecx, edx
mov r10d, 1
shl r10d, cl
sub r10d, 1 ; r10d == mask for BITS(last.bits + last.op)
calign
.mode_len_getlastbits:
mov rsi, [rbx+zlib_istate_lencode_ofs]
mov ecx, r9d
mov edx, r12d
and edx, r10d
shr edx, cl
add edx, r11d
mov eax, [rsi+rdx*4] ; here = state->lencode[last.val + (BITS(last.bits + last.op) >> last.bits)]
movzx ecx, ah ; here.bits
add ecx, r9d ; + last.bits
cmp ecx, r13d
jbe .mode_len_gotlastbits
; else, we need MORE bits
mov r8d, r13d
add r8d, 8
zlib_inflate_needbits_reg r8d
jmp .mode_len_getlastbits
calign
.mode_len_dolit:
mov dword [rbx+zlib_istate_mode_ofs], zmode_lit
jmp .mode_lit
calign
.mode_len_eob:
mov dword [rbx+zlib_istate_back_ofs], -1
mov dword [rbx+zlib_istate_mode_ofs], zmode_type
jmp .mode_type
calign
.mode_len_gotlastbits:
zlib_inflate_dropbits_reg r9d ; DROPBITS(last.bits)
add dword [rbx+zlib_istate_back_ofs], r9d
calign
.mode_len_nolast:
movzx edx, ah
mov r8d, edx ; here.bits
zlib_inflate_dropbits_reg r8d ; DROPBITS(here.bits)
add dword [rbx+zlib_istate_back_ofs], r8d
mov r9d, eax
shr r9d, 16
; wow, I was tired or something when I typed this: add dword [rbx+zlib_istate_back_ofs], r9d ; state->length += here.val
; should be:
mov dword [rbx+zlib_istate_length_ofs], r9d ; state->length = (unsigned)here.val
test al, al
jz .mode_len_dolit
test al, 32
jnz .mode_len_eob
test al, 64
jnz .mode_bad ; invalid literal/length code
; otherwise, set extra to op & 15
; then mode to lenext, and fallthrough
movzx r9d, al
and r9d, 15
mov dword [rbx+zlib_istate_extra_ofs], r9d
mov dword [rbx+zlib_istate_mode_ofs], zmode_lenext
; fallthrough to .mode_lenext
calign
.mode_lenext:
mov r8d, dword [rbx+zlib_istate_extra_ofs]
test r8d, r8d
jnz .mode_lenext_extrabits
mov r9d, dword [rbx+zlib_istate_length_ofs]
mov dword [rbx+zlib_istate_was_ofs], r9d ; state->was = state->length
mov dword [rbx+zlib_istate_mode_ofs], zmode_dist
jmp .mode_dist
calign
.mode_lenext_extrabits:
zlib_inflate_needbits_reg r8d ; NEEDBITS(state->extra)
mov edx, 1
mov ecx, r8d
shl edx, cl
sub edx, 1
mov eax, r12d
and eax, edx ; BITS(state->extra)
add dword [rbx+zlib_istate_length_ofs], eax
zlib_inflate_dropbits_reg r8d ; DROPBITS(state->extra)
add dword [rbx+zlib_istate_back_ofs], r8d ; state->back += state->extra
mov r9d, dword [rbx+zlib_istate_length_ofs]
mov dword [rbx+zlib_istate_was_ofs], r9d
mov dword [rbx+zlib_istate_mode_ofs], zmode_dist
; fallthrough to .mode_dist
calign
.mode_dist:
; accum in r12, bits in r13d
mov rsi, [rbx+zlib_istate_distcode_ofs]
mov edx, dword [rbx+zlib_istate_distbitsmask_ofs]
mov ecx, r12d
and ecx, edx
mov eax, [rsi+rcx*4] ; here = state->distcode[BITS(state->distbits)]
movzx ecx, ah ; here.bits
cmp ecx, r13d
jbe .mode_dist_gotbits
; else, we need MORE bits... but we don't want to necessarily assume that a dword would do the trick
; hmmm... TODO: see how many times this thing actually gets called, and monitor the bit accumulator
; ... the REASON we can't just say "hey gimme 32 bits" is because this code might not actually need that many
; and we'd be falsely stating that we did and prematurely bailing out if in fact there were enough here
; already ... hmmm
mov r8d, r13d
add r8d, 8
zlib_inflate_needbits_reg r8d
jmp .mode_dist
calign
.mode_dist_gotbits:
; if (here.op && (here.op & 0xf0) == 0)
test al, 0xf0
jnz .mode_dist_nolast
; similar to the above getbits, but we need: here = state->distcode[last.val + (BITS(last.bits + last.op) >> last.bits)]
; and the enough check to be last.bits + here.bits
mov r11d, eax
shr r11d, 16 ; r11d == last.val
movzx ecx, al
movzx edx, ah
mov r9d, edx ; last.bits
add ecx, edx
mov r10d, 1
shl r10d, cl
sub r10d, 1 ; r10d == mask for BITS(last.bits + last.op)
calign
.mode_dist_getlastbits:
mov rsi, [rbx+zlib_istate_distcode_ofs]
mov ecx, r9d
mov edx, r12d
and edx, r10d
shr edx, cl
add edx, r11d
mov eax, [rsi+rdx*4] ; here = state->distcode[last.val + (BITS(last.bits + last.op) >> last.bits)]
movzx ecx, ah ; here.bits
add ecx, r9d ; + last.bits
cmp ecx, r13d
jbe .mode_dist_gotlastbits
; else, we need MORE bits
mov r8d, r13d
add r8d, 8
zlib_inflate_needbits_reg r8d
jmp .mode_dist_getlastbits
calign
.mode_dist_gotlastbits:
zlib_inflate_dropbits_reg r9d ; DROPBITS(last.bits)
add dword [rbx+zlib_istate_back_ofs], r9d ; state->back += last.bits
calign
.mode_dist_nolast:
movzx edx, ah
mov r8d, edx ; here.bits
zlib_inflate_dropbits_reg r8d ; DROPBITS(here.bits)
add dword [rbx+zlib_istate_back_ofs], r8d ; state->back += here.bits
test al, 64
jnz .mode_bad ; invalid distance code
mov ecx, eax
shr ecx, 16
mov dword [rbx+zlib_istate_offset_ofs], ecx ; state->offset = here.val
and eax, 15
mov dword [rbx+zlib_istate_extra_ofs], eax
mov dword [rbx+zlib_istate_mode_ofs], zmode_distext
; fallthrough to .mode_distext
calign
.mode_distext:
mov r8d, dword [rbx+zlib_istate_extra_ofs]
test r8d, r8d
jnz .mode_distext_extrabits
mov dword [rbx+zlib_istate_mode_ofs], zmode_match
jmp .mode_match
calign
.mode_distext_extrabits:
zlib_inflate_needbits_reg r8d ; NEEDBITS(state->extra)
mov edx, 1
mov ecx, r8d
shl edx, cl
sub edx, 1
mov eax, r12d
and eax, edx ; BITS(state->extra)
add dword [rbx+zlib_istate_offset_ofs], eax ; state->offset += BITS(state->extra)
zlib_inflate_dropbits_reg r8d ; DROPBITS(state->extra)
add dword [rbx+zlib_istate_back_ofs], r8d ; state->back += state->extra
mov dword [rbx+zlib_istate_mode_ofs], zmode_match
; fallthrough to .mode_match
calign
.mode_match:
; make sure we reserve at least 258 bytes in our output buffer so that it doesn't relocate on us when we append
mov rdi, r15
mov esi, 258
call buffer$reserve
; copy == number of bytes occupied in our output buffer
mov r10, [r15+buffer_length_ofs] ; copy == output buffer occupied
mov r11d, [rbx+zlib_istate_offset_ofs]
cmp r11, r10 ; 64 bit compares here might be important (only if output buffer is _huge_)
ja .mode_match_fromwindow ; offset > copy, if so, from window it is
; else, we copy from the output
mov rsi, [r15+buffer_itself_ofs]
add rsi, r10
sub rsi, r11 ; from = "put" - offset
mov edx, dword [rbx+zlib_istate_length_ofs]
jmp .mode_match_docopy
calign
.mode_match_fromwindow:
sub r11, r10 ; copy = offset - copy
cmp r11d, dword [rbx+zlib_istate_whave_ofs]
jbe .mode_match_fromwindow_okay
cmp dword [rbx+zlib_istate_sane_ofs], 0
jne .mode_bad ; invalid distance too far back
calign
.mode_match_fromwindow_okay:
cmp r11d, dword [rbx+zlib_istate_wnext_ofs]
ja .mode_match_fromwindow_copyadjust
; else, from = state->window + (state->wnext - copy)
mov esi, dword [rbx+zlib_istate_wnext_ofs]
sub esi, r11d
add rsi, qword [rbx+zlib_istate_window_ofs]
mov edx, r11d
mov ecx, dword [rbx+zlib_istate_length_ofs]
cmp edx, ecx
cmova edx, ecx
jmp .mode_match_docopy
calign
.mode_match_fromwindow_copyadjust:
sub r11d, dword [rbx+zlib_istate_wnext_ofs] ; copy -= state->wnext
mov esi, dword [rbx+zlib_istate_wsize_ofs]
sub esi, r11d
add rsi, qword [rbx+zlib_istate_window_ofs] ; from = window + (wsize - copy)
mov edx, r11d
mov ecx, dword [rbx+zlib_istate_length_ofs]
cmp edx, ecx
cmova edx, ecx
; fallthrough to .mode_match_docopy
calign
.mode_match_docopy:
; copy is in edx, from is in rsi
sub dword [rbx+zlib_istate_length_ofs], edx ; state->length -= copy
; well, isn't that pleasant... do in the reference, they do a byte by byte forward walk to copy it
; turns out, this is quite important, if we do a normal buffer$append/memcpy/etc, we can walk off
; the end of the buffer
; so, like the reference version, we too need to do a byte by byte walk
; the old way here would have been considerably faster
; old way: mov rdi, r15
; old way: call buffer$append
test edx, edx
jz .mode_match_nocopy
mov rdi, [r15+buffer_endptr_ofs]
add qword [r15+buffer_length_ofs], rdx
add qword [r15+buffer_endptr_ofs], rdx
xor ecx, ecx
calign
.mode_match_copyloop:
movzx eax, byte [rsi+rcx]
mov [rdi+rcx], al
add ecx, 1
sub edx, 1
jnz .mode_match_copyloop
calign
.mode_match_nocopy:
mov edx, [rbx+zlib_istate_mode_ofs]
mov ecx, zmode_len
cmp dword [rbx+zlib_istate_length_ofs], 0
cmove edx, ecx
mov [rbx+zlib_istate_mode_ofs], edx
je .mode_len
jmp .mode_match
calign
.mode_lit:
mov esi, dword [rbx+zlib_istate_length_ofs]
mov rdi, r15
call buffer$append_byte_noreserve
mov dword [rbx+zlib_istate_mode_ofs], zmode_len
jmp .mode_len
calign
.mode_check:
cmp dword [rbx+zlib_istate_wrap_ofs], 0
je .mode_check_nowrap
zlib_inflate_needbits 32
mov rsi, [rbx+zlib_istate_streamp_ofs]
mov rdx, [r15+buffer_length_ofs]
sub rdx, qword [rbx+zlib_istate_orig_outlength_ofs]
add qword [rsi+zlib_totalout_ofs], rdx
add qword [rbx+zlib_istate_total_ofs], rdx
mov rdi, [rbx+zlib_istate_check_ofs]
mov rsi, [r15+buffer_itself_ofs]
add rsi, qword [rbx+zlib_istate_orig_outlength_ofs]
cmp dword [rbx+zlib_istate_flags_ofs], 0
je .mode_check_adler
call crc$32
mov [rbx+zlib_istate_check_ofs], rax ; store the result
cmp r12d, eax
jne .mode_bad ; incorrect data check
jmp .mode_check_updatedone
calign
.mode_check_adler:
call adler32
mov dword [rbx+zlib_istate_check_ofs], eax ; store the result
mov eax, r12d
bswap eax
cmp eax, dword [rbx+zlib_istate_check_ofs]
jne .mode_bad ; incorrect data check
; fallthrough to mode_check_updatedone
calign
.mode_check_updatedone:
zlib_inflate_dropbits 32
mov dword [rbx+zlib_istate_mode_ofs], zmode_length
jmp .mode_length
calign
.mode_check_nowrap:
mov dword [rbx+zlib_istate_mode_ofs], zmode_length
; fallthrough to .mode_length
calign
.mode_length:
mov dword [rbx+zlib_istate_mode_ofs], zmode_done
cmp dword [rbx+zlib_istate_wrap_ofs], 0
je .mode_done
cmp dword [rbx+zlib_istate_flags_ofs], 0
je .mode_done
zlib_inflate_needbits 32
mov rcx, [rbx+zlib_istate_total_ofs]
cmp ecx, r12d
jne .mode_bad ; incorrect length check
; fallthrough to .mode_done
calign
.mode_done:
xor r12d, r12d
xor r13d, r13d
; we want to return a 1 in eax for successful return, but cleanup all our state information as well
; the reference version jumps straight to inf_leave here... hmmm
mov [rbx+zlib_istate_hold_ofs], r12
mov [rbx+zlib_istate_bits_ofs], r13d
mov rdi, r14
call buffer$reset ; clear our input buffer, cuz presumably we exhausted it
; all our goods are completed, so we really don't have to do much else
mov eax, 1
pop r15 r14 r13 r12 rbx
epilog
calign
.mode_bad:
.mode_mem:
.mode_sync:
; it is presumed that we won't be called again with this (now invalid) state
; so we don't have to do much cleanup
; we want to return a 0 in eax for failed return
mov [rbx+zlib_istate_hold_ofs], r12
mov [rbx+zlib_istate_bits_ofs], r13d
mov rdi, r14
call buffer$reset ; clear our input buffer, cuz presumably we exhausted it
xor eax, eax
pop r15 r14 r13 r12 rbx
epilog
calign
.inf_leave:
; this means we ran out of input
mov [rbx+zlib_istate_hold_ofs], r12
mov [rbx+zlib_istate_bits_ofs], r13d
mov rdi, r14
call buffer$reset ; clear our input buffer, cuz presumably we exhausted it
; we know mode < bad, and can't be check
; updatewindow inlined here:
; note: at our init time, we go ahead and create a window and init wsize/wnext/whave
mov rdx, [r15+buffer_length_ofs]
mov rsi, [r15+buffer_itself_ofs]
add rsi, rdx
sub rdx, qword [rbx+zlib_istate_orig_outlength_ofs]
; rdx == copy
; rsi is the end of our output
cmp rdx, 32768
jae .inf_leave_wsize_or_better
mov r10d, 32768
sub r10d, dword [rbx+zlib_istate_wnext_ofs] ; dist = wsize - wnext
cmp r10d, edx
cmova r10d, edx
mov r11d, dword [rbx+zlib_istate_wnext_ofs]
mov rdi, [rbx+zlib_istate_window_ofs]
add rdi, r11
sub rsi, rdx ; end - copy
push rdx r10
mov edx, r10d ; dist is the amount
call memcpy
pop r10 rdx
sub rdx, r10 ; copy -= dist
test rdx, rdx
jnz .inf_leave_window_copytwo
add dword [rbx+zlib_istate_wnext_ofs], r10d ; state->wnext += dist
xor ecx, ecx
mov edx, dword [rbx+zlib_istate_wnext_ofs]
cmp edx, 32768
cmove edx, ecx
mov dword [rbx+zlib_istate_wnext_ofs], edx
mov edx, dword [rbx+zlib_istate_whave_ofs]
mov ecx, edx
add ecx, r10d
cmp edx, 32768
cmovb edx, ecx
mov dword [rbx+zlib_istate_whave_ofs], edx
jmp .inf_leave_windowsweet
calign
.inf_leave_window_copytwo:
mov rsi, [r15+buffer_itself_ofs]
add rsi, qword [r15+buffer_length_ofs]
sub rsi, rdx ; end - copy == source
; rdx == copy amount
mov rdi, [rbx+zlib_istate_window_ofs]
push rdx
call memcpy
pop rdx
mov dword [rbx+zlib_istate_wnext_ofs], edx ; wnext = copy
mov dword [rbx+zlib_istate_whave_ofs], 32768 ; whave = wsize
jmp .inf_leave_windowsweet
calign
.inf_leave_wsize_or_better:
sub rsi, 32768
mov edx, 32768
mov rdi, [rbx+zlib_istate_window_ofs]
call memcpy
mov dword [rbx+zlib_istate_wnext_ofs], 0
mov dword [rbx+zlib_istate_whave_ofs], 32768
; fallthrough to .inf_leave_windowsweet
calign
.inf_leave_windowsweet:
; end of updatewindow, back to inf_leave goods
mov rsi, [rbx+zlib_istate_streamp_ofs]
mov rdx, [r15+buffer_length_ofs]
sub rdx, qword [rbx+zlib_istate_orig_outlength_ofs]
add qword [rsi+zlib_totalout_ofs], rdx
add qword [rbx+zlib_istate_total_ofs], rdx
cmp dword [rbx+zlib_istate_wrap_ofs], 0
je .inf_leave_nowrap
mov rdi, [rbx+zlib_istate_check_ofs]
mov rsi, [r15+buffer_itself_ofs]
add rsi, qword [rbx+zlib_istate_orig_outlength_ofs]
cmp dword [rbx+zlib_istate_flags_ofs], 0
je .inf_leave_adler
call crc$32
mov dword [rbx+zlib_istate_check_ofs], eax
jmp .inf_leave_nowrap
calign
.inf_leave_adler:
call adler32
mov [rbx+zlib_istate_check_ofs], eax
calign
.inf_leave_nowrap:
mov eax, 1
pop r15 r14 r13 r12 rbx
epilog
dalign
.modejumps:
dq .mode_head, .mode_flags, .mode_time, .mode_os, .mode_exlen, .mode_extra, .mode_name, .mode_comment
dq .mode_hcrc, .mode_dictid, .mode_dict, .mode_type, .mode_typedo, .mode_stored, .mode_copy_, .mode_copy
dq .mode_table, .mode_lenlens, .mode_codelens, .mode_len_, .mode_len, .mode_lenext, .mode_dist
dq .mode_distext, .mode_match, .mode_lit, .mode_check, .mode_length, .mode_done, .mode_bad, .mode_mem
dq .mode_sync
calign
.error_return:
xor eax, eax
pop r15 r14 r13 r12 rbx
epilog
falign
.inflate_table:
; edi == type, rsi == lens (ushort *), edx == codes, rcx == *(code *)table, r8 = bits (uint *), r9 = work (ushort *)
; unlike the reference version, we return eax == 1 on success, eax == 0 on fail
push rbp rbx r12 r13 r14 r15
sub rsp, 168
xor r10d, r10d
mov [rsp], rdi
mov [rsp+8], rsi
mov [rsp+16], rdx
mov [rsp+24], rcx
mov [rsp+32], r8
mov [rsp+40], r9
; count at [rsp+48] for 32 bytes
; offs at [rsp+80] for 32 bytes
; 112+ unused
mov [rsp+48], r10
mov [rsp+56], r10
mov [rsp+64], r10
mov [rsp+72], r10
mov [rsp+80], r10 ; preemptively set offs[1] = 0 (though we clear offs[0..3] of course)
calign
.inflate_table_acclens:
movzx eax, word [rsi+r10*2]
add r10d, 1
add word [rsp+rax*2+48], 1
cmp r10d, edx
jb .inflate_table_acclens
; we'll keep root in ebp, max in ebx
mov ebp, dword [r8]
mov ebx, 15
calign
.inflate_table_boundcodelens:
cmp word [rsp+rbx*2+48], 0
jne .inflate_table_boundcodelens_maxdone
sub ebx, 1
jnz .inflate_table_boundcodelens
calign
.inflate_table_boundcodelens_maxdone:
cmp ebp, ebx
cmova ebp, ebx
test ebx, ebx
jz .inflate_table_nosymbols
; we'll keep min in r12d
mov r12d, 1
calign
.inflate_table_boundcodelens_min:
cmp r12d, ebx
jae .inflate_table_boundcodelens_mindone
cmp word [rsp+r12*2+48], 0
jne .inflate_table_boundcodelens_mindone
add r12d, 1
jmp .inflate_table_boundcodelens_min
calign
.inflate_table_boundcodelens_mindone:
cmp ebp, r12d
cmovb ebp, r12d
; check for an over-subscribed or incomplete set of lengths
mov r10d, 1 ; len
mov r11d, 1 ; left
calign
.inflate_table_checkoversub:
cmp r10d, 15
ja .inflate_table_checkoversubdone
shl r11d, 1
movzx eax, word [rsp+r10*2+48]
sub r11d, eax
cmp r11d, 0
jl .inflate_table_error ; over-subscribed
add r10d, 1
jmp .inflate_table_checkoversub
calign
.inflate_table_checkoversubdone:
mov r10d, 1 ; get ready for next loop
cmp r11d, 0
jle .inflate_table_genoffs ; complete set
; else, left > 0
cmp edi, zlib_inftree_codes ; type == CODES
je .inflate_table_error ; incomplete set
cmp ebx, 1 ; max != 1
jne .inflate_table_error ; incomplete set
calign
.inflate_table_genoffs:
; generate offsets into symbol table for each length for sorting
xor r11d, r11d ; we'll use this for sym for the loop
cmp r10d, 15
jae .inflate_table_symsort
movzx eax, word [rsp+r10*2+80] ; offs[len]
add ax, word [rsp+r10*2+48] ; + count[len]
mov word [rsp+r10*2+82], ax ; offs[len+1] =
add r10d, 1
jmp .inflate_table_genoffs
calign
.inflate_table_symsort:
cmp r11d, edx ; sym
jae .inflate_table_typecheck
mov r10d, r11d
add r10d, 1
cmp word [rsi+r11*2], 0
cmove r11d, r10d
je .inflate_table_symsort
; otherwise, lens[sym] != 0, so we need to do:
; work[offs[lens[sym]]++] = (ushort)sym;
movzx eax, word [rsi+r11*2] ; lens[sym]
movzx r10d, word [rsp+rax*2+80] ; offs[lens[sym]]
add word [rsp+rax*2+80], 1 ; ++
mov word [r9+r10*2], r11w ; work[offs[lens[sym]]++] = sym
add r11d, 1
jmp .inflate_table_symsort
calign
.inflate_table_typecheck:
mov [rsp+112], rbp ; root
mov [rsp+120], rbx ; rbx
mov [rsp+128], rcx ; table
cmp edi, zlib_inftree_codes
je .inflate_table_codes
cmp edi, zlib_inftree_lens
je .inflate_table_lens
; dists
mov r13d, ebp ; curr = root
mov r11, [rcx] ; next = *table
mov ecx, ebp
mov rbp, .inflate_table_dbase ; base
mov rbx, .inflate_table_dext ; extra
mov dword [rsp+136], -1 ; end
xor edx, edx ; huff = 0
xor r8d, r8d ; sym = 0
mov r10d, r12d ; len = min
xor r14d, r14d ; drop = 0
mov r15d, -1 ; low = -1
mov eax, 1
shl eax, cl
cmp eax, 592
ja .inflate_table_error ; type == DISTS && used > ENOUGH_DISTS (592) == insufficient space
mov dword [rsp+144], eax ; used = 1 << root
sub eax, 1
mov dword [rsp+152], eax ; mask = used - 1
; next we need used limit
mov dword [rsp+160], 592 ; used_limit = ENOUGH_DISTS = 592
jmp .inflate_table_mainloop
calign
.inflate_table_lens:
mov r13d, ebp ; curr = root
mov r11, [rcx] ; next = *table
mov ecx, ebp
mov rbp, .inflate_table_lbase ; base
sub rbp, 257 shl 1
mov rbx, .inflate_table_lext ; extra
sub rbx, 257 shl 1
mov dword [rsp+136], 256 ; end
xor edx, edx ; huff = 0
xor r8d, r8d ; sym = 0
mov r10d, r12d ; len = min
xor r14d, r14d ; drop = 0
mov r15d, -1 ; low = -1
mov eax, 1
shl eax, cl
cmp eax, 852
ja .inflate_table_error ; type == LENS && used > ENOUGH_LENS (852) == insufficient space
mov dword [rsp+144], eax ; used = 1 << root
sub eax, 1
mov dword [rsp+152], eax ; mask = used - 1
; next we need used limit
mov dword [rsp+160], 852 ; used_limit = ENOUGH_LENS = 852
jmp .inflate_table_mainloop
calign
.inflate_table_codes:
mov r13d, ebp ; curr = root
mov r11, [rcx] ; next = *table
mov ecx, ebp
mov rbp, r9 ; base
mov rbx, r9 ; extra
mov dword [rsp+136], 19 ; end
xor edx, edx ; huff = 0
xor r8d, r8d ; sym = 0
mov r10d, r12d ; len = min
xor r14d, r14d ; drop = 0
mov r15d, -1 ; low = -1
mov eax, 1
shl eax, cl
mov dword [rsp+144], eax ; used = 1 << root
sub eax, 1
mov dword [rsp+152], eax ; mask = used - 1
; next we need used limit
; because there really is no limit, lets just set it to 0xffff
mov dword [rsp+160], 0xffff ; used_limit is arbitrary large number
; fallthrough to .inflate_table_mainloop
; so at this point:
; rbp base
; rbx extra
; edx huff
; r8d sym
; r9 work
; r10d len
; r11 next
; r12d min
; r13d curr
; r14d drop
; r15d low
; dword [rsp] type
; [rsp+8] lens
; [rsp+112] root
; [rsp+120] max
; [rsp+128] table
; [rsp+136] end
; [rsp+144] used
; [rsp+152] mask
; [rsp+160] used_limit
; edi/esi are more or less free
; eax, ecx are free
calign
.inflate_table_mainloop:
; op:8, bits:8, val:16
movzx ecx, word [r9+r8*2] ; work[sym]
cmp ecx, dword [rsp+136]
jl .inflate_table_mainloop_case1
jg .inflate_table_mainloop_case2
; case3
; low order byte == 96 (end of block), next byte: (len - drop), next word: 0
mov eax, r10d
sub eax, r14d
shl eax, 8
or eax, 0x60
jmp .inflate_table_mainloop_hereset
calign
.inflate_table_mainloop_case1:
; low order byte == 0, next byte: (len - drop), next word: cx (work[sym])
shl ecx, 16
mov eax, r10d
sub eax, r14d
shl eax, 8
or eax, ecx
jmp .inflate_table_mainloop_hereset
calign
.inflate_table_mainloop_case2:
; low order byte == extra[work[sym]] (extra[ecx]), next byte: (len - drop), next word: base[work[sym]] (base[ecx])
movzx eax, word [rbp+rcx*2]
shl eax, 16 ; base[work[sym]]
movzx esi, word [rbx+rcx*2] ; extra[work[sym]]
mov ecx, r10d
sub ecx, r14d
and ecx, 0xff
shl ecx, 8
or eax, esi
or eax, ecx
; fallthrough to .inflate_table_mainloop_hereset
calign
.inflate_table_mainloop_hereset:
; here in eax is set
; replicate for those indices with low len bits equal to huff
mov ecx, r10d
sub ecx, r14d ; len - drop
; edi == incr, esi == fill
mov edi, 1
mov esi, 1
shl edi, cl ; incr = 1 shl (len - drop)
mov ecx, r13d ; curr
shl esi, cl ; fill = 1 shl (curr)
mov r12d, esi ; min = fill (save offset to next table)
; we need an extra var for this loop
push r8
mov ecx, r14d ; save drop in ecx for our shr
calign
.inflate_table_reploop:
sub esi, edi
mov r8d, edx
shr r8d, cl ; huff >> drop
add r8d, esi ; + fill
mov dword [r11+r8*4], eax ; next[(huff >> drop)+fill] = here
test esi, esi
jnz .inflate_table_reploop
pop r8
; next up: backwards increment the len-bit code huff
mov edi, 1
mov ecx, r10d
sub ecx, 1
shl edi, cl ; incr = 1 << (len - 1)
calign
.inflate_table_incrmod:
test edx, edi
jz .inflate_table_incrmod_done
shr edi, 1
jmp .inflate_table_incrmod
calign
.inflate_table_incrmod_done:
; depending on whether incr != 0 or not, need to modify huff (edx)
mov esi, edi
mov ecx, edx ; ecx = huff
sub esi, 1
and ecx, esi ; ecx = huff & (incr - 1)
add ecx, edi ; ecx = (huff & (incr - 1)) + incr
xor esi, esi
test edi, edi
cmovz edx, esi ; if (!incr) huff = 0
cmovnz edx, ecx ; else huff = (huff & (incr - 1)) + incr
add r8d, 1 ; sym++
; count at [rsp+48] for 32 bytes
; offs at [rsp+80] for 32 bytes
sub word [rsp+r10*2+48], 1 ; --(count[len])
jnz .inflate_table_countdecnz
cmp r10d, dword [rsp+120] ; len == max?
je .inflate_table_mainloop_done
mov rsi, [rsp+8] ; get lens back
movzx ecx, word [r9+r8*2] ; work[sym]
movzx r10d, word [rsi+rcx*2] ; len = lens[work[sym]]
calign
.inflate_table_countdecnz:
; create new sub-table if needed
cmp r10d, dword [rsp+112]
jbe .inflate_table_mainloop
mov eax, edx
and eax, dword [rsp+152]
cmp eax, r15d
je .inflate_table_mainloop
test r14d, r14d
cmovz r14d, dword [rsp+112] ; if (!drop) drop = root
mov eax, r12d
shl eax, 2
add r11, rax ; next += min (next is meant to be code*, so shl 2 for byte offset)
mov r13d, r10d
mov edi, 1
sub r13d, r14d ; curr = len - drop
mov ecx, r13d
shl edi, cl ; left = 1 << curr
calign
.inflate_table_nextlength:
mov eax, r13d
add eax, r14d ; curr + drop
cmp eax, dword [rsp+120]
jae .inflate_table_nextlength_done
movzx ecx, word [rsp+rax*2+48] ; count[curr _ drop]
sub edi, ecx
cmp edi, 0
jle .inflate_table_nextlength_done
add r13d, 1 ; curr++
shl edi, 1 ; leff <<= 1
jmp .inflate_table_nextlength
calign
.inflate_table_nextlength_done:
mov esi, dword [rsp+144] ; used
mov ecx, r13d
mov eax, 1
shl eax, cl
add esi, eax
mov dword [rsp+144], esi
cmp esi, dword [rsp+160] ; used > used_limit ?
ja .inflate_table_error ; not enough space
; point entry in root table to sub-table
mov r15d, edx
and r15d, dword [rsp+152] ; low = huff & mask
mov rdi, r11 ; next
mov rcx, [rsp+128] ; get the table pointer back
mov rsi, [rcx] ; and the pointer it is pointing to
sub rdi, rsi
; we want next - *table in offset not bytes, shl'd into the high word, which is shr 2 then shl 16
shl rdi, 14
; low order byte == op, next byte is bits, then val
mov eax, r13d ; curr
mov ecx, dword [rsp+112] ; root
and eax, 0xff
and ecx, 0xff
shl ecx, 8
or eax, ecx
or eax, edi
; rsi is still pointing at our table, we need to offset it by r15d (low)
mov dword [rsi+r15*4], eax
jmp .inflate_table_mainloop
calign
.inflate_table_mainloop_done:
; fill in remaining table entry if code is incomplete
mov rcx, [rsp+128] ; get the table pointer back
mov rsi, [rcx] ; and the pointer it is pointing to
mov edi, dword [rsp+144] ; used
shl edi, 2
add rsi, rdi
mov [rcx], rsi ; *table += used
; next up we need to set *bits = root
mov rdi, [rsp+32] ; bits
mov ecx, [rsp+112] ; root
mov dword [rdi], ecx ; *bits = root
test edx, edx ; huff != 0 ?
jz .inflate_table_success
; low order byte == 64 (invalid code marker), next byte: (len - drop), next word: 0
mov eax, r10d
sub eax, r14d
shl eax, 8
or eax, 0x40
; next[huff] = eax
mov dword [r11+rdx*4], eax
; success return
add rsp, 168
pop r15 r14 r13 r12 rbx rbp
mov eax, 1
ret
calign
.inflate_table_success:
add rsp, 168
pop r15 r14 r13 r12 rbx rbp
mov eax, 1
ret
calign
.inflate_table_nosymbols:
mov eax, 0x140
mov rdx, [rcx] ; get the pointer at table
mov dword [rdx], eax ; set that pointer to 0x140 (invalid code marker)
add rdx, 4
mov dword [rdx], eax ; set the next to 0x140 also
add rdx, 4
mov [rcx], rdx ; set the pointer at table again to our updated value
mov dword [r8], 1 ; *bits = 1
add rsp, 168
pop r15 r14 r13 r12 rbx rbp
xor eax, eax
ret
calign
.inflate_table_error:
add rsp, 168
pop r15 r14 r13 r12 rbx rbp
xor eax, eax
ret
dalign
.inflate_table_lbase:
dw 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0
dalign
.inflate_table_lext:
dw 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 72, 78
dalign
.inflate_table_dbase:
dw 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0, 0
dalign
.inflate_table_dext:
dw 16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 64, 64
dalign
.order:
dd 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15
dalign
.lenfix:
dd 0x760, 0x500800, 0x100800, 0x730814, 0x1f0712, 0x700800, 0x300800, 0xc00900
dd 0xa0710, 0x600800, 0x200800, 0xa00900, 0x800, 0x800800, 0x400800, 0xe00900
dd 0x60710, 0x580800, 0x180800, 0x900900, 0x3b0713, 0x780800, 0x380800, 0xd00900
dd 0x110711, 0x680800, 0x280800, 0xb00900, 0x80800, 0x880800, 0x480800, 0xf00900
dd 0x40710, 0x540800, 0x140800, 0xe30815, 0x2b0713, 0x740800, 0x340800, 0xc80900
dd 0xd0711, 0x640800, 0x240800, 0xa80900, 0x40800, 0x840800, 0x440800, 0xe80900
dd 0x80710, 0x5c0800, 0x1c0800, 0x980900, 0x530714, 0x7c0800, 0x3c0800, 0xd80900
dd 0x170712, 0x6c0800, 0x2c0800, 0xb80900, 0xc0800, 0x8c0800, 0x4c0800, 0xf80900
dd 0x30710, 0x520800, 0x120800, 0xa30815, 0x230713, 0x720800, 0x320800, 0xc40900
dd 0xb0711, 0x620800, 0x220800, 0xa40900, 0x20800, 0x820800, 0x420800, 0xe40900
dd 0x70710, 0x5a0800, 0x1a0800, 0x940900, 0x430714, 0x7a0800, 0x3a0800, 0xd40900
dd 0x130712, 0x6a0800, 0x2a0800, 0xb40900, 0xa0800, 0x8a0800, 0x4a0800, 0xf40900
dd 0x50710, 0x560800, 0x160800, 0x840, 0x330713, 0x760800, 0x360800, 0xcc0900
dd 0xf0711, 0x660800, 0x260800, 0xac0900, 0x60800, 0x860800, 0x460800, 0xec0900
dd 0x90710, 0x5e0800, 0x1e0800, 0x9c0900, 0x630714, 0x7e0800, 0x3e0800, 0xdc0900
dd 0x1b0712, 0x6e0800, 0x2e0800, 0xbc0900, 0xe0800, 0x8e0800, 0x4e0800, 0xfc0900
dd 0x760, 0x510800, 0x110800, 0x830815, 0x1f0712, 0x710800, 0x310800, 0xc20900
dd 0xa0710, 0x610800, 0x210800, 0xa20900, 0x10800, 0x810800, 0x410800, 0xe20900
dd 0x60710, 0x590800, 0x190800, 0x920900, 0x3b0713, 0x790800, 0x390800, 0xd20900
dd 0x110711, 0x690800, 0x290800, 0xb20900, 0x90800, 0x890800, 0x490800, 0xf20900
dd 0x40710, 0x550800, 0x150800, 0x1020810, 0x2b0713, 0x750800, 0x350800, 0xca0900
dd 0xd0711, 0x650800, 0x250800, 0xaa0900, 0x50800, 0x850800, 0x450800, 0xea0900
dd 0x80710, 0x5d0800, 0x1d0800, 0x9a0900, 0x530714, 0x7d0800, 0x3d0800, 0xda0900
dd 0x170712, 0x6d0800, 0x2d0800, 0xba0900, 0xd0800, 0x8d0800, 0x4d0800, 0xfa0900
dd 0x30710, 0x530800, 0x130800, 0xc30815, 0x230713, 0x730800, 0x330800, 0xc60900
dd 0xb0711, 0x630800, 0x230800, 0xa60900, 0x30800, 0x830800, 0x430800, 0xe60900
dd 0x70710, 0x5b0800, 0x1b0800, 0x960900, 0x430714, 0x7b0800, 0x3b0800, 0xd60900
dd 0x130712, 0x6b0800, 0x2b0800, 0xb60900, 0xb0800, 0x8b0800, 0x4b0800, 0xf60900
dd 0x50710, 0x570800, 0x170800, 0x840, 0x330713, 0x770800, 0x370800, 0xce0900
dd 0xf0711, 0x670800, 0x270800, 0xae0900, 0x70800, 0x870800, 0x470800, 0xee0900
dd 0x90710, 0x5f0800, 0x1f0800, 0x9e0900, 0x630714, 0x7f0800, 0x3f0800, 0xde0900
dd 0x1b0712, 0x6f0800, 0x2f0800, 0xbe0900, 0xf0800, 0x8f0800, 0x4f0800, 0xfe0900
dd 0x760, 0x500800, 0x100800, 0x730814, 0x1f0712, 0x700800, 0x300800, 0xc10900
dd 0xa0710, 0x600800, 0x200800, 0xa10900, 0x800, 0x800800, 0x400800, 0xe10900
dd 0x60710, 0x580800, 0x180800, 0x910900, 0x3b0713, 0x780800, 0x380800, 0xd10900
dd 0x110711, 0x680800, 0x280800, 0xb10900, 0x80800, 0x880800, 0x480800, 0xf10900
dd 0x40710, 0x540800, 0x140800, 0xe30815, 0x2b0713, 0x740800, 0x340800, 0xc90900
dd 0xd0711, 0x640800, 0x240800, 0xa90900, 0x40800, 0x840800, 0x440800, 0xe90900
dd 0x80710, 0x5c0800, 0x1c0800, 0x990900, 0x530714, 0x7c0800, 0x3c0800, 0xd90900
dd 0x170712, 0x6c0800, 0x2c0800, 0xb90900, 0xc0800, 0x8c0800, 0x4c0800, 0xf90900
dd 0x30710, 0x520800, 0x120800, 0xa30815, 0x230713, 0x720800, 0x320800, 0xc50900
dd 0xb0711, 0x620800, 0x220800, 0xa50900, 0x20800, 0x820800, 0x420800, 0xe50900
dd 0x70710, 0x5a0800, 0x1a0800, 0x950900, 0x430714, 0x7a0800, 0x3a0800, 0xd50900
dd 0x130712, 0x6a0800, 0x2a0800, 0xb50900, 0xa0800, 0x8a0800, 0x4a0800, 0xf50900
dd 0x50710, 0x560800, 0x160800, 0x840, 0x330713, 0x760800, 0x360800, 0xcd0900
dd 0xf0711, 0x660800, 0x260800, 0xad0900, 0x60800, 0x860800, 0x460800, 0xed0900
dd 0x90710, 0x5e0800, 0x1e0800, 0x9d0900, 0x630714, 0x7e0800, 0x3e0800, 0xdd0900
dd 0x1b0712, 0x6e0800, 0x2e0800, 0xbd0900, 0xe0800, 0x8e0800, 0x4e0800, 0xfd0900
dd 0x760, 0x510800, 0x110800, 0x830815, 0x1f0712, 0x710800, 0x310800, 0xc30900
dd 0xa0710, 0x610800, 0x210800, 0xa30900, 0x10800, 0x810800, 0x410800, 0xe30900
dd 0x60710, 0x590800, 0x190800, 0x930900, 0x3b0713, 0x790800, 0x390800, 0xd30900
dd 0x110711, 0x690800, 0x290800, 0xb30900, 0x90800, 0x890800, 0x490800, 0xf30900
dd 0x40710, 0x550800, 0x150800, 0x1020810, 0x2b0713, 0x750800, 0x350800, 0xcb0900
dd 0xd0711, 0x650800, 0x250800, 0xab0900, 0x50800, 0x850800, 0x450800, 0xeb0900
dd 0x80710, 0x5d0800, 0x1d0800, 0x9b0900, 0x530714, 0x7d0800, 0x3d0800, 0xdb0900
dd 0x170712, 0x6d0800, 0x2d0800, 0xbb0900, 0xd0800, 0x8d0800, 0x4d0800, 0xfb0900
dd 0x30710, 0x530800, 0x130800, 0xc30815, 0x230713, 0x730800, 0x330800, 0xc70900
dd 0xb0711, 0x630800, 0x230800, 0xa70900, 0x30800, 0x830800, 0x430800, 0xe70900
dd 0x70710, 0x5b0800, 0x1b0800, 0x970900, 0x430714, 0x7b0800, 0x3b0800, 0xd70900
dd 0x130712, 0x6b0800, 0x2b0800, 0xb70900, 0xb0800, 0x8b0800, 0x4b0800, 0xf70900
dd 0x50710, 0x570800, 0x170800, 0x840, 0x330713, 0x770800, 0x370800, 0xcf0900
dd 0xf0711, 0x670800, 0x270800, 0xaf0900, 0x70800, 0x870800, 0x470800, 0xef0900
dd 0x90710, 0x5f0800, 0x1f0800, 0x9f0900, 0x630714, 0x7f0800, 0x3f0800, 0xdf0900
dd 0x1b0712, 0x6f0800, 0x2f0800, 0xbf0900, 0xf0800, 0x8f0800, 0x4f0800, 0xff0900
dalign
.distfix:
dd 0x10510, 0x1010517, 0x110513, 0x1001051b, 0x50511, 0x4010519, 0x410515, 0x4001051d
dd 0x30510, 0x2010518, 0x210514, 0x2001051c, 0x90512, 0x801051a, 0x810516, 0x540
dd 0x20510, 0x1810517, 0x190513, 0x1801051b, 0x70511, 0x6010519, 0x610515, 0x6001051d
dd 0x40510, 0x3010518, 0x310514, 0x3001051c, 0xd0512, 0xc01051a, 0xc10516, 0x540
end if