; ------------------------------------------------------------------------
; HeavyThing x86_64 assembly language library and showcase programs
; Copyright © 2015-2018 2 Ton Digital
; Homepage: https://2ton.com.au/
; Author: Jeff Marrison <jeff@2ton.com.au>
;
; This file is part of the HeavyThing library.
;
; HeavyThing is free software: you can redistribute it and/or modify
; it under the terms of the GNU General Public License, or
; (at your option) any later version.
;
; HeavyThing is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
; GNU General Public License for more details.
;
; You should have received a copy of the GNU General Public License along
; with the HeavyThing library. If not, see <http://www.gnu.org/licenses/>.
; ------------------------------------------------------------------------
;
; string_math.inc: double to/fro string conversion math helpers
;
if used string$frexp | defined include_everything
; two arguments: xmm0 and rdi == expptr, return in rax (smashes xmm0)
falign
string$frexp:
prolog_silent string$frexp
; NOTE: frexp does not modify rdi, so we are not saving it
call frexp
sub dword [rdi], 53 ; *expptr -= 53
mulsd xmm0, [_math_1shl53] ; m * (double)(1 << 53)
cvtsd2si rax, xmm0
epilog
end if
if used string$qp2 | defined include_everything
; rdi == exp, return in xmm0, smashes xmm1
falign
string$qp2:
prolog_silent string$qp2
cmp rdi, 64
jge .doublepow
cmp rdi, 0
jle .doublepow
mov ecx, edi
mov edi, 1
shl rdi, cl
cvtsi2sd xmm0, rdi
epilog
calign
.doublepow:
movq xmm0, [_math_two]
cvtsi2sd xmm1, rdi
call pow
epilog
end if
if used string$qp10 | defined include_everything
; rdi == exp, return in xmm0, smashes xmm1
falign
string$qp10:
prolog_silent string$qp10
cmp rdi, 23
jge .doublepow
cmp rdi, 0
jle .doublepow
shl rdi, 3
add rdi, .kpowten
movq xmm0, [rdi]
epilog
calign
.doublepow:
movq xmm0, [_math_ten]
cvtsi2sd xmm1, rdi
call pow
epilog
dalign
.kpowten:
dq 1.0f, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22
end if
if used string$bqp10 | defined include_everything
; rdi == exp, rsi == bint result
falign
string$bqp10:
prolog_silent string$bqp10
push rsi
call string$qp10
pop rdi
call stringbi$sfd
epilog
end if
; next up: stringbi goods, intentionally 32bit word sizes here (TODO: someday when I am bored, replace all this)
stringbi_size = 64
if used stringbi$init | defined include_everything
; single argument: rdi == buffer (which should be stringbi_size bytes in length)
; note: do not use this one-liner, haha, reference only
falign
stringbi$init: ; init
prolog_silent stringbi$init
mov dword [rdi], 0
epilog
end if
if used stringbi$sfi | defined include_everything
; two arguments: rdi == buffer, esi == int to set it from
falign
stringbi$sfi: ; set from int
prolog_silent stringbi$sfi
mov dword [rdi], 1
mov dword [rdi+4], esi
epilog
end if
if used stringbi$sfbi | defined include_everything
; four arguments: rdi == buffer, rsi == o, edx == offset, ecx == amount
falign
stringbi$sfbi: ; set from other stringbi
prolog_silent stringbi$sfbi
mov dword [rdi], ecx
add rdi, 4
test ecx, ecx
jz .alldone
shl edx, 2
add rsi, rdx
calign
.top:
mov edx, dword [rsi]
mov dword [rdi], edx
add rsi, 4
add rdi, 4
sub ecx, 1
jnz .top
epilog
calign
.alldone:
epilog
end if
if used stringbi$cf | defined include_everything
; two arguments: rdi == bfufer, rsi == source
; dont call this, do the memcpy yourself
falign
stringbi$cf: ; copy
prolog_silent stringbi$cf
mov edx, stringbi_size
call memcpy
epilog
end if
if used stringbi$sfd | defined include_everything
; two arguments: rdi == buffer, xmm0 == value
falign
stringbi$sfd: ; set from double
prolog_silent stringbi$sfd
push r12
sub rsp, 8
mov r12, rdi
mov rdi, rsp ; for our string$frexp
call string$frexp
mov rdi, r12
mov esi, dword [rsp]
; rax now has the result, xmm0 got smashed
mov dword [r12+4], eax
shr rax, 32
mov dword [r12+8], eax
cmp eax, 0
jg .twowords
mov dword [r12], 1
cmp esi, 0
jl .rshift
add rsp, 8
pop r12
call stringbi$lsb ; rdi == buffer, esi == shiftcount
epilog
calign
.twowords:
mov dword [r12], 2
cmp esi, 0
jl .rshift
add rsp, 8
pop r12
call stringbi$lsb ; rdi == buffer, esi == shiftcount
epilog
calign
.rshift:
neg esi
add rsp, 8
pop r12
call stringbi$rsb ; rdi == buffer, esi == shiftcount
epilog
end if
if used stringbi$subtract | defined include_everything
; three arguments: rdi == buffer, rsi == smaller, rdx == result
falign
stringbi$subtract:
; c == rax
; x == rcx
; bigger = rdi
; r8 == sbuf
; r9 == bbuf
; r10 == rbuf
; r11 == borrow
; r12 == idx
prolog_silent stringbi$subtract
push r12
push rdi rsi rdx
call stringbi$c
pop rdx rsi rdi
cmp eax, 0
je .zerores
jg .noswap
mov rcx, rsi
mov rsi, rdi
mov rdi, rcx ; bigger/smaller swapped
calign
.noswap:
mov ecx, dword [rdi] ; bigger.length
add ecx, 1
; rdx.snw(ecx, 1)
mov r10d, dword [rdx] ; oldlength == rdx.length
mov dword [rdx], ecx ; newlength == total words
cmp r10d, ecx
jge .firstsnwclear
sub r10d, 1
mov eax, r10d
shl eax, 2
add rax, rdx
add rax, 4
calign
@@:
mov dword [rax], 0
add rax, 4
add r10d, 1
cmp r10d, ecx
jb @b
calign
.firstsnwclear:
mov r8, rsi
add r8, 4 ; sbuf
mov r9, rdi
add r9, 4 ; bbuf
mov r10, rdx
add r10, 4 ; rbuf
mov eax, dword [rsi] ; smaller.length into c
xor r11d, r11d ; borrow = 0
xor r12d, r12d ; idx = 0
calign
.sloop:
mov ecx, dword [r9]
mov esi, dword [r8]
sub rcx, rsi
sub rcx, r11
mov r11, rcx
shr r11, 32
and r11, 1
mov dword [r10], ecx
add r9, 4
add r8, 4
add r10, 4
add r12, 1
sub eax, 1
jnz .sloop
mov eax, dword [rdi] ; bigger.length into c
cmp r12d, eax
jge .chopidx
sub rax, r12
calign
.bloop:
mov ecx, dword [r9]
sub rcx, r11
mov r11, rcx
shr r11, 32
and r11, 1
mov dword [r10], ecx
add r9, 4
add r10, 4
add r12, 1
sub eax, 1
jnz .bloop
calign
.chopidx:
sub r10, 4
sub r12, 1
cmp dword [r10], 0
je .chopidx
add r12, 1
mov dword [rdx], r12d ; result.snw(idx, 0)
pop r12
epilog
calign
.zerores:
mov dword [rdx], 1
mov dword [rdx+4], 0
pop r12
epilog
end if
if used stringbi$mbi | defined include_everything
; two arguments: rdi == buffer, rsi == factor
falign
stringbi$mbi: ; multiply by integer
prolog_silent stringbi$mbi
xor edx, edx ; clear the add
call stringbi$maib
epilog
end if
if used stringbi$mbd | defined include_everything
; two args, rdi == buffer, xmm0 == factor
falign
stringbi$mbd: ; multiply by double
prolog_silent stringbi$mbd
push r12 r13
mov r13, rdi
sub rsp, stringbi_size
mov r12, rsp
mov rdi, rsp
call stringbi$sfd
mov rdi, r13
mov rsi, r12
call stringbi$mb
add rsp, stringbi_size
pop r13 r12
epilog
end if
if used stringbi$dvo | defined include_everything
; one arg: rdi == buffer, return in xmm0
falign
stringbi$dvo: ; double value of
prolog_silent stringbi$dvo
mov edx, dword [rdi]
cmp dword [rdi], 1
je .cvtreturn
sub edx, 1 ; nextWord = numwords - 1
push r12 r13 r14 r15
; no function callouts here... but we need rcx for shift ops
mov eax, 1 ; bits = 1
mov ecx, dword [rdi+rdx*4+4]
calign
.bitsloop:
cmp ecx, 1
jbe .bitsset
shr ecx, 1
add eax, 1
jmp .bitsloop
calign
.bitsset:
xor r8d, r8d
xor r9d, r9d
xor r10d, r10d
xor r11d, r11d
xor r12d, r12d
mov r13d, 53
xor r14d, r14d
mov r15d, 1 ; for our cmovs
calign
.posloop:
cmp r13d, 0
jle .checkpos
mov r12d, dword [rdi+rdx*4+4]
mov ecx, r14d ; wshift
shr r12, cl
or r11, r12
; put w back:
mov r12d, dword [rdi+rdx*4+4]
sub edx, 1
sub r13d, eax ; pos -= bits
cmp r13d, 0
jle .checkpos
cmp edx, -1
jle .checkpos
cmp r13d, 31
jg .posloopbig
mov eax, r13d ; bits = pos
mov r14d, 32
sub r14d, eax ; wshift = 32 - bits
mov ecx, eax
shl r11, cl ; resultMantissa <<= bits
jmp .posloop
calign
.posloopbig:
mov eax, 32 ; bits = 32
xor r14d, r14d ; wshift = 0
shl r11, 32 ; resultMantissa <<= 32
jmp .posloop
calign
.checkpos:
cmp r13d, 0
jg .nearlythere ; pos > 0 don't do squat.
test r11, 1
cmovnz r8d, r15d ; bit53 = (resultmantissa & 1)
cmp eax, 32
jne .bitsnot32
cmp edx, -1
jle .nearlythere
mov r12d, dword [rdi+rdx*4+4]
sub edx, 1
test r12d, 2147483648 ; 1 << 31
cmovnz r9d, r15d
test r12d, 2147483647 ; (1 << 31) - 1
cmovnz r10d, r15d
jmp .nearlythere
calign
.bitsnot32:
; use r15 temporarily
mov ecx, r14d
sub ecx, 1
shl r15, cl ; r15 == 1 << (wshift - 1)
mov ecx, 1
test r12, r15
cmovnz r9d, ecx ; bit54 = (w & (1<<(wshift-1)))
cmp r14d, 1
jle .bitsnot32_1
sub r15, 1
test r12, r15
cmovnz r10d, ecx ; rest =
calign
.bitsnot32_1:
cmp edx, -1
jle .nearlythere
test r10d, r10d
jnz .nearlythere
cmp dword [rdi+rdx*4+4], 0
cmovne r10d, ecx
calign
.nearlythere:
test r9d, r9d
jz .nearlythere_1
or r8d, r10d
; or sets the zero flag for us so we don't need a subsequent test r8d, r8d
jz .nearlythere_1
add r11, 1
calign
.nearlythere_1:
mov r12, r11
; lg2 gets inlined: (rdi is still valid)
mov eax, dword [rdi]
sub eax, 1
mov edx, eax ; setup our index
mov ecx, 32
mul ecx
mov edx, dword [rdi]
sub edx, 1
mov ecx, dword [rdi+rdx*4+4]
calign
.bitsloop2:
cmp ecx, 1
jbe .bitsset2
shr ecx, 1
add eax, 1
jmp .bitsloop2
calign
.bitsset2:
; eax == lg2
sub eax, 52 ; + 1 - 53
cmp eax, 0
jle .alldone_cvt
cmp eax, 64
jl .useint
mov edi, 2
mov esi, eax
cvtsi2sd xmm0, edi
cvtsi2sd xmm1, eax
call pow
cvtsi2sd xmm1, r12
mulsd xmm0, xmm1
pop r15 r14 r13 r12
epilog
calign
.useint:
mov ecx, eax
mov eax, 1
shl rax, cl
cvtsi2sd xmm1, rax
cvtsi2sd xmm0, r12
mulsd xmm0, xmm1
pop r15 r14 r13 r12
epilog
calign
.alldone_cvt:
cvtsi2sd xmm0, r12
pop r15 r14 r13 r12
epilog
calign
.alldone:
pop r15 r14 r13 r12
epilog
calign
.cvtreturn:
mov eax, dword [rdi+4]
cvtsi2sd xmm0, eax
epilog
end if
if used stringbi$m | defined include_everything
; three arguments: rdi == buffer, rsi == smaller, rdx == result
falign
stringbi$m: ; multiply
; c == rax
; x == rcx
; bigger == rdi
; r8 == sbuf
; r9 == bbuf
; r10 == rbuf
; r11 == f
; r12 == p
; r13 == olc
; r14 == result
; r15 == c
prolog_silent stringbi$m
push r12 r13 r14 r15
mov r14, rdx ; save our result
mov ecx, dword [rsi] ; smaller.length
mov r11d, dword [rdi] ; bigger.length
cmp ecx, r11d
jl .reallysmaller
mov r8, rsi
mov rsi, rdi
mov rdi, r8
calign
.reallysmaller:
add ecx, r11d ; maximum number of new words we'll have
mov dword [r14], ecx ; result.snw(x, 0)
test ecx, ecx
jz .noclear
mov r10, r14
add r10, 4 ; rbuf
calign
.clearloop:
mov dword [r10], 0
add r10, 4
sub ecx, 1
jnz .clearloop
calign
.noclear:
mov r8, rsi
add r8, 4 ; smaller.buffer into sbuf
mov r13d, dword [rsi] ; smaller.length into olc
test r13d, r13d
jz .alldone
mov r10, r14
add r10, 4 ; result.buffer into rbuf
; c == rax
; x == rcx
; bigger == rdi
; r8 == sbuf
; r9 == bbuf
; r10 == rbuf
; r11 == f
; r12 == p
; r13 == olc
; r14 == result
; r15 == c
calign
.outerloop:
mov r11d, dword [r8] ; f = [sbuf]
test r11d, r11d ; f?
jz .nofactor ; increments sbuf, increments rbuf, decrements olc, comes back
xor r15d, r15d ; c = 0
push r10 ; save our position (rbuf+x)
mov r9, rdi
add r9, 4 ; bigger.buffer into bbuf
mov ecx, dword [rdi] ; bigger.length into x
calign
.innerloop:
mov r12d, dword [r9] ; p = [bbuf]
add r9, 4 ; move bbuf forward
; mul r11, which multiplies rax * r11 and puts the result into rdx:rax
mov rax, r12 ; get set for mul
mul r11 ; rdx:rax = rax * r11
mov r12, rax ; p = p * f
mov esi, dword [r10] ; get [rbuf]
add r12, rsi ; p += [rbuf]
add r12, r15 ; add p,c
mov r15, r12 ; c = p
shr r15, 32 ; c >>= 32
mov dword [r10], r12d ; [rbuf] = (p & 0xffffffff)
add r10, 4 ; rbuf++
sub ecx, 1 ; x--
jnz .innerloop ; more?
mov dword [r10], r15d ; [rbuf] = (c & 0xffffffff)
pop r10 ; restore rbuf to pre-innerloop
add r10, 4 ; rbuf++
add r8, 4 ; sbuf++
sub r13d, 1 ; olc--
jnz .outerloop ; more?
; result.tlz(), then return
mov rdi, r14
call stringbi$tlz
pop r15 r14 r13 r12
epilog
calign
.nofactor:
add r10, 4
add r8, 4
sub r13d, 1
jnz .outerloop
; result.tlz(), then return
mov rdi, r14
call stringbi$tlz
pop r15 r14 r13 r12
epilog
calign
.alldone:
; result.tlz(), then return
mov rdi, r14
call stringbi$tlz
pop r15 r14 r13 r12
epilog
end if
if used stringbi$a | defined include_everything
; three arguments: rdi == buffer, rsi == smaller, rdx == result
falign
stringbi$a: ; add
; c == rax
; x == rcx
; bigger = rdi
; r8 == sbuf
; r9 == bbuf
; r10 == rbuf
; r11 == borrow
; r12 == idx
prolog_silent stringbi$a
push r12
push rdi rsi rdx
call stringbi$c
pop rdx rsi rdi
cmp eax, 0
je .zerores
jg .noswap
mov rcx, rsi
mov rsi, rdi
mov rdi, rcx ; bigger/smaller swapped
calign
.noswap:
mov ecx, dword [rdi] ; bigger.length
add ecx, 1
; rdx.snw(ecx, 1)
mov r10d, dword [rdx] ; oldlength == rdx.length
mov dword [rdx], ecx ; newlength == total words
cmp r10d, ecx
jge .firstsnwclear
sub r10d, 1
mov eax, r10d
shl eax, 2
add rax, rdx
add rax, 4
calign
@@:
mov dword [rax], 0
add rax, 4
add r10d, 1
cmp r10d, ecx
jb @b
calign
.firstsnwclear:
mov r8, rsi
add r8, 4 ; sbuf
mov r9, rdi
add r9, 4 ; bbuf
mov r10, rdx
add r10, 4 ; rbuf
mov eax, dword [rsi] ; smaller.length into c
xor r11d, r11d ; borrow = 0
xor r12d, r12d ; idx = 0
calign
.sloop:
mov ecx, dword [r9]
mov esi, dword [r8]
add rcx, rsi
add rcx, r11
mov r11, rcx
shr r11, 32
and r11, 1
mov dword [r10], ecx
add r9, 4
add r8, 4
add r10, 4
add r12, 1
sub eax, 1
jnz .sloop
mov eax, dword [rdi] ; bigger.length into c
cmp r12d, eax
jge .chopidx
sub rax, r12
calign
.bloop:
mov ecx, dword [r9]
add rcx, r11
mov r11, rcx
shr r11, 32
and r11, 1
mov dword [r10], ecx
add r9, 4
add r10, 4
add r12, 1
sub eax, 1
jnz .bloop
calign
.chopidx:
test r11, r11
jz .chopidxtwo
mov dword [r10], r11d
add r10, 4
add r12, 1
calign
.chopidxtwo:
sub r10, 4
sub r12, 1
cmp dword [r10], 0
je .chopidx
add r12, 1
mov dword [rdx], r12d ; result.snw(idx, 0)
pop r12
epilog
calign
.zerores:
; we compared the two and they are equal
; check to see if it is one word only and said word is equal to zero for
; quick[er] return from integer 0
cmp dword [rdi], 1
jne .noswap
cmp dword [rdi+4], 0
jne .noswap
mov dword [rdx], 1
mov dword [rdx+4], 0 ; result.sfi(0)
pop r12
epilog
end if
if used stringbi$co | defined include_everything
; three arguments: rdi == buffer, rsi == other (buffer), rdx == offset (also a buffer)
falign
stringbi$co: ; compare offset
prolog_silent stringbi$co
sub rsp, stringbi_size
mov rcx, rsp
mov dword [rsp], 1
mov dword [rsp+4], 0 ; temp.sfi(0)
push rsi
mov rsi, rdx
mov rdx, rcx
call stringbi$a
pop rsi
mov rdi, rsp
call stringbi$c
add rsp, stringbi_size
epilog
end if
if used stringbi$mb | defined include_everything
; two arguments: rdi == buffer, rsi == other (buffer)
falign
stringbi$mb: ; multiply by
prolog_silent stringbi$mb
sub rsp, stringbi_size
mov rdx, rsp
mov dword [rsp], 1
mov dword [rsp+4], 0 ; temp.sfi(0)
push rdi
call stringbi$m
pop rdi
mov rsi, rsp
mov edx, stringbi_size
call memcpy
add rsp, stringbi_size
epilog
end if
if used stringbi$decby | defined include_everything
; two arguments: rdi == buffer, rsi == other
falign
stringbi$decby: ; decrement by
prolog_silent stringbi$decby
sub rsp, stringbi_size
mov rdx, rsp
mov dword [rsp], 1
mov dword [rsp+4], 0 ; temp.sfi(0)
push rdi
call stringbi$subtract
pop rdi
mov rsi, rsp
mov rdx, stringbi_size
call memcpy
add rsp, stringbi_size
epilog
end if
if used stringbi$lsb | defined include_everything
; two arguments: rdi == buffer, esi == shift count
falign
stringbi$lsb: ; left shift by
prolog_silent stringbi$lsb
push r12 r13
mov r12, rdi
sub rsp, stringbi_size
mov r13, rsp
mov rdx, rsp
mov dword [rsp], 1
mov dword [rsp+4], 0 ; temp.sfi(0)
; rdi still valid
; rsi is still valid
; rdx valid from above
call stringbi$ls
; copy our result:
mov rdi, r12
mov rsi, r13
mov edx, stringbi_size
call memcpy
add rsp, stringbi_size
pop r13 r12
epilog
end if
if used stringbi$rsb | defined include_everything
; two arguments: rdi == buffer, esi == shift count
falign
stringbi$rsb: ; right shift by
prolog_silent stringbi$rsb
push r12 r13
mov r12, rdi
sub rsp, stringbi_size
mov r13, rsp
mov rdx, rsp
mov dword [rsp], 1
mov dword [rsp+4], 0 ; temp.sfi(0)
; rdi still valid
; rsi is still valid
; rdx valid from above
call stringbi$rs
; copy our result:
mov rdi, r12
mov rsi, r13
mov edx, stringbi_size
call memcpy
add rsp, stringbi_size
pop r13 r12
epilog
end if
if used stringbi$db | defined include_everything
; three arguments: rdi == buffer, rsi == divisor (buffer), rdx == result (buffer)
falign
stringbi$db: ; divide by
prolog_silent stringbi$db
sub rsp, stringbi_size
mov rcx, rdx
mov rdx, rsp
mov dword [rsp], 1
mov dword [rsp+4], 0 ; temp.sfi(0)
; do we have to initialize our temporary? I don't think we do
push rdi
call stringbi$dm
pop rdi
mov rsi, rsp
mov edx, stringbi_size
call memcpy
add rsp, stringbi_size
epilog
end if
if used stringbi$c | defined include_everything
; two arguments: rdi == buffer, rsi == other (buffer), returns in eax
falign
stringbi$c: ; compare
prolog_silent stringbi$c
mov eax, dword [rsi]
mov ecx, dword [rdi]
cmp ecx, eax
jg .resone
jl .resnegone
mov edx, ecx
sub edx, 1
shl edx, 2
push rax ; save other's length
add rsi, rdx
add rsi, 4
add rdi, rdx
add rdi, 4
pop rdx ; restore others length
calign
.loop:
mov eax, dword [rdi]
cmp eax, dword [rsi]
jb .resnegone
ja .resone
sub rdi, 4
sub rsi, 4
sub edx, 1
jnz .loop
xor eax, eax
epilog
calign
.resone:
mov eax, 1
epilog
calign
.resnegone:
mov eax, -1
epilog
end if
if used stringbi$maib | defined include_everything
; three arguments: rdi == buffer, rsi == factor, edx == addition
falign
stringbi$maib:
prolog_silent stringbi$maib
; eax == x
; edx == carry
mov eax, dword [rdi] ; length
test eax, eax
jz .noloop
mov r8, rdi
add r8, 4 ; ib
calign
.loop:
mov ecx, dword [r8]
imul rcx, rsi
add rcx, rdx
mov rdx, rcx
shr rdx, 32
mov dword [r8], ecx
add r8, 4
sub eax, 1
jnz .loop
test edx, edx
jz .nocarry
mov eax, dword [rdi] ; length
add eax, 1
; rdi.snw(eax, 0)
mov dword [rdi], eax
mov dword [r8], edx
epilog
calign
.noloop:
test edx, edx
jz .nocarry
add eax, 1
; rdi.snw(eax, 0)
mov dword [rdi], eax
mov dword [rdi+1], edx
epilog
calign
.nocarry:
epilog
end if
if used stringbi$dm | defined include_everything
; four arguments: rdi == buffer, rsi == divisor (buffer), rdx == residual (buffer), rcx == result (buffer)
falign
stringbi$dm: ; divmod
prolog_silent stringbi$dm
; rdi (it) is only used initially, and we'll replace it with residual
push rbx r12 r13 r14 r15
mov r12, rdi
mov r13, rsi
mov r14, rdx
mov r15, rcx
call stringbi$c
cmp eax, 0
jl .cnegone
je .cone
mov rdi, r14 ; set residual to first arg
mov rsi, r12
mov edx, stringbi_size
call memcpy
mov r9d, dword [r13] ; divisor.length
mov r10d, dword [r15] ; oldlength == rdx.length
mov dword [r15], r9d ; newlength == total words
cmp r10d, r9d
jge .firstsnwclear
sub r10d, 1
mov eax, r10d
shl eax, 2
add rax, r15
add rax, 4
calign
@@:
mov dword [rax], 0
add rax, 4
add r10d, 1
cmp r10d, r9d
jb @b
calign
.firstsnwclear:
mov edx, dword [r14] ; residual.length
sub edx, 1
shl edx, 2
mov rax, r14
add rax, 4
add rdx, rax
mov eax, dword [rdx]
mov r8, rdx
sub r8, 4
mov r9, rax ; save these two morsels for later
mov edx, dword [r13] ; divisor.length
sub edx, 1
shl edx, 2
add rdx, r13
add rdx, 4
mov ecx, dword [rdx]
mov r10, rcx ; save it
xor edx, edx ; clear rdx for div
div rcx
; rax now has quotient
mov rbx, rax ; factor set
test rax, rax
jz .refactor ; factor == 0
cmp rax, 10
jge .refactor ; factor >= 10
jmp .fcheckthree ; else, skip refactor bit
calign
.refactor:
cmp dword [r14], 1
jle .fcheckthree ; residual.length <= 1
cmp dword [r13], 1
jle .fcheckthree ; divisor.length <= 1
mov eax, dword [r8]
shl r9, 32
or rax, r9
; rax == br
xor edx, edx
div r10
mov rbx, rax ; factor=
cmp rax, 9
jle .fcheckthree
mov rbx, 9
calign
.fcheckthree:
; if (factor)
test rbx, rbx
jz .ccheck
; we need a stack temporary for this section
sub rsp, stringbi_size
mov dword [rsp], 1
mov dword [rsp+4], 0 ; decr.sfi(0)
mov rdi, rsp
mov r12, rsp ; we can safely blast this and use it
mov rsi, r13
mov edx, stringbi_size
call memcpy ; decr.cf(divisor)
mov rdi, r12
mov rsi, rbx
xor edx, edx
call stringbi$maib ; decr.maib(factor, 0)
calign
.fcheckloop:
mov rdi, r12
mov rsi, r14
call stringbi$c ; decr.c(residual)
cmp eax, 1
jne .fcheckloopdone
cmp rbx, 0
jle .fcheckloopdone
mov rdi, r12
mov rsi, r13
call stringbi$decby ; decr.decby(devisor)
sub rbx, 1
jmp .fcheckloop
calign
.fcheckloopdone:
mov rdi, r14
mov rsi, r12
call stringbi$decby ; residual.decby(decr)
add rsp, stringbi_size ; done with our temporary, which also freed up r12 to use again
calign
.ccheck:
mov rdi, r14 ; residual
mov rsi, r13 ; divisor
call stringbi$c
cmp eax, 1
jne .cchecknotone
mov rdi, r14
mov rsi, r13
call stringbi$decby ; residual.decby(divisor)
add rbx, 1 ; factor++
calign
.cchecknotone:
; result->buffer[0] = (u32)factor
mov rdx, r15
add rdx, 4
mov dword [rdx], ebx
; done
mov rdi, r15
call stringbi$tlz ; result.tlz()
pop r15 r14 r13 r12 rbx
epilog
calign
.fchecktwo:
; if we made it here, factor >= 1
cmp rbx, 10
jle .fcheckthree ; no refactor necessary
jmp .refactor
calign
.cnegone:
mov rdi, r14
mov rsi, r12
mov edx, stringbi_size
call memcpy ; residual.cf(this)
mov dword [r15], 1
mov dword [r15+4], 0 ; result = 0
; done
pop r15 r14 r13 r12 rbx
epilog
calign
.cone:
mov dword [r14], 1
mov dword [r15], 1
mov dword [r14+4], 0 ; residual = 0
mov dword [r15+4], 1 ; result = 1
; done
pop r15 r14 r13 r12 rbx
epilog
end if
if used stringbi$ls | defined include_everything
; three arguemnts: rdi == buffer, esi == shift count, rdx == result buffer
falign
stringbi$ls:
prolog_silent stringbi$ls
mov r8d, esi
shr r8d, 5 ; number of new words
mov r9d, dword [rdi]
add r9d, r8d
add r9d, 1 ; total words
; rdx.snw(tw, 1)
mov r10d, dword [rdx] ; oldlength == rdx.length
mov dword [rdx], r9d ; newlength == total words
cmp r10d, r9d
jge .firstsnwclear
sub r10d, 1
mov eax, r10d
shl eax, 2
add rax, rdx
add rax, 4
calign
@@:
mov dword [rax], 0
add rax, 4
add r10d, 1
cmp r10d, r9d
jb @b
calign
.firstsnwclear:
cmp dword [rdi], 1
jne .notzero
cmp dword [rdi+4], 0
jne .notzero
; result.sfi(0)
mov dword [rdx], 1
mov dword [rdx+4], 0
epilog
calign
.notzero:
mov r10, rdx
add r10, 4
test r8d, r8d
jz .noclear
calign
.clearloop:
mov dword [r10], 0
add r10, 4
sub r8d, 1
jnz .clearloop
calign
.noclear:
mov r11d, dword [rdi]
add rdi, 4
mov ecx, esi
and ecx, 0x1f
; now:
; rdi == sbuf [ this (buffer) ]
; rsi == ?
; rdx == result (buffer)
; r8d == x# (zero at this point)
; r9d == tw
; r10 == dbuf
; r11d == l
; ecx == shiftBy
; rax == ?
test ecx, ecx
jz .nocarry
; eax == sc
; esi == c
xor esi, esi
mov eax, 32
sub eax, ecx
calign
.cloop:
mov r8d, dword [rdi]
shl r8d, cl
or r8d, esi
mov dword [r10], r8d
add r10, 4
mov esi, dword [rdi]
xchg eax, ecx ; swap them for the shr
shr esi, cl
add rdi, 4
xchg eax, ecx ; swap them back for the next loop iteration
sub r11d, 1
jnz .cloop
mov dword [r10], esi
test esi, esi
jz .noextraword
add r9d, 1
calign
.noextraword:
sub r9d, 1
; rdx.snw(tw, 0)
mov dword [rdx], r9d
epilog
calign
.nocarry:
mov r8d, dword [rdi]
mov dword [r10], r8d
add rdi, 4
add r10, 4
sub r11d, 1
jnz .nocarry
sub r9d, 1
; rdx.snw(tw, 0)
mov dword [rdx], r9d
epilog
end if
if used stringbi$rs | defined include_everything
; three arguemnts: rdi == buffer, esi == shift count, rdx == result buffer
falign
stringbi$rs:
prolog_silent stringbi$rs
mov r8d, esi
shr r8d, 5 ; total fewer words
mov r9d, dword [rdi]
sub r9d, r8d ; new total words
; rdx.snw(tw, 1)
mov r10d, dword [rdx] ; oldlength == rdx.length
mov dword [rdx], r9d ; newlength == total words
cmp r10d, r9d
jge .firstsnwclear
sub r10d, 1
mov eax, r10d
shl eax, 2
add rax, rdx
add rax, 4
calign
@@:
mov dword [rax], 0
add rax, 4
add r10d, 1
cmp r10d, r9d
jb @b
calign
.firstsnwclear:
cmp r8d, dword [rdi]
jg .zero
mov r8d, r9d
sub r8d, 1
shl r8d, 2
mov r10, rdx
add r10, 4
add r10, r8 ; r10 == dbuf
mov r8d, dword [rdi]
sub r8d, 1
shl r8d, 2
add rdi, 4
add rdi, r8 ; rdi == sbuf
mov ecx, esi
and ecx, 0x1f
test ecx, ecx
jz .nocarry
mov eax, 32
sub eax, ecx
xor esi, esi
test r9d, r9d
jz .nocloop
calign
.cloop:
mov r8d, dword [rdi]
shr r8d, cl
or r8d, esi
mov dword [r10], r8d
sub r10, 4
mov esi, dword [rdi]
xchg eax, ecx ; swap them for the shl
shl esi, cl
xchg eax, ecx ; swap them back for the next iteration
sub rdi, 4
sub r9d, 1
jnz .cloop
mov rdi, rdx
call stringbi$tlz
epilog
calign
.nocarry:
mov r8d, dword [rdi]
mov dword [r10], r8d
sub rdi, 4
sub r10, 4
sub r9d, 1
jnz .nocarry
mov rdi, rdx
call stringbi$tlz
epilog
calign
.zero:
mov dword [rdx], 1
mov dword [rdx+4], 0
epilog
calign
.nocloop:
mov dword [rdx], 0
mov rdi, rdx
call stringbi$tlz
epilog
end if
if used stringbi$tlz | defined include_everything
; single argument: rdi (buffer)
falign
stringbi$tlz: ; trim leading zeroes
prolog_silent stringbi$tlz
mov eax, dword [rdi]
sub eax, 1
shl eax, 2
mov rsi, rdi
add rsi, 4
add rsi, rax
mov eax, dword [rdi]
sub eax, 1
calign
.loop:
cmp eax, 0
jl .loopdone
cmp dword [rsi], 0
jne .loopdone
sub eax, 1
sub rsi, 4
jmp .loop
calign
.loopdone:
cmp eax, -1
je .oneword
add eax, 1
mov dword [rdi], eax
epilog
calign
.oneword:
mov dword [rdi], 1
epilog
end if
virtual at rdi
_dc_value dq ?
_dc_e dd ?
_dc_mantissa dq ?
_dc_mantprec dd ?
_dc_base10exp dd ?
_dc_finished dd ?
_dc_fastestok dd ?
_dc_minprec dd ?
_dc_lowok dd ?
_dc_highok dd ?
_dc_r dq ?
_dc_s dq ?
_dc_plus dq ?
_dc_minus dq ?
_dc_dr dq ?
_dc_ds dq ?
_dc_dplus dq ?
_dc_dminus dq ?
_dc_amode dd ?
_dc_bi dq ?
_dc_bi1 dq ?
end virtual ; 132 bytes
virtual at r12
_dc12_value dq ?
_dc12_e dd ?
_dc12_mantissa dq ?
_dc12_mantprec dd ?
_dc12_base10exp dd ?
_dc12_finished dd ?
_dc12_fastestok dd ?
_dc12_minprec dd ?
_dc12_lowok dd ?
_dc12_highok dd ?
_dc12_r dq ?
_dc12_s dq ?
_dc12_plus dq ?
_dc12_minus dq ?
_dc12_dr dq ?
_dc12_ds dq ?
_dc12_dplus dq ?
_dc12_dminus dq ?
_dc12_amode dd ?
_dc12_bi dq ?
_dc12_bi1 dq ?
end virtual ; 132 bytes
stringdc_size = 132 + (stringbi_size * 6)
if used stringdc$new | defined include_everything
; three arguments: xmm0 == value, edi == mode, esi == min digits
; mode == 0 == normal, 1 == fixed, 2 == precision, 3 == exponential
; returns new stringdc pointer (allocated via heap$alloc) in rax
; TODO: get rid of mov rdi, r12 and replace iwth dc12 goods
falign
stringdc$new:
prolog_silent stringdc$new
; heap funcs do not screw with xmm0, so preserve our other two args
push r12 ; preserve this one for the duration
push rdi rsi
mov edi, stringdc_size
call heap$alloc
mov rdi, rax
mov r12, rax
pop rdx rsi
; so now, rdi is our pointer, esi == mode, edx == min digits
mov dword [_dc_amode], esi
xor ecx, ecx
add rax, 132 ; header size
mov [_dc_r], rax
add rax, stringbi_size
mov [_dc_s], rax
add rax, stringbi_size
mov [_dc_plus], rax
add rax, stringbi_size
mov [_dc_minus], rax
add rax, stringbi_size
mov [_dc_bi], rax
add rax, stringbi_size
mov [_dc_bi1], rax
movq [_dc_value], xmm0
mov dword [_dc_finished], ecx
mov dword [_dc_fastestok], ecx
mov dword [_dc_lowok], ecx
mov dword [_dc_highok], ecx
mov dword [_dc_minprec], edx
mov dword [_dc_mantprec], 53
sub rsp, 8
mov rdi, rsp
call string$frexp
mov ecx, dword [rsp]
add rsp, 8
mov rdi, r12
mov dword [_dc_e], ecx
mov [_dc_mantissa], rax
cmp dword [_dc_amode], 0
jne .notnormal
test eax, 1
jnz .lowhighset
mov dword [_dc_lowok], 1
mov dword [_dc_highok], 1
jmp .lowhighset
calign
.notnormal:
mov dword [_dc_lowok], 1
mov dword [_dc_highok], 1
calign
.lowhighset:
mov ecx, dword [_dc_mantprec]
test ecx, ecx
jz .doneleadingzeroes
mov rax, [_dc_mantissa]
sub ecx, 1
mov dword [_dc_mantprec], ecx
shr rax, cl
test rax, 1
jz .lowhighset
calign
.doneleadingzeroes:
add dword [_dc_mantprec], 1
mov eax, [_dc_e]
cmp eax, 0
jg .noneg
neg eax
calign
.noneg:
add eax, dword [_dc_mantprec]
sub eax, 1
cmp eax, 50
jge .nofastestimate
mov dword [_dc_fastestok], 1
cmp dword [_dc_e], 0
jl .fastnege
mov rax, 1
shl rax, 52
cmp rax, [_dc_mantissa]
je .fastposetwo
mov edi, dword [_dc_e]
call string$qp2
mov rdi, r12
movq [_dc_dplus], xmm0
movq [_dc_dminus], xmm0
movq xmm1, [_math_two]
mulsd xmm0, xmm1
mov rax, [_dc_mantissa]
cvtsi2sd xmm2, rax
mulsd xmm0, xmm2
movq [_dc_dr], xmm0
movq [_dc_ds], xmm1
jmp .fastposemodecheck
calign
.fastposetwo:
mov edi, dword [_dc_e]
call string$qp2
mov rdi, r12
movq [_dc_dminus], xmm0
movq xmm2, [_math_two]
movsd xmm1, xmm0
mulsd xmm1, xmm2
movq [_dc_dplus], xmm1
mulsd xmm1, xmm2
addsd xmm2, xmm2
movq [_dc_ds], xmm2
mov rax, [_dc_mantissa]
cvtsi2sd xmm2, rax
mulsd xmm1, xmm2
movq [_dc_dr], xmm1
jmp .fastposemodecheck
calign
.fastnege:
movq xmm0, [_math_two]
movq xmm1, [_math_fiftytwo]
call pow
cvtsd2si rax, xmm0
cmp rax, [_dc_mantissa]
je .fastnegepowequal
mov rax, [_dc_mantissa]
cvtsi2sd xmm0, rax
movsd xmm1, [_math_two]
mulsd xmm0, xmm1
movq [_dc_dr], xmm0
mov eax, 1
sub eax, dword [_dc_e]
mov rdi, rax
call string$qp2
mov rdi, r12
movq [_dc_ds], xmm0
movq xmm1, [_math_one]
movq [_dc_dplus], xmm1
movq [_dc_dminus], xmm1
jmp .fastposemodecheck
calign
.fastnegepowequal:
mov rax, [_dc_mantissa]
cvtsi2sd xmm0, rax
movq xmm1, [_math_two]
movq [_dc_dplus], xmm1
addsd xmm1, xmm1
mulsd xmm0, xmm1 ; mantissa * 4.0
movq [_dc_dr], xmm0
movq xmm1, [_math_one]
movq [_dc_dminus], xmm1
mov eax, 2
sub eax, dword [_dc_e]
mov rdi, rax
call string$qp2
mov rdi, r12
movq [_dc_dr], xmm0
calign
.fastposemodecheck:
cmp dword [_dc_amode], 0
je .setbase10
mov edi, [_dc_minprec] ; mindigits
call string$qp10
mov rdi, r12
movsd xmm2, xmm0
movq xmm0, [_dc_ds]
movq xmm1, [_dc_dr]
mulsd xmm0, xmm2
mulsd xmm1, xmm2
movq [_dc_ds], xmm0
movq [_dc_dr], xmm1
jmp .setbase10
calign
.nofastestimate:
cmp dword [_dc12_e], 0
jl .slownege
mov rax, [_dc12_bi]
mov dword [rax], 1
mov dword [rax+4], 1 ; bi.sfi(1)
mov rdi, rax
mov esi, dword [_dc12_e]
call stringbi$lsb
mov rax, 1
shl rax, 52
cmp rax, [_dc12_mantissa]
je .slowposetwo
movq xmm0, [_dc12_value]
mov rdi, [_dc12_r]
call stringbi$sfd ; r.sfd(avalue)
mov rdi, [_dc12_r]
mov esi, 1
call stringbi$lsb ; r.lsb(1)
mov rdi, [_dc12_s]
mov dword [rdi], 1
mov dword [rdi+4], 2 ; s.sfi(2)
mov rdi, [_dc12_plus]
mov rsi, [_dc12_bi]
mov edx, stringbi_size
call memcpy ; mplus.cf(bi)
mov rdi, [_dc12_minus]
mov rsi, [_dc12_bi]
mov edx, stringbi_size
call memcpy ; mminus.cf(bi)
jmp .slowmodecheck
calign
.slowposetwo:
mov rax, [_dc12_bi1]
mov dword [rax], 1
mov dword [rax+4], 0 ; bi1.sfi(0)
mov rdi, [_dc12_bi]
mov esi, 1
mov rdx, rax
call stringbi$ls ; bi.ls(1, bi1)
movq xmm2, [_dc12_value]
movq xmm0, [_math_two]
addsd xmm0, xmm0
mulsd xmm0, xmm2
mov rdi, [_dc12_r]
call stringbi$sfd ; r.sfd(4*value)
mov rdi, [_dc12_s]
mov dword [rdi], 1
mov dword [rdi+4], 4 ; s.sfi(4)
mov rdi, [_dc12_plus]
mov rsi, [_dc12_bi1]
mov edx, stringbi_size
call memcpy ; plus.cf(bi1)
mov rsi, [_dc12_minus]
mov rsi, [_dc12_bi]
mov edx, stringbi_size
call memcpy ; minus.cf(bi)
jmp .slowmodecheck
calign
.slownege:
mov rax, 2
shl rax, 51
; WHY are we calling pow with two fixed arguments?
; rax 0x10000000000000 4503599627370496
; movq xmm0, [_math_two]
; movq xmm1, [_math_fiftytwo]
; call pow
; cvtsd2si rax, xmm0
cmp rax, [_dc12_mantissa]
je .slownegepowequal
mov rax, [_dc12_mantissa]
shl rax, 1
cvtsi2sd xmm0, rax
; movq xmm1, [_math_two]
; mulsd xmm0, xmm1 ; shl above does this for us
mov rdi, [_dc12_r]
call stringbi$sfd ; r.sfd(xmm0)
mov rdi, [_dc12_s]
mov dword [rdi], 1
mov dword [rdi+4], 2 ; s.sfi(2)
mov esi, dword [_dc12_e]
neg esi
call stringbi$lsb ; s.lsb(x)
mov rdi, [_dc12_plus]
mov dword [rdi], 1
mov dword [rdi+4], 1 ; plus.sfi(1)
mov rdi, [_dc12_minus]
mov dword [rdi], 1
mov dword [rdi+4], 1 ; minus.sfi(1)
jmp .slowmodecheck
calign
.slownegepowequal:
movq xmm1, [_math_four]
mov rax, [_dc12_mantissa]
cvtsi2sd xmm0, rax
mulsd xmm0, xmm1 ; xmm0 now equals mantissa * 4.0
mov rdi, [_dc12_r]
call stringbi$sfd ; r.sfd(mantissa*4.0)
mov rdi, [_dc12_s]
mov dword [rdi], 1
mov dword [rdi+4], 2 ; s.sfi(2)
mov esi, 1
sub esi, dword [_dc12_e]
call stringbi$lsb ; s.lsb(x)
mov rdi, [_dc12_plus]
mov dword [rdi], 1
mov dword [rdi+4], 2 ; plus.sfi(2)
mov rdi, [_dc12_minus]
mov dword [rdi], 1
mov dword [rdi+4], 1 ; minus.sfi(1)
calign
.slowmodecheck:
cmp dword [_dc12_amode], 0
je .setbase10
mov rsi, [_dc12_bi]
mov dword [rsi], 1
mov dword [rsi+4], 0 ; bi.sfi(0)
mov edi, dword [_dc12_minprec]
call string$bqp10
mov rdi, [_dc12_s]
mov rsi, [_dc12_bi]
call stringbi$mb
mov rdi, [_dc12_r]
mov rsi, [_dc12_bi]
call stringbi$mb
calign
.setbase10:
mov rdi, r12 ; this
call stringdc$s
mov dword [_dc12_base10exp], eax
mov rax, r12
pop rdi
epilog
end if
if used stringdc$s | defined include_everything
; single argument: rdi (this)
; return in eax
falign
stringdc$s:
prolog_silent stringdc$s
push r12 r13
movq xmm1, [.kl2_10]
movq xmm2, [.ch]
mov r12, rdi
mov eax, [_dc_e]
mov ecx, [_dc_mantprec]
sub ecx, 1
add eax, ecx ; b2exp
cvtsi2sd xmm0, eax
mulsd xmm0, xmm1
subsd xmm0, xmm2
; ceil
call ceil
; result is in xmm0
cvtsd2si r13d, xmm0
cmp dword [_dc12_fastestok], 0
je .slow
cmp r13d, 0
jge .fastpos
; fastneg
mov edi, r13d
neg edi
call string$qp10
movq xmm1, [_dc12_dr]
movq xmm2, [_dc12_dplus]
movq xmm3, [_dc12_dminus]
mulsd xmm1, xmm0
mulsd xmm2, xmm0
mulsd xmm3, xmm0
movq [_dc12_dr], xmm1
movq [_dc12_dplus], xmm2
movq [_dc12_dminus], xmm3
jmp .fastfee
calign
.fastpos:
mov edi, r13d
call string$qp10
movq xmm1, [_dc12_ds]
mulsd xmm1, xmm0
movq [_dc12_s], xmm1
calign
.fastfee:
; fastfee:
movq xmm0, [_dc12_dr]
movq xmm1, [_dc12_dplus]
movq xmm2, [_dc12_ds]
movq xmm3, [_dc12_dminus]
movq xmm4, [_math_ten]
addsd xmm0, xmm1
cmp dword [_dc12_highok], 0
je .fasthighnotok
comisd xmm0, xmm2
jae .fastplusonedone
jmp .fastmult10
calign
.fasthighnotok:
comisd xmm0, xmm2
ja .fastplusonedone
calign
.fastmult10:
mulsd xmm0, xmm4
mulsd xmm1, xmm4
mulsd xmm3, xmm4
movq [_dc12_dr], xmm0
movq [_dc12_dplus], xmm1
movq [_dc12_dminus], xmm3
mov eax, r13d
pop r13 r12
epilog
calign
.fastplusonedone:
mov eax, r13d
add eax, 1 ; corrected ee
pop r13 r12
epilog
calign
.slow:
sub rsp, stringbi_size
mov dword [rsp], 1
mov dword [rsp+4], 0 ; scale.sfi(0)
cmp r13d, 0
jge .slowpos
mov edi, r13d
neg edi
mov rsi, rsp
call string$bqp10
mov rdi, [_dc12_r]
mov rsi, rsp
call stringbi$mb
mov rdi, [_dc12_plus]
mov rsi, rsp
call stringbi$mb
mov rdi, [_dc12_minus]
mov rsi, rsp
call stringbi$mb
add rsp, stringbi_size
jmp .slowfee
calign
.slowpos:
mov edi, r13d
mov rsi, rsp
call string$bqp10
mov rdi, [_dc12_s]
mov rsi, rsp
call stringbi$mb
add rsp, stringbi_size
calign
.slowfee:
mov rdi, [_dc12_r]
mov rsi, [_dc12_s]
mov rdx, [_dc12_plus]
call stringbi$co
cmp dword [_dc12_highok], 0
je .slowhighnotok
cmp eax, -1
jne .slowplusonedone
jmp .slowmult10
calign
.slowhighnotok:
cmp eax, 1
je .slowplusonedone
calign
.slowmult10:
mov rdi, [_dc12_r]
mov esi, 10
call stringbi$mbi
mov rdi, [_dc12_plus]
mov esi, 10
call stringbi$mbi
mov rdi, [_dc12_minus]
mov esi, 10
call stringbi$mbi
mov eax, r13d
pop r13 r12
epilog
calign
.slowplusonedone:
mov eax, r13d
add eax, 1
pop r13 r12
epilog
dalign
.kl2_10 dq 0.30102999566398119521373889472449f
dalign
.ch dq 0.0000000001f
end if
if used stringdc$nd | defined include_everything
; single argument in rdi, return in eax
falign
stringdc$nd:
prolog_silent stringdc$nd
cmp dword [_dc_finished], 0
jne .alreadyfinished
push r12 r13 r14
mov r12, rdi
cmp dword [_dc_fastestok], 0
je .slow
movq xmm0, [_dc_dr]
movq xmm1, [_dc_ds]
movsd xmm4, xmm0 ; save
movsd xmm5, xmm1 ; ""
call fmod ; xmm0 now has mod
divsd xmm4, xmm5 ; quotient into xmm4
cvtsd2si r13, xmm4 ; quotient as integer into r13
xor eax, eax ; within low end round range?
xor r14d, r14d ; within high end round range?
mov ecx, 1 ; positive value for the cmov
movq [_dc12_dr], xmm0
movsd xmm4, xmm0 ; save it again
movq xmm1, [_dc12_dplus]
movq xmm2, [_dc12_dminus]
cmp dword [_dc12_lowok], 0
je .fastlownotokay
comisd xmm0, xmm2 ; r <= minus?
cmovbe eax, ecx ; set eax to 1 if r <= minus, otherwise leave it at zero
jmp .fastcheckhighend
calign
.fastlownotokay:
comisd xmm0, xmm2
cmovb eax, ecx ; r < minus? set to 1 if so
calign
.fastcheckhighend:
addsd xmm4, xmm1 ; r + plus
cmp dword [_dc12_highok], 0
je .fasthighnotokay
comisd xmm4, xmm5 ; >= s?
cmovae r14d, ecx ; set to 1 if so
jmp .fastcheckquotient
calign
.fasthighnotokay:
comisd xmm4, xmm5
cmova r14d, ecx ; > s ?
calign
.fastcheckquotient:
cmp r13, 0
jl .fastbadquotient
cmp r13, 9
jg .fastbadquotient
calign
.fastdoit:
; eax is our bool for within low end orund range
; r14d is our bool for within high end round range
test eax, eax
jnz .fastwithinlowend
; else, !fast within low end range
; so now we have to check highend:
test r14d, r14d
jnz .fastquoteplusdone
; else, !fast within low end range and !fastwithinhighendrange
; so, dr *= 10, dplus *= 10, dminus *= 10, then return quotient
movq xmm6, [_math_ten]
mulsd xmm0, xmm6 ; r *= 10
mulsd xmm1, xmm6 ; plus *= 10
mulsd xmm2, xmm6 ; minus *= 10
movq [_dc12_dr], xmm0
movq [_dc12_dplus], xmm1
movq [_dc12_dminus], xmm2
mov rax, r13
pop r14 r13 r12
epilog
calign
.fastwithinlowend:
test r14d, r14d
jz .fastnotwithinhighendouter
; else, if (dr * 2 < ds) finished = true else quotient++ && finished = true; done
addsd xmm0, xmm0
comisd xmm0, xmm5 ; dr * 2 < js?
jae .fastquoteplusdone
mov dword [_dc12_finished], 1
mov rax, r13
pop r14 r13 r12
epilog
calign
.fastquoteplusdone:
mov dword [_dc12_finished], 1
add r13, 1
mov rax, r13
pop r14 r13 r12
epilog
calign
.fastnotwithinhighendouter:
mov dword [_dc12_finished], 1
mov rax, r13
pop r14 r13 r12
epilog
calign
.fastbadquotient:
xor r13d, r13d
jmp .fastdoit
calign
.slow:
sub rsp, stringbi_size
mov dword [rsp], 1
mov dword [rsp+4], 0 ; sfi(0)
mov rdi, [_dc12_r]
mov rsi, [_dc12_s]
mov rdx, rsp
call stringbi$db
mov r13d, dword [rsp+4] ; quotient == bi.buffer[0]
add rsp, stringbi_size
mov rdi, [_dc12_r]
mov rsi, [_dc12_minus]
call stringbi$c
mov r14, rax ; save it temporarily
mov rdi, [_dc12_r]
mov rsi, [_dc12_s]
mov rdx, [_dc12_plus]
call stringbi$co
; so at this point, r14 has the low end comparison result, and rax has the high end comparison result
; put them somewhere else
mov r8, r14 ; low end comparison result
mov r9, rax ; high end comparison result
xor eax, eax ; within low end round range?
xor r14d, r14d ; within high end round range?
mov ecx, 1 ; positive value for the cmov
cmp dword [_dc12_lowok], 0
je .slowlownotokay
cmp r8, 1
cmovne eax, ecx ; set eax to 1 if low end comparison != 1
jmp .slowcheckhighend
calign
.slowlownotokay:
cmp r8d, -1
cmove eax, ecx ; set eax to 1 if low end comparison == -1
calign
.slowcheckhighend:
cmp dword [_dc12_highok], 0
je .slowhighnotokay
cmp r9d, -1
cmovne r14d, ecx ; set r14d to 1 if high end comparison != -1
jmp .slowcheckquotient
calign
.slowhighnotokay:
cmp r9, 1
cmove r14d, ecx ; set r14d to 1 if high end comparison == 1
calign
.slowcheckquotient:
cmp r13, 0
jl .slowbadquotient
cmp r13, 9
jg .slowbadquotient
calign
.slowdoit:
; eax is our bool for within low end orund range
; r14d is our bool for within high end round range
test eax, eax
jnz .slowwithinlowend
; else, !slow within low end range
; so now we have to check highend:
test r14d, r14d
jnz .slowquoteplusdone
; else, !slow within low end range and !slowwithinhighendrange
; so, r *= 10, plus *= 10, minus *= 10, then return quotient
mov rdi, [_dc12_r]
mov esi, 10
call stringbi$mbi
mov rdi, [_dc12_plus]
mov esi, 10
call stringbi$mbi
mov rdi, [_dc12_minus]
mov esi, 10
call stringbi$mbi
mov rax, r13
pop r14 r13 r12
epilog
calign
.slowwithinlowend:
test r14d, r14d
jz .slownotwithinhighendouter
; else, if stringbi$co(r, s, r) == -1 finished = true else quotient++ && finished = true; done
mov rdi, [_dc12_r]
mov rsi, [_dc12_s]
mov rdx, rdi
call stringbi$co
cmp eax, -1
jne .slowquoteplusdone
mov dword [_dc12_finished], 1
mov rax, r13
pop r14 r13 r12
epilog
calign
.slowquoteplusdone:
mov dword [_dc12_finished], 1
add r13, 1
mov rax, r13
pop r14 r13 r12
epilog
calign
.slownotwithinhighendouter:
mov dword [_dc12_finished], 1
mov rax, r13
pop r14 r13 r12
epilog
calign
.slowbadquotient:
xor r13d, r13d
jmp .slowdoit
calign
.alreadyfinished:
xor eax, eax
epilog
end if