; ------------------------------------------------------------------------
; HeavyThing x86_64 assembly language library and showcase programs
; Copyright © 2015-2018 2 Ton Digital
; Homepage: https://2ton.com.au/
; Author: Jeff Marrison <jeff@2ton.com.au>
;
; This file is part of the HeavyThing library.
;
; HeavyThing is free software: you can redistribute it and/or modify
; it under the terms of the GNU General Public License, or
; (at your option) any later version.
;
; HeavyThing is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
; GNU General Public License for more details.
;
; You should have received a copy of the GNU General Public License along
; with the HeavyThing library. If not, see <http://www.gnu.org/licenses/>.
; ------------------------------------------------------------------------
;
; heap.inc: memory management goodies
;
;
; some notes here on the choice of memory management strategy
;
; this is a hotly debated matter of personal preference imo
; ... HOWEVER, one-size-fits-all for every single programmer
; isn't really that cool.
;
; that being said, most general purpose allocators work well
; enough, and are generally fast enough for the average
; coding requirement.
;
; Bin based allocation that is never returned to the kernel
; is how this particular variety works. (Noting of course that
; virtual memory isn't quite so simple.)
;
; several reasons for this: 1) hands down the fastest for
; alloc/free obviously due to its simplistic design
; 2) most every server-based system I have ever built
; manages a good and decent number of inflight allocations
; and during the course of running, frees and reallocates
; similarly sized blocks depending on workload, etc.
; consequently, we don't need some uber-conservative
; tree-based freelist or coalescing strategies, we simply
; place them in as waste-free-bin-sizes as practical
; and call it good.
;
; This is not intended to run inside 64KB of memory, so:
; cry me a river if you don't like it, or code up one that is
; uber-conservative with its waste/overhead.
;
; now then...
;
; up to 2048 byte allocs, granularity for bins is 64 bytes (32 bins)
; from 2048 to 16384, granularity for bins is 1024 bytes (14 bins)
; and from 16384 to 131072, granularity is 4096 bytes (28 bins)
; and from 131072 to 1048576, granularity is 64k (14 bins)
; beyond this: straight unadulterated mmap calls for the lucky caller who wants >1MB alloc
; (similar to how ptmalloc does it anyway for the bigger ones)
;
; TODO: compact our header size down from wasting 139272 bytes
; for my purposes, wasting said is no dramas. (128GB ram on my dev
; machine).
heapdebug = 0
;
; we need two global variables that are writeable for our heap:
if used heap$init | defined include_everything
globals
{
_heap_base dq 0
_heap_base_size dq 0
}
initial_heap_shiftcount = 3
falign
heap$init:
prolog_silent heap$init
mov eax, syscall_mmap
xor edi, edi
mov rsi, 2147483648
mov edx, 0x3
mov r10d, 0x22
mov r8, -1
xor r9d, r9d
shl rsi, initial_heap_shiftcount
syscall
mov [_heap_base], rax
; update, the return from here we are interested in is NOT a -1
; it is ENOMEM, which is -12
cmp rax, -12
je .trysmaller_loop
cmp rax, -1
je .trysmaller_loop
mov rdi, rax ; setup call to mremap
mov rsi, 2147483648 ; old size
mov edx, 139272 + (heap_bincheck * 8) ; new size
xor r10d, r10d
mov eax, syscall_mremap
shl rsi, initial_heap_shiftcount
syscall
mov [_heap_base], rax
cmp rax, -1
je .die
mov [_heap_base_size], 139272 + (heap_bincheck * 8)
epilog
calign
.trysmaller_loop:
push rbx
mov rbx, 2147483648
shl rbx, 3
sub rbx, 256 * 1048576
calign
.trysmaller_attempt:
mov eax, syscall_mmap
xor edi, edi
mov rsi, rbx
mov edx, 0x3
mov r10d, 0x22
mov r8, -1
xor r9d, r9d
syscall
mov [_heap_base], rax
cmp rax, -12
je .trysmaller_again
cmp rax, -1
je .trysmaller_again
mov rdi, rax ; setup call to mremap
mov rsi, rbx ; old size
mov edx, 139272 + (heap_bincheck * 8) ; new size
xor r10d, r10d
mov eax, syscall_mremap
syscall
mov [_heap_base], rax
cmp rax, -1
je .die
mov [_heap_base_size], 139272 + (heap_bincheck * 8)
pop rbx
epilog
calign
.trysmaller_again:
sub rbx, 256 * 1048576
jnz .trysmaller_attempt
jmp .die
; syscall # into rax, args: rdi, rsi, rdx, r10, r8, r9
calign
.die:
mov eax, syscall_write
mov edi, 2
mov rsi, .deathmsg
mov edx, .deathmsglen
syscall
mov eax, syscall_exit
mov edi, 99
syscall
; update, adding friendly output here instead of 99 syscall death
dalign
.deathmsg:
db 'Insufficient memory.',10
.deathmsglen = $ - .deathmsg
end if
if used heap$alloc_permanent | defined include_everything
; single argument in rdi for count, returns in rax
; this is intended for permanent allocations (ones that aren't ever going to be heap$free'd)
; all it does is call mmap for a new segment
falign
heap$alloc_permanent:
prolog heap$alloc_permanent
add rdi, 8
push rdi
mov eax, syscall_mmap
mov rsi, rdi
xor edi, edi
mov edx, 0x3
mov r10d, 0x22
mov r8, -1
xor r9d, r9d
syscall
pop rdi
cmp rax, -1
je .die
mov [rax], rdi
add rax, 8
epilog
calign
.die:
mov eax, syscall_exit
mov edi, 99
syscall
end if
if heapdebug
falign
debug_heap:
; three args: rdi == db ptr, esi == length of same, rdx == number to display
sub rsp, 128
mov [rsp], rdx
mov eax, syscall_write
mov rdx, rsi
mov rsi, rdi
mov edi, 1
syscall
mov eax, syscall_write
lea rsi, [rsp+8]
mov edx, 2
mov edi, 1
mov dword [rsi], '0x'
syscall
mov rax, [rsp]
mov rsi, rsp
mov edi, 1
mov edx, 16
bswap rax
xor ecx, ecx
mov cl, al
and cl, 0xf0
shr cl, 4
movzx r8d, byte [rcx+.hexchars]
mov dword [rsi], r8d
add rsi, 1
mov cl, al
and cl, 0xf
shr rax, 8
movzx r8d, byte [rcx+.hexchars]
mov dword [rsi], r8d
add rsi, 1
mov cl, al
and cl, 0xf0
shr cl, 4
movzx r8d, byte [rcx+.hexchars]
mov dword [rsi], r8d
add rsi, 1
mov cl, al
and cl, 0xf
shr rax, 8
movzx r8d, byte [rcx+.hexchars]
mov dword [rsi], r8d
add rsi, 1
mov cl, al
and cl, 0xf0
shr cl, 4
movzx r8d, byte [rcx+.hexchars]
mov dword [rsi], r8d
add rsi, 1
mov cl, al
and cl, 0xf
shr rax, 8
movzx r8d, byte [rcx+.hexchars]
mov dword [rsi], r8d
add rsi, 1
mov cl, al
and cl, 0xf0
shr cl, 4
movzx r8d, byte [rcx+.hexchars]
mov dword [rsi], r8d
add rsi, 1
mov cl, al
and cl, 0xf
shr rax, 8
movzx r8d, byte [rcx+.hexchars]
mov dword [rsi], r8d
add rsi, 1
mov cl, al
and cl, 0xf0
shr cl, 4
movzx r8d, byte [rcx+.hexchars]
mov dword [rsi], r8d
add rsi, 1
mov cl, al
and cl, 0xf
shr rax, 8
movzx r8d, byte [rcx+.hexchars]
mov dword [rsi], r8d
add rsi, 1
mov cl, al
and cl, 0xf0
shr cl, 4
movzx r8d, byte [rcx+.hexchars]
mov dword [rsi], r8d
add rsi, 1
mov cl, al
and cl, 0xf
shr rax, 8
movzx r8d, byte [rcx+.hexchars]
mov dword [rsi], r8d
add rsi, 1
mov cl, al
and cl, 0xf0
shr cl, 4
movzx r8d, byte [rcx+.hexchars]
mov dword [rsi], r8d
add rsi, 1
mov cl, al
and cl, 0xf
shr rax, 8
movzx r8d, byte [rcx+.hexchars]
mov dword [rsi], r8d
add rsi, 1
mov cl, al
and cl, 0xf0
shr cl, 4
movzx r8d, byte [rcx+.hexchars]
mov dword [rsi], r8d
add rsi, 1
mov cl, al
and cl, 0xf
shr rax, 8
movzx r8d, byte [rcx+.hexchars]
mov dword [rsi], r8d
add rsi, 1
mov rsi, rsp
mov eax, syscall_write
syscall
mov eax, syscall_write
mov edi, 1
mov rsi, rsp
mov edx, 2
mov dword [rsi], 0x0a0d
syscall
add rsp, 128
ret
dalign
.hexchars:
db '0123456789abcdef'
end if
if heapdebug
falign
debug_nodebin:
mov eax, syscall_write
mov edi, 1
mov rsi, .debug1
mov edx, .debug1len
syscall
push rbx
mov rbx, [_heap_base]
mov rbx, [rbx]
calign
.loop:
test rbx, rbx
jz .done
mov rdi, .debug2
mov esi, .debug2len
mov rdx, rbx
call debug_heap
mov rbx, [rbx]
; jmp .loop
calign
.done:
pop rbx
ret
dalign
.debug1 db ' node bin freelist:',10
.debug1len = $ - .debug1
.debug2 db ' node:'
.debug2len = $ - .debug2
end if
if heapdebug
falign
debug_firstbin:
call debug_nodebin
mov eax, syscall_write
mov edi, 1
mov rsi, .debug1
mov edx, .debug1len
syscall
push rbx
mov rbx, [_heap_base]
mov rbx, [rbx+8]
calign
.loop:
test rbx, rbx
jz .done
mov rdi, .debug2
mov esi, .debug2len
mov rdx, rbx
call debug_heap
mov rbx, [rbx]
; jmp .loop
calign
.done:
pop rbx
ret
dalign
.debug1 db ' first bin freelist:',10
.debug1len = $ - .debug1
.debug2 db ' ptr:'
.debug2len = $ - .debug2
end if
if used heap$alloc | defined include_everything
; single argument in rdi for count, returns in rax
falign
heap$alloc:
prolog heap$alloc
if heapdebug
push rdi
mov rdi, .debug3
mov esi, .debug3len
mov rcx, [_heap_base]
mov rdx, [rcx+0x8]
call debug_heap
mov rdx, [rsp]
mov rdi, .debug1
mov esi, .debug1len
call debug_heap
pop rdi
end if
cmp rdi, 68 ; aka avlnode_size, we make a special allowance for map nodes
je .getnode
cmp rdi, 2048
jle .get64
cmp rdi, 16384
jle .get1024
cmp rdi, 131072
jle .get4096
cmp rdi, 1048576
jle .get65536
; else, straight mmap return
add rdi, 8 + (heap_barriers * 8)
push rdi
mov eax, syscall_mmap
mov rsi, rdi
xor edi, edi
mov edx, 0x3
mov r10d, 0x22
mov r8, -1
xor r9d, r9d
syscall
pop rdi
cmp rax, -1
je .die
mov [rax], rdi
if heap_barriers
mov qword [rax+8], 0x46464646
add rax, 16
else
add rax, 8
end if
epilog ; includes our ret for us
if heapdebug
dalign
.debug1 db 'heap$alloc for: '
.debug1len = $ - .debug1
.debug2 db ' bin is: '
.debug2len = $ - .debug2
.debug3 db '-------- heap_base[8] is: '
.debug3len = $ - .debug3
.debug4 db 'NEW BIN NEW BIN NEW BIN NEW BIN NEW BIN: '
.debug4len = $ - .debug4
.debug5 db ' heap_base[8] now: '
.debug5len = $ - .debug5
end if
calign
.get64:
add rdi, 0x47 ; size prefix + rounding
mov rdx, [_heap_base]
mov ecx, 6 ; * 64 if we have to get a new bin
and rdi, not 0x3f
mov rsi, rdi
shr rsi, 3
mov r8, rsi
if heap_barriers
add rdi, 8
end if
if heapdebug
push rdi rdx rcx rsi r8
mov rdx, rsi
mov rdi, .debug2
mov esi, .debug2len
call debug_heap
pop r8 rsi rcx rdx rdi
end if
add rsi, rdx
; rsi is now a valid pointer into our heap_base for our bin freelist start
; so rdi >> 6 == / 64, which is the real bin #, but we actually want that # times 8
; so shl'd again by 3, so the real shr is >> 3 hence above
mov rax, [rsi]
test rax, rax
jz .newbin
; else, we have a valid offset for this bin
mov rcx, [rax] ; get its next into rcx
mov [rsi], rcx ; store the new next for this bin into the head of the freelist
mov [rax], r8 ; to get the actual blocksize, this value << 3 == size of block itself, - 8
if heap_barriers
cmp qword [rax+8], 0x46004600
jne .barrierfail
mov qword [rax+8], 0x46464646
add rax, 16
else
add rax, 8
end if
if heapdebug
push rax
mov rcx, [_heap_base]
mov rdx, [rcx+8]
mov rdi, .debug5
mov esi, .debug5len
call debug_heap
call debug_firstbin
pop rax
end if
epilog ; all good
calign
.getnode:
; special size allowance for map nodes
mov edi, 80 + (heap_barriers * 8) ; avlnode_size == 68, which has a partial dd hanging off the end, so 72 == align 8, + 8 for prefix
mov rdx, [_heap_base]
mov ecx, 9 ; * 512 if we have to get a new bin
; we'll use bin #0 for map nodes if !heap_bincheck, otherwise, the final spot in our header
if heap_bincheck
mov esi, 139272
else
xor esi, esi
end if
mov r8, rsi
add rsi, rdx
mov rax, [rsi]
test rax, rax
jz .newbin
mov rcx, [rax]
mov [rsi], rcx
mov [rax], r8
if heap_barriers
cmp qword [rax+8], 0x46004600
jne .barrierfail
mov qword [rax+8], 0x46464646
add rax, 16
else
add rax, 8
end if
if heapdebug
push rax
mov rcx, [_heap_base]
mov rdx, [rcx+8]
mov rdi, .debug5
mov esi, .debug5len
call debug_heap
call debug_firstbin
pop rax
end if
epilog
; these are all variants of .get64, only diff is the alignment modifier
calign
.get1024:
add rdi, 0x407 ; size prefix + rounding
mov rdx, [_heap_base]
mov ecx, 4 ; * 16 if we have to get a new bin
and rdi, not 0x3ff
mov rsi, rdi
shr rsi, 3
mov r8, rsi
if heap_barriers
add rdi, 8
end if
add rsi, rdx
mov rax, [rsi]
test rax, rax
jz .newbin
mov rcx, [rax] ; get its next into rcx
mov [rsi], rcx ; store the new next for this bin into the head of the freelist
mov [rax], r8
if heap_barriers
cmp qword [rax+8], 0x46004600
jne .barrierfail
mov qword [rax+8], 0x46464646
add rax, 16
else
add rax, 8
end if
epilog ; all good
calign
.get4096:
add rdi, 0x1007 ; size prefix + rounding
mov rdx, [_heap_base]
mov ecx, 2 ; * 4 if we have to get a new bin
and rdi, not 0xfff
mov rsi, rdi
shr rsi, 3
mov r8, rsi
if heap_barriers
add rdi, 8
end if
add rsi, rdx
mov rax, [rsi]
test rax, rax
jz .newbin
mov rcx, [rax] ; get its next into rcx
mov [rsi], rcx ; store the new next for this bin into the head of the freelist
mov [rax], r8
if heap_barriers
cmp qword [rax+8], 0x46004600
jne .barrierfail
mov qword [rax+8], 0x46464646
add rax, 16
else
add rax, 8
end if
epilog ; all good
calign
.get65536:
add rdi, 0x10007 ; size prefix + rounding
mov rdx, [_heap_base]
mov ecx, 1 ; * 2 if we have to get a new bin
and rdi, not 0xffff
mov rsi, rdi
shr rsi, 3
mov r8, rsi
if heap_barriers
add rdi, 8
end if
add rsi, rdx
mov rax, [rsi]
test rax, rax
jz .newbin
mov rcx, [rax] ; get its next into rcx
mov [rsi], rcx ; store the new next for this bin into the head of the freelist
mov [rax], r8
if heap_barriers
cmp qword [rax+8], 0x46004600
jne .barrierfail
mov qword [rax+8], 0x46464646
add rax, 16
else
add rax, 8
end if
epilog ; all good
; rdi == count
; rsi == x
; rax == y
; rcx == z
calign
.newbin:
if heapdebug
push rdi rsi rax rcx r8 rdx
mov rdi, .debug4
mov esi, .debug4len
mov rdx, rcx
call debug_heap
pop rdx r8 rcx rax rsi rdi
end if
; rdi == our aligned upward to whatever boundary SIZE of our get
; rsi == that >> 3 + heap_base
; qword [rsi] == 0, hence no freelist for this bin.
; no freelist for this bin exists, do a heap expand (mremap) for 64 items worth
; then prelink them into the freelist, and copy the return code
push rcx ; save our bincount
mov rdx, rdi
shl rdi, cl ; * whatever was set above depending on how big the block is
; count now contains the size of our heap expansion
add rdi, [_heap_base_size]
; save rdi, rsi, rcx
push rdi
push rsi
push rdx
mov eax, syscall_mremap
mov rdx, rdi ; new size
mov rdi, [_heap_base]
mov rsi, [_heap_base_size]
xor r10d, r10d
syscall
cmp rax, -1
je .die
mov [_heap_base], rax
add rax, [_heap_base_size]
pop rcx
pop rsi
pop rdi
mov [_heap_base_size], rdi
mov rdi, rcx
; so at this point, rsi is still our freelist offset
; rdi is still our alloc size
; rax == end of the previous block (start of our newly acquired block)
;
; ok so rsi is still our freelist head offset
; rax == pointer into heap_base + old heap base size (which is the offset to our new block)
; rdi == actual item size, which is our individual increase size
;
pop rcx
mov r8d, 1
shl r8d, cl
sub r8d, 1 ; how many items - 1
calign
.newloop:
mov rcx, [rsi] ; get the pointer that _was_ at freelist head
mov [rax], rcx ; store that into the first 8 bytes of our new alloc seg
if heap_barriers
mov qword [rax+8], 0x46004600 ; make sure our barrier is set to free
end if
mov [rsi], rax ; store our new alloc seg into the freelist head
add rax, rdi ; move forward by our alloc size
sub r8d, 1 ; dec loop
jnz .newloop
; now, rax is sitting on the LAST item, instead of linking it to the list, we will return it
sub rsi, [_heap_base]
mov [rax], rsi ; store the real offset to our freelist into the first 8 bytes of our alloc seg
if heap_barriers
mov qword [rax+8], 0x46464646
add rax, 16
else
add rax, 8 ; add 8 to skip said prefix
end if
epilog
if heap_barriers
.barrierfail:
breakpoint
end if
calign
.die:
mov eax, syscall_exit
mov edi, 99
syscall
end if
if used heap$alloc_clear | defined include_everything
; single argument in rdi == size of pointer we are allocating
; NOTE: we memset rounded up to nearest 8 of size before returning the pointer
falign
heap$alloc_clear:
prolog heap$alloc_clear
push rdi
call heap$alloc
mov rdi, rax
mov rdx, [rsp]
mov [rsp], rax
xor esi, esi
add rdx, 7
and rdx, not 7
call memset32 ; avoids the imul
pop rax
epilog
end if
if used heap$alloc_blockcopy | defined include_everything
; two arguments: rdi == buffer to copy, rsi == length of same
falign
heap$alloc_blockcopy:
prolog heap$alloc_blockcopy
push rdi rsi
mov rdi, rsi
call heap$alloc
pop rdx rsi
push rax
mov rdi, rax
call memcpy
pop rax
epilog
end if
if used heap$alloc_random | defined include_everything
; single argument in rdi == size of pointer we are allocating
; NOTE: we rng$block rounded up to nearest 8 of size before returning the pointer
falign
heap$alloc_random:
prolog heap$alloc_random
push rdi
call heap$alloc
mov rdi, rax
mov rsi, [rsp]
mov [rsp], rax
add rsi, 7
and rsi, not 7
call rng$block
pop rax
epilog
end if
if used heap$free | defined include_everything
; single argument in rdi == pointer we are freeing
falign
heap$free:
prolog heap$free
if heap_barriers
cmp qword [rdi-8], 0x46464646
jne .barrierfail
mov rsi, [rdi-16]
else
mov rsi, [rdi-8]
end if
if heap_bincheck
cmp rsi, 8
jb .binfail
end if
if heapdebug
push rdi rsi
mov rdi, .debug3
mov esi, .debug3len
mov rcx, [_heap_base]
mov rdx, [rcx+8]
call debug_heap
mov rdx, [rsp]
test rdx, rdx
mov rdi, .debug1
mov esi, .debug1len
call debug_heap
pop rsi rdi
end if
mov rdx, [_heap_base]
cmp rsi, 1048576
jg .largeput
if heap_bincheck
cmp rsi, 139272
ja .binfail
end if
if heap_barriers
mov qword [rdi-8], 0x46004600
sub rdi, 16
else
sub rdi, 8
end if
mov rax, [rdx+rsi] ; get whatever value is currently in our heap of the freelist head
mov [rsi+rdx], rdi ; put our pointer into the freelist head position
mov [rdi], rax ; put the old freelist head into our item
if heapdebug
mov rdi, .debug4
mov esi, .debug4len
mov rcx, [_heap_base]
mov rdx, [rcx+8]
call debug_heap
call debug_firstbin
end if
epilog
if heapdebug
dalign
.debug1 db 'HEAP$FREE FOR BIN: '
.debug1len = $ - .debug1
.debug3 db '-------- heap_base[8] is: '
.debug3len = $ - .debug3
.debug4 db ' heap_base[8] now: '
.debug4len = $ - .debug4
end if
calign
.largeput:
if heap_barriers
sub rdi, 16
else
sub rdi, 8
end if
mov eax, syscall_munmap
; rdi is our pointer
; rsi is already the size
syscall
epilog
if heap_barriers
.barrierfail:
breakpoint
end if
if heap_bincheck
.binfail:
breakpoint
end if
end if
if used heap$free_clear | defined include_everything
; single argument in rdi == pointer we are freeing
; NOTE: we memset 0 the space prior to calling the real free
falign
heap$free_clear:
prolog heap$free_clear
if heap_barriers
cmp qword [rdi-8], 0x46464646
jne .barrierfail
mov rsi, [rdi-16]
else
mov rsi, [rdi-8]
end if
if heap_bincheck
cmp rsi, 8
jb .binfail
end if
if heapdebug
push rdi rsi
mov rdi, .debug3
mov esi, .debug3len
mov rcx, [_heap_base]
mov rdx, [rcx+8]
call debug_heap
mov rdx, [rsp]
mov rdi, .debug1
mov esi, .debug1len
call debug_heap
pop rsi rdi
end if
mov rdx, [_heap_base]
cmp rsi, 1048576
jg .largeput
if heap_bincheck
cmp rsi, 139272
ja .binfail
end if
; we have to infer the size of our pointer so we can clear it before we put it back
if heap_bincheck
mov rdx, rsi
shl rdx, 3
sub rdx, 8
mov ecx, 72
cmp esi, 139272
cmove rdx, rcx
else
mov ecx, 72
mov rdx, rsi
shl rdx, 3
sub rdx, 8
test rsi, rsi
cmovz rdx, rcx
end if
push rdi rsi
xor esi, esi
call memset32
pop rsi rdi
mov rdx, [_heap_base]
if heap_barriers
mov qword [rdi-8], 0x46004600
sub rdi, 16
else
sub rdi, 8
end if
mov rax, [rdx+rsi]
mov [rsi+rdx], rdi
mov [rdi], rax
if heapdebug
mov rdi, .debug4
mov esi, .debug4len
mov rcx, [_heap_base]
mov rdx, [rcx+8]
call debug_heap
call debug_firstbin
end if
epilog
if heapdebug
dalign
.debug1 db 'HEAP$FREE_CLEAR FOR BIN: '
.debug1len = $ - .debug1
.debug3 db '-------- heap_base[8] is: '
.debug3len = $ - .debug3
.debug4 db ' heap_base[8] now: '
.debug4len = $ - .debug4
end if
calign
.largeput:
if heap_barriers
sub rdi, 16
else
sub rdi, 8
end if
push rdi rsi
mov rdx, rsi
xor esi, esi
call memset32 ; note: our size may not be exact, but mmap won't give us a partial word return
pop rsi rdi
mov eax, syscall_munmap
syscall
epilog
if heap_barriers
.barrierfail:
breakpoint
end if
if heap_bincheck
.binfail:
breakpoint
end if
end if
if used heap$free_random | defined include_everything
; single argument in rdi == pointer we are freeing
; NOTE: we put random bytes from rng$u64 into the space prior to calling the real free
; this is _expensive_, hahaha, but useful in rare cases
falign
heap$free_random:
prolog heap$free_random
if heap_barriers
cmp qword [rdi-8], 0x46464646
jne .barrierfail
mov rsi, [rdi-16]
else
mov rsi, [rdi-8]
end if
if heap_bincheck
cmp rsi, 8
jb .binfail
end if
mov rdx, [_heap_base]
cmp rsi, 1048576
jg .largeput
if heap_bincheck
cmp rsi, 139272
ja .binfail
end if
; we have to infer the size of our pointer so we can clear it before we put it back
if heap_bincheck
mov rdx, rsi
shl rdx, 3
sub rdx, 8
mov ecx, 72
cmp esi, 139272
cmove rdx, rcx
else
mov ecx, 72
mov rdx, rsi
shl rdx, 3
sub rdx, 8
test rsi, rsi
cmovz rdx, rcx
end if
push rdi rsi
mov rsi, rdx
call rng$block
pop rsi rdi
mov rdx, [_heap_base]
if heap_barriers
mov qword [rdi-8], 0x46004600
sub rdi, 16
else
sub rdi, 8
end if
mov rax, [rdx+rsi]
mov [rsi+rdx], rdi
mov [rdi], rax
epilog
calign
.largeput:
if heap_barriers
sub rdi, 16
else
sub rdi, 8
end if
push rdi rsi
call rng$block
pop rsi rdi
mov eax, syscall_munmap
syscall
epilog
if heap_barriers
.barrierfail:
breakpoint
end if
if heap_bincheck
.binfail:
breakpoint
end if
end if