HeavyThing - htcrypt.inc

Jeff Marrison

Table of functions

	; ------------------------------------------------------------------------
	; HeavyThing x86_64 assembly language library and showcase programs
	; Copyright © 2015-2018 2 Ton Digital 
	; Homepage: https://2ton.com.au/
	; Author: Jeff Marrison <jeff@2ton.com.au>
	;       
	; This file is part of the HeavyThing library.
	;       
	; HeavyThing is free software: you can redistribute it and/or modify
	; it under the terms of the GNU General Public License, or
	; (at your option) any later version.
	;       
	; HeavyThing is distributed in the hope that it will be useful, 
	; but WITHOUT ANY WARRANTY; without even the implied warranty of
	; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
	; GNU General Public License for more details.
	;       
	; You should have received a copy of the GNU General Public License along
	; with the HeavyThing library. If not, see <http://www.gnu.org/licenses/>.
	; ------------------------------------------------------------------------
	;       
	; htcrypt.inc: my Heavy Thing password-based crypt/decrypt
	;
	; After having built all of the rest of the Heavy Thing crypto goods to support
	; TLS/SSH, I rather like the idea of cascading AES256, so what this
	; does is:
	; 1) start with a user-supplied passphrase (or 8kb worth of key material, and skip step 2)
	; 2) using our hmac-sha512 modified scrypt, expand that out to 8kb worth of preliminary key material
	; 3) break the 8kb key material into 256 AES256 contexts (32 byte keys x 256 == 8kb), initialised sequentially
	; 4) make a temporary copy of the first 64 bytes of key material
	; 5) repeat 1024 times:
	;    using the byte indexes from the last 64 bytes of key material, AES256 the entire key block
	;    repeatedly in sequence to finally arrive at 8kb of key material that took a bit more
	;    grunt to arrive at. See commentary below for the reason why I decided to do this.
	; 6) using our newly crunched 8kb worth of key material, re-initialize all 256 AES256 encryption contexts
	; 7) using the same 8kb worth of key material, initialize 256 AES256 _decryption_ contexts as well
	;
	; to encrypt an AES-sized block (16 bytes)
	; using the byte indexes from step 4, encrypt the block successively 64 times
	; 
	; to decrypt an AES-sized block (16 bytes)
	; using the reversed byte indexes from step 4, decrypt the block successively 64 times
	;
	;
	; This is _not_ meant to be crazy-fast, and is certainly not meant to be memory efficient.
	; 
	; So, the "cascading" means: using the 64 byte index, we walk forward for encryption through
	; each and every 64 byte index, and use that AES256 context to re-encrypt what is there.
	; 
	; We provide a useless key material generator, such that many of these contexts can be created
	; without doing any expensive work (such that a coredump/inspection renders a lot of objects all
	; that look/appear valid, but won't necessarily be).
	;
	; Further to this, we provide additional functions to encrypt/decrypt all our encrypt/decrypt states
	; such that what sits in memory isn't required to be valid at any given time.
	;
	; While the two aforementioned efforts do nothing other than obfuscate and/or make difficult the
	; process of memory analysis, still a nice thing to have in the event there are concerns about
	; coredumps of your running/valid process.
	;
	; In Summary: we initialise 256 different AES256 encrypt and decrypt states, all of which are valid
	; we then maintain a 64 byte sequence index inside a randomized block that is our "real" sequencer
	; such that when we want to encrypt something, we walk through AES encrypt contents at each of the
	; byte indexes contained in the 64 byte sequence, which may contain duplicates, etc.
	; For decryption, we reverse the 64 byte sequence and walk backward through them.
	;
	; NOTE re: cascaded AES256: There is ongoing debate about whether or not cascaded AES256 (using
	; unique keys for each) actually provides a security benefit over a single AES256. The point here
	; is to mandate the use of 8192 bytes of key material, and the cascaded AES256 is a desirable
	; side effect. Whether this really increases security beyond a single key AES256 or not isn't
	; really important to the code contained herein.
	;
	; NOTE re: new_keymaterial doing its one-way AES256 "grinding": The only reason we do this is
	; to make brute-force password recoveries ridiculously difficult. We could have done that
	; exclusively with other key derivation functions, but it is assumed that they were used
	; (as is done with new_passphrase) already, so all we are really doing is "scrambling" the
	; key material further, using AES256 as a CSPRNG as it were. The method chosen requires a fair
	; bit of time/effort.
	;

if used htcrypt$new_passphrase | used htcrypt$new_keymaterial | used htcrypt$new_raw_keymaterial | defined include_everything


htcrypt_user_ofs = 0		; not used in here, 16 bytes
htcrypt_x_ofs = 16		; this gets set to the first unused AES256 context that is not in the sequence
htcrypt_enc_ofs = 24
htcrypt_dec_ofs = 2072
htcrypt_seq_ofs = 4120
htcrypt_r_ofs = 4128

htcrypt_size = htcrypt_r_ofs + 4080


aes_alloc_size = 312


; if this is set, new_keymaterial will dump its sequence and key material to stderr
htcrypt_debug_keymaterial = 0


end if


if used htcrypt$new_passphrase | defined include_everything

	; two arguments: rdi == pointer to passphrase, esi == length of same
falign
htcrypt$new_passphrase:
	prolog	htcrypt$new_passphrase
	sub	rsp, 8192
	mov	rdx, rdi
	mov	ecx, esi
	mov	r8, rdi
	mov	r9d, esi
	mov	rdi, rsp
	mov	esi, 8192
	call	scrypt
	mov	rdi, rsp
	call	htcrypt$new_keymaterial
	add	rsp, 8192
	epilog

end if



if used htcrypt$new_keymaterial | defined include_everything
	; single argument: rdi == pointer to 8kb block of key material
falign
htcrypt$new_keymaterial:
	prolog	htcrypt$new_keymaterial
	push	rbx r12 r13 r14 r15
	mov	r12, rdi
	mov	r13, rdi
	mov	edi, htcrypt_size
	call	heap$alloc_random
	mov	rbx, rax
	; first up: determine a random location in our htcrypt_r_ofs space
	; to store our first 64 bytes of sequencing material
	xor	edi, edi
	mov	esi, 4016
	call	rng$int
	; make sure it falls on an 8 byte boundary so that it at least looks like
	; every other pointer we have
	and	rax, not 7
	lea	rdi, [rbx+rax+htcrypt_r_ofs]
	mov	rsi, r12
	mov	edx, 64
	mov	[rbx+htcrypt_seq_ofs], rdi
	call	memcpy
	; find the index of the first unused in the sequence
	xor	edx, edx
calign
.findx_outer:
	; see if edx exists in the sequence
	mov	rsi, [rbx+htcrypt_seq_ofs]
	mov	ecx, 64
calign
.findx_inner:
	cmp	byte [rsi], dl
	je	.findx_next	; this one exists in the sequence
	add	rsi, 1
	sub	ecx, 1
	jnz	.findx_inner
	; if we made it to here, edx is not in the sequence
	mov	[rbx+htcrypt_x_ofs], rdx
	jmp	.foundx
calign
.findx_next:
	add	edx, 1
	jmp	.findx_outer
calign
.foundx:
if htcrypt_debug_keymaterial
	mov	rdi, .debug3
	call	string$to_stderr
	mov	rdi, [rbx+htcrypt_seq_ofs]
	mov	esi, 64
	call	string$from_bintohex
	push	rax
	mov	rdi, rax
	call	string$to_stderrln
	pop	rdi
	call	heap$free
end if

	; next up: do our preliminary 256 aes256 contexts
	mov	r14d, 256
	lea	r15, [rbx+htcrypt_enc_ofs]
calign
.prelim:
	mov	edi, aes_alloc_size
	call	heap$alloc_random
	mov	[r15], rax
	mov	rdi, rax
	add	r15, 8
	mov	rsi, r13
	mov	edx, 32
	add	r13, 32
	call	aes$init_encrypt
	sub	r14d, 1
	jnz	.prelim
	; next up: using the last 64 bytes of key material as our sequence
	; encrypt the key material 64 times
	sub	r13, 64
	; we can store our 64 byte sequence temporarily in our dec area
	; because it will be overwritten as soon as we are done with this step
	mov	eax, 1024
	lea	rdi, [rbx+htcrypt_dec_ofs]
	mov	rsi, r13
	mov	edx, 64
	mov	r15, rdi
	push	rax
	call	memcpy
	push	r12
	mov	r12d, 64
calign
.keyscramble_outer:
	; for each sequence, encrypt the entire 8192 byte space
	mov	r13, [rsp]
	mov	r14d, 512
calign
.keyscramble_inner:
	; get our aes position:
	movzx	eax, byte [r15]
	mov	rdi, [rbx+rax*8+htcrypt_enc_ofs]
	mov	rsi, r13
	add	r13, 16
	call	aes$encrypt
	sub	r14d, 1
	jnz	.keyscramble_inner
	; move to our next index
	add	r15, 1
	sub	r12d, 1
	jnz	.keyscramble_outer

	; setup for our repeat
	lea	rdi, [rbx+htcrypt_dec_ofs]
	mov	rsi, [rsp]
	mov	edx, 64
	mov	r15, rdi
	add	rsi, 8192 - 64
	call	memcpy
	mov	r12d, 64
	sub	dword [rsp+8], 1
	jnz	.keyscramble_outer

	pop	r12 rax
	; so now, we have done a LOT of work on the key material
	; next up, reinit all our aes256 encryption contexts
	mov	r14d, 256
	lea	r15, [rbx+htcrypt_enc_ofs]
	mov	r13, r12
calign
.final_enc:
if htcrypt_debug_keymaterial
	mov	rdi, .debug1
	call	string$to_stderr
	mov	edi, 256
	sub	edi, r14d
	push	rdi
	mov	rdi, rsp
	mov	esi, 1
	call	string$from_bintohex
	mov	[rsp], rax
	mov	rdi, rax
	call	string$to_stderr
	pop	rdi
	call	heap$free
	mov	rdi, .debug2
	call	string$to_stderr
	mov	rdi, r13
	mov	esi, 32
	call	string$from_bintohex
	push	rax
	mov	rdi, rax
	call	string$to_stderrln
	pop	rdi
	call	heap$free
end if

	mov	rdi, [r15]
	add	r15, 8
	mov	rsi, r13
	mov	edx, 32
	add	r13, 32
	call	aes$init_encrypt
	sub	r14d, 1
	jnz	.final_enc
	; next up, same for our decryption goods as well
	mov	r14d, 256
	lea	r15, [rbx+htcrypt_dec_ofs]
	mov	r13, r12
calign
.final_dec:
	mov	edi, aes_alloc_size
	call	heap$alloc_random
	mov	[r15], rax
	mov	rdi, rax
	add	r15, 8
	mov	rsi, r13
	mov	edx, 32
	add	r13, 32
	call	aes$init_decrypt
	sub	r14d, 1
	jnz	.final_dec
	; last but not least, randomize the block we used
	mov	rdi, r12
	mov	esi, 8192
	call	rng$block
	; done, dusted
	mov	rax, rbx
	pop	r15 r14 r13 r12 rbx
	epilog

if htcrypt_debug_keymaterial
cleartext .debug1, 'Key #'
cleartext .debug2, ': '
cleartext .debug3, 'Sequence: '
end if

end if


if used htcrypt$new_raw_keymaterial | defined include_everything
	; single argument: rdi == pointer to 8kb block of key material
	; unlike the above, this is meant for cryptographically secure key material passed straight in
	; and as a result, we do _not_ grind on it
falign
htcrypt$new_raw_keymaterial:
	prolog	htcrypt$new_raw_keymaterial
	push	rbx r12 r13 r14 r15
	mov	r12, rdi
	mov	r13, rdi
	mov	edi, htcrypt_size
	call	heap$alloc_random
	mov	rbx, rax
	; first up: determine a random location in our htcrypt_r_ofs space
	; to store our first 64 bytes of sequencing material
	xor	edi, edi
	mov	esi, 4016
	call	rng$int
	; make sure it falls on an 8 byte boundary so that it at least looks like
	; every other pointer we have
	and	rax, not 7
	lea	rdi, [rbx+rax+htcrypt_r_ofs]
	mov	rsi, r12
	mov	edx, 64
	mov	[rbx+htcrypt_seq_ofs], rdi
	call	memcpy
	; find the index of the first unused in the sequence
	xor	edx, edx
calign
.findx_outer:
	; see if edx exists in the sequence
	mov	rsi, [rbx+htcrypt_seq_ofs]
	mov	ecx, 64
calign
.findx_inner:
	cmp	byte [rsi], dl
	je	.findx_next	; this one exists in the sequence
	add	rsi, 1
	sub	ecx, 1
	jnz	.findx_inner
	; if we made it to here, edx is not in the sequence
	mov	[rbx+htcrypt_x_ofs], rdx
	jmp	.foundx
calign
.findx_next:
	add	edx, 1
	jmp	.findx_outer
calign
.foundx:
	; next up: do our preliminary 256 aes256 contexts
	mov	r14d, 256
	lea	r15, [rbx+htcrypt_enc_ofs]
calign
.prelim:
	mov	edi, aes_alloc_size
	call	heap$alloc_random
	mov	[r15], rax
	mov	rdi, rax
	add	r15, 8
	mov	rsi, r13
	mov	edx, 32
	add	r13, 32
	call	aes$init_encrypt
	sub	r14d, 1
	jnz	.prelim
	; next up, same for our decryption goods as well
	mov	r14d, 256
	lea	r15, [rbx+htcrypt_dec_ofs]
	mov	r13, r12
calign
.final_dec:
	mov	edi, aes_alloc_size
	call	heap$alloc_random
	mov	[r15], rax
	mov	rdi, rax
	add	r15, 8
	mov	rsi, r13
	mov	edx, 32
	add	r13, 32
	call	aes$init_decrypt
	sub	r14d, 1
	jnz	.final_dec
	; done, dusted
	mov	rax, rbx
	pop	r15 r14 r13 r12 rbx
	epilog

end if

if used htcrypt$new_useless | defined include_everything
	; no arguments, returns a relatively-work-free useless context that looks and smells like the real deal
falign
htcrypt$new_useless:
	prolog	htcrypt$new_useless
	push	rbx r12 r13 r14 r15
	sub	rsp, 8192
	mov	rdi, rsp
	mov	esi, 8192
	mov	r12, rsp
	mov	r13, rsp
	call	rng$block
	mov	edi, htcrypt_size
	call	heap$alloc_random
	mov	rbx, rax
	; first up: determine a random location in our htcrypt_r_ofs space
	; to store our first 64 bytes of sequencing material
	xor	edi, edi
	mov	esi, 4016
	call	rng$int
	; make sure it falls on an 8 byte boundary so that it at least looks like
	; every other pointer we have
	and	rax, not 7
	lea	rdi, [rbx+rax+htcrypt_r_ofs]
	mov	rsi, r12
	mov	edx, 64
	mov	[rbx+htcrypt_seq_ofs], rdi
	call	memcpy
	; find the index of the first unused in the sequence
	xor	edx, edx
calign
.findx_outer:
	; see if edx exists in the sequence
	mov	rsi, [rbx+htcrypt_seq_ofs]
	mov	ecx, 64
calign
.findx_inner:
	cmp	byte [rsi], dl
	je	.findx_next	; this one exists in the sequence
	add	rsi, 1
	sub	ecx, 1
	jnz	.findx_inner
	; if we made it to here, edx is not in the sequence
	mov	[rbx+htcrypt_x_ofs], rdx
	jmp	.foundx
calign
.findx_next:
	add	edx, 1
	jmp	.findx_outer
calign
.foundx:
	; next up: do our preliminary 256 aes256 contexts
	mov	r14d, 256
	lea	r15, [rbx+htcrypt_enc_ofs]
calign
.prelim:
	mov	edi, aes_alloc_size
	call	heap$alloc_random
	mov	[r15], rax
	mov	rdi, rax
	add	r15, 8
	mov	rsi, r13
	mov	edx, 32
	add	r13, 32
	call	aes$init_encrypt
	sub	r14d, 1
	jnz	.prelim

	; next up, same for our decryption goods as well
	mov	r14d, 256
	lea	r15, [rbx+htcrypt_dec_ofs]
	mov	r13, r12
calign
.final_dec:
	mov	edi, aes_alloc_size
	call	heap$alloc_random
	mov	[r15], rax
	mov	rdi, rax
	add	r15, 8
	mov	rsi, r13
	mov	edx, 32
	add	r13, 32
	call	aes$init_decrypt
	sub	r14d, 1
	jnz	.final_dec
	; last but not least, randomize the block we used
	mov	rdi, r12
	mov	esi, 8192
	call	rng$block
	; done, dusted
	mov	rax, rbx
	add	rsp, 8192
	pop	r15 r14 r13 r12 rbx
	epilog

end if


if used htcrypt$destroy | defined include_everything
	; single argument in rdi: the htcrypt object
	; NOTE: this does RNG over every single object before heap$free
falign
htcrypt$destroy:
	prolog	htcrypt$destroy
	push	rbx r12 r13
	mov	rbx, rdi
	lea	r12, [rdi+htcrypt_enc_ofs]
	mov	r13d, 512
calign
.destruct:
	mov	rdi, [r12]
	mov	esi, aes_alloc_size
	call	rng$block
	mov	rdi, [r12]
	call	heap$free
	add	r12, 8
	sub	r13d, 1
	jnz	.destruct
	; next up, randomize our entire block again
	mov	rdi, rbx
	mov	esi, htcrypt_size
	call	rng$block
	; finally, free our pointer
	mov	rdi, rbx
	call	heap$free
	pop	r13 r12 rbx
	epilog

end if

if used htcrypt$hide | defined include_everything
	; two arguments: rdi == htcrypt object, rsi == aes encryption context
	; see commentary atop as to why this is here, all we do is encrypt our
	; important bits such that they don't sit in memory in the clear
falign
htcrypt$hide:
	prolog	htcrypt$hide
	push	rbx r12 r13 r14 r15
	mov	rbx, rdi
	mov	r12, rsi
	; first up, our sequence
	mov	rsi, [rdi+htcrypt_seq_ofs]
	mov	rdi, r12
	call	aes$encrypt
	mov	rdi, r12
	mov	rsi, [rbx+htcrypt_seq_ofs]
	add	rsi, 16
	call	aes$encrypt
	mov	rdi, r12
	mov	rsi, [rbx+htcrypt_seq_ofs]
	add	rsi, 32
	call	aes$encrypt
	mov	rdi, r12
	mov	rsi, [rbx+htcrypt_seq_ofs]
	add	rsi, 48
	call	aes$encrypt
	; next up, all our enc and dec contexts
	lea	r13, [rbx+htcrypt_enc_ofs]
	lea	r14, [rbx+htcrypt_dec_ofs]
	mov	r15d, 256
calign
.doit:
	; we want the aes context to look/smell like every other normal one
	; so we don't want to encrypt the entirety of the block, only the roundkeys
	mov	rsi, [r13]
	add	rsi, 16
	mov	rbx, rsi
	repeat 15
		mov	rdi, r12
		mov	rsi, rbx
		add	rbx, 16
		call	aes$encrypt
	end repeat
	mov	rsi, [r14]
	add	rsi, 16
	mov	rbx, rsi
	repeat 15
		mov	rdi, r12
		mov	rsi, rbx
		add	rbx, 16
		call	aes$encrypt
	end repeat
	add	r13, 8
	add	r14, 8
	sub	r15d, 1
	jnz	.doit
	pop	r15 r14 r13 r12 rbx
	epilog

end if



if used htcrypt$show | defined include_everything
	; two arguments: rdi == htcrypt object, rsi == aes decryption context
	; reverses the hide function above
falign
htcrypt$show:
	prolog	htcrypt$show
	push	rbx r12 r13 r14 r15
	mov	rbx, rdi
	mov	r12, rsi
	; first up, our sequence
	mov	rsi, [rdi+htcrypt_seq_ofs]
	mov	rdi, r12
	call	aes$decrypt
	mov	rdi, r12
	mov	rsi, [rbx+htcrypt_seq_ofs]
	add	rsi, 16
	call	aes$decrypt
	mov	rdi, r12
	mov	rsi, [rbx+htcrypt_seq_ofs]
	add	rsi, 32
	call	aes$decrypt
	mov	rdi, r12
	mov	rsi, [rbx+htcrypt_seq_ofs]
	add	rsi, 48
	call	aes$decrypt
	; next up, all our enc and dec contexts
	lea	r13, [rbx+htcrypt_enc_ofs]
	lea	r14, [rbx+htcrypt_dec_ofs]
	mov	r15d, 256
calign
.doit:
	; we want the aes context to look/smell like every other normal one
	; so we don't want to encrypt the entirety of the block, only the roundkeys
	mov	rsi, [r13]
	add	rsi, 16
	mov	rbx, rsi
	repeat 15
		mov	rdi, r12
		mov	rsi, rbx
		add	rbx, 16
		call	aes$decrypt
	end repeat
	mov	rsi, [r14]
	add	rsi, 16
	mov	rbx, rsi
	repeat 15
		mov	rdi, r12
		mov	rsi, rbx
		add	rbx, 16
		call	aes$decrypt
	end repeat
	add	r13, 8
	add	r14, 8
	sub	r15d, 1
	jnz	.doit
	pop	r15 r14 r13 r12 rbx
	epilog

end if



if used htcrypt$encrypt | defined include_everything
	; two arguments: rdi == htcrypt object, rsi == ptr to block to encrypt in place
falign
htcrypt$encrypt:
	prolog	htcrypt$encrypt
	cmp	dword [rdi+htcrypt_x_ofs], 255
	je	.singleaes
	push	rbx r12 r13 r14
	mov	rbx, rdi
	mov	r12, rsi
	mov	r13d, 64
	mov	r14, [rbx+htcrypt_seq_ofs]
calign
.doit:
	movzx	eax, byte [r14]
	add	r14, 1
	mov	rdi, [rbx+rax*8+htcrypt_enc_ofs]
	mov	rsi, r12
	call	aes$encrypt
	sub	r13d, 1
	jnz	.doit
	pop	r14 r13 r12 rbx
	epilog
calign
.singleaes:
	mov	rdi, [rdi+2040+htcrypt_enc_ofs]	; 8 * 255 is the context we want
	call	aes$encrypt
	epilog

end if

if used htcrypt$decrypt | defined include_everything
	; two arguments: rdi == htcrypt object, rsi == ptr to block to decrypt in place
falign
htcrypt$decrypt:
	prolog	htcrypt$decrypt
	cmp	dword [rdi+htcrypt_x_ofs], 255
	je	.singleaes
	push	rbx r12 r13 r14
	mov	rbx, rdi
	mov	r12, rsi
	mov	r13d, 64
	mov	r14, [rbx+htcrypt_seq_ofs]
	add	r14, 64
calign
.doit:
	sub	r14, 1
	movzx	eax, byte [r14]
	mov	rdi, [rbx+rax*8+htcrypt_dec_ofs]
	mov	rsi, r12
	call	aes$decrypt
	sub	r13d, 1
	jnz	.doit
	pop	r14 r13 r12 rbx
	epilog
calign
.singleaes:
	mov	rdi, [rdi+2040+htcrypt_dec_ofs]	; 8 * 255 is the context we want
	call	aes$decrypt
	epilog


end if