HeavyThing - dhtool/dhtool.asm

Jeff Marrison

	; ------------------------------------------------------------------------
	; HeavyThing x86_64 assembly language library and showcase programs
	; Copyright © 2015-2018 2 Ton Digital 
	; Homepage: https://2ton.com.au/
	; Author: Jeff Marrison <jeff@2ton.com.au>
	;       
	; This file is part of the HeavyThing library.
	;       
	; HeavyThing is free software: you can redistribute it and/or modify
	; it under the terms of the GNU General Public License, or
	; (at your option) any later version.
	;       
	; HeavyThing is distributed in the hope that it will be useful, 
	; but WITHOUT ANY WARRANTY; without even the implied warranty of
	; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
	; GNU General Public License for more details.
	;       
	; You should have received a copy of the GNU General Public License along
	; with the HeavyThing library. If not, see <http://www.gnu.org/licenses/>.
	; ------------------------------------------------------------------------
	;
	; dhtool.asm: Diffie-Hellman parameter tool, used to verify existing
	; parameters or generate new safe prime and generator (using multiple CPU).
	;
	; If verifying: We accept a filename input for either a standard dhparam
	; PEM file, or /etc/ssh/moduli (or similar file), and we then perform our
	; "hardcore" verification. If it is an SSH moduli file, this will verify
	; each and every one of the parameters in it.
	;
	; If creating new: Similar to the "openssl dhparam ..." command, this will
	; send to STDERR a compatible PEM file (see dh_pem2ssh to convert it to
	; /etc/ssh/moduli format). The reason we send to stderr is because stdout
	; is filled with spaces, periods, plusses and $ for visual indication it
	; is not actually stuck. (And so for automated job running, stdout can
	; safely be redirected to /dev/null without affecting operation).
	; 
	; Because much of the huge prime sieve operations are what I like to think of as "luck
	; of the draw", if no CPU count argument is specified, we fire up as many 
	; execution threads as there are cores available. For every sieve candidate,
	; we write a ' ', for every q that passes trial division, we write a '.', for
	; every q that is probably prime we, we write a '+', and for every p that is
	; probably prime, we write a '$', at which point hardcore verification begins.
	; (Hardcore verification being ~192 Miller-Rabin iterations for both primes).
	;
	; command line argument is size in bits you want of the DH safe prime, and optional
	; -CPUCOUNT setting to control the number of processes we fire up.
	; 
	; when we have found and verified one, we output a PKCS#3 DH Parameter file to
	; stderr in PEM format.
	; (The stderr output versus stdout is mainly because the HeavyThing library
	; spews its character progress outputs to stdout, so instead of modifying that,
	; we just chose stderr for the final output stage here to assist in scripting/
	; automation)
	;
	;
	; SOME FURTHER NOTES ABOUT HOW/WHY/WHERE/WHAT:
	;
	; This produces safe primes, and is verified with 192 Miller-Rabin rounds for >=2048
	; bit safe primes. Both the safe prime and its Sophie-Germain counterpart are
	; verified the same.
	;
	; A note here on generator selection: Unlike OpenSSL and OpenSSH, we generate
	; g such that g is a quadratic residue mod p, and is always of order q.
	; OpenSSL/OpenSSH make sure g is always of order 2q, and there are conflicting
	; ideas about which is the "correct" way to go about this. Specifically, see
	; https://groups.google.com/forum/#!topic/sci.crypt/fcfusEoJ8M4
	; and http://crypto.stackexchange.com/questions/12961/diffie-hellman-parameter-check-when-g-2-must-p-mod-24-11
	; and Wei Dai's page: http://www.cryptopp.com/wiki/Diffie-Hellman#Validating_Parameters
	;
	; IMO: The stackexchange commentary from poncho hit it in one, do we leak a bit
	; from private exponents, or do we halve the solution space of the shared secret...
	; Private exponents are usually always smaller than the modulus/prime, so I say
	; Wei Dai's preference (and as is also required by DDH/ElGamal) to ensure g is a
	; quadratic residue mod p is better. Modifications to this code to make it work like
	; OpenSSL/OpenSSH would be trivial in any case if you don't like Wei Dai, poncho, or
	; my decision re: same :-)
	;
	; THAT BEING SAID: If you use openssl dhparam -check to verify the outputs of this
	; program, you'll notice it doesn't like our generators.
	; 


	include 'dhtool_settings.inc'
	include '../ht.inc'

insane_primesize = 131072


	; single epoll object in rdi
calign
parent_receive:
	prolog	parent_receive
	mov	eax, syscall_write
	mov	edi, 2
	syscall

	mov	rdi, [childlist]
	mov	rsi, .killkids
	call	list$clear

	mov	eax, syscall_exit
	xor	edi, edi
	syscall

	epilog

falign
.killkids:
	; single arg in rdi, our child
	mov	eax, syscall_kill
	mov	esi, 0xf		; SIGTERM
	syscall
	ret

dalign
parent_vtable:
	dq	epoll$destroy, epoll$clone, io$connected, epoll$send, parent_receive, io$error, io$timeout

globals
{
	childlist	dq	0
}


public _start
falign
_start:
	call	ht$init

	cmp	qword [argc], 1
	jbe	.usage

	call	list$new
	mov	[childlist], rax

	mov	rdi, [argv]
	call	list$pop_back
	mov	rbx, rax
	mov	rdi, rax
	call	string$isnumber
	test	eax, eax
	jz	.maybeverify
	mov	rdi, rbx
	call	string$to_unsigned
	mov	rdi, rbx
	mov	rbx, rax
	call	heap$free
	test	rbx, rbx
	jz	.usage
	cmp	rbx, insane_primesize
	ja	.yourenuts
	cmp	rbx, 1536
	jb	.toosmall

	; see if a cpucount argument was passed (-XX)
	mov	rdi, [argv]
	call	list$pop_back
	test	rax, rax
	jz	.nocpuarg
	mov	r12, rax
	mov	rdi, rax
	mov	esi, '-'
	call	string$indexof_charcode
	cmp	rax, 0
	jne	.nocpuarg
	mov	rdi, r12
	mov	esi, 1
	mov	rdx, -1
	call	string$substr
	mov	r13, rax
	mov	rdi, r12
	call	heap$free
	mov	rdi, r13
	call	string$isnumber
	test	eax, eax
	jz	.usage
	mov	rdi, r13
	call	string$to_unsigned
	mov	r12, rax
	test	rax, rax
	jz	.usage
	call	sysinfo$cpucount
	cmp	r12, rax
	ja	.cputoomany
	jmp	.doit
calign
.nocpuarg:
	; basic sanity checks passed, determine how many cores we have available
	call	sysinfo$cpucount
	; at minimum 1 (in case for some jacked reason /proc/cpuinfo gave us bupkiss)
	mov	ecx, 1
	mov	edx, 16384		; hahah, funny, though it will do it, my big machines are only 64 cores... :-/
	cmp	eax, ecx
	cmovl	eax, ecx
	cmp	eax, edx
	cmova	eax, edx

	mov	r12d, eax
calign
.doit:
	mov	rdi, .banner
	call	string$to_stdout
	; the easiest/most straightforward/lockfree way to shoot it is of course socketpair/fork
	sub	rsp, 8			; for our socketpair
calign
.children:
	mov	eax, syscall_socketpair
	mov	edi, 1			; AF_UNIX
	mov	esi, 0x801		; SOCK_STREAM | SOCK_NONBLOCK
	xor	edx, edx
	mov	r10, rsp
	syscall
	cmp	rax, 0
	jl	.socketpairdeath
	; fork callee-saves are jacked:
	push	rbx r12
	mov	eax, syscall_fork
	syscall
	cmp	rax, 0
	jl	.forkdeath
	je	.inchild
	pop	r12 rbx

	mov	rdi, [childlist]
	mov	rsi, rax		; push our child's pid into our childlist
	call	list$push_back

	; else, we are the parent, make our established goods
	; we'll use the second socketpair fd for the parent side
	; so close the first one
	mov	eax, syscall_close
	mov	edi, [rsp]
	syscall
	mov	rdi, parent_vtable
	xor	esi, esi
	call	epoll$new
	mov	edi, [rsp+4]
	mov	rsi, rax
	call	epoll$established

	sub	r12d, 1
	jnz	.children

	call	epoll$run		; won't come back

falign
.inchild:
	pop	r12 rbx
	; post-fork child entrypoint
	; close the other half of our socketpair
	mov	eax, syscall_close
	mov	edi, [rsp+4]
	syscall

	; our side is blocking on the socketpair, not epoll$run based
	mov	r13d, [rsp]		; our side of the socketpair

	call	bigint$new
	mov	r14, rax
	call	bigint$new
	mov	r15, rax

	; we need to reinit our rng otherwise all children have the same seed:
	call	rng$init

	; no looping required, dh params will return us with one when it finds one
	mov	rdi, r14
	mov	rsi, r15
	mov	edx, ebx		; our desired safe prime bits
	call	bigint$dh_params

	; now, all we need to do is construct a return and send it back to the parent
	sub	rsp, 65536

	; we need to compute our safe prime bytecount so we can encode the PKCS#3 sequence (and subsequent integer) lengths
	; smash ebp for the task
	mov	rdi, r14
	call	bigint$bytecount
	lea	ebp, [eax+1]		; our encoded bytecount for dh_p
	
	mov	word [rsp], 0x8230	; 0x30 == sequence, 0x82 == length bytecount, 2
	lea	edx, [ebp+7]		; our sequence length is encoded bytecount + 7
	xchg	dh, dl
	mov	word [rsp+2], dx	; sequence length 2 bytes, big endian
	mov	word [rsp+4], 0x8202	; 0x02 == integer, 0x82 == length bytecount, 2
	mov	ecx, ebp
	xchg	ch, cl
	mov	word [rsp+6], cx	; integer length 2 bytes, big endian
	; now we can ssh_encode (which adds the leading zero byte)
	mov	rdi, r14
	lea	rsi, [rsp+8]
	call	bigint$ssh_encode
	; that returns the number of bytes it wrote in eax, which should == ebp
	lea	rdi, [rsp+rbp+8]
	mov	word [rdi], 0x0102	; 0x02 == integer, 0x01 == length bytecount, 1
	; we need to get the dh_g integer value from the bottommost word
	mov	rsi, [r15+bigint_words_ofs]
	mov	rax, [rsi]		; bottommost word is the byte we are after
	mov	byte [rdi+2], al	; we know it will be small
	; so now our total length is rsp+rbp+11
	; we need a base64 encoded version of that
	mov	rdi, rsp
	lea	rsi, [rbp+11]
	xor	edx, edx
	call	string$from_bintobase64
	; we no longer need ebp
	mov	rbp, rax

	; OUTPUT GOODS:
	mov	rdi, rsp
	mov	rsi, .pem_preface
	mov	edx, .pem_prefacelen
	call	memcpy
	lea	r12, [rsp+.pem_prefacelen]	; use this as our running pointer
	; next up, utf8 output our string in rbp
	mov	rdi, rbp
	mov	rsi, r12
	call	string$to_utf8
	; that returns the number of bytes it wrote, so add that to r12
	add	r12, rax
	mov	rdi, r12
	mov	rsi, .pem_postface
	mov	rdx, .pem_postfacelen
	call	memcpy
	; get rid of our base64 string
	mov	rdi, rbp
	call	heap$free
	; add postfacelen to r12
	add	r12, .pem_postfacelen

	; so now, r12-rsp is the length of what we just built
	; send it out to r13d
	mov	eax, syscall_write
	mov	edi, r13d
	mov	rsi, rsp
	mov	rdx, r12
	sub	rdx, rsp
	syscall

	sleep 1

	mov	eax, syscall_exit
	mov	edi, 1
	syscall

dalign
.pem_preface db '-----BEGIN DH PARAMETERS-----',10
.pem_prefacelen = $ - .pem_preface
dalign
.pem_postface db '-----END DH PARAMETERS-----',10
.pem_postfacelen = $ - .pem_postface
cleartext .banner, 'This is dhtool v1.24 ',0xc2,0xa9,' 2015-2018 2 Ton Digital. Author: Jeff Marrison',10,'A showcase piece for the HeavyThing library. Commercial support available',10,'Proudly made in Cooroy, Australia. More info: https://2ton.com.au/dhtool',10


falign
.forkdeath:
	mov	eax, syscall_write
	mov	edi, 2
	mov	rsi, .forkmsg
	mov	edx, .forkmsglen
	syscall
	mov	eax, syscall_exit
	mov	edi, 1
	syscall
dalign
.forkmsg db 'fork syscall failed?!',10
.forkmsglen = $ - .forkmsg

falign
.socketpairdeath:
	mov	eax, syscall_write
	mov	edi, 2
	mov	rsi, .socketpairmsg
	mov	edx, .socketpairmsglen
	syscall	
	mov	eax, syscall_exit
	mov	edi, 1
	syscall
dalign
.socketpairmsg db 'socketpair syscall failed?!',10
.socketpairmsglen = $ - .socketpairmsg

falign
.usage:
	mov	eax, syscall_write
	mov	edi, 2
	mov	rsi, .usagestr
	mov	edx, .usagestrlen
	syscall
	mov	eax, syscall_exit
	mov	edi, 1
	syscall
dalign
.usagestr db 'Usage:',10
	db 'To create a new DH parameter file (similar to openssl dhparam):',10
	db './dhtool [-XX] SIZE',10,'Where SIZE is size in bits of the safe prime you want, XX specifies how many cores to use',10,10
	db 'To verify an existing dhparam file, -or- an OpenSSH moduli file:',10
	db './dhtool filename',10,10
	db 'To convert an existing dhparam file to an OpenSSH moduli compatible line:',10
	db './dhtool -convert filename',10
.usagestrlen = $ - .usagestr
falign
.cputoomany:
	mov	eax, syscall_write
	mov	edi, 2
	mov	rsi, .cpustr
	mov	edx, .cpustrlen
	syscall	
	mov	eax, syscall_exit
	mov	edi, 1
	syscall
dalign
.cpustr db 'You requested more CPUs than we have available. If you REALLY want that, edit make_dh_pem.asm and remove the check.',10
.cpustrlen = $ - .cpustr

falign
.yourenuts:
	mov	eax, syscall_write
	mov	edi, 2
	mov	rsi, .nutsstr
	mov	edx, .nutsstrlen
	syscall
	mov	eax, syscall_exit
	mov	edi, 1
	syscall
dalign
.nutsstr db 'You have requested an insane safe prime size. If you REALLY want that, edit make_dh_pem.asm and up the limit.',10
.nutsstrlen = $ - .nutsstr

falign
.toosmall:
	mov	eax, syscall_write
	mov	edi, 2
	mov	rsi, .smallstr
	mov	edx, .smallstrlen
	syscall
	mov	eax, syscall_exit
	mov	edi, 1
	syscall
dalign
.smallstr db 'You have requested a safe prime size that is insecure/too small. If you REALLY want that, edit make_dh_pem.asm and lower the limit.',10
.smallstrlen = $ - .smallstr

calign
.noinput:
	mov	eax, syscall_write
	mov	edi, 2
	mov	rsi, .noinputstr
	mov	edx, .noinputstrlen
	syscall
	mov	eax, syscall_exit
	mov	edi, 1
	syscall
dalign
.noinputstr db 'Unable to read input file.',10
.noinputstrlen = $ - .noinputstr

calign
.error:
	call	string$to_stderrln
	mov	eax, syscall_exit
	mov	edi, 1
	syscall

cleartext .nofilename, 'Filename from openssl dhparam output required.'
cleartext .dhparameters, '-----BEGIN DH PARAMETERS-----'
cleartext .dhparametersend, '-----END DH PARAMETERS-----'
cleartext .nodhparam, 'Missing BEGIN DH PARAMETERS in input file.'
cleartext .nodhparamend, 'Missing END DH PARAMETERS in input file.'
cleartext .derbad, 'Invalid DER encoding.'

calign
.maybesshmoduli:
	mov	rdi, .banner
	call	string$to_stdout
	mov	rdi, r15
	call	heap$free
	mov	rdi, r12
	call	buffer$destroy
	mov	rdi, rbx
	call	file$to_buffer
	; loop through lines looking for ssh moduli
	mov	rdi, rbx
	mov	rbx, rax
	call	heap$free
calign
.outer:
	mov	rdi, rbx
	mov	esi, 1
	call	buffer$has_more_lines
	test	eax, eax
	jz	.alldone
	mov	rdi, rbx
	call	buffer$nextline
	mov	r12, rax
	mov	rdi, rax
	mov	esi, ' '
	call	string$split
	mov	rdi, r12
	mov	r12, rax
	call	heap$free
	; first one is time or #comment, but make sure we have the right count
	cmp	qword [r12+_list_size_ofs], 7
	jne	.outer_skip
	mov	rdi, r12
	call	list$pop_front
	mov	rdi, rax
	cmp	qword [rax], 1
	je	.outer_skip_free
	mov	r13, rax
	mov	rdi, rax
	call	string$isnumber
	mov	rdi, r13
	test	eax, eax
	jz	.outer_skip_free
	; so far so good, skip this
	call	heap$free
	; next is type, which in mine are always 2
	mov	rdi, r12
	call	list$pop_front
	mov	rdi, rax
	call	heap$free
	; next is tests, which in mine are always 6
	mov	rdi, r12
	call	list$pop_front
	mov	rdi, rax
	call	heap$free
	; next is tries, which in mine are always 100 (Miller-Rabin count?)
	mov	rdi, r12
	call	list$pop_front
	mov	rdi, rax
	call	heap$free
	; next is size, which we need to hangon to
	mov	rdi, r12
	call	list$pop_front
	mov	r13, rax
	mov	rdi, rax
	call	string$to_unsigned
	mov	rdi, r13
	mov	r13, rax
	call	heap$free
	; next is generator, 2, 3, or 5
	mov	rdi, r12
	call	list$pop_front
	mov	r14, rax
	mov	rdi, rax
	call	string$to_unsigned
	mov	rdi, r14
	mov	r14, rax
	call	heap$free
	; next is the bigint itself
	mov	rdi, r12
	call	list$pop_front
	mov	r15, rax
	; tolower it first
	mov	rdi, r15
	call	string$to_lower_inplace
	; create space on our stack for the hex decoded number
	sub	rsp, 16384
	mov	rdi, r15
	mov	rsi, rsp
	call	string$hexdecode
	; that returned the number of bytes we wrote
	mov	rdi, r15
	mov	r15, rax
	call	heap$free
	; next up, bigint from that
	mov	rdi, rsp
	mov	rsi, r15
	call	bigint$new_encoded
	mov	r15, rax
	; we are done with our temporary stack
	add	rsp, 16384
	; output the size that the file said it is + 1
	mov	rdi, .filesize
	call	string$to_stdout
	mov	rdi, r13
	add	rdi, 1
	mov	esi, 10
	call	string$from_unsigned
	mov	rdi, rax
	push	rax
	call	string$to_stdoutln
	pop	rdi
	call	heap$free
	; see if they match
	mov	rdi, .filesizematch
	call	string$to_stdout
	mov	rdi, r15
	call	bigint$bitcount
	mov	rdi, .yes
	mov	rsi, .no
	mov	rdx, r13
	add	rdx, 1
	cmp	rdx, rax
	cmovne	rdi, rsi
	call	string$to_stdoutln
	mov	rdi, r14
	call	bigint$new_unsigned
	mov	r14, rax
	; verify those
	mov	rdi, r15
	mov	rsi, r14
	call	.dh_verify
	mov	rdi, r14
	call	bigint$destroy
	mov	rdi, r15
	call	bigint$destroy
	jmp	.outer_skip
cleartext .filesize, 'Prime Size: '
cleartext .filesizematch, 'Size Match: '
calign
.outer_skip_free:
	call	heap$free
.outer_skip:
	mov	rdi, r12
	mov	rsi, heap$free
	call	list$clear
	mov	rdi, r12
	call	heap$free
	jmp	.outer
calign
.alldone:
	mov	rdi, rbx
	call	buffer$destroy
	mov	eax, syscall_exit
	xor	edi, edi
	syscall

calign
.maybeverify:
	; rbx is our filename argument that string$isnumber did not return happily
	; it is either a PEM file or an /etc/ssh/moduli file
	mov	rdi, rbx
	call	file$mtime
	test	rax, rax
	jz	.noinput
	mov	rdi, rbx
	call	file$to_string
	mov	r15, rax
	call	buffer$new
	mov	r12, rax
	; see if it is a DH PARAMETER file:
	mov	rdi, r15
	mov	rsi, .dhparameters
	xor	edx, edx
	call	string$indexof_ofs
	cmp	rax, -1
	je	.maybesshmoduli
	mov	r14, rax
	mov	rdi, r15
	mov	rsi, .dhparametersend
	mov	rdx, rax
	call	string$indexof_ofs
	mov	rdi, .nodhparamend
	cmp	rax, -1
	je	.error
	mov	r13, rax
	mov	rdi, r15
	mov	rsi, r14
	add	rsi, qword [.dhparameters]
	mov	rdx, rax
	call	string$substring
	mov	rdi, rbx
	mov	rbx, rax
	call	heap$free
	mov	rdi, r12
	mov	rsi, rbx
	xor	edx, edx
	call	buffer$append_base64decode
	mov	rdi, rbx
	call	heap$free
	; so now our buffer in r12 contains the base64 decoded DER
	push	r12
	mov	r13, [r12+buffer_length_ofs]
	mov	r12, [r12+buffer_itself_ofs]

	call	.gettag
	mov	rdi, .derbad
	cmp	eax, 0x10		; SEQUENCE or puke
	jne	.error
	; p is first, integer:
	call	.gettag
	mov	rdi, .derbad
	cmp	eax, 0x2		; INTEGER or puke
	jne	.error
	test	r8d, r8d		; nonzero length or puke
	jz	.error
	cmp	r13, r8			; not enough data left == puke
	jb	.error
	mov	rdi, r12
	mov	rsi, r8
	add	r12, r8
	sub	r13, r8
	call	bigint$new_encoded
	mov	r14, rax		; DH p
	
	; g is next
	call	.gettag
	mov	rdi, .derbad
	cmp	eax, 0x2		; INTEGER or puke
	jne	.error
	test	r8d, r8d		; nonzero length or puke
	jz	.error
	cmp	r13, r8			; not enough data left == puke
	jb	.error
	mov	rdi, r12
	mov	rsi, r8
	add	r12, r8
	sub	r13, r8
	call	bigint$new_encoded
	mov	r15, rax

	mov	rdi, [argv]
	call	list$pop_back
	push	rax
	mov	rdi, rax
	mov	rsi, .dashconvert
	call	string$equals
	pop	rdi
	push	rax
	call	heap$free
	pop	rax
	test	rax, rax
	jnz	.convert

	mov	rdi, .banner
	call	string$to_stdout
	
	; verify both
	mov	rdi, r14
	mov	rsi, r15
	call	.dh_verify
.cleanup_exit:

	; cleanup and exit
	mov	rdi, r14
	call	bigint$destroy
	mov	rdi, r15
	call	bigint$destroy
	pop	rdi
	call	buffer$destroy
	
	mov	eax, syscall_exit
	xor	edi, edi
	syscall
falign
.gettag:
	asn1_tag
	ret
	; not reached, but for callgraph optimisation need it here (see fasmpp.asm)
	call	X509$dsaprivate_destroy
cleartext .dashconvert, '-convert'
calign
.convert:
	mov	rdi, .banner
	call	string$to_stderr
	; DH p == r14, DH g == r15
	mov	edi, 1			; space between items please
	call	formatter$new
	mov	r12, rax
	mov	rdi, rax
	xor	esi, esi
	call	formatter$add_datetime
	mov	rdi, r12
	xor	esi, esi
	xor	edx, edx
	call	formatter$add_unsigned	; type
	mov	rdi, r12
	xor	esi, esi
	xor	edx, edx
	call	formatter$add_unsigned	; tests
	mov	rdi, r12
	xor	esi, esi
	xor	edx, edx
	call	formatter$add_unsigned	; tries
	mov	rdi, r12
	xor	esi, esi
	xor	edx, edx
	call	formatter$add_unsigned	; size
	mov	rdi, r12
	xor	esi, esi
	xor	edx, edx
	call	formatter$add_unsigned	; generator
	mov	rdi, r12
	xor	esi, esi
	call	formatter$add_string	; bigint hex
	; turn our r14 into the necessary goods
	sub	rsp, 16384
	mov	rdi, r14
	mov	rsi, rsp
	call	bigint$encode
	mov	rdi, rsp
	mov	rsi, rax
	call	string$from_bintohex
	add	rsp, 16384
	mov	r13, rax
	mov	rdi, rax
	call	string$to_upper_inplace
	; get the current datetime
	mov	rdi, r14
	call	bigint$bitcount
	push	rax
	call	timestamp
	mov	r9, [r15+bigint_words_ofs]
	mov	rdi, r12
	mov	esi, 2			; type
	mov	edx, 6			; tests
	mov	ecx, 192		; tries
	pop	r8
	sub	r8, 1			; size
	mov	r9, [r9]		; generator
	mov	r10, r13		; bigint string
	call	formatter$doit
	mov	rdi, r12
	mov	r12, rax
	call	formatter$destroy
	; our formatter puts YYYY-MM-DDTHH:MI:SSZ
	; so we need to strip all that.
	mov	rdi, r12
	mov	rsi, .dash
	mov	rdx, .emptystr
	call	string$replace
	mov	rdi, r12
	mov	r12, rax
	call	heap$free
	mov	rdi, r12
	mov	rsi, .t
	mov	rdx, .emptystr
	call	string$replace
	mov	rdi, r12
	mov	r12, rax
	call	heap$free
	mov	rdi, r12
	mov	rsi, .colon
	mov	rdx, .emptystr
	call	string$replace
	mov	rdi, r12
	mov	r12, rax
	call	heap$free
	mov	rdi, r12
	mov	rsi, .z
	mov	rdx, .emptystr
	call	string$replace
	mov	rdi, r12
	mov	r12, rax
	call	heap$free

	mov	rdi, r12
	call	string$to_stdoutln
	mov	rdi, r12
	call	heap$free
	mov	rdi, r13
	call	heap$free

	jmp	.cleanup_exit
cleartext .dash, '-'
cleartext .t, 'T'
cleartext .colon, ':'
cleartext .z, 'Z'
cleartext .emptystr, ''

	; rdi == DH p, rsi == DH g
falign
.dh_verify:
	push	rbx r12 r13 r14 r15
	mov	r14, rdi
	mov	r15, rsi
	
	mov	rdi, .dhp
	call	string$to_stdout
	mov	rdi, r14
	call	bigint$debug
	mov	rdi, .dhg
	call	string$to_stdout
	mov	rdi, r15
	call	bigint$debug

	mov	rdi, .verifyp
	call	string$to_stdout

	mov	rdi, r14
	call	bigint$verifyprime
	mov	rdi, .goodp
	mov	rsi, .badp
	test	eax, eax
	cmovz	rdi, rsi
	call	string$to_stdoutln

	mov	rdi, .verifysafe
	call	string$to_stdout

	mov	rdi, r14
	call	bigint$new_copy
	mov	rbx, rax
	mov	rdi, rax
	mov	rsi, bigint$one
	call	bigint$subtract
	mov	rdi, rbx
	mov	esi, 1
	call	bigint$shr
	
	mov	rdi, rbx
	call	bigint$verifyprime
	mov	rdi, .goodp
	mov	rsi, .badp
	test	eax, eax
	cmovz	rdi, rsi
	call	string$to_stdoutln

	; verify the order of the subgroup
	mov	rdi, .verifysubgroup
	call	string$to_stdout

	mov	rdi, rbx		; exponent == q
	mov	rsi, r14		; modulus == p
	call	monty$new
	mov	[r14+bigint_monty_powmod_ofs], rax

	; reuse rbx for our destination
	mov	rdi, rax
	mov	rsi, rbx		; destination for monty exponentation
	mov	rdx, r15		; source == DH g
	call	monty$doit

	mov	rdi, rbx
	call	bigint$is_one
	mov	rdi, .goodp
	mov	rsi, .badp
	test	eax, eax
	cmovz	rdi, rsi
	call	string$to_stdoutln

	mov	rdi, .verifysubgroup2
	call	string$to_stdout

	; see if it is order of 2q instead of q like above
	mov	rdi, rbx		; set it to (p-1) >> 1 again
	mov	rsi, r14
	call	bigint$assign
	mov	rdi, rbx
	mov	rsi, bigint$one
	call	bigint$subtract
	mov	rdi, rbx
	mov	esi, 1
	call	bigint$shr

	; verify the order of the subgroup 2q instead of q
	mov	rdi, rbx
	mov	esi, 1
	call	bigint$shl

	mov	rdi, rbx		; exponent == 2q
	mov	rsi, r14		; modulus == p
	call	monty$new
	mov	[rbx+bigint_monty_powmod_ofs], rax

	; reuse rbx again for destination
	mov	rdi, rax
	mov	rsi, rbx
	mov	rdx, r15		; rbx = g**2q mod p
	call	monty$doit

	mov	rdi, rbx
	call	bigint$is_one
	mov	rdi, .goodp
	mov	rsi, .badp
	test	eax, eax
	cmovz	rdi, rsi
	call	string$to_stdoutln

	mov	rdi, rbx
	call	bigint$destroy

	mov	rdi, .verifyg
	call	string$to_stdout

	mov	rdi, r15
	mov	rsi, r14
	call	bigint$jacobi
	mov	rdi, .yes
	mov	rsi, .no
	cmp	eax, 1
	cmovne	rdi, rsi
	push	rax
	call	string$to_stdoutln
	pop	rax
	
	cmp	eax, 1
	je	.skip_newg

	; g is not a quadratic residue mod p, so figure out
	; which one is the goods and output that for information/headsup purposes
	mov	rdi, r15
	mov	rsi, bigint$two
	call	bigint$assign

	; walk g upward til we find one that is a quadratic residue mod p
calign
.gloop:
	mov	rdi, r15
	mov	rsi, r14
	call	bigint$jacobi
	cmp	eax, 1
	je	.gfound
	mov	rdi, r15
	mov	rsi, bigint$one
	call	bigint$add
	jmp	.gloop
calign
.gfound:
	mov	rdi, .newg
	call	string$to_stdout
	mov	rdi, r15
	call	bigint$debug
	
calign
.skip_newg:
	mov	rdi, .mod8
	call	string$to_stdout
	mov	rdi, r14
	mov	esi, 8
	call	bigint$modword
	mov	rdi, rax
	mov	esi, 10
	call	string$from_unsigned
	push	rax
	mov	rdi, rax
	call	string$to_stdoutln
	pop	rdi
	call	heap$free

	mov	rdi, .mod7
	call	string$to_stdout
	mov	rdi, r14
	mov	esi, 7
	call	bigint$modword
	mov	rdi, rax
	mov	esi, 10
	call	string$from_unsigned
	push	rax
	mov	rdi, rax
	call	string$to_stdoutln
	pop	rdi
	call	heap$free

	mov	rdi, .mod12
	call	string$to_stdout
	mov	rdi, r14
	mov	esi, 12
	call	bigint$modword
	mov	rdi, rax
	mov	esi, 10
	call	string$from_unsigned
	push	rax
	mov	rdi, rax
	call	string$to_stdoutln
	pop	rdi
	call	heap$free

	mov	rdi, .mod24
	call	string$to_stdout
	mov	rdi, r14
	mov	esi, 24
	call	bigint$modword
	mov	rdi, rax
	mov	esi, 10
	call	string$from_unsigned
	push	rax
	mov	rdi, rax
	call	string$to_stdoutln
	pop	rdi
	call	heap$free

	; done, dusted

	pop	r15 r14 r13 r12 rbx
	ret
cleartext .dhp, 'DH p (LE): '
cleartext .dhg, 'DH g (LE): '
cleartext .verifyp, 'Ridiculous p verification (MR=192)...'
cleartext .verifysafe, 'Ridiculous Sophie Germain counterpart verification (MR=192)...'
cleartext .verifysubgroup, 'Verifying the order of the subgroup (g has order q)...'
cleartext .verifysubgroup2, 'Verifying the order of the subgroup (g has order 2q)...'
cleartext .verifyg, 10,'Wei Dai states "find g such that g is a quadratic residue mod p, then g has order q"', 10, 'A quick Google for "DH_NOT_SUITABLE_GENERATOR" and "DH_check()" provides some useful',10,'information about how OpenSSL chooses these, but Wei Dai chooses g differently.',10,10,'So, is g a quadratic residue mod p? '
cleartext .newg, 'Appropriate g such that g is a quadratic residue mod p: '

cleartext .goodp, 'Good.'
cleartext .badp, 'Bad.'
cleartext .yes, 'Yes.'
cleartext .no, 'No.'

cleartext .mod8, ' p%8 is: '
cleartext .mod7, ' p%7 is: '
cleartext .mod12, 'p%12 is: '
cleartext .mod24, 'p%24 is: '


	include '../ht_data.inc'