HeavyThing - base64url_latin1.inc

Jeff Marrison

Table of functions

	; ------------------------------------------------------------------------
	; HeavyThing x86_64 assembly language library and showcase programs
	; Copyright © 2015-2018 2 Ton Digital 
	; Homepage: https://2ton.com.au/
	; Author: Jeff Marrison <jeff@2ton.com.au>
	; This file is part of the HeavyThing library.
	; HeavyThing is free software: you can redistribute it and/or modify
	; it under the terms of the GNU General Public License, or
	; (at your option) any later version.
	; HeavyThing is distributed in the hope that it will be useful, 
	; but WITHOUT ANY WARRANTY; without even the implied warranty of
	; GNU General Public License for more details.
	; You should have received a copy of the GNU General Public License along
	; with the HeavyThing library. If not, see <http://www.gnu.org/licenses/>.
	; ------------------------------------------------------------------------
	; base64url_latin1.inc: non-string versions of base64url encode/decode
	; because most base64 operations do happen in latin1, there is no benefit/reason
	; to first convert the text to native string{16,32}
	; native string versions are here too, so I suppose it depends on
	; the various use scenarios whether these make sense or the string natives do

if used base64url$encode_length | defined include_everything
	; single argument: rdi == byte length
	; returns # of _characters_ that an encoding to base64 would take, taking into account linefeeds/linelengths/etc
	prolog	base64url$encode_length
	mov	ecx, 3
	mov	rax, rdi
	xor	edx, edx
	div	rcx
	mov	r8, rax
	add	r8, 1		; was there a remainder? if so, we need one more group of 4 chars
	test	edx, edx
	cmovnz	rax, r8
	shl	rax, 2
if base64_linebreaks
	mov	ecx, base64_maxline
	mov	r8, rax
	xor	edx, edx
	div	rcx
	mov	r9, rax
	add	r9, 1
	test	edx, edx
	cmovnz	rax, r9		; was there a remainder? if so, we need one more line
	shl	rax, 1
	add	rax, r8
end if

end if

if used base64url$encode_latin1 | defined include_everything

	; four arguments: rdi: byte buffer we are encoding, rsi: length of same, rdx: destination buffer, rcx == 0 == default base64 table, else, STRING of base64 table
	; it is up to the caller to determine the correct amount of space to reserve and make sure the pointer
	; in rdx is large enough
	; returns # of latin1 characters we wrote into rdx
	prolog	base64url$encode_latin1
	test	rsi, rsi
	jz	.nothingtodo

	push	rbx
if base64_linebreaks
	push	r12
	xor	r12d, r12d
end if
	mov	r9, rsi		; bytes we are processing
	mov	r8, .default_table
	test	rcx, rcx
	cmovz	r10, r8
	cmovnz	r10, rcx	; base64table
	cmp	qword [r10], 64
	jb	.error_return	; table supplied must contain 64 characters
	xor	r11d, r11d	; use this as our reference/byte count
	cmp	r9, 3
	jae	.doit_allthree
	cmp	r9, 2
	je	.doit_two
	; else, only one byte left
	movzx	r8d, byte [rdi]
	mov	esi, r8d
	shr	esi, 2
	and	esi, 0x3f
	; get the character out of our table
if string_bits = 32
	mov	ebx, dword [r10+rsi*4+8]
	movzx	ebx, word [r10+rsi*2+8]
end if
	; get the next character (partial)
	shl	r8d, 4
	and	r8d, 0x3f
	; get the character out of our table
if string_bits = 32
	mov	eax, dword [r10+r8*4+8]
	movzx	eax, word [r10+r8*2+8]
end if
	shl	eax, 8
	or	ebx, eax
	or	ebx, 0x3d3d0000		; add our two padding chars
	mov	dword [rdx+r11], ebx
	add	r11, 4
if base64_linebreaks
	mov	word [rdx+r11], 0x0a0d
	add	r11, 2
end if
	; done
	mov	rax, r11
if base64_linebreaks
	pop	r12
end if
	pop	rbx
	; only two bytes left
	movzx	r8d, word [rdi]

	mov	esi, r8d
	shr	esi, 2
	and	esi, 0x3f
	; get the character out of our table
if string_bits = 32
	mov	ebx, dword [r10+rsi*4+8]
	movzx	ebx, word [r10+rsi*2+8]
end if
	; the next character
	mov	esi, r8d
	mov	ecx, r8d
	shl	esi, 4
	shr	ecx, 12
	shr	r8d, 8			; swallow the first byte that is all done now
	and	esi, 0x3f
	and	ecx, 0xf
	or	esi, ecx
	; get the character out of our table
if string_bits = 32
	mov	eax, dword [r10+rsi*4+8]
	movzx	eax, word [r10+rsi*2+8]
end if
	shl	eax, 8
	or	ebx, eax
	; get the next character (partial)
	shl	r8d, 2
	and	r8d, 0x3f
	; get the character out of our table
if string_bits = 32
	mov	eax, dword [r10+r8*4+8]
	movzx	eax, word [r10+r8*2+8]
end if
	shl	eax, 16
	or	ebx, eax
	or	ebx, 0x3d000000		; add our padding char
	mov	dword [rdx+r11], ebx
	add	r11, 4
if base64_linebreaks
	mov	word [rdx+r11], 0x0a0d
	add	r11, 2
end if
	; done
	mov	rax, r11
if base64_linebreaks
	pop	r12
end if
	pop	rbx
	mov	r8d, dword [rdi]
	add	rdi, 3		; all these unaligned accesses is probably bad... TODO: make this an aligned accumulator

	mov	esi, r8d
	shr	esi, 2
	and	esi, 0x3f
	; get the character out of our table
if string_bits = 32
	mov	ebx, dword [r10+rsi*4+8]
	movzx	ebx, word [r10+rsi*2+8]
end if
	; the next character
	mov	esi, r8d
	mov	ecx, r8d
	shl	esi, 4
	shr	ecx, 12
	shr	r8d, 8			; swallow the first byte that is all done now
	and	esi, 0x3f
	and	ecx, 0xf
	or	esi, ecx
	; get the character out of our table
if string_bits = 32
	mov	eax, dword [r10+rsi*4+8]
	movzx	eax, word [r10+rsi*2+8]
end if
	shl	eax, 8
	or	ebx, eax
	; get the next character
	mov	esi, r8d
	mov	ecx, r8d
	shl	esi, 2
	shr	ecx, 8
	and	esi, 0x3f
	shr	ecx, 6
	and	ecx, 0x3
	or	esi, ecx
	shr	r8d, 8			; swallow the second byte that is all done now
	; get the character out of our table
if string_bits = 32
	mov	eax, dword [r10+rsi*4+8]
	movzx	eax, word [r10+rsi*2+8]
end if
	shl	eax, 16
	or	ebx, eax
	; and last but not least, final character
	and	r8d, 0x3f
	; get the character out of our table
if string_bits = 32
	mov	eax, dword [r10+r8*4+8]
	movzx	eax, word [r10+r8*2+8]
end if
	shl	eax, 24
	or	ebx, eax
	mov	dword [rdx+r11], ebx
	add	r11, 4
if base64_linebreaks
	add	r12d, 4
	cmp	r12d, base64_maxline
	jb	.doit_allthree_next
	; else, maxline reached, add a crlf here and reset the counter
	mov	word [rdx+r11], 0x0a0d
	add	r11, 2
	xor	r12d, r12d
	sub	r9, 3
	jnz	.doit
	; else, all done
	mov	rax, r11
	pop	r12
	pop	rbx
end if
	sub	r9, 3
	jnz	.doit
if base64_linebreaks
	mov	word [rdx+r11], 0x0a0d
	add	r11, 2
end if
	; done
	mov	rax, r11
if base64_linebreaks
	pop	r12
end if
	pop	rbx
if base64_linebreaks
	pop	r12
end if
	pop	rbx
	xor	eax, eax
cleartext .default_table, 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'

end if

if used base64url$decode_latin1 | defined include_everything

	; four arguments: rdi: buffer containing latin1 base64 chars we are decoding, rsi: length of same, rdx: destination buffer, rcx == 0 == default base64 table, else, array of dword (128 of them) of the table to use

	; it is up to the caller to determine the correct amount of space to reserve and make sure the pointer
	; in rdx is large enough
	; returns # of bytes we wrote into rdx

	; TODO: speed this up a bit by two things:
	; 1) reading dwords at a time instead of byte by byte
	; 2) accumulating the output instead of constantly writing unaligned 3 bytes at a time
	prolog	base64url$decode_latin1
	xor	eax, eax
	test	rsi, rsi
	jz	.nothingtodo
	mov	r8, .default_table
	test	rcx, rcx
	cmovnz	r8, rcx
	xor	r9d, r9d
	xor	r10d, r10d
	push	rdx			; save the start of our destination so we can compute how many we wrote to it
	movzx	ecx, byte [rdi]
	add	rdi, 1
	cmp	ecx, 32
	jbe	.whitespaceordie
	; not whitespace, do our table lookup
	cmp	ecx, 128
	jae	.doret			; invalid
	mov	eax, dword [r8+rcx*4]	; the character
	cmp	eax, -1
	je	.doret			; invalid
	mov	ecx, r10d
	and	eax, 0x3f
	shl	rax, cl
	add	r9, rax
	add	r10d, 8
	cmp	r10d, 32
	jae	.next_dowrite
	sub	rsi, 1
	jnz	.doit
	jmp	.doret
	mov	eax, r9d
	mov	ecx, r9d
	mov	r11d, r9d
	and	eax, 0xff	
	shl	eax, 2
	shr	ecx, 12
	and	ecx, 3
	or	eax, ecx
	mov	ecx, r9d
	shr	ecx, 4
	and	ecx, 0xf0
	shr	r11d, 18
	and	r11d, 0xf
	or	ecx, r11d

	mov	r11d, r9d
	shr	r11d, 10
	and	r11d, 0xc0
	shr	r9d, 24
	or	r11d, r9d
	shl	ecx, 8
	shl	r11d, 16
	or	eax, ecx
	or	eax, r11d

	mov	dword [rdx], eax
	add	rdx, 3

	shr	r9, 32
	sub	r10d, 32
	sub	rsi, 1
	jnz	.doit
	; fallthrough to doret
	mov	eax, r9d
	mov	ecx, r9d
	mov	r11d, r9d
	and	eax, 0xff	
	shl	eax, 2
	shr	ecx, 12
	and	ecx, 3
	or	eax, ecx
	mov	ecx, r9d
	shr	ecx, 4
	and	ecx, 0xf0
	shr	r11d, 18
	and	r11d, 0xf
	or	ecx, r11d

	mov	r11d, r9d
	shr	r11d, 10
	and	r11d, 0xc0
	shr	r9d, 24
	or	r11d, r9d
	shl	ecx, 8
	shl	r11d, 16
	or	eax, ecx
	or	eax, r11d

	mov	dword [rdx], eax

	shr	r10d, 3
	test	r10d, r10d
	jz	.noextra

	mov	eax, 1
	sub	r10d, 1
	cmp	r10d, 1
	cmovb	r10d, eax

	add	rdx, r10
	mov	rax, rdx
	pop	rsi
	sub	rax, rsi
	cmp	ecx, 32
	je	.isspace
	cmp	ecx, 13
	je	.isspace
	cmp	ecx, 10
	je	.isspace
	cmp	ecx, 9
	je	.isspace
	; otherwise, puke
	mov	rax, rdx
	pop	rsi
	sub	rax, rsi
	sub	rsi, 1
	jnz	.doit
	jmp	.doret
	; the <128 character position map for the default base64 table:
	; ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_
	dd	-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,-1,-1,52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,63,-1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1

end if