HeavyThing - unicodecase.inc

Jeff Marrison

Table of functions

	; ------------------------------------------------------------------------
	; HeavyThing x86_64 assembly language library and showcase programs
	; Copyright © 2015-2018 2 Ton Digital 
	; Homepage: https://2ton.com.au/
	; Author: Jeff Marrison <jeff@2ton.com.au>
	;       
	; This file is part of the HeavyThing library.
	;       
	; HeavyThing is free software: you can redistribute it and/or modify
	; it under the terms of the GNU General Public License, or
	; (at your option) any later version.
	;       
	; HeavyThing is distributed in the hope that it will be useful, 
	; but WITHOUT ANY WARRANTY; without even the implied warranty of
	; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
	; GNU General Public License for more details.
	;       
	; You should have received a copy of the GNU General Public License along
	; with the HeavyThing library. If not, see <http://www.gnu.org/licenses/>.
	; ------------------------------------------------------------------------
	;       
	; unicodecase.inc: unicode case conversion functions
	;
	;
	; NOTE Re: charToUpper/charToLower and extendedcase setting...
	;
	; if extendedcase is disabled, we don't bother dealing with brute force lookups. this has
	; worked well for me in every multinational project i have had to deal with, but I am sure
	; there are lots of jurisdictions where this is not acceptable.
	;
	; if extendedcase is enabled, then we go through the full whack to hopefully cover the
	; rest of the unicode standard sheet

if extendedcase = 0


if used utf16$upper | defined include_everything
	; single argument in edi == char, return in eax (separate function for arg in eax return in eax)
	; non-extended versions do not mash other regs
falign
utf16$upper:
	prolog	utf16$upper
	mov	eax, edi
	cmp	eax, 0xff
	jge	.notsinglebyte
	xor	eax, [.toupper_map+rax*4]
	epilog
dalign
.toupper_map:
	; note, this could certainly be a db, but seems to go faster with 32 bit xors
	dd	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20
calign
.notsinglebyte:
	; offset 80
	cmp	eax, 0x0450
	jl	.offset_48
	cmp	eax, 0x045F
	jg	.offset_48
	sub	eax, 0x50
	epilog
calign
.offset_48:
	cmp	eax, 0x0561
	jl	.offset_neg32
	cmp	eax, 0x0586
	jg	.offset_neg32
	sub	eax, 0x30
	epilog
calign
.offset_neg32:
	cmp	eax, 0x03b1
	jl	.offset_26
	cmp	eax, 0x03cb
	jg	.offset_neg32_2
	sub	eax, 0x20
	epilog
calign
.offset_neg32_2:
	cmp	eax, 0x0430
	jl	.offset_26
	cmp	eax, 0x044f
	jg	.offset_neg32_3
	sub	eax, 0x20
	epilog
calign
.offset_neg32_3:
	cmp	eax, 0xff41
	jl	.offset_26
	cmp	eax, 0xff5a
	jg	.offset_26
	sub	eax, 0x20
	epilog
calign
.offset_26:
	cmp	eax, 0x24d0
	jl	.offset_16
	cmp	eax, 0x24e9
	jg	.offset_16
	sub	eax, 0x1a
	epilog
calign
.offset_16:
	cmp	eax, 0x2170
	jl	.offset_8
	cmp	eax, 0x217f
	jg	.offset_8
	sub	eax, 0x10
	epilog
calign
.offset_8:
	cmp	eax, 0x1f00
	jl	.offset_neg1
	cmp	eax, 0x1f37
	jg	.offset_neg1
	cmp	eax, 0x1f07
	jg	.offset_8_2
	add	eax, 0x8
	epilog
calign
.offset_8_2:
	cmp	eax, 0x1f10
	jl	.offset_neg1
	cmp	eax, 0x1f15
	jg	.offset_8_3
	add	eax, 0x8
	epilog
calign
.offset_8_3:
	cmp	eax, 0x1f20
	jl	.offset_neg1
	cmp	eax, 0x1f27
	jg	.offset_8_4
	add	eax, 0x8
	epilog
calign
.offset_8_4:
	cmp	eax, 0x1f30
	jl	.offset_neg1
	cmp	eax, 0x1f37
	jg	.offset_neg1
	add	eax, 0x8
	epilog
calign
.offset_neg1:
	cmp	eax, 0x0101
	jl	.oddchars
	cmp	eax, 0x0233
	jg	.oddchars
	cmp	eax, 0x012f
	jg	.offset_neg1_2
	test	eax, 1
	jz	.offset_neg1_2
	sub	eax, 1
	epilog
calign
.offset_neg1_2:
	cmp	eax, 0x013a
	jl	.offset_neg1_3
	cmp	eax, 0x0148
	jg	.offset_neg1_3
	test	eax, 1
	jnz	.offset_neg1_3
	sub	eax, 1
	epilog
calign
.offset_neg1_3:
	cmp	eax, 0x014b
	jl	.offset_neg1_4
	cmp	eax, 0x0177
	jg	.offset_neg1_4
	test	eax, 1
	jz	.offset_neg1_4
	sub	eax, 1
	epilog
calign
.offset_neg1_4:
	cmp	eax, 0x0201
	jl	.oddchars
	cmp	eax, 0x0233
	jg	.oddchars
	test	eax, 1
	jz	.oddchars
	cmp	eax, 0x0221
	je	.oddchars
	sub	eax, 1
	epilog
calign
.oddchars:
	test	eax, 1
	jz	.notfound	; extended version calls binarysearch here
	cmp	eax, 0x03d9
	jl	.oddchars_2
	cmp	eax, 0x03ef
	jg	.oddchars_2
	sub	eax, 1
	epilog
calign
.oddchars_2:
	cmp	eax, 0x0461
	jl	.oddchars_3
	cmp	eax, 0x04bf
	jg	.oddchars_3
	cmp	eax, 0x0483
	je	.oddchars_3
	cmp	eax, 0x0485
	je	.oddchars_3
	cmp	eax, 0x0487
	je	.oddchars_3
	cmp	eax, 0x0489
	je	.oddchars_3
	sub	eax, 1
	epilog
calign
.oddchars_3:
	cmp	eax, 0x04d1
	jl	.oddchars_4
	cmp	eax, 0x04f9
	jg	.oddchars_4
	sub	eax, 1
	epilog
calign
.oddchars_4:
	cmp	eax, 0x1e01
	jl	.oddchars_5
	cmp	eax, 0x1e95
	jg	.oddchars_5
	sub	eax, 1
	epilog
calign
.oddchars_5:
	cmp	eax, 0x1ea1
	jl	.notfound	; extended version calls binarysearch here
	cmp	eax, 0x1ef9
	jg	.notfound	; extended version calls binarysearch here
	sub	eax, 1
	epilog
calign
.notfound:
	epilog
end if


else


if used utf16$upper | defined include_everything
	; extended uppercase, does lots more work and mashes more regs
	; single arg in edi, return in eax
	; mashes esi and edx
falign
utf16$upper:
	prolog	utf16$upper
	mov	eax, edi
	cmp	eax, 0xff
	jge	.notsinglebyte
	xor	eax, [.toupper_map+rax*4]
	epilog
dalign
.toupper_map:
	; note, this could certainly be a db, but seems to go faster with 32 bit xors
	dd	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20
calign
.notsinglebyte:
	; offset 80
	cmp	eax, 0x0450
	jl	.offset_48
	cmp	eax, 0x045F
	jg	.offset_48
	sub	eax, 0x50
	epilog
calign
.offset_48:
	cmp	eax, 0x0561
	jl	.offset_neg32
	cmp	eax, 0x0586
	jg	.offset_neg32
	sub	eax, 0x30
	epilog
calign
.offset_neg32:
	cmp	eax, 0x03b1
	jl	.offset_26
	cmp	eax, 0x03cb
	jg	.offset_neg32_2
	sub	eax, 0x20
	epilog
calign
.offset_neg32_2:
	cmp	eax, 0x0430
	jl	.offset_26
	cmp	eax, 0x044f
	jg	.offset_neg32_3
	sub	eax, 0x20
	epilog
calign
.offset_neg32_3:
	cmp	eax, 0xff41
	jl	.offset_26
	cmp	eax, 0xff5a
	jg	.offset_26
	sub	eax, 0x20
	epilog
calign
.offset_26:
	cmp	eax, 0x24d0
	jl	.offset_16
	cmp	eax, 0x24e9
	jg	.offset_16
	sub	eax, 0x1a
	epilog
calign
.offset_16:
	cmp	eax, 0x2170
	jl	.offset_8
	cmp	eax, 0x217f
	jg	.offset_8
	sub	eax, 0x10
	epilog
calign
.offset_8:
	cmp	eax, 0x1f00
	jl	.offset_neg1
	cmp	eax, 0x1f37
	jg	.offset_neg1
	cmp	eax, 0x1f07
	jg	.offset_8_2
	add	eax, 0x8
	epilog
calign
.offset_8_2:
	cmp	eax, 0x1f10
	jl	.offset_neg1
	cmp	eax, 0x1f15
	jg	.offset_8_3
	add	eax, 0x8
	epilog
calign
.offset_8_3:
	cmp	eax, 0x1f20
	jl	.offset_neg1
	cmp	eax, 0x1f27
	jg	.offset_8_4
	add	eax, 0x8
	epilog
calign
.offset_8_4:
	cmp	eax, 0x1f30
	jl	.offset_neg1
	cmp	eax, 0x1f37
	jg	.offset_neg1
	add	eax, 0x8
	epilog
calign
.offset_neg1:
	cmp	eax, 0x0101
	jl	.oddchars
	cmp	eax, 0x0233
	jg	.oddchars
	cmp	eax, 0x012f
	jg	.offset_neg1_2
	test	eax, 1
	jz	.offset_neg1_2
	sub	eax, 1
	epilog
calign
.offset_neg1_2:
	cmp	eax, 0x013a
	jl	.offset_neg1_3
	cmp	eax, 0x0148
	jg	.offset_neg1_3
	test	eax, 1
	jnz	.offset_neg1_3
	sub	eax, 1
	epilog
calign
.offset_neg1_3:
	cmp	eax, 0x014b
	jl	.offset_neg1_4
	cmp	eax, 0x0177
	jg	.offset_neg1_4
	test	eax, 1
	jz	.offset_neg1_4
	sub	eax, 1
	epilog
calign
.offset_neg1_4:
	cmp	eax, 0x0201
	jl	.oddchars
	cmp	eax, 0x0233
	jg	.oddchars
	test	eax, 1
	jz	.oddchars
	cmp	eax, 0x0221
	je	.oddchars
	sub	eax, 1
	epilog
calign
.oddchars:
	test	eax, 1
	jz	.binarysearch
	cmp	eax, 0x03d9
	jl	.oddchars_2
	cmp	eax, 0x03ef
	jg	.oddchars_2
	sub	eax, 1
	epilog
calign
.oddchars_2:
	cmp	eax, 0x0461
	jl	.oddchars_3
	cmp	eax, 0x04bf
	jg	.oddchars_3
	cmp	eax, 0x0483
	je	.oddchars_3
	cmp	eax, 0x0485
	je	.oddchars_3
	cmp	eax, 0x0487
	je	.oddchars_3
	cmp	eax, 0x0489
	je	.oddchars_3
	sub	eax, 1
	epilog
calign
.oddchars_3:
	cmp	eax, 0x04d1
	jl	.oddchars_4
	cmp	eax, 0x04f9
	jg	.oddchars_4
	sub	eax, 1
	epilog
calign
.oddchars_4:
	cmp	eax, 0x1e01
	jl	.oddchars_5
	cmp	eax, 0x1e95
	jg	.oddchars_5
	sub	eax, 1
	epilog
calign
.oddchars_5:
	cmp	eax, 0x1ea1
	jl	.binarysearch
	cmp	eax, 0x1ef9
	jg	.binarysearch
	sub	eax, 1
	epilog
calign
.notfound:
	epilog
	; lo == edi
	; hi == esi
	; pivot == edx
calign
.binarysearch:
	xor	edi, edi
	mov	esi, 165
calign
.whileloop:
	cmp	edi, esi
	jg	.notfound
	mov	edx, edi
	add	edx, esi
	shr	edx, 1
	cmp	ax, word [.lowerCaseBase + edx]
	jne	.not_this_char
	movzx	eax, word [.upperCaseConversion + edx]
	epilog
calign
.not_this_char:
	cmp	ax, word [.lowerCaseBase + edx]
	jge	.set_low
	mov	esi, edx
	sub	esi, 1
	jmp	.whileloop
calign
.set_low:
	mov	edi, edx
	add	edi, 1
	jmp	.whileloop
dalign
.lowerCaseBase:
	dw 0xb5, 0xff, 0x131, 0x133, 0x135, 0x137, 0x17a, 0x17c, 0x17e, 0x17f, 0x183, 0x185, 0x188, 0x18c, 0x192, 0x195, 0x199, 0x1a1, 0x1a3, 0x1a5, 0x1a8, 0x1ad, 0x1b0, 0x1b4, 0x1b6, 0x1b9, 0x1bd, 0x1bf, 0x1c5, 0x1c6, 0x1c8, 0x1c9, 0x1cb, 0x1cc, 0x1ce, 0x1d0, 0x1d2, 0x1d4, 0x1d6, 0x1d8, 0x1da, 0x1dc, 0x1dd, 0x1df, 0x1e1, 0x1e3, 0x1e5, 0x1e7, 0x1e9, 0x1eb, 0x1ed, 0x1ef, 0x1f2, 0x1f3, 0x1f5, 0x1f9, 0x1fb, 0x1fd, 0x1ff, 0x253, 0x254, 0x256, 0x257, 0x259, 0x25b, 0x260, 0x263, 0x268, 0x269, 0x26f, 0x272, 0x275, 0x280, 0x283, 0x288, 0x28a, 0x28b, 0x292, 0x345, 0x3ac, 0x3ad, 0x3ae, 0x3af, 0x3cc, 0x3cd, 0x3ce, 0x3d0, 0x3d1, 0x3d5, 0x3d6, 0x3f0, 0x3f1, 0x3f2, 0x3f5, 0x4c2, 0x4c4, 0x4c8, 0x4cc, 0x1e9b, 0x1f40, 0x1f41, 0x1f42, 0x1f43, 0x1f44, 0x1f45, 0x1f51, 0x1f53, 0x1f55, 0x1f57, 0x1f60, 0x1f61, 0x1f62, 0x1f63, 0x1f64, 0x1f65, 0x1f66, 0x1f67, 0x1f70, 0x1f71, 0x1f72, 0x1f73, 0x1f74, 0x1f75, 0x1f76, 0x1f77, 0x1f78, 0x1f79, 0x1f7a, 0x1f7b, 0x1f7c, 0x1f7d, 0x1f80, 0x1f81, 0x1f82, 0x1f83, 0x1f84, 0x1f85, 0x1f86, 0x1f87, 0x1f90, 0x1f91, 0x1f92, 0x1f93, 0x1f94, 0x1f95, 0x1f96, 0x1f97, 0x1fa0, 0x1fa1, 0x1fa2, 0x1fa3, 0x1fa4, 0x1fa5, 0x1fa6, 0x1fa7, 0x1fb0, 0x1fb1, 0x1fb3, 0x1fbe, 0x1fc3, 0x1fd0, 0x1fd1, 0x1fe0, 0x1fe1, 0x1fe5, 0x1ff3
dalign
.upperCaseConversion:
	dw 0x39c, 0x178, 0x49, 0x132, 0x134, 0x136, 0x179, 0x17b, 0x17d, 0x53, 0x182, 0x184, 0x187, 0x18b, 0x191, 0x1f6, 0x198, 0x1a0, 0x1a2, 0x1a4, 0x1a7, 0x1ac, 0x1af, 0x1b3, 0x1b5, 0x1b8, 0x1bc, 0x1f7, 0x1c4, 0x1c4, 0x1c7, 0x1c7, 0x1ca, 0x1ca, 0x1cd, 0x1cf, 0x1d1, 0x1d3, 0x1d5, 0x1d7, 0x1d9, 0x1db, 0x18e, 0x1de, 0x1e0, 0x1e2, 0x1e4, 0x1e6, 0x1e8, 0x1ea, 0x1ec, 0x1ee, 0x1f1, 0x1f1, 0x1f4, 0x1f8, 0x1fa, 0x1fc, 0x1fe, 0x181, 0x186, 0x189, 0x18a, 0x18f, 0x190, 0x193, 0x194, 0x197, 0x196, 0x19c, 0x19d, 0x19f, 0x1a6, 0x1a9, 0x1ae, 0x1b1, 0x1b2, 0x1b7, 0x399, 0x386, 0x388, 0x389, 0x38a, 0x38c, 0x38e, 0x38f, 0x392, 0x398, 0x3a6, 0x3a0, 0x39a, 0x3a1, 0x3a3, 0x395, 0x4c1, 0x4c3, 0x4c7, 0x4cb, 0x1e60, 0x1f48, 0x1f49, 0x1f4a, 0x1f4b, 0x1f4c, 0x1f4d, 0x1f59, 0x1f5b, 0x1f5d, 0x1f5f, 0x1f68, 0x1f69, 0x1f6a, 0x1f6b, 0x1f6c, 0x1f6d, 0x1f6e, 0x1f6f, 0x1fba, 0x1fbb, 0x1fc8, 0x1fc9, 0x1fca, 0x1fcb, 0x1fda, 0x1fdb, 0x1ff8, 0x1ff9, 0x1fea, 0x1feb, 0x1ffa, 0x1ffb, 0x1f88, 0x1f89, 0x1f8a, 0x1f8b, 0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f, 0x1f98, 0x1f99, 0x1f9a, 0x1f9b, 0x1f9c, 0x1f9d, 0x1f9e, 0x1f9f, 0x1fa8, 0x1fa9, 0x1faa, 0x1fab, 0x1fac, 0x1fad, 0x1fae, 0x1faf, 0x1fb8, 0x1fb9, 0x1fbc, 0x399, 0x1fcc, 0x1fd8, 0x1fd9, 0x1fe8, 0x1fe9, 0x1fec, 0x1ffc

end if

end if


if extendedcase = 0

if used utf16$lower | defined include_everything
	; single arg in edi, returns in eax
falign
utf16$lower:
	prolog	utf16$lower
	mov	eax, edi
	cmp	eax, 0xff
	jge	.notsinglebyte
	xor	eax, [.tolower_map + rax * 4]
	epilog
dalign
.tolower_map:
	dd 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 
calign
.notsinglebyte:
	cmp	eax, 0x10a0
	jl	.offset_80
	cmp	eax, 0x10c5
	jg	.offset_80
	add	eax, 48		; should be + 1c60 ? hmmm, why are we doing it different?
	epilog
calign
.offset_80:
	cmp	eax, 0x0400
	jl	.offset_48
	cmp	eax, 0x040f
	jg	.offset_48
	add	eax, 0x50
	epilog
calign
.offset_48:
	cmp	eax, 0x0531
	jl	.offset_32
	cmp	eax, 0x0556
	jg	.offset_32
	add	eax, 0x30
	epilog
calign
.offset_32:
	cmp	eax, 0x0391
	jl	.offset_26
	cmp	eax, 0x03ab
	jg	.offset_32_2
	add	eax, 0x20
	epilog
calign
.offset_32_2:
	cmp	eax, 0x0410
	jl	.offset_26
	cmp	eax, 0x042f
	jg	.offset_32_3
	add	eax, 0x20
	epilog
calign
.offset_32_3:
	cmp	eax, 0xff21
	jl	.offset_26
	cmp	eax, 0xff3a
	jg	.offset_26
	add	eax, 0x20
	epilog
calign
.offset_26:
	cmp	eax, 0x24b6
	jl	.offset_16
	cmp	eax, 0x24cf
	jg	.offset_16
	add	eax, 0x1a
	epilog
calign
.offset_16:
	cmp	eax, 0x2160
	jl	.offset_neg8
	cmp	eax, 0x216f
	jg	.offset_neg8
	add	eax, 0x10
	epilog
calign
.offset_neg8:
	cmp	eax, 0x1f08
	jl	.offset_1
	cmp	eax, 0x1f0f
	jg	.offset_neg8_2
	sub	eax, 0x8
	epilog
calign
.offset_neg8_2:
	cmp	eax, 0x1f18
	jl	.offset_1
	cmp	eax, 0x1f1d
	jg	.offset_neg8_3
	sub	eax, 0x8
	epilog
calign
.offset_neg8_3:
	cmp	eax, 0x1f28
	jl	.offset_1
	cmp	eax, 0x1f2f
	jg	.offset_neg8_4
	sub	eax, 0x8
	epilog
calign
.offset_neg8_4:
	cmp	eax, 0x1f38
	jl	.offset_1
	cmp	eax, 0x1f3f
	jg	.offset_1
	sub	eax, 0x8
	epilog
calign
.offset_1:
	cmp	eax, 0x0100
	jl	.evenchars
	cmp	eax, 0x0232
	jg	.evenchars
	cmp	eax, 0x012e
	jg	.offset_1_2
	test	eax, 1
	jnz	.offset_1_2
	add	eax, 1
	epilog
calign
.offset_1_2:
	cmp	eax, 0x0139
	jl	.evenchars
	cmp	eax, 0x0147
	jg	.offset_1_3
	test	eax, 1
	jz	.evenchars
	add	eax, 1
	epilog
calign
.offset_1_3:
	cmp	eax, 0x014a
	jl	.evenchars
	cmp	eax, 0x0176
	jg	.offset_1_4
	test	eax, 1
	jnz	.evenchars
	add	eax, 1
	epilog
calign
.offset_1_4:
	cmp	eax, 0x0200
	jl	.evenchars
	cmp	eax, 0x0232
	jg	.evenchars
	test	eax, 1
	jnz	.evenchars
	cmp	eax, 0x0220
	je	.evenchars
	add	eax, 1
	epilog
calign
.evenchars:
	test	eax, 1
	jnz	.notfound
	cmp	eax, 0x03d8
	jl	.notfound
	cmp	eax, 0x03ee
	jg	.evenchars_2
	add	eax, 1
	epilog
calign
.evenchars_2:
	cmp	eax, 0x0460
	jl	.notfound
	cmp	eax, 0x04be
	jg	.evenchars_3
	cmp	eax, 0x0482
	je	.notfound
	cmp	eax, 0x0484
	je	.notfound
	cmp	eax, 0x0486
	je	.notfound
	cmp	eax, 0x0488
	je	.notfound
	add	eax, 1
	epilog
calign
.evenchars_3:
	cmp	eax, 0x04d0
	jl	.notfound
	cmp	eax, 0x04f8
	jg	.evenchars_4
	add	eax, 1
	epilog
calign
.evenchars_4:
	cmp	eax, 0x1e00
	jl	.notfound
	cmp	eax, 0x1e94
	jg	.evenchars_5
	add	eax, 1
	epilog
calign
.evenchars_5:
	cmp	eax, 0x1ea0
	jl	.notfound
	cmp	eax, 0x1ef8
	jg	.notfound
	add	eax, 1
	epilog
calign
.notfound:
	epilog

end if

else

if used utf16$lower | defined include_everything
	; extended version, mashes more regs (esi, edx)
falign
utf16$lower:
	prolog	utf16$lower
	mov	eax, edi
	cmp	eax, 0xff
	jge	.notsinglebyte
	xor	eax, [.tolower_map + rax * 4]
	epilog
dalign
.tolower_map:
	dd 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 
calign
.notsinglebyte:
	cmp	eax, 0x10a0
	jl	.offset_80
	cmp	eax, 0x10c5
	jg	.offset_80
	add	eax, 48		; should be + 1c60 ? hmmm, why are we doing it different?
	epilog
calign
.offset_80:
	cmp	eax, 0x0400
	jl	.offset_48
	cmp	eax, 0x040f
	jg	.offset_48
	add	eax, 0x50
	epilog
calign
.offset_48:
	cmp	eax, 0x0531
	jl	.offset_32
	cmp	eax, 0x0556
	jg	.offset_32
	add	eax, 0x30
	epilog
calign
.offset_32:
	cmp	eax, 0x0391
	jl	.offset_26
	cmp	eax, 0x03ab
	jg	.offset_32_2
	add	eax, 0x20
	epilog
calign
.offset_32_2:
	cmp	eax, 0x0410
	jl	.offset_26
	cmp	eax, 0x042f
	jg	.offset_32_3
	add	eax, 0x20
	epilog
calign
.offset_32_3:
	cmp	eax, 0xff21
	jl	.offset_26
	cmp	eax, 0xff3a
	jg	.offset_26
	add	eax, 0x20
	epilog
calign
.offset_26:
	cmp	eax, 0x24b6
	jl	.offset_16
	cmp	eax, 0x24cf
	jg	.offset_16
	add	eax, 0x1a
	epilog
calign
.offset_16:
	cmp	eax, 0x2160
	jl	.offset_neg8
	cmp	eax, 0x216f
	jg	.offset_neg8
	add	eax, 0x10
	epilog
calign
.offset_neg8:
	cmp	eax, 0x1f08
	jl	.offset_1
	cmp	eax, 0x1f0f
	jg	.offset_neg8_2
	sub	eax, 0x8
	epilog
calign
.offset_neg8_2:
	cmp	eax, 0x1f18
	jl	.offset_1
	cmp	eax, 0x1f1d
	jg	.offset_neg8_3
	sub	eax, 0x8
	epilog
calign
.offset_neg8_3:
	cmp	eax, 0x1f28
	jl	.offset_1
	cmp	eax, 0x1f2f
	jg	.offset_neg8_4
	sub	eax, 0x8
	epilog
calign
.offset_neg8_4:
	cmp	eax, 0x1f38
	jl	.offset_1
	cmp	eax, 0x1f3f
	jg	.offset_1
	sub	eax, 0x8
	epilog
calign
.offset_1:
	cmp	eax, 0x0100
	jl	.evenchars
	cmp	eax, 0x0232
	jg	.evenchars
	cmp	eax, 0x012e
	jg	.offset_1_2
	test	eax, 1
	jnz	.offset_1_2
	add	eax, 1
	epilog
calign
.offset_1_2:
	cmp	eax, 0x0139
	jl	.evenchars
	cmp	eax, 0x0147
	jg	.offset_1_3
	test	eax, 1
	jz	.evenchars
	add	eax, 1
	epilog
calign
.offset_1_3:
	cmp	eax, 0x014a
	jl	.evenchars
	cmp	eax, 0x0176
	jg	.offset_1_4
	test	eax, 1
	jnz	.evenchars
	add	eax, 1
	epilog
calign
.offset_1_4:
	cmp	eax, 0x0200
	jl	.evenchars
	cmp	eax, 0x0232
	jg	.evenchars
	test	eax, 1
	jnz	.evenchars
	cmp	eax, 0x0220
	je	.evenchars
	add	eax, 1
	epilog
calign
.evenchars:
	test	eax, 1
	jnz	.binarysearch
	cmp	eax, 0x03d8
	jl	.binarysearch
	cmp	eax, 0x03ee
	jg	.evenchars_2
	add	eax, 1
	epilog
calign
.evenchars_2:
	cmp	eax, 0x0460
	jl	.binarysearch
	cmp	eax, 0x04be
	jg	.evenchars_3
	cmp	eax, 0x0482
	je	.binarysearch
	cmp	eax, 0x0484
	je	.binarysearch
	cmp	eax, 0x0486
	je	.binarysearch
	cmp	eax, 0x0488
	je	.binarysearch
	add	eax, 1
	epilog
calign
.evenchars_3:
	cmp	eax, 0x04d0
	jl	.binarysearch
	cmp	eax, 0x04f8
	jg	.evenchars_4
	add	eax, 1
	epilog
calign
.evenchars_4:
	cmp	eax, 0x1e00
	jl	.binarysearch
	cmp	eax, 0x1e94
	jg	.evenchars_5
	add	eax, 1
	epilog
calign
.evenchars_5:
	cmp	eax, 0x1ea0
	jl	.binarysearch
	cmp	eax, 0x1ef8
	jg	.binarysearch
	add	eax, 1
	epilog
calign
.notfound:
	epilog
calign
.binarysearch:
	xor	edi, edi
	mov	esi, 156
calign
.whileloop:
	cmp	edi, esi
	jg	.notfound
	mov	edx, edi
	add	edx, esi
	shr	edx, 1
	cmp	ax, word [.upperCaseBase + edx]
	jne	.not_this_char
	movzx	eax, word [.lowerCaseConversion + edx]
	epilog
calign
.not_this_char:
	cmp	ax, word [.upperCaseBase + edx]
	jge	.set_low
	mov	esi, edx
	sub	esi, 1
	jmp	.whileloop
calign
.set_low:
	mov	edi, edx
	add	edi, 1
	jmp	.whileloop
dalign
.upperCaseBase:
	dw 0x130, 0x132, 0x134, 0x136, 0x178, 0x179, 0x17b, 0x17d, 0x181, 0x182, 0x184, 0x186, 0x187, 0x189, 0x18a, 0x18b, 0x18e, 0x18f, 0x190, 0x191, 0x193, 0x194, 0x196, 0x197, 0x198, 0x19c, 0x19d, 0x19f, 0x1a0, 0x1a2, 0x1a4, 0x1a6, 0x1a7, 0x1a9, 0x1ac, 0x1ae, 0x1af, 0x1b1, 0x1b2, 0x1b3, 0x1b5, 0x1b7, 0x1b8, 0x1bc, 0x1c4, 0x1c5, 0x1c7, 0x1c8, 0x1ca, 0x1cb, 0x1cd, 0x1cf, 0x1d1, 0x1d3, 0x1d5, 0x1d7, 0x1d9, 0x1db, 0x1de, 0x1e0, 0x1e2, 0x1e4, 0x1e6, 0x1e8, 0x1ea, 0x1ec, 0x1ee, 0x1f1, 0x1f2, 0x1f4, 0x1f6, 0x1f7, 0x1f8, 0x1fa, 0x1fc, 0x1fe, 0x386, 0x388, 0x389, 0x38a, 0x38c, 0x38e, 0x38f, 0x3f4, 0x4c1, 0x4c3, 0x4c7, 0x4cb, 0x1f48, 0x1f49, 0x1f4a, 0x1f4b, 0x1f4c, 0x1f4d, 0x1f59, 0x1f5b, 0x1f5d, 0x1f5f, 0x1f68, 0x1f69, 0x1f6a, 0x1f6b, 0x1f6c, 0x1f6d, 0x1f6e, 0x1f6f, 0x1f88, 0x1f89, 0x1f8a, 0x1f8b, 0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f, 0x1f98, 0x1f99, 0x1f9a, 0x1f9b, 0x1f9c, 0x1f9d, 0x1f9e, 0x1f9f, 0x1fa8, 0x1fa9, 0x1faa, 0x1fab, 0x1fac, 0x1fad, 0x1fae, 0x1faf, 0x1fb8, 0x1fb9, 0x1fba, 0x1fbb, 0x1fbc, 0x1fc8, 0x1fc9, 0x1fca, 0x1fcb, 0x1fcc, 0x1fd8, 0x1fd9, 0x1fda, 0x1fdb, 0x1fe8, 0x1fe9, 0x1fea, 0x1feb, 0x1fec, 0x1ff8, 0x1ff9, 0x1ffa, 0x1ffb, 0x1ffc, 0x2126, 0x212a, 0x212b
dalign
.lowerCaseConversion:
	dw 0x69, 0x133, 0x135, 0x137, 0xff, 0x17a, 0x17c, 0x17e, 0x253, 0x183, 0x185, 0x254, 0x188, 0x256, 0x257, 0x18c, 0x1dd, 0x259, 0x25b, 0x192, 0x260, 0x263, 0x269, 0x268, 0x199, 0x26f, 0x272, 0x275, 0x1a1, 0x1a3, 0x1a5, 0x280, 0x1a8, 0x283, 0x1ad, 0x288, 0x1b0, 0x28a, 0x28b, 0x1b4, 0x1b6, 0x292, 0x1b9, 0x1bd, 0x1c6, 0x1c6, 0x1c9, 0x1c9, 0x1cc, 0x1cc, 0x1ce, 0x1d0, 0x1d2, 0x1d4, 0x1d6, 0x1d8, 0x1da, 0x1dc, 0x1df, 0x1e1, 0x1e3, 0x1e5, 0x1e7, 0x1e9, 0x1eb, 0x1ed, 0x1ef, 0x1f3, 0x1f3, 0x1f5, 0x195, 0x1bf, 0x1f9, 0x1fb, 0x1fd, 0x1ff, 0x3ac, 0x3ad, 0x3ae, 0x3af, 0x3cc, 0x3cd, 0x3ce, 0x3b8, 0x4c2, 0x4c4, 0x4c8, 0x4cc, 0x1f40, 0x1f41, 0x1f42, 0x1f43, 0x1f44, 0x1f45, 0x1f51, 0x1f53, 0x1f55, 0x1f57, 0x1f60, 0x1f61, 0x1f62, 0x1f63, 0x1f64, 0x1f65, 0x1f66, 0x1f67, 0x1f80, 0x1f81, 0x1f82, 0x1f83, 0x1f84, 0x1f85, 0x1f86, 0x1f87, 0x1f90, 0x1f91, 0x1f92, 0x1f93, 0x1f94, 0x1f95, 0x1f96, 0x1f97, 0x1fa0, 0x1fa1, 0x1fa2, 0x1fa3, 0x1fa4, 0x1fa5, 0x1fa6, 0x1fa7, 0x1fb0, 0x1fb1, 0x1f70, 0x1f71, 0x1fb3, 0x1f72, 0x1f73, 0x1f74, 0x1f75, 0x1fc3, 0x1fd0, 0x1fd1, 0x1f76, 0x1f77, 0x1fe0, 0x1fe1, 0x1f7a, 0x1f7b, 0x1fe5, 0x1f78, 0x1f79, 0x1f7c, 0x1f7d, 0x1ff3, 0x3c9, 0x6b, 0xe5

end if

end if