; ------------------------------------------------------------------------
; HeavyThing x86_64 assembly language library and showcase programs
; Copyright © 2015-2018 2 Ton Digital
; Homepage: https://2ton.com.au/
; Author: Jeff Marrison <jeff@2ton.com.au>
;
; This file is part of the HeavyThing library.
;
; HeavyThing is free software: you can redistribute it and/or modify
; it under the terms of the GNU General Public License, or
; (at your option) any later version.
;
; HeavyThing is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
; GNU General Public License for more details.
;
; You should have received a copy of the GNU General Public License along
; with the HeavyThing library. If not, see <http://www.gnu.org/licenses/>.
; ------------------------------------------------------------------------
;
; unicodecase.inc: unicode case conversion functions
;
;
; NOTE Re: charToUpper/charToLower and extendedcase setting...
;
; if extendedcase is disabled, we don't bother dealing with brute force lookups. this has
; worked well for me in every multinational project i have had to deal with, but I am sure
; there are lots of jurisdictions where this is not acceptable.
;
; if extendedcase is enabled, then we go through the full whack to hopefully cover the
; rest of the unicode standard sheet
if extendedcase = 0
if used utf16$upper | defined include_everything
; single argument in edi == char, return in eax (separate function for arg in eax return in eax)
; non-extended versions do not mash other regs
falign
utf16$upper:
prolog utf16$upper
mov eax, edi
cmp eax, 0xff
jge .notsinglebyte
xor eax, [.toupper_map+rax*4]
epilog
dalign
.toupper_map:
; note, this could certainly be a db, but seems to go faster with 32 bit xors
dd 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20
calign
.notsinglebyte:
; offset 80
cmp eax, 0x0450
jl .offset_48
cmp eax, 0x045F
jg .offset_48
sub eax, 0x50
epilog
calign
.offset_48:
cmp eax, 0x0561
jl .offset_neg32
cmp eax, 0x0586
jg .offset_neg32
sub eax, 0x30
epilog
calign
.offset_neg32:
cmp eax, 0x03b1
jl .offset_26
cmp eax, 0x03cb
jg .offset_neg32_2
sub eax, 0x20
epilog
calign
.offset_neg32_2:
cmp eax, 0x0430
jl .offset_26
cmp eax, 0x044f
jg .offset_neg32_3
sub eax, 0x20
epilog
calign
.offset_neg32_3:
cmp eax, 0xff41
jl .offset_26
cmp eax, 0xff5a
jg .offset_26
sub eax, 0x20
epilog
calign
.offset_26:
cmp eax, 0x24d0
jl .offset_16
cmp eax, 0x24e9
jg .offset_16
sub eax, 0x1a
epilog
calign
.offset_16:
cmp eax, 0x2170
jl .offset_8
cmp eax, 0x217f
jg .offset_8
sub eax, 0x10
epilog
calign
.offset_8:
cmp eax, 0x1f00
jl .offset_neg1
cmp eax, 0x1f37
jg .offset_neg1
cmp eax, 0x1f07
jg .offset_8_2
add eax, 0x8
epilog
calign
.offset_8_2:
cmp eax, 0x1f10
jl .offset_neg1
cmp eax, 0x1f15
jg .offset_8_3
add eax, 0x8
epilog
calign
.offset_8_3:
cmp eax, 0x1f20
jl .offset_neg1
cmp eax, 0x1f27
jg .offset_8_4
add eax, 0x8
epilog
calign
.offset_8_4:
cmp eax, 0x1f30
jl .offset_neg1
cmp eax, 0x1f37
jg .offset_neg1
add eax, 0x8
epilog
calign
.offset_neg1:
cmp eax, 0x0101
jl .oddchars
cmp eax, 0x0233
jg .oddchars
cmp eax, 0x012f
jg .offset_neg1_2
test eax, 1
jz .offset_neg1_2
sub eax, 1
epilog
calign
.offset_neg1_2:
cmp eax, 0x013a
jl .offset_neg1_3
cmp eax, 0x0148
jg .offset_neg1_3
test eax, 1
jnz .offset_neg1_3
sub eax, 1
epilog
calign
.offset_neg1_3:
cmp eax, 0x014b
jl .offset_neg1_4
cmp eax, 0x0177
jg .offset_neg1_4
test eax, 1
jz .offset_neg1_4
sub eax, 1
epilog
calign
.offset_neg1_4:
cmp eax, 0x0201
jl .oddchars
cmp eax, 0x0233
jg .oddchars
test eax, 1
jz .oddchars
cmp eax, 0x0221
je .oddchars
sub eax, 1
epilog
calign
.oddchars:
test eax, 1
jz .notfound ; extended version calls binarysearch here
cmp eax, 0x03d9
jl .oddchars_2
cmp eax, 0x03ef
jg .oddchars_2
sub eax, 1
epilog
calign
.oddchars_2:
cmp eax, 0x0461
jl .oddchars_3
cmp eax, 0x04bf
jg .oddchars_3
cmp eax, 0x0483
je .oddchars_3
cmp eax, 0x0485
je .oddchars_3
cmp eax, 0x0487
je .oddchars_3
cmp eax, 0x0489
je .oddchars_3
sub eax, 1
epilog
calign
.oddchars_3:
cmp eax, 0x04d1
jl .oddchars_4
cmp eax, 0x04f9
jg .oddchars_4
sub eax, 1
epilog
calign
.oddchars_4:
cmp eax, 0x1e01
jl .oddchars_5
cmp eax, 0x1e95
jg .oddchars_5
sub eax, 1
epilog
calign
.oddchars_5:
cmp eax, 0x1ea1
jl .notfound ; extended version calls binarysearch here
cmp eax, 0x1ef9
jg .notfound ; extended version calls binarysearch here
sub eax, 1
epilog
calign
.notfound:
epilog
end if
else
if used utf16$upper | defined include_everything
; extended uppercase, does lots more work and mashes more regs
; single arg in edi, return in eax
; mashes esi and edx
falign
utf16$upper:
prolog utf16$upper
mov eax, edi
cmp eax, 0xff
jge .notsinglebyte
xor eax, [.toupper_map+rax*4]
epilog
dalign
.toupper_map:
; note, this could certainly be a db, but seems to go faster with 32 bit xors
dd 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20
calign
.notsinglebyte:
; offset 80
cmp eax, 0x0450
jl .offset_48
cmp eax, 0x045F
jg .offset_48
sub eax, 0x50
epilog
calign
.offset_48:
cmp eax, 0x0561
jl .offset_neg32
cmp eax, 0x0586
jg .offset_neg32
sub eax, 0x30
epilog
calign
.offset_neg32:
cmp eax, 0x03b1
jl .offset_26
cmp eax, 0x03cb
jg .offset_neg32_2
sub eax, 0x20
epilog
calign
.offset_neg32_2:
cmp eax, 0x0430
jl .offset_26
cmp eax, 0x044f
jg .offset_neg32_3
sub eax, 0x20
epilog
calign
.offset_neg32_3:
cmp eax, 0xff41
jl .offset_26
cmp eax, 0xff5a
jg .offset_26
sub eax, 0x20
epilog
calign
.offset_26:
cmp eax, 0x24d0
jl .offset_16
cmp eax, 0x24e9
jg .offset_16
sub eax, 0x1a
epilog
calign
.offset_16:
cmp eax, 0x2170
jl .offset_8
cmp eax, 0x217f
jg .offset_8
sub eax, 0x10
epilog
calign
.offset_8:
cmp eax, 0x1f00
jl .offset_neg1
cmp eax, 0x1f37
jg .offset_neg1
cmp eax, 0x1f07
jg .offset_8_2
add eax, 0x8
epilog
calign
.offset_8_2:
cmp eax, 0x1f10
jl .offset_neg1
cmp eax, 0x1f15
jg .offset_8_3
add eax, 0x8
epilog
calign
.offset_8_3:
cmp eax, 0x1f20
jl .offset_neg1
cmp eax, 0x1f27
jg .offset_8_4
add eax, 0x8
epilog
calign
.offset_8_4:
cmp eax, 0x1f30
jl .offset_neg1
cmp eax, 0x1f37
jg .offset_neg1
add eax, 0x8
epilog
calign
.offset_neg1:
cmp eax, 0x0101
jl .oddchars
cmp eax, 0x0233
jg .oddchars
cmp eax, 0x012f
jg .offset_neg1_2
test eax, 1
jz .offset_neg1_2
sub eax, 1
epilog
calign
.offset_neg1_2:
cmp eax, 0x013a
jl .offset_neg1_3
cmp eax, 0x0148
jg .offset_neg1_3
test eax, 1
jnz .offset_neg1_3
sub eax, 1
epilog
calign
.offset_neg1_3:
cmp eax, 0x014b
jl .offset_neg1_4
cmp eax, 0x0177
jg .offset_neg1_4
test eax, 1
jz .offset_neg1_4
sub eax, 1
epilog
calign
.offset_neg1_4:
cmp eax, 0x0201
jl .oddchars
cmp eax, 0x0233
jg .oddchars
test eax, 1
jz .oddchars
cmp eax, 0x0221
je .oddchars
sub eax, 1
epilog
calign
.oddchars:
test eax, 1
jz .binarysearch
cmp eax, 0x03d9
jl .oddchars_2
cmp eax, 0x03ef
jg .oddchars_2
sub eax, 1
epilog
calign
.oddchars_2:
cmp eax, 0x0461
jl .oddchars_3
cmp eax, 0x04bf
jg .oddchars_3
cmp eax, 0x0483
je .oddchars_3
cmp eax, 0x0485
je .oddchars_3
cmp eax, 0x0487
je .oddchars_3
cmp eax, 0x0489
je .oddchars_3
sub eax, 1
epilog
calign
.oddchars_3:
cmp eax, 0x04d1
jl .oddchars_4
cmp eax, 0x04f9
jg .oddchars_4
sub eax, 1
epilog
calign
.oddchars_4:
cmp eax, 0x1e01
jl .oddchars_5
cmp eax, 0x1e95
jg .oddchars_5
sub eax, 1
epilog
calign
.oddchars_5:
cmp eax, 0x1ea1
jl .binarysearch
cmp eax, 0x1ef9
jg .binarysearch
sub eax, 1
epilog
calign
.notfound:
epilog
; lo == edi
; hi == esi
; pivot == edx
calign
.binarysearch:
xor edi, edi
mov esi, 165
calign
.whileloop:
cmp edi, esi
jg .notfound
mov edx, edi
add edx, esi
shr edx, 1
cmp ax, word [.lowerCaseBase + edx]
jne .not_this_char
movzx eax, word [.upperCaseConversion + edx]
epilog
calign
.not_this_char:
cmp ax, word [.lowerCaseBase + edx]
jge .set_low
mov esi, edx
sub esi, 1
jmp .whileloop
calign
.set_low:
mov edi, edx
add edi, 1
jmp .whileloop
dalign
.lowerCaseBase:
dw 0xb5, 0xff, 0x131, 0x133, 0x135, 0x137, 0x17a, 0x17c, 0x17e, 0x17f, 0x183, 0x185, 0x188, 0x18c, 0x192, 0x195, 0x199, 0x1a1, 0x1a3, 0x1a5, 0x1a8, 0x1ad, 0x1b0, 0x1b4, 0x1b6, 0x1b9, 0x1bd, 0x1bf, 0x1c5, 0x1c6, 0x1c8, 0x1c9, 0x1cb, 0x1cc, 0x1ce, 0x1d0, 0x1d2, 0x1d4, 0x1d6, 0x1d8, 0x1da, 0x1dc, 0x1dd, 0x1df, 0x1e1, 0x1e3, 0x1e5, 0x1e7, 0x1e9, 0x1eb, 0x1ed, 0x1ef, 0x1f2, 0x1f3, 0x1f5, 0x1f9, 0x1fb, 0x1fd, 0x1ff, 0x253, 0x254, 0x256, 0x257, 0x259, 0x25b, 0x260, 0x263, 0x268, 0x269, 0x26f, 0x272, 0x275, 0x280, 0x283, 0x288, 0x28a, 0x28b, 0x292, 0x345, 0x3ac, 0x3ad, 0x3ae, 0x3af, 0x3cc, 0x3cd, 0x3ce, 0x3d0, 0x3d1, 0x3d5, 0x3d6, 0x3f0, 0x3f1, 0x3f2, 0x3f5, 0x4c2, 0x4c4, 0x4c8, 0x4cc, 0x1e9b, 0x1f40, 0x1f41, 0x1f42, 0x1f43, 0x1f44, 0x1f45, 0x1f51, 0x1f53, 0x1f55, 0x1f57, 0x1f60, 0x1f61, 0x1f62, 0x1f63, 0x1f64, 0x1f65, 0x1f66, 0x1f67, 0x1f70, 0x1f71, 0x1f72, 0x1f73, 0x1f74, 0x1f75, 0x1f76, 0x1f77, 0x1f78, 0x1f79, 0x1f7a, 0x1f7b, 0x1f7c, 0x1f7d, 0x1f80, 0x1f81, 0x1f82, 0x1f83, 0x1f84, 0x1f85, 0x1f86, 0x1f87, 0x1f90, 0x1f91, 0x1f92, 0x1f93, 0x1f94, 0x1f95, 0x1f96, 0x1f97, 0x1fa0, 0x1fa1, 0x1fa2, 0x1fa3, 0x1fa4, 0x1fa5, 0x1fa6, 0x1fa7, 0x1fb0, 0x1fb1, 0x1fb3, 0x1fbe, 0x1fc3, 0x1fd0, 0x1fd1, 0x1fe0, 0x1fe1, 0x1fe5, 0x1ff3
dalign
.upperCaseConversion:
dw 0x39c, 0x178, 0x49, 0x132, 0x134, 0x136, 0x179, 0x17b, 0x17d, 0x53, 0x182, 0x184, 0x187, 0x18b, 0x191, 0x1f6, 0x198, 0x1a0, 0x1a2, 0x1a4, 0x1a7, 0x1ac, 0x1af, 0x1b3, 0x1b5, 0x1b8, 0x1bc, 0x1f7, 0x1c4, 0x1c4, 0x1c7, 0x1c7, 0x1ca, 0x1ca, 0x1cd, 0x1cf, 0x1d1, 0x1d3, 0x1d5, 0x1d7, 0x1d9, 0x1db, 0x18e, 0x1de, 0x1e0, 0x1e2, 0x1e4, 0x1e6, 0x1e8, 0x1ea, 0x1ec, 0x1ee, 0x1f1, 0x1f1, 0x1f4, 0x1f8, 0x1fa, 0x1fc, 0x1fe, 0x181, 0x186, 0x189, 0x18a, 0x18f, 0x190, 0x193, 0x194, 0x197, 0x196, 0x19c, 0x19d, 0x19f, 0x1a6, 0x1a9, 0x1ae, 0x1b1, 0x1b2, 0x1b7, 0x399, 0x386, 0x388, 0x389, 0x38a, 0x38c, 0x38e, 0x38f, 0x392, 0x398, 0x3a6, 0x3a0, 0x39a, 0x3a1, 0x3a3, 0x395, 0x4c1, 0x4c3, 0x4c7, 0x4cb, 0x1e60, 0x1f48, 0x1f49, 0x1f4a, 0x1f4b, 0x1f4c, 0x1f4d, 0x1f59, 0x1f5b, 0x1f5d, 0x1f5f, 0x1f68, 0x1f69, 0x1f6a, 0x1f6b, 0x1f6c, 0x1f6d, 0x1f6e, 0x1f6f, 0x1fba, 0x1fbb, 0x1fc8, 0x1fc9, 0x1fca, 0x1fcb, 0x1fda, 0x1fdb, 0x1ff8, 0x1ff9, 0x1fea, 0x1feb, 0x1ffa, 0x1ffb, 0x1f88, 0x1f89, 0x1f8a, 0x1f8b, 0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f, 0x1f98, 0x1f99, 0x1f9a, 0x1f9b, 0x1f9c, 0x1f9d, 0x1f9e, 0x1f9f, 0x1fa8, 0x1fa9, 0x1faa, 0x1fab, 0x1fac, 0x1fad, 0x1fae, 0x1faf, 0x1fb8, 0x1fb9, 0x1fbc, 0x399, 0x1fcc, 0x1fd8, 0x1fd9, 0x1fe8, 0x1fe9, 0x1fec, 0x1ffc
end if
end if
if extendedcase = 0
if used utf16$lower | defined include_everything
; single arg in edi, returns in eax
falign
utf16$lower:
prolog utf16$lower
mov eax, edi
cmp eax, 0xff
jge .notsinglebyte
xor eax, [.tolower_map + rax * 4]
epilog
dalign
.tolower_map:
dd 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
calign
.notsinglebyte:
cmp eax, 0x10a0
jl .offset_80
cmp eax, 0x10c5
jg .offset_80
add eax, 48 ; should be + 1c60 ? hmmm, why are we doing it different?
epilog
calign
.offset_80:
cmp eax, 0x0400
jl .offset_48
cmp eax, 0x040f
jg .offset_48
add eax, 0x50
epilog
calign
.offset_48:
cmp eax, 0x0531
jl .offset_32
cmp eax, 0x0556
jg .offset_32
add eax, 0x30
epilog
calign
.offset_32:
cmp eax, 0x0391
jl .offset_26
cmp eax, 0x03ab
jg .offset_32_2
add eax, 0x20
epilog
calign
.offset_32_2:
cmp eax, 0x0410
jl .offset_26
cmp eax, 0x042f
jg .offset_32_3
add eax, 0x20
epilog
calign
.offset_32_3:
cmp eax, 0xff21
jl .offset_26
cmp eax, 0xff3a
jg .offset_26
add eax, 0x20
epilog
calign
.offset_26:
cmp eax, 0x24b6
jl .offset_16
cmp eax, 0x24cf
jg .offset_16
add eax, 0x1a
epilog
calign
.offset_16:
cmp eax, 0x2160
jl .offset_neg8
cmp eax, 0x216f
jg .offset_neg8
add eax, 0x10
epilog
calign
.offset_neg8:
cmp eax, 0x1f08
jl .offset_1
cmp eax, 0x1f0f
jg .offset_neg8_2
sub eax, 0x8
epilog
calign
.offset_neg8_2:
cmp eax, 0x1f18
jl .offset_1
cmp eax, 0x1f1d
jg .offset_neg8_3
sub eax, 0x8
epilog
calign
.offset_neg8_3:
cmp eax, 0x1f28
jl .offset_1
cmp eax, 0x1f2f
jg .offset_neg8_4
sub eax, 0x8
epilog
calign
.offset_neg8_4:
cmp eax, 0x1f38
jl .offset_1
cmp eax, 0x1f3f
jg .offset_1
sub eax, 0x8
epilog
calign
.offset_1:
cmp eax, 0x0100
jl .evenchars
cmp eax, 0x0232
jg .evenchars
cmp eax, 0x012e
jg .offset_1_2
test eax, 1
jnz .offset_1_2
add eax, 1
epilog
calign
.offset_1_2:
cmp eax, 0x0139
jl .evenchars
cmp eax, 0x0147
jg .offset_1_3
test eax, 1
jz .evenchars
add eax, 1
epilog
calign
.offset_1_3:
cmp eax, 0x014a
jl .evenchars
cmp eax, 0x0176
jg .offset_1_4
test eax, 1
jnz .evenchars
add eax, 1
epilog
calign
.offset_1_4:
cmp eax, 0x0200
jl .evenchars
cmp eax, 0x0232
jg .evenchars
test eax, 1
jnz .evenchars
cmp eax, 0x0220
je .evenchars
add eax, 1
epilog
calign
.evenchars:
test eax, 1
jnz .notfound
cmp eax, 0x03d8
jl .notfound
cmp eax, 0x03ee
jg .evenchars_2
add eax, 1
epilog
calign
.evenchars_2:
cmp eax, 0x0460
jl .notfound
cmp eax, 0x04be
jg .evenchars_3
cmp eax, 0x0482
je .notfound
cmp eax, 0x0484
je .notfound
cmp eax, 0x0486
je .notfound
cmp eax, 0x0488
je .notfound
add eax, 1
epilog
calign
.evenchars_3:
cmp eax, 0x04d0
jl .notfound
cmp eax, 0x04f8
jg .evenchars_4
add eax, 1
epilog
calign
.evenchars_4:
cmp eax, 0x1e00
jl .notfound
cmp eax, 0x1e94
jg .evenchars_5
add eax, 1
epilog
calign
.evenchars_5:
cmp eax, 0x1ea0
jl .notfound
cmp eax, 0x1ef8
jg .notfound
add eax, 1
epilog
calign
.notfound:
epilog
end if
else
if used utf16$lower | defined include_everything
; extended version, mashes more regs (esi, edx)
falign
utf16$lower:
prolog utf16$lower
mov eax, edi
cmp eax, 0xff
jge .notsinglebyte
xor eax, [.tolower_map + rax * 4]
epilog
dalign
.tolower_map:
dd 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
calign
.notsinglebyte:
cmp eax, 0x10a0
jl .offset_80
cmp eax, 0x10c5
jg .offset_80
add eax, 48 ; should be + 1c60 ? hmmm, why are we doing it different?
epilog
calign
.offset_80:
cmp eax, 0x0400
jl .offset_48
cmp eax, 0x040f
jg .offset_48
add eax, 0x50
epilog
calign
.offset_48:
cmp eax, 0x0531
jl .offset_32
cmp eax, 0x0556
jg .offset_32
add eax, 0x30
epilog
calign
.offset_32:
cmp eax, 0x0391
jl .offset_26
cmp eax, 0x03ab
jg .offset_32_2
add eax, 0x20
epilog
calign
.offset_32_2:
cmp eax, 0x0410
jl .offset_26
cmp eax, 0x042f
jg .offset_32_3
add eax, 0x20
epilog
calign
.offset_32_3:
cmp eax, 0xff21
jl .offset_26
cmp eax, 0xff3a
jg .offset_26
add eax, 0x20
epilog
calign
.offset_26:
cmp eax, 0x24b6
jl .offset_16
cmp eax, 0x24cf
jg .offset_16
add eax, 0x1a
epilog
calign
.offset_16:
cmp eax, 0x2160
jl .offset_neg8
cmp eax, 0x216f
jg .offset_neg8
add eax, 0x10
epilog
calign
.offset_neg8:
cmp eax, 0x1f08
jl .offset_1
cmp eax, 0x1f0f
jg .offset_neg8_2
sub eax, 0x8
epilog
calign
.offset_neg8_2:
cmp eax, 0x1f18
jl .offset_1
cmp eax, 0x1f1d
jg .offset_neg8_3
sub eax, 0x8
epilog
calign
.offset_neg8_3:
cmp eax, 0x1f28
jl .offset_1
cmp eax, 0x1f2f
jg .offset_neg8_4
sub eax, 0x8
epilog
calign
.offset_neg8_4:
cmp eax, 0x1f38
jl .offset_1
cmp eax, 0x1f3f
jg .offset_1
sub eax, 0x8
epilog
calign
.offset_1:
cmp eax, 0x0100
jl .evenchars
cmp eax, 0x0232
jg .evenchars
cmp eax, 0x012e
jg .offset_1_2
test eax, 1
jnz .offset_1_2
add eax, 1
epilog
calign
.offset_1_2:
cmp eax, 0x0139
jl .evenchars
cmp eax, 0x0147
jg .offset_1_3
test eax, 1
jz .evenchars
add eax, 1
epilog
calign
.offset_1_3:
cmp eax, 0x014a
jl .evenchars
cmp eax, 0x0176
jg .offset_1_4
test eax, 1
jnz .evenchars
add eax, 1
epilog
calign
.offset_1_4:
cmp eax, 0x0200
jl .evenchars
cmp eax, 0x0232
jg .evenchars
test eax, 1
jnz .evenchars
cmp eax, 0x0220
je .evenchars
add eax, 1
epilog
calign
.evenchars:
test eax, 1
jnz .binarysearch
cmp eax, 0x03d8
jl .binarysearch
cmp eax, 0x03ee
jg .evenchars_2
add eax, 1
epilog
calign
.evenchars_2:
cmp eax, 0x0460
jl .binarysearch
cmp eax, 0x04be
jg .evenchars_3
cmp eax, 0x0482
je .binarysearch
cmp eax, 0x0484
je .binarysearch
cmp eax, 0x0486
je .binarysearch
cmp eax, 0x0488
je .binarysearch
add eax, 1
epilog
calign
.evenchars_3:
cmp eax, 0x04d0
jl .binarysearch
cmp eax, 0x04f8
jg .evenchars_4
add eax, 1
epilog
calign
.evenchars_4:
cmp eax, 0x1e00
jl .binarysearch
cmp eax, 0x1e94
jg .evenchars_5
add eax, 1
epilog
calign
.evenchars_5:
cmp eax, 0x1ea0
jl .binarysearch
cmp eax, 0x1ef8
jg .binarysearch
add eax, 1
epilog
calign
.notfound:
epilog
calign
.binarysearch:
xor edi, edi
mov esi, 156
calign
.whileloop:
cmp edi, esi
jg .notfound
mov edx, edi
add edx, esi
shr edx, 1
cmp ax, word [.upperCaseBase + edx]
jne .not_this_char
movzx eax, word [.lowerCaseConversion + edx]
epilog
calign
.not_this_char:
cmp ax, word [.upperCaseBase + edx]
jge .set_low
mov esi, edx
sub esi, 1
jmp .whileloop
calign
.set_low:
mov edi, edx
add edi, 1
jmp .whileloop
dalign
.upperCaseBase:
dw 0x130, 0x132, 0x134, 0x136, 0x178, 0x179, 0x17b, 0x17d, 0x181, 0x182, 0x184, 0x186, 0x187, 0x189, 0x18a, 0x18b, 0x18e, 0x18f, 0x190, 0x191, 0x193, 0x194, 0x196, 0x197, 0x198, 0x19c, 0x19d, 0x19f, 0x1a0, 0x1a2, 0x1a4, 0x1a6, 0x1a7, 0x1a9, 0x1ac, 0x1ae, 0x1af, 0x1b1, 0x1b2, 0x1b3, 0x1b5, 0x1b7, 0x1b8, 0x1bc, 0x1c4, 0x1c5, 0x1c7, 0x1c8, 0x1ca, 0x1cb, 0x1cd, 0x1cf, 0x1d1, 0x1d3, 0x1d5, 0x1d7, 0x1d9, 0x1db, 0x1de, 0x1e0, 0x1e2, 0x1e4, 0x1e6, 0x1e8, 0x1ea, 0x1ec, 0x1ee, 0x1f1, 0x1f2, 0x1f4, 0x1f6, 0x1f7, 0x1f8, 0x1fa, 0x1fc, 0x1fe, 0x386, 0x388, 0x389, 0x38a, 0x38c, 0x38e, 0x38f, 0x3f4, 0x4c1, 0x4c3, 0x4c7, 0x4cb, 0x1f48, 0x1f49, 0x1f4a, 0x1f4b, 0x1f4c, 0x1f4d, 0x1f59, 0x1f5b, 0x1f5d, 0x1f5f, 0x1f68, 0x1f69, 0x1f6a, 0x1f6b, 0x1f6c, 0x1f6d, 0x1f6e, 0x1f6f, 0x1f88, 0x1f89, 0x1f8a, 0x1f8b, 0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f, 0x1f98, 0x1f99, 0x1f9a, 0x1f9b, 0x1f9c, 0x1f9d, 0x1f9e, 0x1f9f, 0x1fa8, 0x1fa9, 0x1faa, 0x1fab, 0x1fac, 0x1fad, 0x1fae, 0x1faf, 0x1fb8, 0x1fb9, 0x1fba, 0x1fbb, 0x1fbc, 0x1fc8, 0x1fc9, 0x1fca, 0x1fcb, 0x1fcc, 0x1fd8, 0x1fd9, 0x1fda, 0x1fdb, 0x1fe8, 0x1fe9, 0x1fea, 0x1feb, 0x1fec, 0x1ff8, 0x1ff9, 0x1ffa, 0x1ffb, 0x1ffc, 0x2126, 0x212a, 0x212b
dalign
.lowerCaseConversion:
dw 0x69, 0x133, 0x135, 0x137, 0xff, 0x17a, 0x17c, 0x17e, 0x253, 0x183, 0x185, 0x254, 0x188, 0x256, 0x257, 0x18c, 0x1dd, 0x259, 0x25b, 0x192, 0x260, 0x263, 0x269, 0x268, 0x199, 0x26f, 0x272, 0x275, 0x1a1, 0x1a3, 0x1a5, 0x280, 0x1a8, 0x283, 0x1ad, 0x288, 0x1b0, 0x28a, 0x28b, 0x1b4, 0x1b6, 0x292, 0x1b9, 0x1bd, 0x1c6, 0x1c6, 0x1c9, 0x1c9, 0x1cc, 0x1cc, 0x1ce, 0x1d0, 0x1d2, 0x1d4, 0x1d6, 0x1d8, 0x1da, 0x1dc, 0x1df, 0x1e1, 0x1e3, 0x1e5, 0x1e7, 0x1e9, 0x1eb, 0x1ed, 0x1ef, 0x1f3, 0x1f3, 0x1f5, 0x195, 0x1bf, 0x1f9, 0x1fb, 0x1fd, 0x1ff, 0x3ac, 0x3ad, 0x3ae, 0x3af, 0x3cc, 0x3cd, 0x3ce, 0x3b8, 0x4c2, 0x4c4, 0x4c8, 0x4cc, 0x1f40, 0x1f41, 0x1f42, 0x1f43, 0x1f44, 0x1f45, 0x1f51, 0x1f53, 0x1f55, 0x1f57, 0x1f60, 0x1f61, 0x1f62, 0x1f63, 0x1f64, 0x1f65, 0x1f66, 0x1f67, 0x1f80, 0x1f81, 0x1f82, 0x1f83, 0x1f84, 0x1f85, 0x1f86, 0x1f87, 0x1f90, 0x1f91, 0x1f92, 0x1f93, 0x1f94, 0x1f95, 0x1f96, 0x1f97, 0x1fa0, 0x1fa1, 0x1fa2, 0x1fa3, 0x1fa4, 0x1fa5, 0x1fa6, 0x1fa7, 0x1fb0, 0x1fb1, 0x1f70, 0x1f71, 0x1fb3, 0x1f72, 0x1f73, 0x1f74, 0x1f75, 0x1fc3, 0x1fd0, 0x1fd1, 0x1f76, 0x1f77, 0x1fe0, 0x1fe1, 0x1f7a, 0x1f7b, 0x1fe5, 0x1f78, 0x1f79, 0x1f7c, 0x1f7d, 0x1ff3, 0x3c9, 0x6b, 0xe5
end if
end if