HeavyThing - ht_defaults.inc

Jeff Marrison

	; ------------------------------------------------------------------------
	; HeavyThing x86_64 assembly language library and showcase programs
	; Copyright © 2015-2018 2 Ton Digital 
	; Homepage: https://2ton.com.au/
	; Author: Jeff Marrison <jeff@2ton.com.au>
	; This file is part of the HeavyThing library.
	; HeavyThing is free software: you can redistribute it and/or modify
	; it under the terms of the GNU General Public License, or
	; (at your option) any later version.
	; HeavyThing is distributed in the hope that it will be useful, 
	; but WITHOUT ANY WARRANTY; without even the implied warranty of
	; GNU General Public License for more details.
	; You should have received a copy of the GNU General Public License along
	; with the HeavyThing library. If not, see <http://www.gnu.org/licenses/>.
	; ------------------------------------------------------------------------
	; ht_defaults.inc: global settings that determine how the library gets built.
	; we are the first include, set our fasm format
	format ELF64
	; SETTINGS, which directly affect which bits of our basket get actually assembled
	; some are dependent on others of course...

	; some things, such as mapped, need to know what our system page size is
	page_size = 4096

	; align functions?
	align_functions = 1

	; align returns? (translates call into push aligned after call and jmp)
	align_returns = 0

	; align call returns? (does multibyte NOP fill _before_ the call to pad the return address out to 16 byte alignment)
	align_callreturns = 0

	; align inner jump targets?
	align_inner = 1

	; align tables/static text/jump tables? 
	align_data = 1

	; the actual alignment for either
	function_alignment = 16
	inner_alignment = 16
	data_alignment = 16

	; generate framepointers? (we like gdb)
	framepointers = 1

	; should we make public symbols for our functions?
	public_funcs = 1

	; enable automatic profiling in clocks/calls/callgraph?
	profiling = 0

	; preload (barbarically) the code? (does a once-over read of all of it)
	code_preload = 1

	if profiling
		; the CPC field for the profiler output can be floating point
		; or integer based... floating point requires string_math goodies
		; which in turn create a very large number of nested function calls
		; for most purposes, integer based is fine and dandy
		cpc_integers = 1

		; how large the profiler record size has to be, depending on your call graph, depends on how large this needs to be
		profiler_recordcount = 16384
	end if

	; if profiling = 0, and this is enabled, every function that begins with a prolog statement will dump its function name to stdout when called
	calltracing = 0

	; enable bin checking during free? (adds overhead, but useful for picking up errant memory corruption)
	heap_bincheck = 0

	; enable heap barrier/validity checking? setting this to 1 causes an extra 8 byte preface that is checked for validity and breakpoints if not
	; during alloc/free of non-mmap blocks
	heap_barriers = 0

	; if this is defined (whether set to 1 or not), then instead of only
	; functions being included that are used, EVERYTHING ends up in the resultant
	; binary...
	; include_everything = 1

	; rng uses simple 32bit rdtsc seed if 0, or uses HMAC_DRBG to do up its initial state
	rng_heavy_init = 1

	; by default, the initial seed is in part pulled from /dev/urandom. If this setting is enabled,
	; we pull 32 bytes from /dev/random instead (which may stall). Default is 0
	rng_paranoid = 0

	; native string in UTF16 or UTF32?
	; NOTE: some tui components require UTF32
	string_bits = 32

	; extended utf16 case modifies? (see commentary in unicodecase.inc)
	extendedcase = 0

	; utf strict adherence? (relaxed, aka 0 is probably what you want)
	; that being said, it is nice to also be able to validate utf.. your choice
	strict_utf = 0

	; if you use string$from_bintobase64, these settings affect its encoding (as well as base64$encode_latin1)
	base64_linebreaks = 1
	base64_maxline = 76

	; for library functions, if this is enabled, uses movbe rather than bswap + separate mov
	; Haswell or better CPU and this is a good thing.
	use_movbe = 0

	; epoll specific settings:

	; NOTE: it is _highly_ recommended that you lower the linux default of 15 to 8 for this kernel setting:
	; echo 8 >/proc/sys/net/ipv4/tcp_retries2
	; the default of 15 means that if a connection "vanishes" (think: lappy going to sleep), then
	; we hold/maintain/accumulate output for that connection for a full 13 minutes, and that is insane!
	; 8 == ~3 minutes or so, which seems to be a good value "in the middle"... spose in the end it depends
	; on your application/what you are trying to achieve

	; on init, we do a getrlimit/setrlimit, the minfds here lists the minimum # required to proceed, or we error and die
	epoll_minfds = 4096

	; do we call accept4 repeatedly, or do we let the level-triggered action deal with that
	epoll_multiple_accepts = 1

	; default socket options:
	epoll_keepalive = 1
	epoll_linger = 0
	epoll_nodelay = 1

	; docs say "new programs should use fcntl to set nonblocking", but that requires two separate syscalls
	; so, for busy systems, by default we use the old-school method of ioctl(fd, FIONBIO, 1)
	epoll_fionbio = 1

	; if epoll_linger is on, this is the linger timeout:
	epoll_linger_time = 30

	; the "maxevents" count that we stack-allocate on our calls to epoll_wait (* 12 for bytes)
	epoll_stacksize = 4096

	; the default read block size (each connection reserves this much, then calls read,
	; so if you have many many connections, this size factors into overall memory usage)
	epoll_readsize = 32768

	; issue a EPOLL_CTL_DEL before closing the fd on cleanup?
	; epoll does interesting things across/before/after forks to say the least...
	; specifically, if a parent process fires up the epoll layer, does some outbound connections
	; that end up pending, then forks, even though we set CLOEXEC for the outbound sockets
	; the fds don't get properly terminated from the epoll fd in the parent process, weird imo.
	; by leaving this here, forces the epoll_ctl deletion prior to doing the close(fd) call.
	epoll_del_before_close = 0

	; set CLOEXEC flag for epoll$outbound sockets?
	epoll_outbound_cloexec = 0

	; unix connect "evil trickery":
	; Ok so, some commentary here re: the basic idea/issue/premise... For outbound socket connects to a
	; unix stream socket, it is quite possible to receive an EAGAIN from the connect syscall, indicating
	; that the other end's listen backlog is exhausted and that we should "try again later". Linux defaults
	; the kernel upper limit of this setting to 128, which is fine in most cases, but there are lots of
	; scenarios where this is less than ideal, and it seems silly to force the condition back on the user
	; of this library, rather than allow the library to deal with it.
	; So, _if_ this setting is enabled, and epoll$outbound is used, and connect returns EAGAIN, then
	; we insert the epoll object and a memcpy of the destination address into an insert-order map.
	; We do _not_ do this timer-based, so it is assumed that when this condition arises, that the epoll
	; layer is quite busy anyway, so we don't need to postpone/delay, and calls to unix stream socket
	; connects are quite fast. Of course, there are ways to break this method entirely, but for my use
	; cases, this works well. Specifically, we service the "try unix connect again" queue _before_ we
	; process the next round of epoll_wait/epoll$iteration, and what this means is that the connects
	; will ultimately be serviced in the order they arrived to begin with, and subsequent connects don't
	; get to run _before_ we do the ones already in the queue (e.g. new ones that arrive get put at the
	; back of the queue, where they should be). Some common sense obviously applies as to whether this
	; strategy really does make sense or not, but it means that application layer uses for this epoll
	; layer don't have to worry about it, it _will_ let it through when the far endpoint's queue comes
	; good. Our default here is to apply this queueing strategy. CAUTION: It is _very_ important that
	; you don't end up deleting an epoll object (unrelated epoll object that is) during a connected
	; handler, because then it is possible that a queued unix outbound connection may still be in the
	; "list of ones to process" after it was destroyed (this isn't a problem for normal use).
	epoll_unixconnect_forgiving = 1

	; default timeout in milliseconds for DNS queries
	dns_timeout_msecs = 10000

	; mimelike specific settings:
	; during transmit body composition, if Content-Encoding is set to gzip, and the actual body
	; length is less than this setting, composition removes the Content-Encoding header and does
	; NOT gzip it. The default here works well in the wild.
	mimelike_mingzip = 1024
	; during transmit body composition, if Transfer-Encoding is set to chunked, and this setting is
	; nonzero, and the body length is less than this setting, composition removes the Transfer-Encoding
	; header and adds a Content-Length header instead. (known content lengths defeats the purpose of
	; chunked transfer encoding anyway, but this is a useful feature to have around, different browsers
	; treat chunked transfer encodings quite interestingly).
	mimelike_minchunked = 0
	; and if we do end up doing chunked encoding, what should our chunksize be?
	mimelike_chunksize = 65535
	; during mimelike$compose, do we "split" Set-Cookie headers? Basically, new browsers have no
	; issue with my mimelike overall design choice of single unique headers, concatenated with
	; ', ' ... but, older browsers, specifically when used from the webserver object, don't like
	; dealing with multiple ', ' separated cookies in a single header... so if this setting is
	; enabled, then during composition, it checks each header for string$equals to 'Set-Cookie',
	; and if it _is_, then Compose itself splits them out into individual lines
	mimelike_setcookie_split = 1

	; tui_* specific settings:

	; should we send an initial 1049h and close with 1049l ? (think: vim restore screen when done)
	terminal_alternatescreen = 1

	; same for tui_ssh sessions
	tui_ssh_alternatescreen = 1

	; translate unicode line characters to the alternate character set line drawing chars?
	acs_linechars = 1
	; some notes here re: that one... with my Terminal.app, the unicode line characters draw just fine
	; but leaves horrific gaps in the vertical linespacing... this is presumably due to my font
	; having increased linespacing as well... interestingly however, they made special allowances
	; for the VT100 ACS linedrawing characters, which draw perfectly.
	; SO, since instead of writing invalid unicode characters into the tui_object buffers,
	; if the above setting is enabled, then the renderer translates them on the fly and modeswitches
	; between normal and ACS mode automatically... descendents of tui_object needn't be aware
	; and can just write standard unicode line drawing...

	; for tui_simpleauth, should a new user failure cause an exit?
	tui_simpleauth_newuserfail_exit = 0

	; syslog options:
	syslog_facility = 1 * 8		; user-level messages (*8 for priority value calculation)
					; 0 == kernel, 1 == user, 2 == mail, 3 == system daemons, 4 == security/auth, 6 == lpd, 7 == news, 8 == uucp, 9 == clockd, 10 == security/auth, 11 == ftp, etc.
					; see RFC5424 for further details
	syslog_stderr = 0		; send to stderr in addition to the syslog?

	; formatter settings
	; ISO8601 date format, from formatter$add_datetime. If this setting is true, we set the seconds in fractional
	; otherwise we stop at whole seconds
	formatter_datetime_fractional = 0

	; bigint settings:
	; to reduce the number of times we re-allocate/check for permissible sizes, etc
	; we have a static hard limit defined on the number of words a bigint can hold
	; CAUTION: we do _not_ check for overruns, so expect a crashy-crashy if this
	; setting isn't right for the intended application (or the app layer doesn't
	; boundscheck the math beforehand). SSH/TLS included herein do basic checks
	; to ensure overruns don't occur in the wild wild internet.
	bigint_maxwords = 512
	; maximum wordcount for unrolled squaring/mults
	; (This setting adds a TON of code if unrollsize > 16)
	; must be one of 16, 32, 64, 96, 128
	; NOTE: 16 is probably what you want unless you are doing a very specialized bit of mahem
	bigint_unrollsize = 16

	; for Miller-Rabin primality checking, this setting affects how many iterations end up being
	; done. Error rate is in 2^-VALUE, where value is below:
	millerrabinerrorrate = 64	; can be one of 64, 80, 128, 160, 256

	; sizes for DSA generated goods (one of 1024, 2048, 3072)
	dsa_size = 3072
	; dsa subgroup size (one of 160, 224, 256 per above size)
	dsa_subgroup_size = 256

	; Diffie-Hellman parameter settings
	; dh_bits = 4096		; this was our library default, many people said this was too much
	dh_bits = 2048
	; how many bits to use for our side's private "key" (aka DH private exponent)
	; While any value in the range 2..q-2 is perfectly fine, the modexp required to do a full
	; q's worth is considerably heavier than is required. NIST says 224 bits for a 2048 key, etc. etc.
	; so our default of 512 bits covers that and a bit more for good measure.
	; NOTE: OpenSSL (and probably others) always sets this to a random value in 2..q-2, and they
	; do so because they have no assurances about the DH p in use. Since we painstakingly
	; created our dh_pool to consist entirely of safe primes and their sophie germain counterparts,
	; we can safely use smaller private exponents to achieve the desired level of symmetric key
	; strength equivalence. It is somewhat surprising that none of the popular TLS server software
	; seems to have an option for specifying this.
	; dh_privatekey_size = 512	; this setting was used with dh_bits == 4096
	dh_privatekey_size = 256

	; TLS settings:
	; when we walk the cipher suite list, do we walk our own first, or the clients?
	; (server cipher order preferred or not)
	tls_server_cipher_order = 1

	; for server side PEM/X509 handling, this setting is the # of seconds
	; that we wait to check to see if the underlying PEM file got modified
	tls_pem_refresh_interval = 3600

	; Unfortunately, a good chunk of the wild interwebs doesn't support DHE (still, ughgh).
	; so, by default, we toss in the mandatory non-PFS ciphersuites.
	tls_perfect_forward_secrecy_only = 0

	; Much to my surprise, RSA blinding operations are not performed by default in OpenSSL
	; or any other library that I can find. Enabling this option prevents leaking any
	; information about the private exponent, at the expense of a tremendous amount of 
	; extra calculations.
	tls_server_rsa_blinding = 0

	; As many have pointed out, a lot of large-scale production environments do not care
	; so much about security as I do, this setting disables all but RSA/AES-128/CBC for
	; the ciphersuite list (to provide maximum throughout, despite it being with no DHE)
	tls_minimalist = 0

	; for DHE, in _client_ mode, should we verify that the supplied modulus is prime?
	tls_clientside_dh_p_verify = 0

	; in _server_ mode, if we encounter a bad_record_mac condition, should we blacklist
	; the IP? if this is nonzero, it specifies the time in seconds that the blacklist
	; should last for
	tls_blacklist = 86400

	; in _server_ mode, do we honour/enable sessionid reuse? This of course involves us
	; keeping copies of TLS states around for a while, so this setting, if nonzero
	; determines how long we keep them around for. The default here is 1 hour, and this
	; is probably overkill, since most are single-session based anyway (from the browser
	; side that is).
	tls_server_sessioncache = 3600

	; in _server_ mode, do we include OCSP Stapling? (this is generally a good thing)
	; NOTE: I can't find a single browser that supports RFC6961 yet, so multi-stapling
	; is not going to work for you just yet. Simple solution: Get your certificate from
	; a top-tier CA so that you don't have intermediates to contend with and you'll be
	; sweet. If we do have more than one certificate (and thus OCSPResponse for each),
	; at the moment, we'll only present status_request (not v2/multi), with the first
	; one we have (and this is better than none I spose).
	tls_server_ocsp_stapling = 1

	; OCSP specs say that everyone shall do sha1, so by default we leave the following
	; turned off (surely they will move to SHA256 at some point in the near future)
	X509_ocsp_sha256 = 0

	; on successful OCSP responses (regardless of what the validity period returned by
	; the OCSP responder), how often should we update our OCSP responses? NOTE: since
	; the response carries its age onto the end clients, even though the response may
	; be valid for a week or more, I have seen several clients reject them if they are
	; too stale (and then they go and fetch their own fresh copies, defeating the
	; entire purpose of OCSP stapling to begin with)
	; so, this is set by default to a 2 hourly timer (time is in milliseconds)
	X509_ocsp_refresh = 7200000

	; and similarly, if an error occurs, or we are unable to get them, etc, how often
	; should we retry? 5 min is the default
	X509_ocsp_retry = 300000

	; so that you can get an idea of what is really going on, and because we don't provide
	; any way to verify what is happening, we default to doing syslog messages for OCSP
	; Stapling activity
	X509_ocsp_syslog = 1

	; a companion to the above setting, if it is nonzero and enabled, should we encrypt
	; the actual session data itself? While really just an obfuscation technique, what
	; this does, if enabled, is randomly generate an AES256 key, and then use said
	; persistent AES256 enc/dec states such that actual session master key/cipherindex
	; are not kept in the cleartext in memory. It means a coredump of a running tls server
	; doesn't just "give up the goods" without someone having to spend some effort, haha
	tls_server_encryptcache = 1

	; similarly, in client mode, do we store sessionids and their associated master keys?
	; see above commentary for the server-side similar functionality
	; NOTE/CAUTION: because these share the same tls$sessionid map, both of these can't be
	; enabled at the same time _IF_ (and only if) you are trying to establish connections
	; directly to yourself inside the same executing session (haha, not a very common thing
	; but figure it is worth the warning)
	tls_client_sessioncache = 3600
	tls_client_encryptcache = 1

	; SCRYPT settings:
	; of course the "real" scrypt uses hmac-sha256, this option lets us use sha512 with it instead
	scrypt_sha512 = 1
	scrypt_N = 1024
	scrypt_r = 1
	scrypt_p = 1

	; SSH settings:
	; compile-time option that determines whether we use dh_pool.inc with a random index
	; for our DH public p/g, or whether we generate them on-the-fly. The dh_bits setting
	; above determines how large our safe prime is. CAUTION IS ADVISED! haha, generating
	; safe primes is not without its computational penalties. 2kbit DH params generate
	; fast enough to use on the fly, but anything larger will come with some heavy time
	; delays for our server-side goods. This is mainly intended for fun/playing around,
	; haha, if you really wanted to generate on-the-fly DH parameters, a modification
	; to the multiprocess make_dh_static to then send new parameters as they are generated
	; would be much more prudent. haha
	ssh_dh_dynamic = 0

	; compile-time option whether we use/try to use compression for SSH client/server.
	ssh_do_compression = 1

	; compile-time option to whether we _FORCE_ compression (requires the above set to 1 obviously)
	; NOTE: this prevents us from sending the "none" option in the kex, and really does force it
	; NOTE 2: for tui_ssh related goods, which is mostly what I use this for, our default of forced
	; is a GOOD thing.
	ssh_force_compression = 1

	; in _server_ mode, if we encounter a bad hmac, we randomize the expected length of the
	; incoming packet, such that we don't reveal any information to an attacker and sit there
	; waiting for more. Per my sentiments on the same for TLS though, by default we will blacklist
	; the remote IP by the below setting. Set to 0 to disable.
	ssh_blacklist = 86400

	; zlib settings:
	zlib_deflate_level = 6		; 0 == Z_NO_COMPRESSION, 1 == Z_BEST_SPEED, 9 == Z_BEST_COMPRESSION, 6 == normal zlib default

	; how many bytes do we automatically reserve in the outbuf when deflate is called?
	zlib_deflate_reserve = 16384

	; privmapped setting:
	; NOTE: if you are not running as user nobody, this noatime is a nicety, but otherwise you'll likely
	; get EPERM issues if the user/file doesn't belong to you (which seems weird)... at least confirmed on
	; oldish kernels, hmm, TODO: investigate this oddity a bit more, as O_NOATIME would be nice across the
	; board
	privmapped_noatime = 0

	; WEBSERVER settings:
	; for file-based (sandbox/vhost) serving, we automatically add Cache-Control, Last-Modified and ETag
	; headers, this setting defines how the max-age setting (and we set s-maxage to it * 3)
	; if this is zero, we only send Cache-Control: no-cache. This can also be overridden in the webserver
	; object.
	webserver_filecache_time = 300

	; when we receive data and are expecting a request header, this defines how large the upper limit
	; is before we decide to bailout and call it bad input:
	webserver_maxheader = 32768

	; similarly, and to prevent memory starvation attacks, this defines the maximum upper limit
	; of a POST request (and depending on your environment determines what this value should really be)
	; the default here reflects my normal operating environments, YMMV.
	webserver_maxrequest = 64 * 1048576
	; for file handling, if the file size is <= this setting, we may or may not do autogzip/chunking
	; and we use mimelike$xmitbody to do our full composition beforehand. Otherwise, no gzip/chunking
	; and we send it out in pieces as-is (the body of the file that is).
	; imagine: serving an 8GB file, without this threshold, we'd read it all into buffers and slowly
	; let the epoll layer consume it as it goes out, yikes.
	webserver_bigfile = 32 * 1048576

	; on the fly gzip our output (below the bigfile threshold only)
	webserver_autogzip = 1

	; we do our best to pay close attention to actual socket-level buffers, and because epoll$send
	; actually does buffer$consumes on whatever it has pending, we must be careful not to go too
	; crazy filling epoll's send buffers. (This is because for remote TCP where the send window is
	; small, buffer$consume effectively does a memmove of its entire sendbuffer by the amount that
	; is gradually accepted by the socket itself, obviously a bad thing if said buffer were gigantic).
	; so, this setting and the next one determine how aggressive we are filling the epoll layer's
	; sendbuffer and subsequent refills:
	webserver_initialsend = 262144
	webserver_subsequentsend = 262144

	; for filesystem serving, we do not constantly stat our cached privmapped objects, and only
	; do them periodically... this setting (seconds) detemrines how frequently we'll check them for
	; modifications (for production systems that don't change a lot, higher is better)
	; NOTE: this value _must_ be less than the hotlist time setting below
	webserver_hotlist_statfreq = 120

	; for filesystem serving, we keep cached privmapped objects for the underlying files, such that
	; we don't have to constantly stat/open/mmap (or any of the other various methods).
	; this setting determines how long old ones should hang around for (in seconds)
	webserver_hotlist_time = 900

	; ssllabs.com Best Practices says that all sites should employ HSTS, and further, that such
	; headers are ignored by HTTP clients (and ones that don't support it to begin with).
	; if the webservercfg object is set with the istls flag, we go ahead and add it (and include
	; subdomains) if this setting is in effect.
	webserver_hsts = 1

	; the BREACH attack is annoying, specifically because we _like_ webserver_autogzip, what a
	; spectacular difference it makes for normal webserver operations, especially nice when the
	; application layers (be them fastcgi, function calls, whatever) needn't concern themselves
	; with gzip awareness.
	; If this setting is nonzero, we add this random number of bytes (and then hex encode it) 
	; and add it as a random-length header in our responses (called X-NB). We only do so for TLS
	; _and_ auto-gzipped content. Note that because we add these random number of bytes in the
	; response header, and that the attacker only gets to see the resultant encrypted traffic
	; to measure the response sizes, compression does not apply here and these would make the
	; attacker's job much more difficult.
	; This also directly applies to the TIME attack, and any that rely on measuring compressed
	; response bodies (down to the byte level). Paranoid users may wish to increase this number.
	webserver_breach_mitigation = 48

	; if this is set, fastcgi maps are not hooked _on the spot_ and instead are hooked via
	; epoll's postprocessing hook
	webserver_fastcgi_postprocess = 0

	; webclient settings:
	; this setting defines the upper limit on how many simultaneous connections we'll do for
	; a single hostname (default only, can be modified directly in webclient)
	webclient_maxconns = 4

	; for any webclient request, how long will we sit around without _receiving_ any data?
	; (each time we get more data down the line, we reset this timer, so it really is a
	; read idle timeout). NOTE: if you are using webclient to upload some huge amount of
	; data that will take a while, this timeout needs upped considerably, haha
	; default here is 2 minutes (ms resolution as passed to epoll$timer_new)
	webclient_readtimeout = 120000

	; if the webclient receives a 301 or 302, should it follow them or return them?
	webclient_follow_redirects = 1

	; should we use a global object for dns cacheing? (this is only useful if you are using
	; more than one webclient object at the same time)
	webclient_global_dnscache = 1