diff --git a/toolchain/musl/Config.version b/toolchain/musl/Config.version
index 57b5bd0df45a7fa073185cff374be1e7048b778e..c560c603b7d0ee989e0f6a5d5ba5d0e3f96a2f04 100644
--- a/toolchain/musl/Config.version
+++ b/toolchain/musl/Config.version
@@ -3,6 +3,6 @@ if USE_MUSL
 config MUSL_VERSION
 	string
 	depends on USE_MUSL
-	default "1.1.7"
+	default "1.1.9"
 
 endif
diff --git a/toolchain/musl/common.mk b/toolchain/musl/common.mk
index c67be2a9fb42e6968fccb6a9b114c41797b86fe2..f4a34e469e9eb8aff491fbc4027237b01ae6ec9e 100644
--- a/toolchain/musl/common.mk
+++ b/toolchain/musl/common.mk
@@ -11,7 +11,7 @@ PKG_NAME:=musl
 PKG_VERSION:=$(call qstrip,$(CONFIG_MUSL_VERSION))
 PKG_RELEASE=1
 
-PKG_MD5SUM:=6fe9fc4d99a7d321432b3e179c138d73
+PKG_MD5SUM:=14e8c5ac74f887d53256b3dcaf9b4aaa
 
 PKG_SOURCE_URL:=http://www.musl-libc.org/releases
 PKG_SOURCE:=$(PKG_NAME)-$(PKG_VERSION).tar.gz
diff --git a/toolchain/musl/patches/001-git-2015-03-28.patch b/toolchain/musl/patches/001-git-2015-03-28.patch
deleted file mode 100644
index 85f044183caf38229af7fb3ff139564fa5afcb2e..0000000000000000000000000000000000000000
--- a/toolchain/musl/patches/001-git-2015-03-28.patch
+++ /dev/null
@@ -1,93 +0,0 @@
-diff --git a/arch/aarch64/bits/alltypes.h.in b/arch/aarch64/bits/alltypes.h.in
-index 99f1654..d56abda 100644
---- a/arch/aarch64/bits/alltypes.h.in
-+++ b/arch/aarch64/bits/alltypes.h.in
-@@ -16,6 +16,8 @@ TYPEDEF unsigned int nlink_t;
- TYPEDEF float float_t;
- TYPEDEF double double_t;
- 
-+TYPEDEF struct { long long __ll; long double __ld; } max_align_t;
-+
- TYPEDEF long time_t;
- TYPEDEF long suseconds_t;
- 
-diff --git a/include/float.h b/include/float.h
-index c6429d3..713aadb 100644
---- a/include/float.h
-+++ b/include/float.h
-@@ -1,6 +1,10 @@
- #ifndef _FLOAT_H
- #define _FLOAT_H
- 
-+#ifdef __cplusplus
-+extern "C" {
-+#endif
-+
- int __flt_rounds(void);
- #define FLT_ROUNDS (__flt_rounds())
- 
-@@ -41,4 +45,8 @@ int __flt_rounds(void);
- 
- #include <bits/float.h>
- 
-+#ifdef __cplusplus
-+}
-+#endif
-+
- #endif
-diff --git a/src/network/inet_pton.c b/src/network/inet_pton.c
-index 4496b47..d36c368 100644
---- a/src/network/inet_pton.c
-+++ b/src/network/inet_pton.c
-@@ -39,14 +39,15 @@ int inet_pton(int af, const char *restrict s, void *restrict a0)
- 	for (i=0; ; i++) {
- 		if (s[0]==':' && brk<0) {
- 			brk=i;
--			ip[i]=0;
-+			ip[i&7]=0;
- 			if (!*++s) break;
-+			if (i==7) return 0;
- 			continue;
- 		}
- 		for (v=j=0; j<4 && (d=hexval(s[j]))>=0; j++)
- 			v=16*v+d;
- 		if (j==0) return 0;
--		ip[i] = v;
-+		ip[i&7] = v;
- 		if (!s[j] && (brk>=0 || i==7)) break;
- 		if (i==7) return 0;
- 		if (s[j]!=':') {
-diff --git a/src/regex/regcomp.c b/src/regex/regcomp.c
-index 4cdaa1e..978dd87 100644
---- a/src/regex/regcomp.c
-+++ b/src/regex/regcomp.c
-@@ -839,7 +839,7 @@ static reg_errcode_t parse_atom(tre_parse_ctx_t *ctx, const char *s)
- 			s--;
- 			break;
- 		default:
--			if (isdigit(*s)) {
-+			if (!ere && (unsigned)*s-'1' < 9) {
- 				/* back reference */
- 				int val = *s - '0';
- 				node = tre_ast_new_literal(ctx->mem, BACKREF, val, ctx->position);
-@@ -847,7 +847,7 @@ static reg_errcode_t parse_atom(tre_parse_ctx_t *ctx, const char *s)
- 			} else {
- 				/* extension: accept unknown escaped char
- 				   as a literal */
--				node = tre_ast_new_literal(ctx->mem, *s, *s, ctx->position);
-+				goto parse_literal;
- 			}
- 			ctx->position++;
- 		}
-@@ -1700,6 +1700,11 @@ tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
- 		*result = tre_ast_new_literal(mem, min, max, pos);
- 		if (*result == NULL)
- 		  status = REG_ESPACE;
-+		else {
-+		  tre_literal_t *p = (*result)->obj;
-+		  p->class = lit->class;
-+		  p->neg_classes = lit->neg_classes;
-+		}
- 
- 		if (pos > *max_pos)
- 		  *max_pos = pos;
diff --git a/toolchain/musl/patches/001-git-2015-06-04.patch b/toolchain/musl/patches/001-git-2015-06-04.patch
new file mode 100644
index 0000000000000000000000000000000000000000..0baea676703649612f15c8b671a7143389e1024b
--- /dev/null
+++ b/toolchain/musl/patches/001-git-2015-06-04.patch
@@ -0,0 +1,2015 @@
+commit b6a6cd703ffefa6352249fb01f4da28d85d17306
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Thu Jun 4 11:45:17 2015 -0400
+
+    fix dynamic linker regression processing R_*_NONE type relocations
+    
+    commit f3ddd173806fd5c60b3f034528ca24542aecc5b9 inadvertently removed
+    the early check for "none" type relocations, causing the address
+    dso->base+0 to be dereferenced to obtain an addend. shared libraries,
+    (including libc.so) and PIE executables were unaffected, since their
+    base addresses are the actual address of their mappings and are
+    readable. non-PIE main executables, however, have a base address of 0
+    because their load addresses are absolute and not offset at load time.
+    
+    in practice none-type relocations do not arise with toolchains that
+    are in use except on mips, and on mips it's moderately rare for a
+    non-PIE executable to have a relocation table, since the mips-specific
+    got processing serves in its place for most purposes.
+
+commit 585ba14df4799d50ec9682ce75825d2eafec2a6a
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Wed Jun 3 02:00:44 2015 -0400
+
+    add additional Makefile dependency rules for rcrt1.o PIE start file
+
+commit 2b4fcfdacf93c3dfd6ac15e31790a9e154374679
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Thu May 28 23:08:12 2015 -0400
+
+    fix failure of ungetc and ungetwc to work on files in eof status
+    
+    these functions were written to handle clearing eof status, but failed
+    to account for the __toread function's handling of eof. with this
+    patch applied, __toread still returns EOF when the file is in eof
+    status, so that read operations will fail, but it also sets up valid
+    buffer pointers for read mode, which are set to the end of the buffer
+    rather than the beginning in order to make the whole buffer available
+    to ungetc/ungetwc.
+    
+    minor changes to __uflow were needed since it's now possible to have
+    non-zero buffer pointers while in eof status. as made, these changes
+    remove a 'fast path' bypassing the function call to __toread, which
+    could be reintroduced with slightly different logic, but since
+    ordinary files have a syscall in f->read, optimizing the code path
+    does not seem worthwhile.
+    
+    the __stdio_read function is also updated not to zero the read buffer
+    pointers on eof/error. while not necessary for correctness, this
+    change avoids the overhead of calling __toread in ungetc after
+    reaching eof, and it also reduces code size and increases consistency
+    with the fmemopen read operation which does not zero the pointers.
+
+commit b6e7c664677ab7c77f183b8c41105f2be519800c
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Thu May 28 15:37:23 2015 -0400
+
+    add missing legacy LFS64 macros in sys/resource.h
+    
+    based on patch by Felix Janda, with RLIM64_SAVED_CUR and
+    RLIM64_SAVED_MAX added for completeness.
+
+commit fc431d3f76bb9bde34a89e4a3e4d0c27de959855
+Author: Shiz <hi@shiz.me>
+Date:   Thu May 28 05:52:22 2015 +0200
+
+    configure: work around compilers that merely warn for unknown options
+    
+    some compilers (such as clang) accept unknown options without error,
+    but then print warnings on each invocation, cluttering the build
+    output and burying meaningful warnings. this patch makes configure's
+    tryflag and tryldflag functions use additional options to turn the
+    unknown-option warnings into errors, if available, but only at check
+    time. these options are not output in config.mak to avoid the risk of
+    spurious build breakage; if they work, they will have already done
+    their job at configure time.
+
+commit aeeac9ca5490d7d90fe061ab72da446c01ddf746
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Wed May 27 15:54:47 2015 -0400
+
+    implement fail-safe static locales for newlocale
+    
+    this frees applications which need to make temporary use of the C
+    locale (via uselocale) from the possibility that newlocale might fail.
+    
+    the C.UTF-8 locale is also provided as a static locale. presently they
+    behave the same, but this may change in the future.
+
+commit 11858d31aa020df3e7e7dedf49f9870ce12f31cc
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Wed May 27 03:32:46 2015 -0400
+
+    rename internal locale file handling locale maps
+    
+    since the __setlocalecat function was removed, the filename
+    __setlocalecat.c no longer made sense.
+
+commit 61a3364d246e72b903da8b76c2e27a225a51351e
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Wed May 27 03:22:52 2015 -0400
+
+    overhaul locale internals to treat categories roughly uniformly
+    
+    previously, LC_MESSAGES was treated specially as the only category
+    which could be set to a locale name without a definition file, in
+    order to facilitate gettext message translations when no libc locale
+    was available. LC_NUMERIC was completely un-settable, and LC_CTYPE
+    stored a flag intended to be used for a possible future byte-based C
+    locale, instead of storing a __locale_map pointer like the other
+    categories use.
+    
+    this patch changes all categories to be represented by pointers to
+    __locale_map structures, and allows locale names without definition
+    files to be treated as valid locales with trivial definition when used
+    in any category. outwardly visible functional changes should be minor,
+    limited mainly to the strings read back from setlocale and the way
+    gettext handles translations in categories other than LC_MESSAGES.
+    
+    various internal refactoring has also been performed, and improvements
+    in const correctness have been made.
+
+commit 63c188ec42e76ff768e81f6b65b11c68fc43351e
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Wed May 27 00:22:43 2015 -0400
+
+    replace atomics with locks in locale-setting code
+    
+    this is part of a general program of removing direct use of atomics
+    where they are not necessary to meet correctness or performance needs,
+    but in this case it's also an optimization. only the global locale
+    needs synchronization; allocated locales referenced with locale_t
+    handles are immutable during their lifetimes, and using atomics to
+    initialize them increases their cost of setup.
+
+commit dc031ee0b1ba11baa00cd7f0769e461a5f396c71
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Tue May 26 03:37:41 2015 -0400
+
+    add rcrt1 start file for fully static-linked PIE
+    
+    static-linked PIE files need startup code to relocate themselves, much
+    like the dynamic linker does. rcrt1.c reuses the code in dlstart.c,
+    stage 1 of the dynamic linker, which in turn reuses crt_arch.h, to
+    achieve static PIE with no new code. only relative relocations are
+    supported.
+    
+    existing toolchains that don't yet support static PIE directly can be
+    repurposed by passing "-shared -Wl,-Bstatic -Wl,-Bsymbolic" instead of
+    "-static -pie" and substituting rcrt1.o in place of crt1.o.
+    
+    all libraries being linked must be built as PIC/PIE; TEXTRELs are not
+    supported at this time.
+
+commit ed0c8249825161036356a3616e8c5247c15d0927
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Tue May 26 02:31:04 2015 -0400
+
+    fix incorrect application of visibility to Scrt1.o
+    
+    commit de2b67f8d41e08caa56bf6540277f6561edb647f attempted to avoid
+    having vis.h affect crt files, but the Makefile variable used,
+    CRT_LIBS, refers to the final output copies in the lib directory, not
+    the copies in the crt build directory, and thus the -DCRT was not
+    applied.
+    
+    while unlikely to be noticed, this regression probably broke
+    production of PIE executables whose main functions are not in the
+    executable but rather a shared library.
+
+commit 9bbddf730f7837cf87f4c789fbb41a312e295d6c
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Mon May 25 23:33:59 2015 -0400
+
+    reprocess all libc/ldso symbolic relocations in dynamic linking stage 3
+    
+    commit f3ddd173806fd5c60b3f034528ca24542aecc5b9 introduced early
+    relocations and subsequent reprocessing as part of the dynamic linker
+    bootstrap overhaul, to allow use of arbitrary libc functions before
+    the main application and libraries are loaded, but only reprocessed
+    GOT/PLT relocation types.
+    
+    commit c093e2e8201524db0d638920e76bcb6b1d925f3a added reprocessing of
+    non-GOT/PLT relocations to fix an actual regression that was observed
+    on powerpc, but only for RELA format tables with out-of-line addends.
+    REL table (inline addends at the relocation address) reprocessing is
+    trickier because the first relocation pass clobbers the addends.
+    
+    this patch extends symbolic relocation reprocessing for libc/ldso to
+    support all relocation types, whether REL or RELA format tables are
+    used. it is believed not to alter behavior on any existing archs for
+    the current dynamic linker and libc code. the motivations for this
+    change are consistency and future-proofing. it ensures that behavior
+    does not differ depending on whether REL or RELA tables are used,
+    which could lead to undetected arch-specific bugs. it also ensures
+    that, if in the future code depending on additional relocation types
+    is added to libc.so, either at the source level or as part of the
+    compiler runtime that gets pulled in (for example, soft-float with TLS
+    for fenv), the new code will work properly.
+    
+    the implementation concept is simple: stage 2 of the dynamic linker
+    counts the number of symbolic relocations in the libc/ldso REL table
+    and allocates a VLA to save their addends into; stage 3 then uses the
+    saved addends in place of the inline ones which were clobbered. for
+    stack safety, a hard limit (currently 4k) is imposed on the number of
+    such addends; this should be a couple orders of magnitude larger than
+    the actual need. this number is not a runtime variable that could
+    break fail-safety; it is constant for a given libc.so build.
+
+commit 768b82c6de24e480267c4c251c440edfc71800e3
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Mon May 25 19:15:17 2015 -0400
+
+    move call to dynamic linker stage-3 into stage-2 function
+    
+    this move eliminates a duplicate "by-hand" symbol lookup loop from the
+    stage-1 code and replaces it with a call to find_sym, which can be
+    used once we're in stage 2. it reduces the size of the stage 1 code,
+    which is helpful because stage 1 will become the crt start file for
+    static-PIE executables, and it will allow stage 3 to access stage 2's
+    automatic storage, which will be important in an upcoming commit.
+
+commit 967bcbf67c3ffac587de4d79abc1e5e072d83e3e
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Mon May 25 16:02:49 2015 -0400
+
+    mark mips crt code as code
+    
+    otherwise disassemblers treat it as data.
+
+commit 7b75c4877ddf22f219f944c61d939df1dee4f6d3
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Mon May 25 15:56:36 2015 -0400
+
+    mark mips cancellable syscall code as code
+    
+    otherwise disassemblers treat it as data.
+
+commit 0e0e49421f08cfd670975ecd3604f7f9015e1833
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Mon May 25 00:32:37 2015 -0400
+
+    simplify/shrink relocation processing in dynamic linker stage 1
+    
+    the outer-loop approach made sense when we were also processing
+    DT_JMPREL, which might be in REL or RELA form, to avoid major code
+    duplication. commit 09db855b35709aa627d7055c57a98e1e471920ab removed
+    processing of DT_JMPREL, and in the remaining two tables, the format
+    (REL or RELA) is known by the name of the table. simply writing two
+    versions of the loop results in smaller and simpler code.
+
+commit 09db855b35709aa627d7055c57a98e1e471920ab
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Mon May 25 00:25:56 2015 -0400
+
+    remove processing of DT_JMPREL from dynamic linker stage 1 bootstrap
+    
+    the DT_JMPREL relocation table necessarily consists entirely of
+    JMP_SLOT (REL_PLT in internal nomenclature) relocations, which are
+    symbolic; they cannot be resolved in stage 1, so there is no point in
+    processing them.
+
+commit 9f26ebded188ed78c3571a4ca1477dd6351bc647
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Sun May 24 23:03:47 2015 -0400
+
+    fix stack alignment code in mips crt_arch.h
+    
+    the instruction used to align the stack, "and $sp, $sp, -8", does not
+    actually exist; it's expanded to 2 instructions using the 'at'
+    (assembler temporary) register, and thus cannot be used in a branch
+    delay slot. since alignment mod 16 commutes with subtracting 8, simply
+    swapping these two operations fixes the problem.
+    
+    crt1.o was not affected because it's still being generated from a
+    dedicated asm source file. dlstart.lo was not affected because the
+    stack pointer it receives is already aligned by the kernel. but
+    Scrt1.o was affected in cases where the dynamic linker gave it a
+    misaligned stack pointer.
+
+commit 63caf1d207d143fe405bbe0cda9aac8deca1171a
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Fri May 22 01:50:05 2015 -0400
+
+    add .text section directive to all crt_arch.h files missing it
+    
+    i386 and x86_64 versions already had the .text directive; other archs
+    did not. normally, top-level (file scope) __asm__ starts in the .text
+    section anyway, but problems were reported with some versions of
+    clang, and it seems preferable to set it explicitly anyway, at least
+    for the sake of consistency between archs.
+
+commit 3b0e83264d156f9e496ab32badd89e4447b807aa
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Thu May 21 17:06:28 2015 -0400
+
+    remove outdated and misleading comment in iconv.c
+    
+    the comment claimed that EUC/GBK/Big5 are not implemented, which has
+    been incorrect since commit 19b4a0a20efc6b9df98b6a43536ecdd628ba4643.
+
+commit 39b8ce66f2ed9c17427ec3a48be9bda29b93b9d7
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Thu May 21 17:01:23 2015 -0400
+
+    in iconv_open, accept "CHAR" and "" as aliases for "UTF-8"
+    
+    while not a requirement, it's common convention in other iconv
+    implementations to accept "CHAR" as an alias for nl_langinfo(CODESET),
+    meaning the encoding used for char[] strings in the current locale,
+    and also "" as an alternate form. supporting this is not costly and
+    improves compatibility.
+
+commit c648cefb27984db60474ec1747cbfde83c2856d0
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Wed May 20 00:17:35 2015 -0400
+
+    fix inconsistency in a_and and a_or argument types on x86[_64]
+    
+    conceptually, and on other archs, these functions take a pointer to
+    int, but in the i386, x86_64, and x32 versions of atomic.h, they took
+    a pointer to void instead.
+
+commit 390f93ef69153bf2087fcf3baa1776ad9a6765ab
+Author: Bobby Bingham <koorogi@koorogi.info>
+Date:   Sun May 17 13:46:38 2015 -0500
+
+    inline llsc atomics when building for sh4a
+    
+    If we're building for sh4a, the compiler is already free to use
+    instructions only available on sh4a, so we can do the same and inline the
+    llsc atomics. If we're building for an older processor, we still do the
+    same runtime atomics selection as before.
+
+commit c093e2e8201524db0d638920e76bcb6b1d925f3a
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Mon May 18 16:51:54 2015 -0400
+
+    reprocess libc/ldso RELA relocations in stage 3 of dynamic linking
+    
+    this fixes a regression on powerpc that was introduced in commit
+    f3ddd173806fd5c60b3f034528ca24542aecc5b9. global data accesses on
+    powerpc seem to be using a translation-unit-local GOT filled via
+    R_PPC_ADDR32 relocations rather than R_PPC_GLOB_DAT. being a non-GOT
+    relocation type, these were not reprocessed after adding the main
+    application and its libraries to the chain, causing libc code not to
+    see copy relocations in the main program, and therefore to use the
+    pre-copy-relocation addresses for global data objects (like environ).
+    
+    the motivation for the dynamic linker only reprocessing GOT/PLT
+    relocation types in stage 3 is that these types always have a zero
+    addend, making them safe to process again even if the storage for the
+    addend has been clobbered. other relocation types which can be used
+    for address constants in initialized data objects may have non-zero
+    addends which will be clobbered during the first pass of relocation
+    processing if they're stored inline (REL form) rather than out-of-line
+    (RELA form).
+    
+    powerpc generally uses only RELA, so this patch is sufficient to fix
+    the regression in practice, but is not fully general, and would not
+    suffice if an alternate toolchain generated REL for powerpc.
+
+commit 43e9f652bf4b2195b04fc14c93db591b30a7b790
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Mon May 18 12:11:25 2015 -0400
+
+    fix null pointer dereference in dcngettext under specific conditions
+    
+    if setlocale has not been called, the current locale's messages_name
+    may be a null pointer. the code path where it's assumed to be non-null
+    was only reachable if bindtextdomain had already been called, which is
+    normally not done in programs which do not call setlocale, so the
+    omitted check went unnoticed.
+    
+    patch from Void Linux, with description rewritten.
+
+commit 68630b55c0c7219fe9df70dc28ffbf9efc8021d8
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Sat May 16 01:53:54 2015 -0400
+
+    eliminate costly tricks to avoid TLS access for current locale state
+    
+    the code being removed used atomics to track whether any threads might
+    be using a locale other than the current global locale, and whether
+    any threads might have abstract 8-bit (non-UTF-8) LC_CTYPE active, a
+    feature which was never committed (still pending). the motivations
+    were to support early execution prior to setup of the thread pointer,
+    to partially support systems (ancient kernels) where thread pointer
+    setup is not possible, and to avoid high performance cost on archs
+    where accessing the thread pointer may be very slow.
+    
+    since commit 19a1fe670acb3ab9ead0fe31859ca7d4fe40dd54, the thread
+    pointer is always available, so these hacks are no longer needed.
+    removing them greatly simplifies the affected code.
+
+commit 707d7c30f3379441de9b320536ddfd354f4c2143
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Sat May 16 01:15:40 2015 -0400
+
+    in i386 __set_thread_area, don't assume %gs register is initially zero
+    
+    commit f630df09b1fd954eda16e2f779da0b5ecc9d80d3 added logic to handle
+    the case where __set_thread_area is called more than once by reusing
+    the GDT slot already in the %gs register, and only setting up a new
+    GDT slot when %gs is zero. this created a hidden assumption that %gs
+    is zero when a new process image starts, which is true in practice on
+    Linux, but does not seem to be documented ABI, and fails to hold under
+    qemu app-level emulation.
+    
+    while it would in theory be possible to zero %gs in the entry point
+    code, this code is shared between static and dynamic binaries, and
+    dynamic binaries must not clobber the value of %gs already setup by
+    the dynamic linker.
+    
+    the alternative solution implemented in this commit simply uses global
+    data to store the GDT index that's selected. __set_thread_area should
+    only be called in the initial thread anyway (subsequent threads get
+    their thread pointer setup by __clone), but even if it were called by
+    another thread, it would simply read and write back the same GDT index
+    that was already assigned to the initial thread, and thus (in the x86
+    memory model) there is no data race.
+
+commit c0f10cf06725bd0de37f3ced7954a653bf9f1049
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Thu May 14 18:51:27 2015 -0400
+
+    make arm reloc.h CRTJMP macro compatible with thumb
+    
+    compilers targeting armv7 may be configured to produce thumb2 code
+    instead of arm code by default, and in the future we may wish to
+    support targets where only the thumb instruction set is available.
+    
+    the instructions this patch omits in thumb mode are needed only for
+    non-thumb versions of armv4 or earlier, which are not supported by any
+    current compilers/toolchains and thus rather pointless to have. at
+    some point these compatibility return sequences may be removed from
+    all asm source files, and in that case it would make sense to remove
+    them here too and remove the ifdef.
+
+commit 83340c7a580e91b22f58321b7cf6d976af61084c
+Author: Rich Felker <dalias@aerifal.cx>
+Date:   Thu May 14 18:26:16 2015 -0400
+
+    make arm crt_arch.h compatible with thumb code generation
+    
+    compilers targeting armv7 may be configured to produce thumb2 code
+    instead of arm code by default, and in the future we may wish to
+    support targets where only the thumb instruction set is available.
+    
+    the changes made here avoid operating directly on the sp register,
+    which is not possible in thumb code, and address an issue with the way
+    the address of _DYNAMIC is computed.
+    
+    previously, the relative address of _DYNAMIC was stored with an
+    additional offset of -8 versus the pc-relative add instruction, since
+    on arm the pc register evaluates to ".+8". in thumb code, it instead
+    evaluates to ".+4". both are two (normal-size) instructions beyond "."
+    in the current execution mode, so the numbered label 2 used in the
+    relative address expression is simply moved two instructions ahead to
+    be compatible with both instruction sets.
+
+--- a/Makefile
++++ b/Makefile
+@@ -44,7 +44,7 @@ ALL_INCLUDES = $(sort $(wildcard include
+ 
+ EMPTY_LIB_NAMES = m rt pthread crypt util xnet resolv dl
+ EMPTY_LIBS = $(EMPTY_LIB_NAMES:%=lib/lib%.a)
+-CRT_LIBS = lib/crt1.o lib/Scrt1.o lib/crti.o lib/crtn.o
++CRT_LIBS = lib/crt1.o lib/Scrt1.o lib/rcrt1.o lib/crti.o lib/crtn.o
+ STATIC_LIBS = lib/libc.a
+ SHARED_LIBS = lib/libc.so
+ TOOL_LIBS = lib/musl-gcc.specs
+@@ -85,11 +85,13 @@ src/internal/version.h: $(wildcard VERSI
+ 
+ src/internal/version.lo: src/internal/version.h
+ 
+-src/ldso/dlstart.lo src/ldso/dynlink.lo: src/internal/dynlink.h arch/$(ARCH)/reloc.h
++crt/rcrt1.o src/ldso/dlstart.lo src/ldso/dynlink.lo: src/internal/dynlink.h arch/$(ARCH)/reloc.h
+ 
+-crt/crt1.o crt/Scrt1.o src/ldso/dlstart.lo: $(wildcard arch/$(ARCH)/crt_arch.h)
++crt/crt1.o crt/Scrt1.o crt/rcrt1.o src/ldso/dlstart.lo: $(wildcard arch/$(ARCH)/crt_arch.h)
+ 
+-crt/Scrt1.o: CFLAGS += -fPIC
++crt/rcrt1.o: src/ldso/dlstart.c
++
++crt/Scrt1.o crt/rcrt1.o: CFLAGS += -fPIC
+ 
+ OPTIMIZE_SRCS = $(wildcard $(OPTIMIZE_GLOBS:%=src/%))
+ $(OPTIMIZE_SRCS:%.c=%.o) $(OPTIMIZE_SRCS:%.c=%.lo): CFLAGS += -O3
+@@ -104,7 +106,7 @@ NOSSP_SRCS = $(wildcard crt/*.c) \
+ 	src/ldso/dlstart.c src/ldso/dynlink.c
+ $(NOSSP_SRCS:%.c=%.o) $(NOSSP_SRCS:%.c=%.lo): CFLAGS += $(CFLAGS_NOSSP)
+ 
+-$(CRT_LIBS): CFLAGS += -DCRT
++$(CRT_LIBS:lib/%=crt/%): CFLAGS += -DCRT
+ 
+ # This incantation ensures that changes to any subarch asm files will
+ # force the corresponding object file to be rebuilt, even if the implicit
+--- a/arch/aarch64/crt_arch.h
++++ b/arch/aarch64/crt_arch.h
+@@ -1,4 +1,5 @@
+ __asm__(
++".text \n"
+ ".global " START "\n"
+ ".type " START ",%function\n"
+ START ":\n"
+--- a/arch/arm/crt_arch.h
++++ b/arch/arm/crt_arch.h
+@@ -1,15 +1,18 @@
+ __asm__(
++".text \n"
+ ".global " START " \n"
+ ".type " START ",%function \n"
+ START ": \n"
+ "	mov fp, #0 \n"
+ "	mov lr, #0 \n"
+-"	mov a1, sp \n"
+ "	ldr a2, 1f \n"
+-"2:	add a2, pc, a2 \n"
+-"	and sp, sp, #-16 \n"
++"	add a2, pc, a2 \n"
++"	mov a1, sp \n"
++"2:	and ip, a1, #-16 \n"
++"	mov sp, ip \n"
+ "	bl " START "_c \n"
+ ".weak _DYNAMIC \n"
+ ".hidden _DYNAMIC \n"
+-"1:	.word _DYNAMIC-2b-8 \n"
++".align 2 \n"
++"1:	.word _DYNAMIC-2b \n"
+ );
+--- a/arch/arm/reloc.h
++++ b/arch/arm/reloc.h
+@@ -28,5 +28,10 @@
+ #define REL_TPOFF       R_ARM_TLS_TPOFF32
+ //#define REL_TLSDESC     R_ARM_TLS_DESC
+ 
++#ifdef __thumb__
++#define CRTJMP(pc,sp) __asm__ __volatile__( \
++	"mov sp,%1 ; bx %0" : : "r"(pc), "r"(sp) : "memory" )
++#else
+ #define CRTJMP(pc,sp) __asm__ __volatile__( \
+ 	"mov sp,%1 ; tst %0,#1 ; moveq pc,%0 ; bx %0" : : "r"(pc), "r"(sp) : "memory" )
++#endif
+--- a/arch/i386/atomic.h
++++ b/arch/i386/atomic.h
+@@ -50,16 +50,16 @@ static inline int a_cas(volatile int *p,
+ 	return t;
+ }
+ 
+-static inline void a_or(volatile void *p, int v)
++static inline void a_or(volatile int *p, int v)
+ {
+ 	__asm__( "lock ; orl %1, %0"
+-		: "=m"(*(int *)p) : "r"(v) : "memory" );
++		: "=m"(*p) : "r"(v) : "memory" );
+ }
+ 
+-static inline void a_and(volatile void *p, int v)
++static inline void a_and(volatile int *p, int v)
+ {
+ 	__asm__( "lock ; andl %1, %0"
+-		: "=m"(*(int *)p) : "r"(v) : "memory" );
++		: "=m"(*p) : "r"(v) : "memory" );
+ }
+ 
+ static inline int a_swap(volatile int *x, int v)
+--- a/arch/microblaze/crt_arch.h
++++ b/arch/microblaze/crt_arch.h
+@@ -1,4 +1,5 @@
+ __asm__(
++".text \n"
+ ".global " START " \n"
+ ".align  2 \n"
+ START ": \n"
+--- a/arch/mips/crt_arch.h
++++ b/arch/mips/crt_arch.h
+@@ -1,6 +1,7 @@
+ __asm__(
+ ".set push\n"
+ ".set noreorder\n"
++".text \n"
+ ".global _" START "\n"
+ ".global " START "\n"
+ ".type   _" START ", @function\n"
+@@ -21,8 +22,8 @@ __asm__(
+ "	addu $5, $5, $gp \n"
+ "	lw $25, 4($ra) \n"
+ "	addu $25, $25, $gp \n"
+-"	subu $sp, $sp, 16 \n"
++"	and $sp, $sp, -8 \n"
+ "	jalr $25 \n"
+-"	 and $sp, $sp, -8 \n"
++"	 subu $sp, $sp, 16 \n"
+ ".set pop \n"
+ );
+--- a/arch/or1k/crt_arch.h
++++ b/arch/or1k/crt_arch.h
+@@ -1,4 +1,5 @@
+ __asm__(
++".text \n"
+ ".global " START " \n"
+ ".align  4 \n"
+ START ": \n"
+--- a/arch/powerpc/crt_arch.h
++++ b/arch/powerpc/crt_arch.h
+@@ -1,4 +1,5 @@
+ __asm__(
++".text \n"
+ ".global " START " \n"
+ ".type   " START ", %function \n"
+ START ": \n"
+--- a/arch/sh/atomic.h
++++ b/arch/sh/atomic.h
+@@ -22,6 +22,88 @@ static inline int a_ctz_64(uint64_t x)
+ 	return a_ctz_l(y);
+ }
+ 
++#define LLSC_CLOBBERS "r0", "t", "memory"
++#define LLSC_START(mem) "synco\n"  \
++	"0:	movli.l @" mem ", r0\n"
++#define LLSC_END(mem)              \
++	"1:	movco.l r0, @" mem "\n"    \
++	"	bf 0b\n"                   \
++	"	synco\n"
++
++static inline int __sh_cas_llsc(volatile int *p, int t, int s)
++{
++	int old;
++	__asm__ __volatile__(
++		LLSC_START("%1")
++		"	mov r0, %0\n"
++		"	cmp/eq %0, %2\n"
++		"	bf 1f\n"
++		"	mov %3, r0\n"
++		LLSC_END("%1")
++		: "=&r"(old) : "r"(p), "r"(t), "r"(s) : LLSC_CLOBBERS);
++	return old;
++}
++
++static inline int __sh_swap_llsc(volatile int *x, int v)
++{
++	int old;
++	__asm__ __volatile__(
++		LLSC_START("%1")
++		"	mov r0, %0\n"
++		"	mov %2, r0\n"
++		LLSC_END("%1")
++		: "=&r"(old) : "r"(x), "r"(v) : LLSC_CLOBBERS);
++	return old;
++}
++
++static inline int __sh_fetch_add_llsc(volatile int *x, int v)
++{
++	int old;
++	__asm__ __volatile__(
++		LLSC_START("%1")
++		"	mov r0, %0\n"
++		"	add %2, r0\n"
++		LLSC_END("%1")
++		: "=&r"(old) : "r"(x), "r"(v) : LLSC_CLOBBERS);
++	return old;
++}
++
++static inline void __sh_store_llsc(volatile int *p, int x)
++{
++	__asm__ __volatile__(
++		"	synco\n"
++		"	mov.l %1, @%0\n"
++		"	synco\n"
++		: : "r"(p), "r"(x) : "memory");
++}
++
++static inline void __sh_and_llsc(volatile int *x, int v)
++{
++	__asm__ __volatile__(
++		LLSC_START("%0")
++		"	and %1, r0\n"
++		LLSC_END("%0")
++		: : "r"(x), "r"(v) : LLSC_CLOBBERS);
++}
++
++static inline void __sh_or_llsc(volatile int *x, int v)
++{
++	__asm__ __volatile__(
++		LLSC_START("%0")
++		"	or %1, r0\n"
++		LLSC_END("%0")
++		: : "r"(x), "r"(v) : LLSC_CLOBBERS);
++}
++
++#ifdef __SH4A__
++#define a_cas(p,t,s)     __sh_cas_llsc(p,t,s)
++#define a_swap(x,v)      __sh_swap_llsc(x,v)
++#define a_fetch_add(x,v) __sh_fetch_add_llsc(x, v)
++#define a_store(x,v)     __sh_store_llsc(x, v)
++#define a_and(x,v)       __sh_and_llsc(x, v)
++#define a_or(x,v)        __sh_or_llsc(x, v)
++#else
++
+ int  __sh_cas(volatile int *, int, int);
+ int  __sh_swap(volatile int *, int);
+ int  __sh_fetch_add(volatile int *, int);
+@@ -35,6 +117,7 @@ void __sh_or(volatile int *, int);
+ #define a_store(x,v)     __sh_store(x, v)
+ #define a_and(x,v)       __sh_and(x, v)
+ #define a_or(x,v)        __sh_or(x, v)
++#endif
+ 
+ static inline void *a_cas_p(volatile void *p, void *t, void *s)
+ {
+--- a/arch/sh/crt_arch.h
++++ b/arch/sh/crt_arch.h
+@@ -1,4 +1,5 @@
+ __asm__(
++".text \n"
+ ".global " START " \n"
+ START ": \n"
+ "	mova 1f, r0 \n"
+--- a/arch/sh/src/atomic.c
++++ b/arch/sh/src/atomic.c
+@@ -1,12 +1,7 @@
+-#include "libc.h"
++#ifndef __SH4A__
+ 
+-#define LLSC_CLOBBERS   "r0", "t", "memory"
+-#define LLSC_START(mem) "synco\n"  \
+-	"0:	movli.l @" mem ", r0\n"
+-#define LLSC_END(mem)              \
+-	"1:	movco.l r0, @" mem "\n"    \
+-	"	bf 0b\n"                   \
+-	"	synco\n"
++#include "atomic.h"
++#include "libc.h"
+ 
+ /* gusa is a hack in the kernel which lets you create a sequence of instructions
+  * which will be restarted if the process is preempted in the middle of the
+@@ -34,114 +29,74 @@
+ 
+ int __sh_cas(volatile int *p, int t, int s)
+ {
++	if (__hwcap & CPU_HAS_LLSC) return __sh_cas_llsc(p, t, s);
++
+ 	int old;
+-	if (__hwcap & CPU_HAS_LLSC) {
+-		__asm__ __volatile__(
+-			LLSC_START("%1")
+-			"	mov r0, %0\n"
+-			"	cmp/eq %0, %2\n"
+-			"	bf 1f\n"
+-			"	mov %3, r0\n"
+-			LLSC_END("%1")
+-			: "=&r"(old) : "r"(p), "r"(t), "r"(s) : LLSC_CLOBBERS);
+-	} else {
+-		__asm__ __volatile__(
+-			GUSA_START_EVEN("%1", "%0")
+-			"	cmp/eq %0, %2\n"
+-			"	bf 1f\n"
+-			GUSA_END("%1", "%3")
+-			: "=&r"(old) : "r"(p), "r"(t), "r"(s) : GUSA_CLOBBERS, "t");
+-	}
++	__asm__ __volatile__(
++		GUSA_START_EVEN("%1", "%0")
++		"	cmp/eq %0, %2\n"
++		"	bf 1f\n"
++		GUSA_END("%1", "%3")
++		: "=&r"(old) : "r"(p), "r"(t), "r"(s) : GUSA_CLOBBERS, "t");
+ 	return old;
+ }
+ 
+ int __sh_swap(volatile int *x, int v)
+ {
++	if (__hwcap & CPU_HAS_LLSC) return __sh_swap_llsc(x, v);
++
+ 	int old;
+-	if (__hwcap & CPU_HAS_LLSC) {
+-		__asm__ __volatile__(
+-			LLSC_START("%1")
+-			"	mov r0, %0\n"
+-			"	mov %2, r0\n"
+-			LLSC_END("%1")
+-			: "=&r"(old) : "r"(x), "r"(v) : LLSC_CLOBBERS);
+-	} else {
+-		__asm__ __volatile__(
+-			GUSA_START_EVEN("%1", "%0")
+-			GUSA_END("%1", "%2")
+-			: "=&r"(old) : "r"(x), "r"(v) : GUSA_CLOBBERS);
+-	}
++	__asm__ __volatile__(
++		GUSA_START_EVEN("%1", "%0")
++		GUSA_END("%1", "%2")
++		: "=&r"(old) : "r"(x), "r"(v) : GUSA_CLOBBERS);
+ 	return old;
+ }
+ 
+ int __sh_fetch_add(volatile int *x, int v)
+ {
++	if (__hwcap & CPU_HAS_LLSC) return __sh_fetch_add_llsc(x, v);
++
+ 	int old, dummy;
+-	if (__hwcap & CPU_HAS_LLSC) {
+-		__asm__ __volatile__(
+-			LLSC_START("%1")
+-			"	mov r0, %0\n"
+-			"	add %2, r0\n"
+-			LLSC_END("%1")
+-			: "=&r"(old) : "r"(x), "r"(v) : LLSC_CLOBBERS);
+-	} else {
+-		__asm__ __volatile__(
+-			GUSA_START_EVEN("%2", "%0")
+-			"	mov %0, %1\n"
+-			"	add %3, %1\n"
+-			GUSA_END("%2", "%1")
+-			: "=&r"(old), "=&r"(dummy) : "r"(x), "r"(v) : GUSA_CLOBBERS);
+-	}
++	__asm__ __volatile__(
++		GUSA_START_EVEN("%2", "%0")
++		"	mov %0, %1\n"
++		"	add %3, %1\n"
++		GUSA_END("%2", "%1")
++		: "=&r"(old), "=&r"(dummy) : "r"(x), "r"(v) : GUSA_CLOBBERS);
+ 	return old;
+ }
+ 
+ void __sh_store(volatile int *p, int x)
+ {
+-	if (__hwcap & CPU_HAS_LLSC) {
+-		__asm__ __volatile__(
+-			"	synco\n"
+-			"	mov.l %1, @%0\n"
+-			"	synco\n"
+-			: : "r"(p), "r"(x) : "memory");
+-	} else {
+-		__asm__ __volatile__(
+-			"	mov.l %1, @%0\n"
+-			: : "r"(p), "r"(x) : "memory");
+-	}
++	if (__hwcap & CPU_HAS_LLSC) return __sh_store_llsc(p, x);
++	__asm__ __volatile__(
++		"	mov.l %1, @%0\n"
++		: : "r"(p), "r"(x) : "memory");
+ }
+ 
+ void __sh_and(volatile int *x, int v)
+ {
++	if (__hwcap & CPU_HAS_LLSC) return __sh_and_llsc(x, v);
++
+ 	int dummy;
+-	if (__hwcap & CPU_HAS_LLSC) {
+-		__asm__ __volatile__(
+-			LLSC_START("%0")
+-			"	and %1, r0\n"
+-			LLSC_END("%0")
+-			: : "r"(x), "r"(v) : LLSC_CLOBBERS);
+-	} else {
+-		__asm__ __volatile__(
+-			GUSA_START_ODD("%1", "%0")
+-			"	and %2, %0\n"
+-			GUSA_END("%1", "%0")
+-			: "=&r"(dummy) : "r"(x), "r"(v) : GUSA_CLOBBERS);
+-	}
++	__asm__ __volatile__(
++		GUSA_START_ODD("%1", "%0")
++		"	and %2, %0\n"
++		GUSA_END("%1", "%0")
++		: "=&r"(dummy) : "r"(x), "r"(v) : GUSA_CLOBBERS);
+ }
+ 
+ void __sh_or(volatile int *x, int v)
+ {
++	if (__hwcap & CPU_HAS_LLSC) return __sh_or_llsc(x, v);
++
+ 	int dummy;
+-	if (__hwcap & CPU_HAS_LLSC) {
+-		__asm__ __volatile__(
+-			LLSC_START("%0")
+-			"	or %1, r0\n"
+-			LLSC_END("%0")
+-			: : "r"(x), "r"(v) : LLSC_CLOBBERS);
+-	} else {
+-		__asm__ __volatile__(
+-			GUSA_START_ODD("%1", "%0")
+-			"	or %2, %0\n"
+-			GUSA_END("%1", "%0")
+-			: "=&r"(dummy) : "r"(x), "r"(v) : GUSA_CLOBBERS);
+-	}
++	__asm__ __volatile__(
++		GUSA_START_ODD("%1", "%0")
++		"	or %2, %0\n"
++		GUSA_END("%1", "%0")
++		: "=&r"(dummy) : "r"(x), "r"(v) : GUSA_CLOBBERS);
+ }
++
++#endif
+--- a/arch/x32/atomic.h
++++ b/arch/x32/atomic.h
+@@ -47,16 +47,16 @@ static inline int a_cas(volatile int *p,
+ 	return t;
+ }
+ 
+-static inline void a_or(volatile void *p, int v)
++static inline void a_or(volatile int *p, int v)
+ {
+ 	__asm__( "lock ; or %1, %0"
+-		: "=m"(*(int *)p) : "r"(v) : "memory" );
++		: "=m"(*p) : "r"(v) : "memory" );
+ }
+ 
+-static inline void a_and(volatile void *p, int v)
++static inline void a_and(volatile int *p, int v)
+ {
+ 	__asm__( "lock ; and %1, %0"
+-		: "=m"(*(int *)p) : "r"(v) : "memory" );
++		: "=m"(*p) : "r"(v) : "memory" );
+ }
+ 
+ static inline int a_swap(volatile int *x, int v)
+--- a/arch/x86_64/atomic.h
++++ b/arch/x86_64/atomic.h
+@@ -47,16 +47,16 @@ static inline int a_cas(volatile int *p,
+ 	return t;
+ }
+ 
+-static inline void a_or(volatile void *p, int v)
++static inline void a_or(volatile int *p, int v)
+ {
+ 	__asm__( "lock ; or %1, %0"
+-		: "=m"(*(int *)p) : "r"(v) : "memory" );
++		: "=m"(*p) : "r"(v) : "memory" );
+ }
+ 
+-static inline void a_and(volatile void *p, int v)
++static inline void a_and(volatile int *p, int v)
+ {
+ 	__asm__( "lock ; and %1, %0"
+-		: "=m"(*(int *)p) : "r"(v) : "memory" );
++		: "=m"(*p) : "r"(v) : "memory" );
+ }
+ 
+ static inline int a_swap(volatile int *x, int v)
+--- a/configure
++++ b/configure
+@@ -80,7 +80,7 @@ fi
+ tryflag () {
+ printf "checking whether compiler accepts %s... " "$2"
+ echo "typedef int x;" > "$tmpc"
+-if $CC $2 -c -o /dev/null "$tmpc" >/dev/null 2>&1 ; then
++if $CC $CFLAGS_TRY $2 -c -o /dev/null "$tmpc" >/dev/null 2>&1 ; then
+ printf "yes\n"
+ eval "$1=\"\${$1} \$2\""
+ eval "$1=\${$1# }"
+@@ -94,7 +94,7 @@ fi
+ tryldflag () {
+ printf "checking whether linker accepts %s... " "$2"
+ echo "typedef int x;" > "$tmpc"
+-if $CC -nostdlib -shared "$2" -o /dev/null "$tmpc" >/dev/null 2>&1 ; then
++if $CC $LDFLAGS_TRY -nostdlib -shared "$2" -o /dev/null "$tmpc" >/dev/null 2>&1 ; then
+ printf "yes\n"
+ eval "$1=\"\${$1} \$2\""
+ eval "$1=\${$1# }"
+@@ -113,7 +113,9 @@ CFLAGS_C99FSE=
+ CFLAGS_AUTO=
+ CFLAGS_MEMOPS=
+ CFLAGS_NOSSP=
++CFLAGS_TRY=
+ LDFLAGS_AUTO=
++LDFLAGS_TRY=
+ OPTIMIZE_GLOBS=
+ prefix=/usr/local/musl
+ exec_prefix='$(prefix)'
+@@ -205,6 +207,14 @@ exit 1
+ fi
+ 
+ #
++# Figure out options to force errors on unknown flags.
++#
++tryflag   CFLAGS_TRY  -Werror=unknown-warning-option
++tryflag   CFLAGS_TRY  -Werror=unused-command-line-argument
++tryldflag LDFLAGS_TRY -Werror=unknown-warning-option
++tryldflag LDFLAGS_TRY -Werror=unused-command-line-argument
++
++#
+ # Need to know if the compiler is gcc to decide whether to build the
+ # musl-gcc wrapper, and for critical bug detection in some gcc versions.
+ #
+--- a/crt/mips/crt1.s
++++ b/crt/mips/crt1.s
+@@ -4,6 +4,8 @@
+ .weak  _fini
+ .global __start
+ .global _start
++.type __start,@function
++.type _start,@function
+ __start:
+ _start:
+ 	subu    $fp, $fp, $fp            # Zero the frame pointer.
+--- a/crt/mips/crti.s
++++ b/crt/mips/crti.s
+@@ -2,6 +2,7 @@
+ 
+ .section .init
+ .global _init
++.type _init,@function
+ .align 2
+ _init:
+ 	subu $sp,$sp,32
+@@ -10,6 +11,7 @@ _init:
+ 
+ .section .fini
+ .global _fini
++.type _fini,@function
+ .align 2
+ _fini:
+ 	subu $sp,$sp,32
+--- /dev/null
++++ b/crt/rcrt1.c
+@@ -0,0 +1,15 @@
++#define SHARED
++#define START "_start"
++#define _dlstart_c _start_c
++#include "../src/ldso/dlstart.c"
++
++int main();
++void _init() __attribute__((weak));
++void _fini() __attribute__((weak));
++_Noreturn int __libc_start_main(int (*)(), int, char **,
++	void (*)(), void(*)(), void(*)());
++
++_Noreturn void __dls2(unsigned char *base, size_t *sp)
++{
++	__libc_start_main(main, *sp, (void *)(sp+1), _init, _fini, 0);
++}
+--- a/include/sys/resource.h
++++ b/include/sys/resource.h
+@@ -96,6 +96,9 @@ int prlimit(pid_t, int, const struct rli
+ #define RLIM_NLIMITS RLIMIT_NLIMITS
+ 
+ #if defined(_LARGEFILE64_SOURCE) || defined(_GNU_SOURCE)
++#define RLIM64_INFINITY RLIM_INFINITY
++#define RLIM64_SAVED_CUR RLIM_SAVED_CUR
++#define RLIM64_SAVED_MAX RLIM_SAVED_MAX
+ #define getrlimit64 getrlimit
+ #define setrlimit64 setrlimit
+ #define rlimit64 rlimit
+--- a/src/internal/dynlink.h
++++ b/src/internal/dynlink.h
+@@ -51,7 +51,7 @@ enum {
+ #define AUX_CNT 32
+ #define DYN_CNT 32
+ 
+-typedef void (*stage2_func)(unsigned char *);
++typedef void (*stage2_func)(unsigned char *, size_t *);
+ typedef _Noreturn void (*stage3_func)(size_t *);
+ 
+ #endif
+--- a/src/internal/libc.h
++++ b/src/internal/libc.h
+@@ -8,9 +8,7 @@
+ struct __locale_map;
+ 
+ struct __locale_struct {
+-	volatile int ctype_utf8;
+-	char *messages_name;
+-	struct __locale_map *volatile cat[4];
++	const struct __locale_map *volatile cat[6];
+ };
+ 
+ struct __libc {
+@@ -23,8 +21,6 @@ struct __libc {
+ 	volatile int ofl_lock[2];
+ 	size_t tls_size;
+ 	size_t page_size;
+-	volatile int uselocale_cnt;
+-	volatile int bytelocale_cnt_minus_1;
+ 	struct __locale_struct global_locale;
+ };
+ 
+--- a/src/internal/locale_impl.h
++++ b/src/internal/locale_impl.h
+@@ -9,22 +9,20 @@ struct __locale_map {
+ 	const void *map;
+ 	size_t map_size;
+ 	char name[LOCALE_NAME_MAX+1];
+-	struct __locale_map *next;
++	const struct __locale_map *next;
+ };
+ 
+-int __setlocalecat(locale_t, int, const char *);
++const struct __locale_map *__get_locale(int, const char *);
+ const char *__mo_lookup(const void *, size_t, const char *);
+ const char *__lctrans(const char *, const struct __locale_map *);
+ const char *__lctrans_cur(const char *);
+ 
+-#define LCTRANS(msg, lc, loc) __lctrans(msg, (loc)->cat[(lc)-2])
++#define LCTRANS(msg, lc, loc) __lctrans(msg, (loc)->cat[(lc)])
+ #define LCTRANS_CUR(msg) __lctrans_cur(msg)
+ 
+-#define CURRENT_LOCALE \
+-	(libc.uselocale_cnt ? __pthread_self()->locale : &libc.global_locale)
++#define CURRENT_LOCALE (__pthread_self()->locale)
+ 
+-#define CURRENT_UTF8 \
+-	(libc.bytelocale_cnt_minus_1<0 || __pthread_self()->locale->ctype_utf8)
++#define CURRENT_UTF8 (!!__pthread_self()->locale->cat[LC_CTYPE])
+ 
+ #undef MB_CUR_MAX
+ #define MB_CUR_MAX (CURRENT_UTF8 ? 4 : 1)
+--- a/src/ldso/dlstart.c
++++ b/src/ldso/dlstart.c
+@@ -56,31 +56,22 @@ void _dlstart_c(size_t *sp, size_t *dynv
+ 		for (i=0; i<local_cnt; i++) got[i] += (size_t)base;
+ 	}
+ 
+-	/* The use of the reloc_info structure and nested loops is a trick
+-	 * to work around the fact that we can't necessarily make function
+-	 * calls yet. Each struct in the array serves like the arguments
+-	 * to a function call. */
+-	struct {
+-		void *rel;
+-		size_t size;
+-		size_t stride;
+-	} reloc_info[] = {
+-		{ base+dyn[DT_JMPREL], dyn[DT_PLTRELSZ], 2+(dyn[DT_PLTREL]==DT_RELA) },
+-		{ base+dyn[DT_REL], dyn[DT_RELSZ], 2 },
+-		{ base+dyn[DT_RELA], dyn[DT_RELASZ], 3 },
+-		{ 0, 0, 0 }
+-	};
+-
+-	for (i=0; reloc_info[i].stride; i++) {
+-		size_t *rel = reloc_info[i].rel;
+-		size_t rel_size = reloc_info[i].size;
+-		size_t stride = reloc_info[i].stride;
+-		for (; rel_size; rel+=stride, rel_size-=stride*sizeof(size_t)) {
+-			if (!IS_RELATIVE(rel[1])) continue;
+-			size_t *rel_addr = (void *)(base + rel[0]);
+-			size_t addend = stride==3 ? rel[2] : *rel_addr;
+-			*rel_addr = (size_t)base + addend;
+-		}
++	size_t *rel, rel_size;
++
++	rel = (void *)(base+dyn[DT_REL]);
++	rel_size = dyn[DT_RELSZ];
++	for (; rel_size; rel+=2, rel_size-=2*sizeof(size_t)) {
++		if (!IS_RELATIVE(rel[1])) continue;
++		size_t *rel_addr = (void *)(base + rel[0]);
++		*rel_addr += (size_t)base;
++	}
++
++	rel = (void *)(base+dyn[DT_RELA]);
++	rel_size = dyn[DT_RELASZ];
++	for (; rel_size; rel+=3, rel_size-=3*sizeof(size_t)) {
++		if (!IS_RELATIVE(rel[1])) continue;
++		size_t *rel_addr = (void *)(base + rel[0]);
++		*rel_addr = (size_t)base + rel[2];
+ 	}
+ 
+ 	const char *strings = (void *)(base + dyn[DT_STRTAB]);
+@@ -93,16 +84,7 @@ void _dlstart_c(size_t *sp, size_t *dynv
+ 		 && s[3]=='l' && s[4]=='s' && s[5]=='2' && !s[6])
+ 			break;
+ 	}
+-	((stage2_func)(base + syms[i].st_value))(base);
+-
+-	/* Call dynamic linker stage-3, __dls3 */
+-	for (i=0; ;i++) {
+-		const char *s = strings + syms[i].st_name;
+-		if (s[0]=='_' && s[1]=='_' && s[2]=='d'
+-		 && s[3]=='l' && s[4]=='s' && s[5]=='3' && !s[6])
+-			break;
+-	}
+-	((stage3_func)(base + syms[i].st_value))(sp);
++	((stage2_func)(base + syms[i].st_value))(base, sp);
+ }
+ 
+ #endif
+--- a/src/ldso/dynlink.c
++++ b/src/ldso/dynlink.c
+@@ -74,7 +74,6 @@ struct dso {
+ 	volatile int new_dtv_idx, new_tls_idx;
+ 	struct td_index *td_index;
+ 	struct dso *fini_next;
+-	int rel_early_relative, rel_update_got;
+ 	char *shortname;
+ 	char buf[];
+ };
+@@ -96,6 +95,9 @@ static struct builtin_tls {
+ } builtin_tls[1];
+ #define MIN_TLS_ALIGN offsetof(struct builtin_tls, pt)
+ 
++#define ADDEND_LIMIT 4096
++static size_t *saved_addends, *apply_addends_to;
++
+ static struct dso ldso;
+ static struct dso *head, *tail, *fini_head;
+ static char *env_path, *sys_path;
+@@ -256,10 +258,19 @@ static void do_relocs(struct dso *dso, s
+ 	size_t sym_val;
+ 	size_t tls_val;
+ 	size_t addend;
++	int skip_relative = 0, reuse_addends = 0, save_slot = 0;
++
++	if (dso == &ldso) {
++		/* Only ldso's REL table needs addend saving/reuse. */
++		if (rel == apply_addends_to)
++			reuse_addends = 1;
++		skip_relative = 1;
++	}
+ 
+ 	for (; rel_size; rel+=stride, rel_size-=stride*sizeof(size_t)) {
+-		if (dso->rel_early_relative && IS_RELATIVE(rel[1])) continue;
++		if (skip_relative && IS_RELATIVE(rel[1])) continue;
+ 		type = R_TYPE(rel[1]);
++		if (type == REL_NONE) continue;
+ 		sym_index = R_SYM(rel[1]);
+ 		reloc_addr = (void *)(base + rel[0]);
+ 		if (sym_index) {
+@@ -280,12 +291,20 @@ static void do_relocs(struct dso *dso, s
+ 			def.dso = dso;
+ 		}
+ 
+-		int gotplt = (type == REL_GOT || type == REL_PLT);
+-		if (dso->rel_update_got && !gotplt) continue;
+-
+-		addend = stride>2 ? rel[2]
+-			: gotplt || type==REL_COPY ? 0
+-			: *reloc_addr;
++		if (stride > 2) {
++			addend = rel[2];
++		} else if (type==REL_GOT || type==REL_PLT|| type==REL_COPY) {
++			addend = 0;
++		} else if (reuse_addends) {
++			/* Save original addend in stage 2 where the dso
++			 * chain consists of just ldso; otherwise read back
++			 * saved addend since the inline one was clobbered. */
++			if (head==&ldso)
++				saved_addends[save_slot] = *reloc_addr;
++			addend = saved_addends[save_slot++];
++		} else {
++			addend = *reloc_addr;
++		}
+ 
+ 		sym_val = def.sym ? (size_t)def.dso->base+def.sym->st_value : 0;
+ 		tls_val = def.sym ? def.sym->st_value : 0;
+@@ -879,7 +898,7 @@ static void do_mips_relocs(struct dso *p
+ 	size_t i, j, rel[2];
+ 	unsigned char *base = p->base;
+ 	i=0; search_vec(p->dynv, &i, DT_MIPS_LOCAL_GOTNO);
+-	if (p->rel_early_relative) {
++	if (p==&ldso) {
+ 		got += i;
+ 	} else {
+ 		while (i--) *got++ += (size_t)base;
+@@ -1116,7 +1135,7 @@ static void update_tls_size()
+  * linker itself, but some of the relocations performed may need to be
+  * replaced later due to copy relocations in the main program. */
+ 
+-void __dls2(unsigned char *base)
++void __dls2(unsigned char *base, size_t *sp)
+ {
+ 	Ehdr *ehdr = (void *)base;
+ 	ldso.base = base;
+@@ -1125,15 +1144,35 @@ void __dls2(unsigned char *base)
+ 	ldso.phnum = ehdr->e_phnum;
+ 	ldso.phdr = (void *)(base + ehdr->e_phoff);
+ 	ldso.phentsize = ehdr->e_phentsize;
+-	ldso.rel_early_relative = 1;
+ 	kernel_mapped_dso(&ldso);
+ 	decode_dyn(&ldso);
+ 
++	/* Prepare storage for to save clobbered REL addends so they
++	 * can be reused in stage 3. There should be very few. If
++	 * something goes wrong and there are a huge number, abort
++	 * instead of risking stack overflow. */
++	size_t dyn[DYN_CNT];
++	decode_vec(ldso.dynv, dyn, DYN_CNT);
++	size_t *rel = (void *)(base+dyn[DT_REL]);
++	size_t rel_size = dyn[DT_RELSZ];
++	size_t symbolic_rel_cnt = 0;
++	apply_addends_to = rel;
++	for (; rel_size; rel+=2, rel_size-=2*sizeof(size_t))
++		if (!IS_RELATIVE(rel[1])) symbolic_rel_cnt++;
++	if (symbolic_rel_cnt >= ADDEND_LIMIT) a_crash();
++	size_t addends[symbolic_rel_cnt+1];
++	saved_addends = addends;
++
+ 	head = &ldso;
+ 	reloc_all(&ldso);
+ 
+ 	ldso.relocated = 0;
+-	ldso.rel_update_got = 1;
++
++	/* Call dynamic linker stage-3, __dls3, looking it up
++	 * symbolically as a barrier against moving the address
++	 * load across the above relocation processing. */
++	struct symdef dls3_def = find_sym(&ldso, "__dls3", 0);
++	((stage3_func)(ldso.base+dls3_def.sym->st_value))(sp);
+ }
+ 
+ /* Stage 3 of the dynamic linker is called with the dynamic linker/libc
+--- a/src/locale/__lctrans.c
++++ b/src/locale/__lctrans.c
+@@ -16,5 +16,5 @@ const char *__lctrans(const char *msg, c
+ 
+ const char *__lctrans_cur(const char *msg)
+ {
+-	return __lctrans_impl(msg, CURRENT_LOCALE->cat[LC_MESSAGES-2]);
++	return __lctrans_impl(msg, CURRENT_LOCALE->cat[LC_MESSAGES]);
+ }
+--- a/src/locale/__setlocalecat.c
++++ /dev/null
+@@ -1,111 +0,0 @@
+-#include <locale.h>
+-#include <string.h>
+-#include "locale_impl.h"
+-#include "libc.h"
+-#include "atomic.h"
+-
+-const char *__lctrans_impl(const char *msg, const struct __locale_map *lm)
+-{
+-	const char *trans = 0;
+-	if (lm) trans = __mo_lookup(lm->map, lm->map_size, msg);
+-	return trans ? trans : msg;
+-}
+-
+-const unsigned char *__map_file(const char *, size_t *);
+-int __munmap(void *, size_t);
+-char *__strchrnul(const char *, int);
+-
+-static struct __locale_map *findlocale(const char *name, size_t n)
+-{
+-	static void *volatile loc_head;
+-	struct __locale_map *p, *new, *old_head;
+-	const char *path = 0, *z;
+-	char buf[256];
+-	size_t l;
+-	const void *map;
+-	size_t map_size;
+-
+-	for (p=loc_head; p; p=p->next)
+-		if (!strcmp(name, p->name)) return p;
+-
+-	if (!libc.secure) path = getenv("MUSL_LOCPATH");
+-	/* FIXME: add a default path? */
+-	if (!path) return 0;
+-
+-	for (; *path; path=z+!!*z) {
+-		z = __strchrnul(path, ':');
+-		l = z - path - !!*z;
+-		if (l >= sizeof buf - n - 2) continue;
+-		memcpy(buf, path, l);
+-		buf[l] = '/';
+-		memcpy(buf+l+1, name, n);
+-		buf[l+1+n] = 0;
+-		map = __map_file(buf, &map_size);
+-		if (map) {
+-			new = malloc(sizeof *new);
+-			if (!new) {
+-				__munmap((void *)map, map_size);
+-				return 0;
+-			}
+-			new->map = map;
+-			new->map_size = map_size;
+-			memcpy(new->name, name, n);
+-			new->name[n] = 0;
+-			do {
+-				old_head = loc_head;
+-				new->next = old_head;
+-			} while (a_cas_p(&loc_head, old_head, new) != old_head);
+-			return new;
+-		}
+-	}
+-	return 0;
+-}
+-
+-static const char envvars[][12] = {
+-	"LC_CTYPE",
+-	"LC_NUMERIC",
+-	"LC_TIME",
+-	"LC_COLLATE",
+-	"LC_MONETARY",
+-	"LC_MESSAGES",
+-};
+-
+-int __setlocalecat(locale_t loc, int cat, const char *val)
+-{
+-	if (!*val) {
+-		(val = getenv("LC_ALL")) && *val ||
+-		(val = getenv(envvars[cat])) && *val ||
+-		(val = getenv("LANG")) && *val ||
+-		(val = "C.UTF-8");
+-	}
+-
+-	size_t n;
+-	for (n=0; n<LOCALE_NAME_MAX && val[n] && val[n]!='/'; n++);
+-	if (val[0]=='.' || val[n]) val = "C.UTF-8";
+-	int builtin = (val[0]=='C' && !val[1])
+-		|| !strcmp(val, "C.UTF-8")
+-		|| !strcmp(val, "POSIX");
+-	struct __locale_map *data, *old;
+-
+-	switch (cat) {
+-	case LC_CTYPE:
+-		a_store(&loc->ctype_utf8, !builtin || val[1]=='.');
+-		break;
+-	case LC_MESSAGES:
+-		if (builtin) {
+-			loc->messages_name[0] = 0;
+-		} else {
+-			memcpy(loc->messages_name, val, n);
+-			loc->messages_name[n] = 0;
+-		}
+-		/* fall through */
+-	default:
+-		data = builtin ? 0 : findlocale(val, n);
+-		if (data == loc->cat[cat-2]) break;
+-		do old = loc->cat[cat-2];
+-		while (a_cas_p(&loc->cat[cat-2], old, data) != old);
+-	case LC_NUMERIC:
+-		break;
+-	}
+-	return 0;
+-}
+--- a/src/locale/dcngettext.c
++++ b/src/locale/dcngettext.c
+@@ -84,13 +84,15 @@ char *bindtextdomain(const char *domainn
+ }
+ 
+ static const char catnames[][12] = {
++	"LC_CTYPE",
++	"LC_NUMERIC",
+ 	"LC_TIME",
+ 	"LC_COLLATE",
+ 	"LC_MONETARY",
+ 	"LC_MESSAGES",
+ };
+ 
+-static const char catlens[] = { 7, 10, 11, 11 };
++static const char catlens[] = { 8, 10, 7, 10, 11, 11 };
+ 
+ struct msgcat {
+ 	struct msgcat *next;
+@@ -117,10 +119,12 @@ char *dcngettext(const char *domainname,
+ 	static struct msgcat *volatile cats;
+ 	struct msgcat *p;
+ 	struct __locale_struct *loc = CURRENT_LOCALE;
+-	struct __locale_map *lm;
++	const struct __locale_map *lm;
+ 	const char *dirname, *locname, *catname;
+ 	size_t dirlen, loclen, catlen, domlen;
+ 
++	if ((unsigned)category >= LC_ALL) goto notrans;
++
+ 	if (!domainname) domainname = __gettextdomain();
+ 
+ 	domlen = strlen(domainname);
+@@ -129,25 +133,15 @@ char *dcngettext(const char *domainname,
+ 	dirname = gettextdir(domainname, &dirlen);
+ 	if (!dirname) goto notrans;
+ 
+-	switch (category) {
+-	case LC_MESSAGES:
+-		locname = loc->messages_name;
+-		if (!*locname) goto notrans;
+-		break;
+-	case LC_TIME:
+-	case LC_MONETARY:
+-	case LC_COLLATE:
+-		lm = loc->cat[category-2];
+-		if (!lm) goto notrans;
+-		locname = lm->name;
+-		break;
+-	default:
++	lm = loc->cat[category];
++	if (!lm) {
+ notrans:
+ 		return (char *) ((n == 1) ? msgid1 : msgid2);
+ 	}
++	locname = lm->name;
+ 
+-	catname = catnames[category-2];
+-	catlen = catlens[category-2];
++	catname = catnames[category];
++	catlen = catlens[category];
+ 	loclen = strlen(locname);
+ 
+ 	size_t namelen = dirlen+1 + loclen+1 + catlen+1 + domlen+3;
+--- a/src/locale/duplocale.c
++++ b/src/locale/duplocale.c
+@@ -5,17 +5,10 @@
+ 
+ locale_t __duplocale(locale_t old)
+ {
+-	locale_t new = calloc(1, sizeof *new + LOCALE_NAME_MAX + 1);
++	locale_t new = malloc(sizeof *new);
+ 	if (!new) return 0;
+-	new->messages_name = (void *)(new+1);
+-
+ 	if (old == LC_GLOBAL_LOCALE) old = &libc.global_locale;
+-	new->ctype_utf8 = old->ctype_utf8;
+-	if (old->messages_name)
+-		strcpy(new->messages_name, old->messages_name);
+-
+-	for (size_t i=0; i<sizeof new->cat/sizeof new->cat[0]; i++)
+-		new->cat[i] = old->cat[i];
++	*new = *old;
+ 	return new;
+ }
+ 
+--- a/src/locale/freelocale.c
++++ b/src/locale/freelocale.c
+@@ -2,9 +2,11 @@
+ #include "locale_impl.h"
+ #include "libc.h"
+ 
++int __loc_is_allocated(locale_t);
++
+ void freelocale(locale_t l)
+ {
+-	free(l);
++	if (__loc_is_allocated(l)) free(l);
+ }
+ 
+ weak_alias(freelocale, __freelocale);
+--- a/src/locale/iconv.c
++++ b/src/locale/iconv.c
+@@ -23,19 +23,13 @@
+ #define BIG5        0340
+ #define EUC_KR      0350
+ 
+-/* FIXME: these are not implemented yet
+- * EUC:   A1-FE A1-FE
+- * GBK:   81-FE 40-7E,80-FE
+- * Big5:  A1-FE 40-7E,A1-FE
+- */
+-
+ /* Definitions of charmaps. Each charmap consists of:
+  * 1. Empty-string-terminated list of null-terminated aliases.
+  * 2. Special type code or number of elided entries.
+  * 3. Character table (size determined by field 2). */
+ 
+ static const unsigned char charmaps[] =
+-"utf8\0\0\310"
++"utf8\0char\0\0\310"
+ "wchart\0\0\306"
+ "ucs2\0ucs2be\0\0\304"
+ "ucs2le\0\0\305"
+@@ -90,6 +84,7 @@ static int fuzzycmp(const unsigned char
+ static size_t find_charmap(const void *name)
+ {
+ 	const unsigned char *s;
++	if (!*(char *)name) name=charmaps; /* "utf8" */
+ 	for (s=charmaps; *s; ) {
+ 		if (!fuzzycmp(name, s)) {
+ 			for (; *s; s+=strlen((void *)s)+1);
+--- /dev/null
++++ b/src/locale/locale_map.c
+@@ -0,0 +1,124 @@
++#include <locale.h>
++#include <string.h>
++#include "locale_impl.h"
++#include "libc.h"
++#include "atomic.h"
++
++const char *__lctrans_impl(const char *msg, const struct __locale_map *lm)
++{
++	const char *trans = 0;
++	if (lm) trans = __mo_lookup(lm->map, lm->map_size, msg);
++	return trans ? trans : msg;
++}
++
++const unsigned char *__map_file(const char *, size_t *);
++int __munmap(void *, size_t);
++char *__strchrnul(const char *, int);
++
++static const char envvars[][12] = {
++	"LC_CTYPE",
++	"LC_NUMERIC",
++	"LC_TIME",
++	"LC_COLLATE",
++	"LC_MONETARY",
++	"LC_MESSAGES",
++};
++
++static const uint32_t empty_mo[] = { 0x950412de, 0, -1, -1, -1 };
++
++const struct __locale_map __c_dot_utf8 = {
++	.map = empty_mo,
++	.map_size = sizeof empty_mo,
++	.name = "C.UTF-8"
++};
++
++const struct __locale_map *__get_locale(int cat, const char *val)
++{
++	static int lock[2];
++	static void *volatile loc_head;
++	const struct __locale_map *p;
++	struct __locale_map *new = 0;
++	const char *path = 0, *z;
++	char buf[256];
++	size_t l, n;
++
++	if (!*val) {
++		(val = getenv("LC_ALL")) && *val ||
++		(val = getenv(envvars[cat])) && *val ||
++		(val = getenv("LANG")) && *val ||
++		(val = "C.UTF-8");
++	}
++
++	/* Limit name length and forbid leading dot or any slashes. */
++	for (n=0; n<LOCALE_NAME_MAX && val[n] && val[n]!='/'; n++);
++	if (val[0]=='.' || val[n]) val = "C.UTF-8";
++	int builtin = (val[0]=='C' && !val[1])
++		|| !strcmp(val, "C.UTF-8")
++		|| !strcmp(val, "POSIX");
++
++	if (builtin) {
++		if (cat == LC_CTYPE && val[1]=='.')
++			return (void *)&__c_dot_utf8;
++		return 0;
++	}
++
++	for (p=loc_head; p; p=p->next)
++		if (!strcmp(val, p->name)) return p;
++
++	LOCK(lock);
++
++	for (p=loc_head; p; p=p->next)
++		if (!strcmp(val, p->name)) {
++			UNLOCK(lock);
++			return p;
++		}
++
++	if (!libc.secure) path = getenv("MUSL_LOCPATH");
++	/* FIXME: add a default path? */
++
++	if (path) for (; *path; path=z+!!*z) {
++		z = __strchrnul(path, ':');
++		l = z - path - !!*z;
++		if (l >= sizeof buf - n - 2) continue;
++		memcpy(buf, path, l);
++		buf[l] = '/';
++		memcpy(buf+l+1, val, n);
++		buf[l+1+n] = 0;
++		size_t map_size;
++		const void *map = __map_file(buf, &map_size);
++		if (map) {
++			new = malloc(sizeof *new);
++			if (!new) {
++				__munmap((void *)map, map_size);
++				break;
++			}
++			new->map = map;
++			new->map_size = map_size;
++			memcpy(new->name, val, n);
++			new->name[n] = 0;
++			new->next = loc_head;
++			loc_head = new;
++			break;
++		}
++	}
++
++	/* If no locale definition was found, make a locale map
++	 * object anyway to store the name, which is kept for the
++	 * sake of being able to do message translations at the
++	 * application level. */
++	if (!new && (new = malloc(sizeof *new))) {
++		new->map = empty_mo;
++		new->map_size = sizeof empty_mo;
++		memcpy(new->name, val, n);
++		new->name[n] = 0;
++		new->next = loc_head;
++		loc_head = new;
++	}
++
++	/* For LC_CTYPE, never return a null pointer unless the
++	 * requested name was "C" or "POSIX". */
++	if (!new && cat == LC_CTYPE) new = (void *)&__c_dot_utf8;
++
++	UNLOCK(lock);
++	return new;
++}
+--- a/src/locale/newlocale.c
++++ b/src/locale/newlocale.c
+@@ -3,22 +3,52 @@
+ #include "locale_impl.h"
+ #include "libc.h"
+ 
++extern const struct __locale_map __c_dot_utf8;
++
++static const struct __locale_struct c_locale = { 0 };
++static const struct __locale_struct c_dot_utf8_locale = {
++	.cat[LC_CTYPE] = &__c_dot_utf8
++};
++
++int __loc_is_allocated(locale_t loc)
++{
++	return loc && loc != &c_locale && loc != &c_dot_utf8_locale;
++}
++
+ locale_t __newlocale(int mask, const char *name, locale_t loc)
+ {
+-	int i;
++	int i, j;
++	struct __locale_struct tmp;
++	const struct __locale_map *lm;
+ 
+-	if (!loc) {
+-		loc = calloc(1, sizeof *loc + LOCALE_NAME_MAX + 1);
+-		if (!loc) return 0;
+-		loc->messages_name = (void *)(loc+1);
++	/* For locales with allocated storage, modify in-place. */
++	if (__loc_is_allocated(loc)) {
+ 		for (i=0; i<LC_ALL; i++)
+-			if (!(mask & (1<<i)))
+-				__setlocalecat(loc, i, "");
++			if (mask & (1<<i))
++				loc->cat[i] = __get_locale(i, name);
++		return loc;
++	}
++
++	/* Otherwise, build a temporary locale object, which will only
++	 * be instantiated in allocated storage if it does not match
++	 * one of the built-in static locales. This makes the common
++	 * usage case for newlocale, getting a C locale with predictable
++	 * behavior, very fast, and more importantly, fail-safe. */
++	for (j=i=0; i<LC_ALL; i++) {
++		if (loc && !(mask & (1<<i)))
++			lm = loc->cat[i];
++		else
++			lm = __get_locale(i, mask & (1<<i) ? name : "");
++		if (lm) j++;
++		tmp.cat[i] = lm;
+ 	}
+ 
+-	for (i=0; i<LC_ALL; i++)
+-		if (mask & (1<<i))
+-			__setlocalecat(loc, i, name);
++	if (!j)
++		return (locale_t)&c_locale;
++	if (j==1 && tmp.cat[LC_CTYPE]==c_dot_utf8_locale.cat[LC_CTYPE])
++		return (locale_t)&c_dot_utf8_locale;
++
++	if ((loc = malloc(sizeof *loc))) *loc = tmp;
+ 
+ 	return loc;
+ }
+--- a/src/locale/setlocale.c
++++ b/src/locale/setlocale.c
+@@ -5,73 +5,66 @@
+ #include "libc.h"
+ #include "atomic.h"
+ 
+-static char buf[2+4*(LOCALE_NAME_MAX+1)];
++static char buf[LC_ALL*(LOCALE_NAME_MAX+1)];
+ 
+-char *setlocale(int cat, const char *name)
++static char *setlocale_one_unlocked(int cat, const char *name)
+ {
+-	struct __locale_map *lm;
+-	int i, j;
++	const struct __locale_map *lm;
+ 
+-	if (!libc.global_locale.messages_name) {
+-		libc.global_locale.messages_name =
+-			buf + 2 + 3*(LOCALE_NAME_MAX+1);
+-	}
++	if (name) libc.global_locale.cat[cat] = lm = __get_locale(cat, name);
++	else lm = libc.global_locale.cat[cat];
++
++	return lm ? (char *)lm->name : "C";
++}
++
++char *__strchrnul(const char *, int);
++
++char *setlocale(int cat, const char *name)
++{
++	static volatile int lock[2];
+ 
+ 	if ((unsigned)cat > LC_ALL) return 0;
+ 
++	LOCK(lock);
++
+ 	/* For LC_ALL, setlocale is required to return a string which
+ 	 * encodes the current setting for all categories. The format of
+ 	 * this string is unspecified, and only the following code, which
+ 	 * performs both the serialization and deserialization, depends
+ 	 * on the format, so it can easily be changed if needed. */
+ 	if (cat == LC_ALL) {
++		int i;
+ 		if (name) {
+-			char part[LOCALE_NAME_MAX+1];
+-			if (name[0] && name[1]==';'
+-			    && strlen(name) > 2 + 3*(LOCALE_NAME_MAX+1)) {
+-				part[0] = name[0];
+-				part[1] = 0;
+-				setlocale(LC_CTYPE, part);
+-				part[LOCALE_NAME_MAX] = 0;
+-				for (i=LC_TIME; i<LC_MESSAGES; i++) {
+-					memcpy(part, name + 2 + (i-2)*(LOCALE_NAME_MAX+1), LOCALE_NAME_MAX);
+-					for (j=LOCALE_NAME_MAX-1; j && part[j]==';'; j--)
+-						part[j] = 0;
+-					setlocale(i, part);
++			char part[LOCALE_NAME_MAX+1] = "C.UTF-8";
++			const char *p = name;
++			for (i=0; i<LC_ALL; i++) {
++				const char *z = __strchrnul(p, ';');
++				if (z-p <= LOCALE_NAME_MAX) {
++					memcpy(part, p, z-p);
++					part[z-p] = 0;
++					if (*z) p = z+1;
+ 				}
+-				setlocale(LC_MESSAGES, name + 2 + 3*(LOCALE_NAME_MAX+1));
+-			} else {
+-				for (i=0; i<LC_ALL; i++)
+-					setlocale(i, name);
++				setlocale_one_unlocked(i, part);
+ 			}
+ 		}
+-		memset(buf, ';', 2 + 3*(LOCALE_NAME_MAX+1));
+-		buf[0] = libc.global_locale.ctype_utf8 ? 'U' : 'C';
+-		for (i=LC_TIME; i<LC_MESSAGES; i++) {
+-			lm = libc.global_locale.cat[i-2];
+-			if (lm) memcpy(buf + 2 + (i-2)*(LOCALE_NAME_MAX+1),
+-				lm->name, strlen(lm->name));
++		char *s = buf;
++		for (i=0; i<LC_ALL; i++) {
++			const struct __locale_map *lm =
++				libc.global_locale.cat[i];
++			const char *part = lm ? lm->name : "C";
++			size_t l = strlen(part);
++			memcpy(s, part, l);
++			s[l] = ';';
++			s += l+1;
+ 		}
++		*--s = 0;
++		UNLOCK(lock);
+ 		return buf;
+ 	}
+ 
+-	if (name) {
+-		int adj = libc.global_locale.ctype_utf8;
+-		__setlocalecat(&libc.global_locale, cat, name);
+-		adj -= libc.global_locale.ctype_utf8;
+-		if (adj) a_fetch_add(&libc.bytelocale_cnt_minus_1, adj);
+-	}
++	char *ret = setlocale_one_unlocked(cat, name);
+ 
+-	switch (cat) {
+-	case LC_CTYPE:
+-		return libc.global_locale.ctype_utf8 ? "C.UTF-8" : "C";
+-	case LC_NUMERIC:
+-		return "C";
+-	case LC_MESSAGES:
+-		return libc.global_locale.messages_name[0]
+-			? libc.global_locale.messages_name : "C";
+-	default:
+-		lm = libc.global_locale.cat[cat-2];
+-		return lm ? lm->name : "C";
+-	}
++	UNLOCK(lock);
++
++	return ret;
+ }
+--- a/src/locale/uselocale.c
++++ b/src/locale/uselocale.c
+@@ -10,15 +10,7 @@ locale_t __uselocale(locale_t new)
+ 
+ 	if (new == LC_GLOBAL_LOCALE) new = global;
+ 
+-	if (new && new != old) {
+-		int adj = 0;
+-		if (new == global) a_dec(&libc.uselocale_cnt);
+-		else if (!new->ctype_utf8) adj++;
+-		if (old == global) a_inc(&libc.uselocale_cnt);
+-		else if (!old->ctype_utf8) adj--;
+-		a_fetch_add(&libc.bytelocale_cnt_minus_1, adj);
+-		self->locale = new;
+-	}
++	self->locale = new;
+ 
+ 	return old == global ? LC_GLOBAL_LOCALE : old;
+ }
+--- a/src/stdio/__stdio_read.c
++++ b/src/stdio/__stdio_read.c
+@@ -21,7 +21,6 @@ size_t __stdio_read(FILE *f, unsigned ch
+ 	pthread_cleanup_pop(0);
+ 	if (cnt <= 0) {
+ 		f->flags |= F_EOF ^ ((F_ERR^F_EOF) & cnt);
+-		f->rpos = f->rend = 0;
+ 		return cnt;
+ 	}
+ 	if (cnt <= iov[0].iov_len) return cnt;
+--- a/src/stdio/__toread.c
++++ b/src/stdio/__toread.c
+@@ -5,12 +5,12 @@ int __toread(FILE *f)
+ 	f->mode |= f->mode-1;
+ 	if (f->wpos > f->buf) f->write(f, 0, 0);
+ 	f->wpos = f->wbase = f->wend = 0;
+-	if (f->flags & (F_EOF|F_NORD)) {
+-		if (f->flags & F_NORD) f->flags |= F_ERR;
++	if (f->flags & F_NORD) {
++		f->flags |= F_ERR;
+ 		return EOF;
+ 	}
+-	f->rpos = f->rend = f->buf;
+-	return 0;
++	f->rpos = f->rend = f->buf + f->buf_size;
++	return (f->flags & F_EOF) ? EOF : 0;
+ }
+ 
+ void __stdio_exit_needed(void);
+--- a/src/stdio/__uflow.c
++++ b/src/stdio/__uflow.c
+@@ -1,11 +1,11 @@
+ #include "stdio_impl.h"
+ 
+-/* This function will never be called if there is already data
+- * buffered for reading. Thus we can get by with very few branches. */
++/* This function assumes it will never be called if there is already
++ * data buffered for reading. */
+ 
+ int __uflow(FILE *f)
+ {
+ 	unsigned char c;
+-	if ((f->rend || !__toread(f)) && f->read(f, &c, 1)==1) return c;
++	if (!__toread(f) && f->read(f, &c, 1)==1) return c;
+ 	return EOF;
+ }
+--- a/src/stdio/ungetc.c
++++ b/src/stdio/ungetc.c
+@@ -6,7 +6,8 @@ int ungetc(int c, FILE *f)
+ 
+ 	FLOCK(f);
+ 
+-	if ((!f->rend && __toread(f)) || f->rpos <= f->buf - UNGET) {
++	if (!f->rpos) __toread(f);
++	if (!f->rpos || f->rpos <= f->buf - UNGET) {
+ 		FUNLOCK(f);
+ 		return EOF;
+ 	}
+--- a/src/stdio/ungetwc.c
++++ b/src/stdio/ungetwc.c
+@@ -19,7 +19,8 @@ wint_t ungetwc(wint_t c, FILE *f)
+ 
+ 	f->mode |= f->mode+1;
+ 
+-	if ((!f->rend && __toread(f)) || f->rpos < f->buf - UNGET + l) {
++	if (!f->rpos) __toread(f);
++	if (!f->rpos || f->rpos < f->buf - UNGET + l) {
+ 		FUNLOCK(f);
+ 		return EOF;
+ 	}
+--- a/src/thread/i386/__set_thread_area.s
++++ b/src/thread/i386/__set_thread_area.s
+@@ -6,10 +6,10 @@ __set_thread_area:
+ 	push $0x51
+ 	push $0xfffff
+ 	push 16(%esp)
+-	xor %edx,%edx
+-	mov %gs,%dx
+-	sub $3,%edx
+-	sar $3,%edx
++	call 1f
++1:	addl $4f-1b,(%esp)
++	pop %ecx
++	mov (%ecx),%edx
+ 	push %edx
+ 	mov %esp,%ebx
+ 	xor %eax,%eax
+@@ -18,6 +18,7 @@ __set_thread_area:
+ 	testl %eax,%eax
+ 	jnz 2f
+ 	movl (%esp),%edx
++	movl %edx,(%ecx)
+ 	leal 3(,%edx,8),%edx
+ 3:	movw %dx,%gs
+ 1:
+@@ -38,3 +39,7 @@ __set_thread_area:
+ 	mov $7,%dl
+ 	inc %al
+ 	jmp 3b
++
++.data
++	.align 4
++4:	.long -1
+--- a/src/thread/mips/syscall_cp.s
++++ b/src/thread/mips/syscall_cp.s
+@@ -2,10 +2,13 @@
+ 
+ .global __cp_begin
+ .hidden __cp_begin
++.type   __cp_begin,@function
+ .global __cp_end
+ .hidden __cp_end
++.type   __cp_end,@function
+ .global __cp_cancel
+ .hidden __cp_cancel
++.type   __cp_cancel,@function
+ .hidden __cancel
+ .global __syscall_cp_asm
+ .hidden __syscall_cp_asm
+--- a/src/thread/pthread_create.c
++++ b/src/thread/pthread_create.c
+@@ -67,12 +67,6 @@ _Noreturn void __pthread_exit(void *resu
+ 		exit(0);
+ 	}
+ 
+-	if (self->locale != &libc.global_locale) {
+-		a_dec(&libc.uselocale_cnt);
+-		if (self->locale->ctype_utf8)
+-			a_dec(&libc.bytelocale_cnt_minus_1);
+-	}
+-
+ 	/* Process robust list in userspace to handle non-pshared mutexes
+ 	 * and the detached thread case where the robust list head will
+ 	 * be invalid when the kernel would process it. */
diff --git a/toolchain/musl/patches/900-iconv_size_hack.patch b/toolchain/musl/patches/900-iconv_size_hack.patch
index f5418db8f3de22b989ba4c537e8df265420dd2eb..f4e2be4d0325c7360ec9e542d8da6438c7a5f93e 100644
--- a/toolchain/musl/patches/900-iconv_size_hack.patch
+++ b/toolchain/musl/patches/900-iconv_size_hack.patch
@@ -1,6 +1,6 @@
 --- a/src/locale/iconv.c
 +++ b/src/locale/iconv.c
-@@ -44,6 +44,7 @@ static const unsigned char charmaps[] =
+@@ -38,6 +38,7 @@ static const unsigned char charmaps[] =
  "ucs4\0ucs4be\0utf32\0utf32be\0\0\300"
  "ucs4le\0utf32le\0\0\303"
  "ascii\0usascii\0iso646\0iso646us\0\0\307"
@@ -8,7 +8,7 @@
  "eucjp\0\0\320"
  "shiftjis\0sjis\0\0\321"
  "gb18030\0\0\330"
-@@ -51,6 +52,7 @@ static const unsigned char charmaps[] =
+@@ -45,6 +46,7 @@ static const unsigned char charmaps[] =
  "gb2312\0\0\332"
  "big5\0bigfive\0cp950\0big5hkscs\0\0\340"
  "euckr\0ksc5601\0ksx1001\0cp949\0\0\350"
@@ -16,7 +16,7 @@
  #include "codepages.h"
  ;
  
-@@ -58,6 +60,7 @@ static const unsigned short legacy_chars
+@@ -52,6 +54,7 @@ static const unsigned short legacy_chars
  #include "legacychars.h"
  };
  
@@ -24,7 +24,7 @@
  static const unsigned short jis0208[84][94] = {
  #include "jis0208.h"
  };
-@@ -77,6 +80,7 @@ static const unsigned short hkscs[] = {
+@@ -71,6 +74,7 @@ static const unsigned short hkscs[] = {
  static const unsigned short ksc[93][94] = {
  #include "ksc.h"
  };
@@ -32,7 +32,7 @@
  
  static int fuzzycmp(const unsigned char *a, const unsigned char *b)
  {
-@@ -217,6 +221,7 @@ size_t iconv(iconv_t cd0, char **restric
+@@ -212,6 +216,7 @@ size_t iconv(iconv_t cd0, char **restric
  				c = ((c-0xd7c0)<<10) + (d-0xdc00);
  			}
  			break;
@@ -40,7 +40,7 @@
  		case SHIFT_JIS:
  			if (c-0xa1 <= 0xdf-0xa1) {
  				c += 0xff61-0xa1;
-@@ -363,6 +368,7 @@ size_t iconv(iconv_t cd0, char **restric
+@@ -358,6 +363,7 @@ size_t iconv(iconv_t cd0, char **restric
  			c = ksc[c][d];
  			if (!c) goto ilseq;
  			break;