FS-2746 --resolve large xmlrpc update thanks garmt

2025-08-13 09:36:46 +00:00 · 2012-10-13 11:37:25 -05:00
parent 37ecad9903
commit 6b6c83a718
397 changed files with 41822 additions and 33841 deletions
--- a/libs/xmlrpc-c/lib/libutil/Makefile
+++ b/libs/xmlrpc-c/lib/libutil/Makefile
@@ -1,7 +1,9 @@
 ###############################################################################
 # This directory builds libxmlrpc_util, which contains utility
-# functions that are used by the Xmlprc-c # libraries, and also
-# directly by Xmlrpc-c programs.
+# functions that are used by the Xmlprc-c libraries, and also
+# directly by Xmlrpc-c programs.  Some of them are documented for use
+# by Xmlrpc-c users, as facilities of the libxmlrpc library (which
+# prerequires libxmlrpc_util).
 #
 # The functions in this library are characterized by being general purpose
 # programming functions, such as one might wish were in the standard C
@@ -29,11 +31,13 @@ SHARED_LIBS_TO_INSTALL := libxmlrpc_util

 TARGET_MODS = \
  asprintf \
+  base64 \
  error \
  make_printable \
  memblock \
  select \
  sleep \
+  string_number \
  time \
  utf8 \

@@ -43,8 +47,6 @@ MAJ=3

 include $(SRCDIR)/common.mk

-CFLAGS = $(CFLAGS_COMMON) $(CFLAGS_PERSONAL) $(CADD)
-
 INCLUDES = -I$(BLDDIR) -Isrcdir \
           -I$(BLDDIR)/include -Isrcdir/include -Isrcdir/lib/util/include

@@ -53,17 +55,13 @@ UTIL_SHLIB = $(call shlibfn,libxmlrpc_util)
 UTIL_SHLIBLE = $(call shliblefn,libxmlrpc_util)
 #UTIL_SHLIBLE is e.g. libxmlrpc_util.so

-ifneq ($(SHARED_LIB_TYPE),NONE)
-  TARGET_SHARED_LIBS := $(UTIL_SHLIB) $(UTIL_SHLIBLE)
-  endif
-
 # This 'common.mk' dependency makes sure the symlinks get built before
 # this make file is used for anything.

 $(SRCDIR)/common.mk: srcdir blddir

 .PHONY: all
-all: libxmlrpc_util.a $(TARGET_SHARED_LIBS) $(TARGET_SHARED_LE_LIBS)
+all: libxmlrpc_util.a $(TARGET_SHARED_LIBRARIES) $(TARGET_SHARED_LE_LIBS)

 # Rule for this is in common.mk, courtesy of TARGET_LIBRARY_NAMES:
 $(UTIL_SHLIB): $(TARGET_MODS:%=%.osh)
@@ -92,4 +90,4 @@ distclean: clean distclean-common
 .PHONY: dep
 dep: dep-common

-include Makefile.depend
+include depend.mk
--- a/libs/xmlrpc-c/lib/libutil/asprintf.c
+++ b/libs/xmlrpc-c/lib/libutil/asprintf.c
@@ -1,10 +1,61 @@
-//#define _GNU_SOURCE
+#define _XOPEN_SOURCE 600  /* Make sure strdup() is in <string.h> */
+#ifndef _GNU_SOURCE
+	#define _GNU_SOURCE  /* But only when HAVE_ASPRINTF */
+#endif
+#include <stdarg.h>
+
 #include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <limits.h>

 #include "xmlrpc_config.h"  /* For HAVE_ASPRINTF, __inline__ */
 #include "xmlrpc-c/string_int.h"
+#include "bool.h"
+
+
+
+static __inline__ void
+newVsnprintf(char *       const buffer,
+             size_t       const bufferSize,
+             const char * const fmt,
+             va_list            varargs,
+             size_t *     const formattedSizeP) {
+/*----------------------------------------------------------------------------
+   This is vsnprintf() with the new behavior, where not fitting in the buffer
+   is not a failure.
+
+   Unfortunately, we can't practically return the size of the formatted string
+   if the C library has old vsnprintf() and the formatted string doesn't fit
+   in the buffer, so in that case we just return something larger than the
+   buffer.
+-----------------------------------------------------------------------------*/
+    if (bufferSize > INT_MAX/2) {
+        /* There's a danger we won't be able to coerce the return value
+           of XMLRPC_VSNPRINTF to an integer (which we have to do because,
+           while for POSIX its return value is ssize_t, on Windows it is int),
+           or return double the buffer size.
+        */
+        *formattedSizeP = 0;
+    } else {
+        int rc;
+
+        rc = XMLRPC_VSNPRINTF(buffer, bufferSize, fmt, varargs);
+
+        if (rc < 0) {
+            /* We have old vsnprintf() (or Windows) and the formatted value
+               doesn't fit in the buffer, but we don't know how big a buffer it
+               needs.
+            */
+            *formattedSizeP = bufferSize * 2;
+        } else {
+            /* Either the string fits in the buffer or we have new vsnprintf()
+               which tells us how big the string is regardless.
+            */
+            *formattedSizeP = rc;
+        }
+    }
+}



@@ -15,29 +66,24 @@ simpleVasprintf(char **      const retvalP,
 /*----------------------------------------------------------------------------
   This is a poor man's implementation of vasprintf(), of GNU fame.
 -----------------------------------------------------------------------------*/
-    size_t const initialSize = 4096;
    char * result;
+    size_t bufferSize;
+    bool outOfMemory;

-    result = malloc(initialSize);
-    if (result != NULL) {
-        size_t bytesNeeded;
-        bytesNeeded = XMLRPC_VSNPRINTF(result, initialSize, fmt, varargs);
-        if (bytesNeeded > initialSize) {
-            free(result);
-            result = malloc(bytesNeeded);
-            if (result != NULL)
-                XMLRPC_VSNPRINTF(result, bytesNeeded, fmt, varargs);
-        } else if (bytesNeeded == initialSize) {
-            if (result[initialSize-1] != '\0') {
-                /* This is one of those old systems where vsnprintf()
-                   returns the number of bytes it used, instead of the
-                   number that it needed, and it in fact needed more than
-                   we gave it.  Rather than mess with this highly unlikely
-                   case (old system and string > 4095 characters), we just
-                   treat this like an out of memory failure.
-                */
+    for (result = NULL, bufferSize = 4096, outOfMemory = false;
+         !result && !outOfMemory;
+        ) {
+
+        result = malloc(bufferSize);
+        if (!result)
+            outOfMemory = true;
+        else {
+            size_t bytesNeeded;
+            newVsnprintf(result, bufferSize, fmt, varargs, &bytesNeeded);
+            if (bytesNeeded > bufferSize) {
                free(result);
                result = NULL;
+                bufferSize = bytesNeeded;
            }
        }
    }
@@ -46,7 +92,28 @@ simpleVasprintf(char **      const retvalP,



-const char * const xmlrpc_strsol = "[insufficient memory to build string]";
+static const char * const xmlrpc_strsol =
+    "[insufficient memory to build string]";
+
+
+
+bool
+xmlrpc_strnomem(const char * const string) {
+/*----------------------------------------------------------------------------
+   The string 'string' was generated by a function in this file because it
+   couldn't get enough memory to generate the string that it was supposed to
+   generate.  I.e. a preceding call to a string function failed.
+-----------------------------------------------------------------------------*/
+    return string == xmlrpc_strsol;
+}
+
+
+
+const char *
+xmlrpc_strnomemval() {
+
+    return xmlrpc_strsol;
+}



@@ -71,7 +138,7 @@ xmlrpc_vasprintf(const char ** const retvalP,



-void GNU_PRINTF_ATTR(2,3)
+void XMLRPC_PRINTF_ATTR(2,3)
 xmlrpc_asprintf(const char ** const retvalP, const char * const fmt, ...) {

    va_list varargs;  /* mysterious structure used by variable arg facility */
@@ -85,6 +152,27 @@ xmlrpc_asprintf(const char ** const retvalP, const char * const fmt, ...) {



+const char *
+xmlrpc_strdupsol(const char * const string) {
+
+    const char * retvalOrNull;
+
+    retvalOrNull = strdup(string);
+
+    return retvalOrNull ? retvalOrNull : xmlrpc_strsol;
+}
+
+
+
+void
+xmlrpc_strfree(const char * const string) {
+
+    if (string != xmlrpc_strsol)
+        free((void *)string);
+}
+
+
+
 const char *
 xmlrpc_strdupnull(const char * const string) {

@@ -96,15 +184,6 @@ xmlrpc_strdupnull(const char * const string) {



-void
-xmlrpc_strfree(const char * const string) {
-
-    if (string != xmlrpc_strsol)
-        free((void *)string);
-}
-
-
-
 void
 xmlrpc_strfreenull(const char * const string) {

--- a/libs/xmlrpc-c/lib/libutil/base64.c
+++ b/libs/xmlrpc-c/lib/libutil/base64.c
@@ -0,0 +1,49 @@
+#include <string.h>
+
+#include "int.h"
+#include "xmlrpc-c/base64_int.h"
+
+
+
+void
+xmlrpc_base64Encode(const char * const chars,
+                    char *       const base64) {
+
+    /* Conversion table. */
+    static char tbl[64] = {
+        'A','B','C','D','E','F','G','H',
+        'I','J','K','L','M','N','O','P',
+        'Q','R','S','T','U','V','W','X',
+        'Y','Z','a','b','c','d','e','f',
+        'g','h','i','j','k','l','m','n',
+        'o','p','q','r','s','t','u','v',
+        'w','x','y','z','0','1','2','3',
+        '4','5','6','7','8','9','+','/'
+    };
+
+    unsigned int i;
+    uint32_t length;
+    char * p;
+    const char * s;
+    
+    length = strlen(chars);  /* initial value */
+    s = &chars[0];  /* initial value */
+    p = &base64[0];  /* initial value */
+    /* Transform the 3x8 bits to 4x6 bits, as required by base64. */
+    for (i = 0; i < length; i += 3) {
+        *p++ = tbl[s[0] >> 2];
+        *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
+        *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)];
+        *p++ = tbl[s[2] & 0x3f];
+        s += 3;
+    }
+    
+    /* Pad the result if necessary... */
+    if (i == length + 1)
+        *(p - 1) = '=';
+    else if (i == length + 2)
+        *(p - 1) = *(p - 2) = '=';
+    
+    /* ...and zero-terminate it. */
+    *p = '\0';
+}
--- a/libs/xmlrpc-c/lib/libutil/error.c
+++ b/libs/xmlrpc-c/lib/libutil/error.c
@@ -1,5 +1,7 @@
 /* Copyright information is at end of file */

+#define _XOPEN_SOURCE 600  /* Make sure strdup() is in <string.h> */
+
 #include "xmlrpc_config.h"

 #include <stdlib.h>
@@ -86,7 +88,7 @@ void
 xmlrpc_set_fault_formatted_v(xmlrpc_env * const envP,
                             int          const code,
                             const char * const format,
-                             va_list      const args) {
+                             va_list            args) {

    const char * faultDescription;

--- a/libs/xmlrpc-c/lib/libutil/make_printable.c
+++ b/libs/xmlrpc-c/lib/libutil/make_printable.c
@@ -1,4 +1,4 @@
-//#define _GNU_SOURCE
+#define _XOPEN_SOURCE 600  /* Make sure strdup() is in <string.h> */

 #include <stdarg.h>
 #include <string.h>
--- a/libs/xmlrpc-c/lib/libutil/memblock.c
+++ b/libs/xmlrpc-c/lib/libutil/memblock.c
@@ -6,6 +6,7 @@
 #include <string.h>
 #include <ctype.h>

+#include "mallocvar.h"
 #include "xmlrpc-c/util_int.h"
 #include "xmlrpc-c/util.h"

@@ -19,30 +20,30 @@


 xmlrpc_mem_block * 
-xmlrpc_mem_block_new(xmlrpc_env * const env, 
+xmlrpc_mem_block_new(xmlrpc_env * const envP, 
                     size_t       const size) {

-    xmlrpc_mem_block* block;
+    xmlrpc_mem_block * block;

-    XMLRPC_ASSERT_ENV_OK(env);
+    XMLRPC_ASSERT_ENV_OK(envP);

-    block = (xmlrpc_mem_block*) malloc(sizeof(xmlrpc_mem_block));
-    XMLRPC_FAIL_IF_NULL(block, env, XMLRPC_INTERNAL_ERROR,
-                        "Can't allocate memory block");
+    MALLOCVAR(block);
+    
+    if (block == NULL)
+        xmlrpc_faultf(envP, "Can't allocate memory block");
+    else {
+        xmlrpc_mem_block_init(envP, block, size);

-    xmlrpc_mem_block_init(env, block, size);
-    XMLRPC_FAIL_IF_FAULT(env);
-
-                     cleanup:
-    if (env->fault_occurred) {
-        if (block)
+        if (envP->fault_occurred) {
            free(block);
-        return NULL;
-    } else {
-        return block;
+            block = NULL;
+        }
    }
+    return block;
 }

+
+
 /* Destroy an existing xmlrpc_mem_block, and everything it contains. */
 void
 xmlrpc_mem_block_free(xmlrpc_mem_block * const blockP) {
@@ -74,7 +75,7 @@ xmlrpc_mem_block_init(xmlrpc_env *       const envP,
    blockP->_block = (void*) malloc(blockP->_allocated);
    if (!blockP->_block)
        xmlrpc_faultf(envP, "Can't allocate %u-byte memory block",
-                      blockP->_allocated);
+                      (unsigned)blockP->_allocated);
 }


@@ -170,19 +171,15 @@ xmlrpc_mem_block_append(xmlrpc_env *       const envP,
                        const void *       const data, 
                        size_t             const len) {

-    int size;
+    size_t const originalSize = blockP->_size;

    XMLRPC_ASSERT_ENV_OK(envP);
    XMLRPC_ASSERT(blockP != NULL);

-    size = blockP->_size;
-    xmlrpc_mem_block_resize(envP, blockP, size + len);
-    XMLRPC_FAIL_IF_FAULT(envP);
-
-    memcpy(((unsigned char*) blockP->_block) + size, data, len);
-
- cleanup:
-    return;
+    xmlrpc_mem_block_resize(envP, blockP, originalSize + len);
+    if (!envP->fault_occurred) {
+        memcpy(((unsigned char*) blockP->_block) + originalSize, data, len);
+    }
 }


--- a/libs/xmlrpc-c/lib/libutil/select.c
+++ b/libs/xmlrpc-c/lib/libutil/select.c
@@ -1,5 +1,7 @@
 #define _XOPEN_SOURCE 600  /* Get pselect() in <sys/select.h> */

+#include "xmlrpc_config.h"
+
 #ifdef WIN32
 #include <winsock.h>
 #else
@@ -8,12 +10,12 @@
   in this order appears to work on all.
 */
 #include <sys/time.h>
+#if HAVE_SYS_SELECT_H
 #include <sys/select.h>
+#endif
 #endif 
 #include <signal.h>

-#include "xmlrpc_config.h"
-
 #include "xmlrpc-c/select_int.h"


--- a/libs/xmlrpc-c/lib/libutil/string_number.c
+++ b/libs/xmlrpc-c/lib/libutil/string_number.c
@@ -0,0 +1,46 @@
+/*============================================================================
+                                string_number
+==============================================================================
+  This file contains utilities for dealing with text string representation
+  of numbers.
+============================================================================*/
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include <xmlrpc-c/base.h>
+#include <xmlrpc-c/util.h>
+#include <xmlrpc-c/string_int.h>
+#include "xmlrpc_config.h"
+#include "int.h"
+
+#include <xmlrpc-c/string_number.h>
+
+
+
+void
+xmlrpc_parse_int64(xmlrpc_env *   const envP,
+                   const char *   const str,
+                   xmlrpc_int64 * const i64P) {
+
+    xmlrpc_int64 i64val;
+
+    char * tail;
+
+    errno = 0;
+    i64val = XMLRPC_STRTOLL(str, &tail, 10);
+
+    if (errno == ERANGE)
+        xmlrpc_faultf(envP, "Number cannot be represented in 64 bits.  "
+                      "Must be in the range "
+                      "[%" XMLRPC_PRId64 " - %" XMLRPC_PRId64 "]",
+                      XMLRPC_INT64_MIN, XMLRPC_INT64_MAX);
+    else if (errno != 0)
+        xmlrpc_faultf(envP, "unexpected error: "
+                      "strtoll() failed with errno %d (%s)",
+                      errno, strerror(errno));
+    else if (tail[0] != '\0')
+        xmlrpc_faultf(envP, "contains non-numerical junk: '%s'", tail);
+    else
+        *i64P = i64val;
+}
--- a/libs/xmlrpc-c/lib/libutil/utf8.c
+++ b/libs/xmlrpc-c/lib/libutil/utf8.c
@@ -38,6 +38,7 @@
 **    http://www.cl.cam.ac.uk/~mgk25/unicode.html
 */

+#include <assert.h>
 #include "int.h"

 #include "xmlrpc_config.h"
@@ -51,31 +52,33 @@
 **  UTF-8 data.
 */

-/* The number of bytes in a UTF-8 sequence starting with the character used
-** as the array index.  A zero entry indicates an illegal initial byte.
-** This table was generated using a Perl script and information from the
-** UTF-8 standard.
-**
-** Fredrik Lundh's UTF-8 decoder Python 2.0 uses a similar table.  But
-** since Python 2.0 has the icky CNRI license, I regenerated this
-** table from scratch and wrote my own decoder. */
-static unsigned char utf8_seq_length[256] = {
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0
+static unsigned char utf8SeqLength[256] = {
+
+  /* utf8SeqLength[B] is the number of bytes in a UTF-8 sequence that starts
+     with byte B.  Except zero indicates an illegal initial byte.
+
+     Fredrik Lundh's UTF-8 decoder Python 2.0 uses a similar table.  But since
+     Python 2.0 has the icky CNRI license, I generated this table from scratch
+     and wrote my own decoder.
+  */
+
+          /* 0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F  */
+  /* 0 */    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  /* 1 */    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  /* 2 */    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  /* 3 */    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  /* 4 */    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  /* 5 */    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  /* 6 */    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  /* 7 */    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  /* 8 */    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  /* 9 */    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  /* A */    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  /* B */    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  /* C */    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  /* D */    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  /* E */    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  /* F */    4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0
 };

 /* The minimum legal character value for a UTF-8 sequence of the given
@@ -118,14 +121,129 @@ static uint32_t const utf8_min_char_for_length[] = {
 #if HAVE_UNICODE_WCHAR


-static void 
-decode_utf8(xmlrpc_env * const envP,
-            const char * const utf8_data,
-            size_t       const utf8_len,
-            wchar_t *    const ioBuff,
-            size_t *     const outBuffLenP) {
+static void
+validateContinuation(xmlrpc_env * const envP,
+                     char         const c) {
+
+    if (!IS_CONTINUATION(c))
+        xmlrpc_env_set_fault_formatted(
+            envP, XMLRPC_INVALID_UTF8_ERROR,
+            "UTF-8 multibyte sequence contains character 0x%02x, "
+            "which does not indicate continuation.", c);
+}
+
+
+
+static void
+validateUtf16(xmlrpc_env * const envP,
+              wchar_t      const wc) {
+
+    if (wc > UCS2_MAX_LEGAL_CHARACTER)
+        xmlrpc_env_set_fault_formatted(
+            envP, XMLRPC_INVALID_UTF8_ERROR,
+            "UCS-2 characters > U+FFFD are illegal.  String contains 0x%04x",
+            (unsigned)wc);
+    else if (UTF16_FIRST_SURROGATE <= wc && wc <= UTF16_LAST_SURROGATE)
+        xmlrpc_env_set_fault_formatted(
+            envP, XMLRPC_INVALID_UTF8_ERROR,
+            "UTF-16 surrogates may not appear in UTF-8 data.  "
+            "String contains %04x", (unsigned)wc);
+}
+
+
+
+/* Microsoft Visual C in debug mode produces code that complains about
+   returning an undefined value from xmlrpc_datetime_new_str().  It's a bogus
+   complaint, because this function is defined to return nothing meaningful
+   those cases.  So we disable the check.
+*/
+#pragma runtime_checks("u", off)
+
+static void
+decodeMultibyte(xmlrpc_env * const envP,
+                const char * const utf8_seq,
+                size_t       const length,
+                wchar_t *    const wcP) {
 /*----------------------------------------------------------------------------
-  Decode to UCS-2 (or validates as UTF-8 that can be decoded to UCS-2)
+   Decode the multibyte UTF-8 sequence which is 'length' characters
+   at 'utf8_data'.
+
+   Return the character in UTF-16 format as *wcP.
+-----------------------------------------------------------------------------*/
+    wchar_t wc;
+
+    assert(utf8_seq[0] & 0x80); /* High bit set: this is multibyte seq */
+
+    switch (length) {
+    case 2:
+        /* 110xxxxx 10xxxxxx */
+        validateContinuation(envP, utf8_seq[1]);
+
+        if (!envP->fault_occurred)
+            wc = ((((wchar_t) (utf8_seq[0] & 0x1F)) <<  6) |
+                  (((wchar_t) (utf8_seq[1] & 0x3F))));
+        break;
+                
+    case 3:
+        /* 1110xxxx 10xxxxxx 10xxxxxx */
+        validateContinuation(envP, utf8_seq[1]);
+        if (!envP->fault_occurred) {
+            validateContinuation(envP, utf8_seq[2]);
+            if (!envP->fault_occurred)
+                wc = ((((wchar_t) (utf8_seq[0] & 0x0F)) << 12) |
+                      (((wchar_t) (utf8_seq[1] & 0x3F)) <<  6) |
+                      (((wchar_t) (utf8_seq[2] & 0x3F))));
+        }
+        break;
+
+    case 4:
+        /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
+    case 5:
+        /* 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
+    case 6:
+        /* 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
+        /* This would require more than 16 bits in UTF-16, so
+           it can't be represented in UCS-2, so it's beyond
+           our capability.  Characters in the BMP fit in 16
+           bits.
+        */
+        xmlrpc_env_set_fault_formatted(
+            envP, XMLRPC_INVALID_UTF8_ERROR,
+            "UTF-8 string contains a character not in the "
+            "Basic Multilingual Plane (first byte 0x%02x)",
+            utf8_seq[0]);
+        break;
+
+    default:
+        xmlrpc_faultf(envP,
+                      "Internal error: Impossible UTF-8 sequence length %u",
+                      (unsigned)length);
+    }
+
+    if (!envP->fault_occurred)
+        validateUtf16(envP, wc);
+
+    if (!envP->fault_occurred)
+        if ((uint32_t)wc < utf8_min_char_for_length[length])
+            xmlrpc_env_set_fault_formatted(
+                envP, XMLRPC_INVALID_UTF8_ERROR,
+                "Overlong UTF-8 sequence not allowed");
+
+    *wcP = wc;
+}
+
+#pragma runtime_checks("u", restore)
+
+
+
+static void 
+decodeUtf8(xmlrpc_env * const envP,
+           const char * const utf8_data,
+           size_t       const utf8_len,
+           wchar_t *    const ioBuff,
+           size_t *     const outBuffLenP) {
+/*----------------------------------------------------------------------------
+  Decode to UCS-2 (or validate as UTF-8 that can be decoded to UCS-2)
  a UTF-8 string.  To validate, set ioBuff and outBuffLenP to NULL.
  To decode, allocate a sufficiently large buffer, pass it as ioBuff,
  and pass a pointer as as outBuffLenP.  The data will be written to
@@ -134,132 +252,60 @@ decode_utf8(xmlrpc_env * const envP,
  We assume that wchar_t holds a single UCS-2 character in native-endian
  byte ordering.
 -----------------------------------------------------------------------------*/
-    size_t i, length, out_pos;
-    char init, con1, con2;
-    wchar_t wc;
+    size_t utf8Cursor;
+    size_t outPos;

    XMLRPC_ASSERT_ENV_OK(envP);
    XMLRPC_ASSERT_PTR_OK(utf8_data);
-    XMLRPC_ASSERT((!ioBuff && !outBuffLenP) ||
-                  (ioBuff && outBuffLenP));
+    XMLRPC_ASSERT((!ioBuff && !outBuffLenP) || (ioBuff && outBuffLenP));

-    /* Suppress GCC warning about possibly undefined variable. */
-    wc = 0;
+    for (utf8Cursor = 0, outPos = 0;
+         utf8Cursor < utf8_len && !envP->fault_occurred;
+        ) {
+
+        char const init = utf8_data[utf8Cursor];
+            /* Initial byte of the UTF-8 sequence */
+
+        wchar_t wc;

-    i = 0;
-    out_pos = 0;
-    while (i < utf8_len) {
-        init = utf8_data[i];
        if ((init & 0x80) == 0x00) {
            /* Convert ASCII character to wide character. */
            wc = init;
-            i++;
+            ++utf8Cursor;
        } else {
            /* Look up the length of this UTF-8 sequence. */
-            length = utf8_seq_length[(unsigned char) init];
-            
-            /* Check to make sure we have enough bytes to convert. */
-            if (i + length > utf8_len)
-                XMLRPC_FAIL(envP, XMLRPC_INVALID_UTF8_ERROR,
-                            "Truncated UTF-8 sequence");
-            
-            /* Decode a multibyte UTF-8 sequence. */
-            switch (length) {
-            case 0:
-                XMLRPC_FAIL(envP, XMLRPC_INVALID_UTF8_ERROR,
-                            "Invalid UTF-8 initial byte");
-                
-            case 2:
-                /* 110xxxxx 10xxxxxx */
-                con1 = utf8_data[i+1];
-                if (!IS_CONTINUATION(con1))
-                    XMLRPC_FAIL(envP, XMLRPC_INVALID_UTF8_ERROR,
-                                "UTF-8 sequence too short");
-                wc = ((((wchar_t) (init & 0x1F)) <<  6) |
-                      (((wchar_t) (con1 & 0x3F))));
-                break;
-                
-            case 3:
-                /* 1110xxxx 10xxxxxx 10xxxxxx */
-                con1 = utf8_data[i+1];
-                con2 = utf8_data[i+2];
-                if (!IS_CONTINUATION(con1) || !IS_CONTINUATION(con2))
-                    XMLRPC_FAIL(envP, XMLRPC_INVALID_UTF8_ERROR,
-                                "UTF-8 sequence too short");
-                wc = ((((wchar_t) (init & 0x0F)) << 12) |
-                      (((wchar_t) (con1 & 0x3F)) <<  6) |
-                      (((wchar_t) (con2 & 0x3F))));
-                break;
-                
-            case 4:
-                /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
-            case 5:
-                /* 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
-            case 6:
-                /* 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
-                /* This would require more than 16 bits in UTF-16, so
-                   it can't be represented in UCS-2, so it's beyond
-                   our capability.  Characters in the BMP fit in 16
-                   bits.
-                */
+            size_t const length = utf8SeqLength[(unsigned char) init];
+
+            if (length == 0)
                xmlrpc_env_set_fault_formatted(
                    envP, XMLRPC_INVALID_UTF8_ERROR,
-                    "UTF-8 string contains a character not in the "
-                    "Basic Multilingual Plane (first byte %08x)",
-                    init);
-                goto cleanup;
-                
-            default:
-                XMLRPC_ASSERT("Error in UTF-8 decoder tables");
+                    "Unrecognized UTF-8 initial byte value 0x%02x", init);
+            else {
+                /* Make sure we have enough bytes to convert. */
+                if (utf8Cursor + length > utf8_len) {
+                    xmlrpc_env_set_fault_formatted(
+                        envP, XMLRPC_INVALID_UTF8_ERROR,
+                        "Invalid UTF-8 sequence indicates a %u-byte sequence "
+                        "when only %u bytes are left in the string",
+                        (unsigned)length, (unsigned)(utf8_len - utf8Cursor));
+                } else {
+                    decodeMultibyte(envP, &utf8_data[utf8Cursor], length, &wc);
+                    
+                    /* Advance to the end of the sequence. */
+                    utf8Cursor += length;
+                }
            }
-            
-            /* Advance to the end of the sequence. */
-            i += length;
-            
-            /* Check for illegal UCS-2 characters. */
-            if (wc > UCS2_MAX_LEGAL_CHARACTER)
-                XMLRPC_FAIL(envP, XMLRPC_INVALID_UTF8_ERROR,
-                            "UCS-2 characters > U+FFFD are illegal");
-            
-            /* Check for UTF-16 surrogates. */
-            if (UTF16_FIRST_SURROGATE <= wc && wc <= UTF16_LAST_SURROGATE)
-                XMLRPC_FAIL(envP, XMLRPC_INVALID_UTF8_ERROR,
-                            "UTF-16 surrogates may not appear in UTF-8 data");
-            
-            /* Check for overlong sequences. */
-            if ((uint32_t)wc < utf8_min_char_for_length[length])
-                XMLRPC_FAIL(envP, XMLRPC_INVALID_UTF8_ERROR,
-                            "Overlong UTF-8 sequence not allowed");
        }
-        
-        /* If we have a buffer, write our character to it. */
-        if (ioBuff) {
-            ioBuff[out_pos++] = wc;
+
+        if (!envP->fault_occurred) {
+            /* If we have a buffer, write our character to it. */
+            if (ioBuff)
+                ioBuff[outPos++] = wc;
        }
    }
-    
-    /* Record the number of characters we found. */
+
    if (outBuffLenP)
-        *outBuffLenP = out_pos;
-    
-            cleanup:
-    if (envP->fault_occurred) {
-        if (outBuffLenP)
-            *outBuffLenP = 0;
-    }
-}
-
-
-
-void 
-xmlrpc_validate_utf8(xmlrpc_env * const env,
-                     const char * const utf8_data,
-                     size_t       const utf8_len) {
-/*----------------------------------------------------------------------------
-   Validate that a string is valid UTF-8.
-----------------------------------------------------------------------------*/
-
-    decode_utf8(env, utf8_data, utf8_len, NULL, NULL);
+        *outBuffLenP = envP->fault_occurred ? 0 : outPos;
 }


@@ -286,9 +332,9 @@ xmlrpc_utf8_to_wcs(xmlrpc_env * const envP,
    wcsP = XMLRPC_MEMBLOCK_NEW(wchar_t, envP, utf8_len);
    if (!envP->fault_occurred) {
        /* Decode the UTF-8 data. */
-        decode_utf8(envP, utf8_data, utf8_len,
-                    XMLRPC_MEMBLOCK_CONTENTS(wchar_t, wcsP),
-                    &wcs_length);
+        decodeUtf8(envP, utf8_data, utf8_len,
+                   XMLRPC_MEMBLOCK_CONTENTS(wchar_t, wcsP),
+                   &wcs_length);
        if (!envP->fault_occurred) {
            /* We can't have overrun our buffer. */
            XMLRPC_ASSERT(wcs_length <= utf8_len);
@@ -329,7 +375,8 @@ xmlrpc_wcs_to_utf8(xmlrpc_env *    const envP,

    utf8P = XMLRPC_MEMBLOCK_NEW(char, envP, estimate);
    if (!envP->fault_occurred) {
-        unsigned char * const buffer = XMLRPC_MEMBLOCK_CONTENTS(char, utf8P);
+        unsigned char * const buffer =
+            XMLRPC_MEMBLOCK_CONTENTS(unsigned char, utf8P);
        size_t bytesUsed;
        size_t i;

@@ -401,13 +448,12 @@ xmlrpc_force_to_utf8(char * const buffer) {
    char * p;

    for (p = &buffer[0]; *p;) {
-        uint const length = utf8_seq_length[(unsigned char) *p];
+        unsigned int const length = utf8SeqLength[(unsigned char) *p];

        bool forceDel;
        uint32_t decoded;

-        forceDel = false;
-        decoded  = 0;  /* suppress compiler warning; valid when !forceDel */
+        forceDel = false;  /* initial value */

        switch (length) {
        case 1:
@@ -482,7 +528,7 @@ xmlrpc_force_to_xml_chars(char * const buffer) {
    char * p;

    for (p = &buffer[0]; *p;) {
-        uint const length = utf8_seq_length[(unsigned char) *p];
+        unsigned int const length = utf8SeqLength[(unsigned char) *p];

        if (length == 1) {
            if (*p < 0x20 && *p != '\r' && *p != '\n' && *p != '\t')
@@ -505,7 +551,31 @@ xmlrpc_force_to_xml_chars(char * const buffer) {



+void 
+xmlrpc_validate_utf8(xmlrpc_env * const envP,
+                     const char * const utf8_data,
+                     size_t       const utf8_len) {
+/*----------------------------------------------------------------------------
+   Validate that a string is valid UTF-8.
+-----------------------------------------------------------------------------*/
+    xmlrpc_env env;

+    xmlrpc_env_init(&env);

+#if HAVE_UNICODE_WCHAR
+    decodeUtf8(&env, utf8_data, utf8_len, NULL, NULL);
+#else
+    /* We don't have a convenient way to validate, so we just fake it and
+       call it valid.
+    */
+#endif

-
+    if (env.fault_occurred) {
+        xmlrpc_env_set_fault_formatted(
+            envP, XMLRPC_INVALID_UTF8_ERROR,
+            "%" XMLRPC_PRId64 "-byte "
+            "supposed UTF-8 string is not valid UTF-8.  %s",
+            (XMLRPC_INT64)utf8_len, env.fault_string);
+    }
+    xmlrpc_env_clean(&env);
+}