FS-2746 --resolve large xmlrpc update thanks garmt

This commit is contained in:
Jeff Lenk
2012-10-13 11:37:25 -05:00
parent 37ecad9903
commit 6b6c83a718
397 changed files with 41822 additions and 33841 deletions

View File

@@ -1,7 +1,9 @@
###############################################################################
# This directory builds libxmlrpc_util, which contains utility
# functions that are used by the Xmlprc-c # libraries, and also
# directly by Xmlrpc-c programs.
# functions that are used by the Xmlprc-c libraries, and also
# directly by Xmlrpc-c programs. Some of them are documented for use
# by Xmlrpc-c users, as facilities of the libxmlrpc library (which
# prerequires libxmlrpc_util).
#
# The functions in this library are characterized by being general purpose
# programming functions, such as one might wish were in the standard C
@@ -29,11 +31,13 @@ SHARED_LIBS_TO_INSTALL := libxmlrpc_util
TARGET_MODS = \
asprintf \
base64 \
error \
make_printable \
memblock \
select \
sleep \
string_number \
time \
utf8 \
@@ -43,8 +47,6 @@ MAJ=3
include $(SRCDIR)/common.mk
CFLAGS = $(CFLAGS_COMMON) $(CFLAGS_PERSONAL) $(CADD)
INCLUDES = -I$(BLDDIR) -Isrcdir \
-I$(BLDDIR)/include -Isrcdir/include -Isrcdir/lib/util/include
@@ -53,17 +55,13 @@ UTIL_SHLIB = $(call shlibfn,libxmlrpc_util)
UTIL_SHLIBLE = $(call shliblefn,libxmlrpc_util)
#UTIL_SHLIBLE is e.g. libxmlrpc_util.so
ifneq ($(SHARED_LIB_TYPE),NONE)
TARGET_SHARED_LIBS := $(UTIL_SHLIB) $(UTIL_SHLIBLE)
endif
# This 'common.mk' dependency makes sure the symlinks get built before
# this make file is used for anything.
$(SRCDIR)/common.mk: srcdir blddir
.PHONY: all
all: libxmlrpc_util.a $(TARGET_SHARED_LIBS) $(TARGET_SHARED_LE_LIBS)
all: libxmlrpc_util.a $(TARGET_SHARED_LIBRARIES) $(TARGET_SHARED_LE_LIBS)
# Rule for this is in common.mk, courtesy of TARGET_LIBRARY_NAMES:
$(UTIL_SHLIB): $(TARGET_MODS:%=%.osh)
@@ -92,4 +90,4 @@ distclean: clean distclean-common
.PHONY: dep
dep: dep-common
include Makefile.depend
include depend.mk

View File

@@ -1,10 +1,61 @@
//#define _GNU_SOURCE
#define _XOPEN_SOURCE 600 /* Make sure strdup() is in <string.h> */
#ifndef _GNU_SOURCE
#define _GNU_SOURCE /* But only when HAVE_ASPRINTF */
#endif
#include <stdarg.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include "xmlrpc_config.h" /* For HAVE_ASPRINTF, __inline__ */
#include "xmlrpc-c/string_int.h"
#include "bool.h"
static __inline__ void
newVsnprintf(char * const buffer,
size_t const bufferSize,
const char * const fmt,
va_list varargs,
size_t * const formattedSizeP) {
/*----------------------------------------------------------------------------
This is vsnprintf() with the new behavior, where not fitting in the buffer
is not a failure.
Unfortunately, we can't practically return the size of the formatted string
if the C library has old vsnprintf() and the formatted string doesn't fit
in the buffer, so in that case we just return something larger than the
buffer.
-----------------------------------------------------------------------------*/
if (bufferSize > INT_MAX/2) {
/* There's a danger we won't be able to coerce the return value
of XMLRPC_VSNPRINTF to an integer (which we have to do because,
while for POSIX its return value is ssize_t, on Windows it is int),
or return double the buffer size.
*/
*formattedSizeP = 0;
} else {
int rc;
rc = XMLRPC_VSNPRINTF(buffer, bufferSize, fmt, varargs);
if (rc < 0) {
/* We have old vsnprintf() (or Windows) and the formatted value
doesn't fit in the buffer, but we don't know how big a buffer it
needs.
*/
*formattedSizeP = bufferSize * 2;
} else {
/* Either the string fits in the buffer or we have new vsnprintf()
which tells us how big the string is regardless.
*/
*formattedSizeP = rc;
}
}
}
@@ -15,29 +66,24 @@ simpleVasprintf(char ** const retvalP,
/*----------------------------------------------------------------------------
This is a poor man's implementation of vasprintf(), of GNU fame.
-----------------------------------------------------------------------------*/
size_t const initialSize = 4096;
char * result;
size_t bufferSize;
bool outOfMemory;
result = malloc(initialSize);
if (result != NULL) {
size_t bytesNeeded;
bytesNeeded = XMLRPC_VSNPRINTF(result, initialSize, fmt, varargs);
if (bytesNeeded > initialSize) {
free(result);
result = malloc(bytesNeeded);
if (result != NULL)
XMLRPC_VSNPRINTF(result, bytesNeeded, fmt, varargs);
} else if (bytesNeeded == initialSize) {
if (result[initialSize-1] != '\0') {
/* This is one of those old systems where vsnprintf()
returns the number of bytes it used, instead of the
number that it needed, and it in fact needed more than
we gave it. Rather than mess with this highly unlikely
case (old system and string > 4095 characters), we just
treat this like an out of memory failure.
*/
for (result = NULL, bufferSize = 4096, outOfMemory = false;
!result && !outOfMemory;
) {
result = malloc(bufferSize);
if (!result)
outOfMemory = true;
else {
size_t bytesNeeded;
newVsnprintf(result, bufferSize, fmt, varargs, &bytesNeeded);
if (bytesNeeded > bufferSize) {
free(result);
result = NULL;
bufferSize = bytesNeeded;
}
}
}
@@ -46,7 +92,28 @@ simpleVasprintf(char ** const retvalP,
const char * const xmlrpc_strsol = "[insufficient memory to build string]";
static const char * const xmlrpc_strsol =
"[insufficient memory to build string]";
bool
xmlrpc_strnomem(const char * const string) {
/*----------------------------------------------------------------------------
The string 'string' was generated by a function in this file because it
couldn't get enough memory to generate the string that it was supposed to
generate. I.e. a preceding call to a string function failed.
-----------------------------------------------------------------------------*/
return string == xmlrpc_strsol;
}
const char *
xmlrpc_strnomemval() {
return xmlrpc_strsol;
}
@@ -71,7 +138,7 @@ xmlrpc_vasprintf(const char ** const retvalP,
void GNU_PRINTF_ATTR(2,3)
void XMLRPC_PRINTF_ATTR(2,3)
xmlrpc_asprintf(const char ** const retvalP, const char * const fmt, ...) {
va_list varargs; /* mysterious structure used by variable arg facility */
@@ -85,6 +152,27 @@ xmlrpc_asprintf(const char ** const retvalP, const char * const fmt, ...) {
const char *
xmlrpc_strdupsol(const char * const string) {
const char * retvalOrNull;
retvalOrNull = strdup(string);
return retvalOrNull ? retvalOrNull : xmlrpc_strsol;
}
void
xmlrpc_strfree(const char * const string) {
if (string != xmlrpc_strsol)
free((void *)string);
}
const char *
xmlrpc_strdupnull(const char * const string) {
@@ -96,15 +184,6 @@ xmlrpc_strdupnull(const char * const string) {
void
xmlrpc_strfree(const char * const string) {
if (string != xmlrpc_strsol)
free((void *)string);
}
void
xmlrpc_strfreenull(const char * const string) {

View File

@@ -0,0 +1,49 @@
#include <string.h>
#include "int.h"
#include "xmlrpc-c/base64_int.h"
void
xmlrpc_base64Encode(const char * const chars,
char * const base64) {
/* Conversion table. */
static char tbl[64] = {
'A','B','C','D','E','F','G','H',
'I','J','K','L','M','N','O','P',
'Q','R','S','T','U','V','W','X',
'Y','Z','a','b','c','d','e','f',
'g','h','i','j','k','l','m','n',
'o','p','q','r','s','t','u','v',
'w','x','y','z','0','1','2','3',
'4','5','6','7','8','9','+','/'
};
unsigned int i;
uint32_t length;
char * p;
const char * s;
length = strlen(chars); /* initial value */
s = &chars[0]; /* initial value */
p = &base64[0]; /* initial value */
/* Transform the 3x8 bits to 4x6 bits, as required by base64. */
for (i = 0; i < length; i += 3) {
*p++ = tbl[s[0] >> 2];
*p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
*p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)];
*p++ = tbl[s[2] & 0x3f];
s += 3;
}
/* Pad the result if necessary... */
if (i == length + 1)
*(p - 1) = '=';
else if (i == length + 2)
*(p - 1) = *(p - 2) = '=';
/* ...and zero-terminate it. */
*p = '\0';
}

View File

@@ -1,5 +1,7 @@
/* Copyright information is at end of file */
#define _XOPEN_SOURCE 600 /* Make sure strdup() is in <string.h> */
#include "xmlrpc_config.h"
#include <stdlib.h>
@@ -86,7 +88,7 @@ void
xmlrpc_set_fault_formatted_v(xmlrpc_env * const envP,
int const code,
const char * const format,
va_list const args) {
va_list args) {
const char * faultDescription;

View File

@@ -1,4 +1,4 @@
//#define _GNU_SOURCE
#define _XOPEN_SOURCE 600 /* Make sure strdup() is in <string.h> */
#include <stdarg.h>
#include <string.h>

View File

@@ -6,6 +6,7 @@
#include <string.h>
#include <ctype.h>
#include "mallocvar.h"
#include "xmlrpc-c/util_int.h"
#include "xmlrpc-c/util.h"
@@ -19,30 +20,30 @@
xmlrpc_mem_block *
xmlrpc_mem_block_new(xmlrpc_env * const env,
xmlrpc_mem_block_new(xmlrpc_env * const envP,
size_t const size) {
xmlrpc_mem_block* block;
xmlrpc_mem_block * block;
XMLRPC_ASSERT_ENV_OK(env);
XMLRPC_ASSERT_ENV_OK(envP);
block = (xmlrpc_mem_block*) malloc(sizeof(xmlrpc_mem_block));
XMLRPC_FAIL_IF_NULL(block, env, XMLRPC_INTERNAL_ERROR,
"Can't allocate memory block");
MALLOCVAR(block);
if (block == NULL)
xmlrpc_faultf(envP, "Can't allocate memory block");
else {
xmlrpc_mem_block_init(envP, block, size);
xmlrpc_mem_block_init(env, block, size);
XMLRPC_FAIL_IF_FAULT(env);
cleanup:
if (env->fault_occurred) {
if (block)
if (envP->fault_occurred) {
free(block);
return NULL;
} else {
return block;
block = NULL;
}
}
return block;
}
/* Destroy an existing xmlrpc_mem_block, and everything it contains. */
void
xmlrpc_mem_block_free(xmlrpc_mem_block * const blockP) {
@@ -74,7 +75,7 @@ xmlrpc_mem_block_init(xmlrpc_env * const envP,
blockP->_block = (void*) malloc(blockP->_allocated);
if (!blockP->_block)
xmlrpc_faultf(envP, "Can't allocate %u-byte memory block",
blockP->_allocated);
(unsigned)blockP->_allocated);
}
@@ -170,19 +171,15 @@ xmlrpc_mem_block_append(xmlrpc_env * const envP,
const void * const data,
size_t const len) {
int size;
size_t const originalSize = blockP->_size;
XMLRPC_ASSERT_ENV_OK(envP);
XMLRPC_ASSERT(blockP != NULL);
size = blockP->_size;
xmlrpc_mem_block_resize(envP, blockP, size + len);
XMLRPC_FAIL_IF_FAULT(envP);
memcpy(((unsigned char*) blockP->_block) + size, data, len);
cleanup:
return;
xmlrpc_mem_block_resize(envP, blockP, originalSize + len);
if (!envP->fault_occurred) {
memcpy(((unsigned char*) blockP->_block) + originalSize, data, len);
}
}

View File

@@ -1,5 +1,7 @@
#define _XOPEN_SOURCE 600 /* Get pselect() in <sys/select.h> */
#include "xmlrpc_config.h"
#ifdef WIN32
#include <winsock.h>
#else
@@ -8,12 +10,12 @@
in this order appears to work on all.
*/
#include <sys/time.h>
#if HAVE_SYS_SELECT_H
#include <sys/select.h>
#endif
#endif
#include <signal.h>
#include "xmlrpc_config.h"
#include "xmlrpc-c/select_int.h"

View File

@@ -0,0 +1,46 @@
/*============================================================================
string_number
==============================================================================
This file contains utilities for dealing with text string representation
of numbers.
============================================================================*/
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <xmlrpc-c/base.h>
#include <xmlrpc-c/util.h>
#include <xmlrpc-c/string_int.h>
#include "xmlrpc_config.h"
#include "int.h"
#include <xmlrpc-c/string_number.h>
void
xmlrpc_parse_int64(xmlrpc_env * const envP,
const char * const str,
xmlrpc_int64 * const i64P) {
xmlrpc_int64 i64val;
char * tail;
errno = 0;
i64val = XMLRPC_STRTOLL(str, &tail, 10);
if (errno == ERANGE)
xmlrpc_faultf(envP, "Number cannot be represented in 64 bits. "
"Must be in the range "
"[%" XMLRPC_PRId64 " - %" XMLRPC_PRId64 "]",
XMLRPC_INT64_MIN, XMLRPC_INT64_MAX);
else if (errno != 0)
xmlrpc_faultf(envP, "unexpected error: "
"strtoll() failed with errno %d (%s)",
errno, strerror(errno));
else if (tail[0] != '\0')
xmlrpc_faultf(envP, "contains non-numerical junk: '%s'", tail);
else
*i64P = i64val;
}

View File

@@ -38,6 +38,7 @@
** http://www.cl.cam.ac.uk/~mgk25/unicode.html
*/
#include <assert.h>
#include "int.h"
#include "xmlrpc_config.h"
@@ -51,31 +52,33 @@
** UTF-8 data.
*/
/* The number of bytes in a UTF-8 sequence starting with the character used
** as the array index. A zero entry indicates an illegal initial byte.
** This table was generated using a Perl script and information from the
** UTF-8 standard.
**
** Fredrik Lundh's UTF-8 decoder Python 2.0 uses a similar table. But
** since Python 2.0 has the icky CNRI license, I regenerated this
** table from scratch and wrote my own decoder. */
static unsigned char utf8_seq_length[256] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0
static unsigned char utf8SeqLength[256] = {
/* utf8SeqLength[B] is the number of bytes in a UTF-8 sequence that starts
with byte B. Except zero indicates an illegal initial byte.
Fredrik Lundh's UTF-8 decoder Python 2.0 uses a similar table. But since
Python 2.0 has the icky CNRI license, I generated this table from scratch
and wrote my own decoder.
*/
/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
/* 0 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 1 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 2 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 3 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 4 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 5 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 6 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 7 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 8 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 9 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* A */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* B */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* C */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
/* D */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
/* E */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
/* F */ 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0
};
/* The minimum legal character value for a UTF-8 sequence of the given
@@ -118,14 +121,129 @@ static uint32_t const utf8_min_char_for_length[] = {
#if HAVE_UNICODE_WCHAR
static void
decode_utf8(xmlrpc_env * const envP,
const char * const utf8_data,
size_t const utf8_len,
wchar_t * const ioBuff,
size_t * const outBuffLenP) {
static void
validateContinuation(xmlrpc_env * const envP,
char const c) {
if (!IS_CONTINUATION(c))
xmlrpc_env_set_fault_formatted(
envP, XMLRPC_INVALID_UTF8_ERROR,
"UTF-8 multibyte sequence contains character 0x%02x, "
"which does not indicate continuation.", c);
}
static void
validateUtf16(xmlrpc_env * const envP,
wchar_t const wc) {
if (wc > UCS2_MAX_LEGAL_CHARACTER)
xmlrpc_env_set_fault_formatted(
envP, XMLRPC_INVALID_UTF8_ERROR,
"UCS-2 characters > U+FFFD are illegal. String contains 0x%04x",
(unsigned)wc);
else if (UTF16_FIRST_SURROGATE <= wc && wc <= UTF16_LAST_SURROGATE)
xmlrpc_env_set_fault_formatted(
envP, XMLRPC_INVALID_UTF8_ERROR,
"UTF-16 surrogates may not appear in UTF-8 data. "
"String contains %04x", (unsigned)wc);
}
/* Microsoft Visual C in debug mode produces code that complains about
returning an undefined value from xmlrpc_datetime_new_str(). It's a bogus
complaint, because this function is defined to return nothing meaningful
those cases. So we disable the check.
*/
#pragma runtime_checks("u", off)
static void
decodeMultibyte(xmlrpc_env * const envP,
const char * const utf8_seq,
size_t const length,
wchar_t * const wcP) {
/*----------------------------------------------------------------------------
Decode to UCS-2 (or validates as UTF-8 that can be decoded to UCS-2)
Decode the multibyte UTF-8 sequence which is 'length' characters
at 'utf8_data'.
Return the character in UTF-16 format as *wcP.
-----------------------------------------------------------------------------*/
wchar_t wc;
assert(utf8_seq[0] & 0x80); /* High bit set: this is multibyte seq */
switch (length) {
case 2:
/* 110xxxxx 10xxxxxx */
validateContinuation(envP, utf8_seq[1]);
if (!envP->fault_occurred)
wc = ((((wchar_t) (utf8_seq[0] & 0x1F)) << 6) |
(((wchar_t) (utf8_seq[1] & 0x3F))));
break;
case 3:
/* 1110xxxx 10xxxxxx 10xxxxxx */
validateContinuation(envP, utf8_seq[1]);
if (!envP->fault_occurred) {
validateContinuation(envP, utf8_seq[2]);
if (!envP->fault_occurred)
wc = ((((wchar_t) (utf8_seq[0] & 0x0F)) << 12) |
(((wchar_t) (utf8_seq[1] & 0x3F)) << 6) |
(((wchar_t) (utf8_seq[2] & 0x3F))));
}
break;
case 4:
/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
case 5:
/* 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
case 6:
/* 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
/* This would require more than 16 bits in UTF-16, so
it can't be represented in UCS-2, so it's beyond
our capability. Characters in the BMP fit in 16
bits.
*/
xmlrpc_env_set_fault_formatted(
envP, XMLRPC_INVALID_UTF8_ERROR,
"UTF-8 string contains a character not in the "
"Basic Multilingual Plane (first byte 0x%02x)",
utf8_seq[0]);
break;
default:
xmlrpc_faultf(envP,
"Internal error: Impossible UTF-8 sequence length %u",
(unsigned)length);
}
if (!envP->fault_occurred)
validateUtf16(envP, wc);
if (!envP->fault_occurred)
if ((uint32_t)wc < utf8_min_char_for_length[length])
xmlrpc_env_set_fault_formatted(
envP, XMLRPC_INVALID_UTF8_ERROR,
"Overlong UTF-8 sequence not allowed");
*wcP = wc;
}
#pragma runtime_checks("u", restore)
static void
decodeUtf8(xmlrpc_env * const envP,
const char * const utf8_data,
size_t const utf8_len,
wchar_t * const ioBuff,
size_t * const outBuffLenP) {
/*----------------------------------------------------------------------------
Decode to UCS-2 (or validate as UTF-8 that can be decoded to UCS-2)
a UTF-8 string. To validate, set ioBuff and outBuffLenP to NULL.
To decode, allocate a sufficiently large buffer, pass it as ioBuff,
and pass a pointer as as outBuffLenP. The data will be written to
@@ -134,132 +252,60 @@ decode_utf8(xmlrpc_env * const envP,
We assume that wchar_t holds a single UCS-2 character in native-endian
byte ordering.
-----------------------------------------------------------------------------*/
size_t i, length, out_pos;
char init, con1, con2;
wchar_t wc;
size_t utf8Cursor;
size_t outPos;
XMLRPC_ASSERT_ENV_OK(envP);
XMLRPC_ASSERT_PTR_OK(utf8_data);
XMLRPC_ASSERT((!ioBuff && !outBuffLenP) ||
(ioBuff && outBuffLenP));
XMLRPC_ASSERT((!ioBuff && !outBuffLenP) || (ioBuff && outBuffLenP));
/* Suppress GCC warning about possibly undefined variable. */
wc = 0;
for (utf8Cursor = 0, outPos = 0;
utf8Cursor < utf8_len && !envP->fault_occurred;
) {
char const init = utf8_data[utf8Cursor];
/* Initial byte of the UTF-8 sequence */
wchar_t wc;
i = 0;
out_pos = 0;
while (i < utf8_len) {
init = utf8_data[i];
if ((init & 0x80) == 0x00) {
/* Convert ASCII character to wide character. */
wc = init;
i++;
++utf8Cursor;
} else {
/* Look up the length of this UTF-8 sequence. */
length = utf8_seq_length[(unsigned char) init];
/* Check to make sure we have enough bytes to convert. */
if (i + length > utf8_len)
XMLRPC_FAIL(envP, XMLRPC_INVALID_UTF8_ERROR,
"Truncated UTF-8 sequence");
/* Decode a multibyte UTF-8 sequence. */
switch (length) {
case 0:
XMLRPC_FAIL(envP, XMLRPC_INVALID_UTF8_ERROR,
"Invalid UTF-8 initial byte");
case 2:
/* 110xxxxx 10xxxxxx */
con1 = utf8_data[i+1];
if (!IS_CONTINUATION(con1))
XMLRPC_FAIL(envP, XMLRPC_INVALID_UTF8_ERROR,
"UTF-8 sequence too short");
wc = ((((wchar_t) (init & 0x1F)) << 6) |
(((wchar_t) (con1 & 0x3F))));
break;
case 3:
/* 1110xxxx 10xxxxxx 10xxxxxx */
con1 = utf8_data[i+1];
con2 = utf8_data[i+2];
if (!IS_CONTINUATION(con1) || !IS_CONTINUATION(con2))
XMLRPC_FAIL(envP, XMLRPC_INVALID_UTF8_ERROR,
"UTF-8 sequence too short");
wc = ((((wchar_t) (init & 0x0F)) << 12) |
(((wchar_t) (con1 & 0x3F)) << 6) |
(((wchar_t) (con2 & 0x3F))));
break;
case 4:
/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
case 5:
/* 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
case 6:
/* 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
/* This would require more than 16 bits in UTF-16, so
it can't be represented in UCS-2, so it's beyond
our capability. Characters in the BMP fit in 16
bits.
*/
size_t const length = utf8SeqLength[(unsigned char) init];
if (length == 0)
xmlrpc_env_set_fault_formatted(
envP, XMLRPC_INVALID_UTF8_ERROR,
"UTF-8 string contains a character not in the "
"Basic Multilingual Plane (first byte %08x)",
init);
goto cleanup;
default:
XMLRPC_ASSERT("Error in UTF-8 decoder tables");
"Unrecognized UTF-8 initial byte value 0x%02x", init);
else {
/* Make sure we have enough bytes to convert. */
if (utf8Cursor + length > utf8_len) {
xmlrpc_env_set_fault_formatted(
envP, XMLRPC_INVALID_UTF8_ERROR,
"Invalid UTF-8 sequence indicates a %u-byte sequence "
"when only %u bytes are left in the string",
(unsigned)length, (unsigned)(utf8_len - utf8Cursor));
} else {
decodeMultibyte(envP, &utf8_data[utf8Cursor], length, &wc);
/* Advance to the end of the sequence. */
utf8Cursor += length;
}
}
/* Advance to the end of the sequence. */
i += length;
/* Check for illegal UCS-2 characters. */
if (wc > UCS2_MAX_LEGAL_CHARACTER)
XMLRPC_FAIL(envP, XMLRPC_INVALID_UTF8_ERROR,
"UCS-2 characters > U+FFFD are illegal");
/* Check for UTF-16 surrogates. */
if (UTF16_FIRST_SURROGATE <= wc && wc <= UTF16_LAST_SURROGATE)
XMLRPC_FAIL(envP, XMLRPC_INVALID_UTF8_ERROR,
"UTF-16 surrogates may not appear in UTF-8 data");
/* Check for overlong sequences. */
if ((uint32_t)wc < utf8_min_char_for_length[length])
XMLRPC_FAIL(envP, XMLRPC_INVALID_UTF8_ERROR,
"Overlong UTF-8 sequence not allowed");
}
/* If we have a buffer, write our character to it. */
if (ioBuff) {
ioBuff[out_pos++] = wc;
if (!envP->fault_occurred) {
/* If we have a buffer, write our character to it. */
if (ioBuff)
ioBuff[outPos++] = wc;
}
}
/* Record the number of characters we found. */
if (outBuffLenP)
*outBuffLenP = out_pos;
cleanup:
if (envP->fault_occurred) {
if (outBuffLenP)
*outBuffLenP = 0;
}
}
void
xmlrpc_validate_utf8(xmlrpc_env * const env,
const char * const utf8_data,
size_t const utf8_len) {
/*----------------------------------------------------------------------------
Validate that a string is valid UTF-8.
-----------------------------------------------------------------------------*/
decode_utf8(env, utf8_data, utf8_len, NULL, NULL);
*outBuffLenP = envP->fault_occurred ? 0 : outPos;
}
@@ -286,9 +332,9 @@ xmlrpc_utf8_to_wcs(xmlrpc_env * const envP,
wcsP = XMLRPC_MEMBLOCK_NEW(wchar_t, envP, utf8_len);
if (!envP->fault_occurred) {
/* Decode the UTF-8 data. */
decode_utf8(envP, utf8_data, utf8_len,
XMLRPC_MEMBLOCK_CONTENTS(wchar_t, wcsP),
&wcs_length);
decodeUtf8(envP, utf8_data, utf8_len,
XMLRPC_MEMBLOCK_CONTENTS(wchar_t, wcsP),
&wcs_length);
if (!envP->fault_occurred) {
/* We can't have overrun our buffer. */
XMLRPC_ASSERT(wcs_length <= utf8_len);
@@ -329,7 +375,8 @@ xmlrpc_wcs_to_utf8(xmlrpc_env * const envP,
utf8P = XMLRPC_MEMBLOCK_NEW(char, envP, estimate);
if (!envP->fault_occurred) {
unsigned char * const buffer = XMLRPC_MEMBLOCK_CONTENTS(char, utf8P);
unsigned char * const buffer =
XMLRPC_MEMBLOCK_CONTENTS(unsigned char, utf8P);
size_t bytesUsed;
size_t i;
@@ -401,13 +448,12 @@ xmlrpc_force_to_utf8(char * const buffer) {
char * p;
for (p = &buffer[0]; *p;) {
uint const length = utf8_seq_length[(unsigned char) *p];
unsigned int const length = utf8SeqLength[(unsigned char) *p];
bool forceDel;
uint32_t decoded;
forceDel = false;
decoded = 0; /* suppress compiler warning; valid when !forceDel */
forceDel = false; /* initial value */
switch (length) {
case 1:
@@ -482,7 +528,7 @@ xmlrpc_force_to_xml_chars(char * const buffer) {
char * p;
for (p = &buffer[0]; *p;) {
uint const length = utf8_seq_length[(unsigned char) *p];
unsigned int const length = utf8SeqLength[(unsigned char) *p];
if (length == 1) {
if (*p < 0x20 && *p != '\r' && *p != '\n' && *p != '\t')
@@ -505,7 +551,31 @@ xmlrpc_force_to_xml_chars(char * const buffer) {
void
xmlrpc_validate_utf8(xmlrpc_env * const envP,
const char * const utf8_data,
size_t const utf8_len) {
/*----------------------------------------------------------------------------
Validate that a string is valid UTF-8.
-----------------------------------------------------------------------------*/
xmlrpc_env env;
xmlrpc_env_init(&env);
#if HAVE_UNICODE_WCHAR
decodeUtf8(&env, utf8_data, utf8_len, NULL, NULL);
#else
/* We don't have a convenient way to validate, so we just fake it and
call it valid.
*/
#endif
if (env.fault_occurred) {
xmlrpc_env_set_fault_formatted(
envP, XMLRPC_INVALID_UTF8_ERROR,
"%" XMLRPC_PRId64 "-byte "
"supposed UTF-8 string is not valid UTF-8. %s",
(XMLRPC_INT64)utf8_len, env.fault_string);
}
xmlrpc_env_clean(&env);
}