mirror of
https://github.com/signalwire/freeswitch.git
synced 2025-08-13 01:26:58 +00:00
update to pcre 7.9
git-svn-id: http://svn.freeswitch.org/svn/freeswitch/trunk@13706 d0543943-73ff-0310-b7d9-9358b9ac24b2
This commit is contained in:
@@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -42,9 +42,18 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
supporting functions. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/* Returns from set_start_bits() */
|
||||
|
||||
enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE };
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Set a bit and maybe its alternate case *
|
||||
*************************************************/
|
||||
@@ -72,12 +81,16 @@ if (caseless && (cd->ctypes[c] & ctype_letter) != 0)
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Create bitmap of starting chars *
|
||||
* Create bitmap of starting bytes *
|
||||
*************************************************/
|
||||
|
||||
/* This function scans a compiled unanchored expression and attempts to build a
|
||||
bitmap of the set of initial characters. If it can't, it returns FALSE. As time
|
||||
goes by, we may be able to get more clever at doing this.
|
||||
/* This function scans a compiled unanchored expression recursively and
|
||||
attempts to build a bitmap of the set of possible starting bytes. As time goes
|
||||
by, we may be able to get more clever at doing this. The SSB_CONTINUE return is
|
||||
useful for parenthesized groups in patterns such as (a*)b where the group
|
||||
provides some optional starting bytes but scanning must continue at the outer
|
||||
level to find at least one mandatory byte. At the outermost level, this
|
||||
function fails unless the result is SSB_DONE.
|
||||
|
||||
Arguments:
|
||||
code points to an expression
|
||||
@@ -86,14 +99,17 @@ Arguments:
|
||||
utf8 TRUE if in UTF-8 mode
|
||||
cd the block with char table pointers
|
||||
|
||||
Returns: TRUE if table built, FALSE otherwise
|
||||
Returns: SSB_FAIL => Failed to find any starting bytes
|
||||
SSB_DONE => Found mandatory starting bytes
|
||||
SSB_CONTINUE => Found optional starting bytes
|
||||
*/
|
||||
|
||||
static BOOL
|
||||
static int
|
||||
set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless,
|
||||
BOOL utf8, compile_data *cd)
|
||||
{
|
||||
register int c;
|
||||
int yield = SSB_DONE;
|
||||
|
||||
#if 0
|
||||
/* ========================================================================= */
|
||||
@@ -114,25 +130,55 @@ volatile int dummy;
|
||||
|
||||
do
|
||||
{
|
||||
const uschar *tcode = code + 1 + LINK_SIZE;
|
||||
const uschar *tcode = code + (((int)*code == OP_CBRA)? 3:1) + LINK_SIZE;
|
||||
BOOL try_next = TRUE;
|
||||
|
||||
while (try_next)
|
||||
while (try_next) /* Loop for items in this branch */
|
||||
{
|
||||
/* If a branch starts with a bracket or a positive lookahead assertion,
|
||||
recurse to set bits from within them. That's all for this branch. */
|
||||
|
||||
if ((int)*tcode >= OP_BRA || *tcode == OP_ASSERT)
|
||||
int rc;
|
||||
switch(*tcode)
|
||||
{
|
||||
if (!set_start_bits(tcode, start_bits, caseless, utf8, cd))
|
||||
return FALSE;
|
||||
try_next = FALSE;
|
||||
}
|
||||
/* Fail if we reach something we don't understand */
|
||||
|
||||
else switch(*tcode)
|
||||
{
|
||||
default:
|
||||
return FALSE;
|
||||
return SSB_FAIL;
|
||||
|
||||
/* If we hit a bracket or a positive lookahead assertion, recurse to set
|
||||
bits from within the subpattern. If it can't find anything, we have to
|
||||
give up. If it finds some mandatory character(s), we are done for this
|
||||
branch. Otherwise, carry on scanning after the subpattern. */
|
||||
|
||||
case OP_BRA:
|
||||
case OP_SBRA:
|
||||
case OP_CBRA:
|
||||
case OP_SCBRA:
|
||||
case OP_ONCE:
|
||||
case OP_ASSERT:
|
||||
rc = set_start_bits(tcode, start_bits, caseless, utf8, cd);
|
||||
if (rc == SSB_FAIL) return SSB_FAIL;
|
||||
if (rc == SSB_DONE) try_next = FALSE; else
|
||||
{
|
||||
do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
|
||||
tcode += 1 + LINK_SIZE;
|
||||
}
|
||||
break;
|
||||
|
||||
/* If we hit ALT or KET, it means we haven't found anything mandatory in
|
||||
this branch, though we might have found something optional. For ALT, we
|
||||
continue with the next alternative, but we have to arrange that the final
|
||||
result from subpattern is SSB_CONTINUE rather than SSB_DONE. For KET,
|
||||
return SSB_CONTINUE: if this is the top level, that indicates failure,
|
||||
but after a nested subpattern, it causes scanning to continue. */
|
||||
|
||||
case OP_ALT:
|
||||
yield = SSB_CONTINUE;
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
case OP_KET:
|
||||
case OP_KETRMAX:
|
||||
case OP_KETRMIN:
|
||||
return SSB_CONTINUE;
|
||||
|
||||
/* Skip over callout */
|
||||
|
||||
@@ -140,19 +186,13 @@ do
|
||||
tcode += 2 + 2*LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* Skip over extended extraction bracket number */
|
||||
|
||||
case OP_BRANUMBER:
|
||||
tcode += 3;
|
||||
break;
|
||||
|
||||
/* Skip over lookbehind and negative lookahead assertions */
|
||||
|
||||
case OP_ASSERT_NOT:
|
||||
case OP_ASSERTBACK:
|
||||
case OP_ASSERTBACK_NOT:
|
||||
do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
|
||||
tcode += 1+LINK_SIZE;
|
||||
tcode += 1 + LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* Skip over an option setting, changing the caseless flag */
|
||||
@@ -166,27 +206,38 @@ do
|
||||
|
||||
case OP_BRAZERO:
|
||||
case OP_BRAMINZERO:
|
||||
if (!set_start_bits(++tcode, start_bits, caseless, utf8, cd))
|
||||
return FALSE;
|
||||
if (set_start_bits(++tcode, start_bits, caseless, utf8, cd) == SSB_FAIL)
|
||||
return SSB_FAIL;
|
||||
/* =========================================================================
|
||||
See the comment at the head of this function concerning the next line,
|
||||
which was an old fudge for the benefit of OS/2.
|
||||
dummy = 1;
|
||||
========================================================================= */
|
||||
do tcode += GET(tcode,1); while (*tcode == OP_ALT);
|
||||
tcode += 1+LINK_SIZE;
|
||||
tcode += 1 + LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* SKIPZERO skips the bracket. */
|
||||
|
||||
case OP_SKIPZERO:
|
||||
tcode++;
|
||||
do tcode += GET(tcode,1); while (*tcode == OP_ALT);
|
||||
tcode += 1 + LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* Single-char * or ? sets the bit and tries the next item */
|
||||
|
||||
case OP_STAR:
|
||||
case OP_MINSTAR:
|
||||
case OP_POSSTAR:
|
||||
case OP_QUERY:
|
||||
case OP_MINQUERY:
|
||||
case OP_POSQUERY:
|
||||
set_bit(start_bits, tcode[1], caseless, cd);
|
||||
tcode += 2;
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8) while ((*tcode & 0xc0) == 0x80) tcode++;
|
||||
if (utf8 && tcode[-1] >= 0xc0)
|
||||
tcode += _pcre_utf8_table4[tcode[-1] & 0x3f];
|
||||
#endif
|
||||
break;
|
||||
|
||||
@@ -194,10 +245,12 @@ do
|
||||
|
||||
case OP_UPTO:
|
||||
case OP_MINUPTO:
|
||||
case OP_POSUPTO:
|
||||
set_bit(start_bits, tcode[3], caseless, cd);
|
||||
tcode += 4;
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8) while ((*tcode & 0xc0) == 0x80) tcode++;
|
||||
if (utf8 && tcode[-1] >= 0xc0)
|
||||
tcode += _pcre_utf8_table4[tcode[-1] & 0x3f];
|
||||
#endif
|
||||
break;
|
||||
|
||||
@@ -210,6 +263,7 @@ do
|
||||
case OP_CHARNC:
|
||||
case OP_PLUS:
|
||||
case OP_MINPLUS:
|
||||
case OP_POSPLUS:
|
||||
set_bit(start_bits, tcode[1], caseless, cd);
|
||||
try_next = FALSE;
|
||||
break;
|
||||
@@ -283,16 +337,20 @@ do
|
||||
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
case OP_TYPEPOSUPTO:
|
||||
tcode += 2; /* Fall through */
|
||||
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEPOSSTAR:
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
case OP_TYPEPOSQUERY:
|
||||
switch(tcode[1])
|
||||
{
|
||||
case OP_ANY:
|
||||
return FALSE;
|
||||
case OP_ALLANY:
|
||||
return SSB_FAIL;
|
||||
|
||||
case OP_NOT_DIGIT:
|
||||
for (c = 0; c < 32; c++)
|
||||
@@ -349,11 +407,13 @@ do
|
||||
character with a value > 255. */
|
||||
|
||||
case OP_NCLASS:
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8)
|
||||
{
|
||||
start_bits[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */
|
||||
memset(start_bits+25, 0xff, 7); /* Bits for 0xc9 - 0xff */
|
||||
}
|
||||
#endif
|
||||
/* Fall through */
|
||||
|
||||
case OP_CLASS:
|
||||
@@ -366,6 +426,7 @@ do
|
||||
value is > 127. In fact, there are only two possible starting bytes for
|
||||
characters in the range 128 - 255. */
|
||||
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8)
|
||||
{
|
||||
for (c = 0; c < 16; c++) start_bits[c] |= tcode[c];
|
||||
@@ -383,6 +444,7 @@ do
|
||||
/* In non-UTF-8 mode, the two bit maps are completely compatible. */
|
||||
|
||||
else
|
||||
#endif
|
||||
{
|
||||
for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];
|
||||
}
|
||||
@@ -418,7 +480,7 @@ do
|
||||
code += GET(code, 1); /* Advance to next branch */
|
||||
}
|
||||
while (*code == OP_ALT);
|
||||
return TRUE;
|
||||
return yield;
|
||||
}
|
||||
|
||||
|
||||
@@ -442,7 +504,7 @@ Returns: pointer to a pcre_extra block, with study_data filled in and the
|
||||
NULL on error or if no optimization possible
|
||||
*/
|
||||
|
||||
PCRE_DATA_SCOPE pcre_extra *
|
||||
PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION
|
||||
pcre_study(const pcre *external_re, int options, const char **errorptr)
|
||||
{
|
||||
uschar start_bits[32];
|
||||
@@ -474,7 +536,8 @@ code = (uschar *)re + re->name_table_offset +
|
||||
a multiline pattern that matches only at "line starts", no further processing
|
||||
at present. */
|
||||
|
||||
if ((re->options & (PCRE_ANCHORED|PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)
|
||||
if ((re->options & PCRE_ANCHORED) != 0 ||
|
||||
(re->flags & (PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)
|
||||
return NULL;
|
||||
|
||||
/* Set the character tables in the block that is passed around */
|
||||
@@ -492,8 +555,8 @@ compile_block.ctypes = tables + ctypes_offset;
|
||||
/* See if we can find a fixed set of initial characters for the pattern. */
|
||||
|
||||
memset(start_bits, 0, 32 * sizeof(uschar));
|
||||
if (!set_start_bits(code, start_bits, (re->options & PCRE_CASELESS) != 0,
|
||||
(re->options & PCRE_UTF8) != 0, &compile_block)) return NULL;
|
||||
if (set_start_bits(code, start_bits, (re->options & PCRE_CASELESS) != 0,
|
||||
(re->options & PCRE_UTF8) != 0, &compile_block) != SSB_DONE) return NULL;
|
||||
|
||||
/* Get a pcre_extra block and a pcre_study_data block. The study data is put in
|
||||
the latter, which is pointed to by the former, which may also get additional
|
||||
|
Reference in New Issue
Block a user