mirror of
https://gitlab.gnome.org/GNOME/libxml2
synced 2025-03-28 21:33:13 +00:00
applied patch from Youri Golovanov fixing bug #316338 and adding a couple
* xmlregexp.c: applied patch from Youri Golovanov fixing bug #316338 and adding a couple of optimizations in the regexp compilation engine. * test/regexp/bug316338 result/regexp/bug316338: added regression tests based on the examples provided in the bug report. Daniel
This commit is contained in:
parent
aac7c68e87
commit
54eb0243c4
@ -1,3 +1,11 @@
|
|||||||
|
Wed Mar 22 00:14:34 CET 2006 Daniel Veillard <daniel@veillard.com>
|
||||||
|
|
||||||
|
* xmlregexp.c: applied patch from Youri Golovanov fixing bug
|
||||||
|
#316338 and adding a couple of optimizations in the regexp
|
||||||
|
compilation engine.
|
||||||
|
* test/regexp/bug316338 result/regexp/bug316338: added regression
|
||||||
|
tests based on the examples provided in the bug report.
|
||||||
|
|
||||||
Fri Mar 10 08:40:55 EST 2006 Daniel Veillard <daniel@veillard.com>
|
Fri Mar 10 08:40:55 EST 2006 Daniel Veillard <daniel@veillard.com>
|
||||||
|
|
||||||
* c14n.c encoding.c xmlschemas.c xpath.c xpointer.c: fix a few
|
* c14n.c encoding.c xmlschemas.c xpath.c xpointer.c: fix a few
|
||||||
|
20
result/regexp/bug316338
Normal file
20
result/regexp/bug316338
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
Regexp: (((C|c)(([\s]*\-?[0-9]+(\.[0-9]+)?((e|E)\-?[0-9]+)?){3})+[\s]*))+
|
||||||
|
C 433: Ok
|
||||||
|
C 433 12: Fail
|
||||||
|
C 433 123: Ok
|
||||||
|
C 433 123 456: Ok
|
||||||
|
C 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12: Fail
|
||||||
|
Regexp: (((C|c)(([\s]+\-?[0-9]+(\.[0-9]+)?((e|E)\-?[0-9]+)?){3})+[\s]*))+
|
||||||
|
C 433: Fail
|
||||||
|
C 433 12: Fail
|
||||||
|
C 433 123: Fail
|
||||||
|
C 433 123 456: Ok
|
||||||
|
C 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12: Fail
|
||||||
|
Regexp: (((C|c)(([\s]+\-?[0-9]+(\.[0-9]+)?[\s]*,[\s]*\-?[0-9]+(\.[0-9]+)?){3})+[\s]*))+
|
||||||
|
C 433: Fail
|
||||||
|
C 433 12: Fail
|
||||||
|
C 433 123: Fail
|
||||||
|
C 433 123 456: Fail
|
||||||
|
C 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12: Fail
|
||||||
|
Regexp: (((M|m)([\s]+\-?[0-9]+(\.[0-9]+)?((e|E)\-?[0-9]+)?[\s]*,[\s]*\-?[0-9]+(\.[0-9]+)?((e|E)\-?[0-9]+)?)[\s]*)|((L|l)([\s]+\-?[0-9]+(\.[0-9]+)?((e|E)\-?[0-9]+)?[\s]*,[\s]*\-?[0-9]+(\.[0-9]+)?((e|E)\-?[0-9]+)?)+[\s]*)|((H|h)([\s]+\-?[0-9]+(\.[0-9]+)?((e|E)\-?[0-9]+)?)+[\s]*)|((V|v)([\s]+\-?[0-9]+(\.[0-9]+)?((e|E)\-?[0-9]+)?)+[\s]*)|((C|c)(([\s]+\-?[0-9]+(\.[0-9]+)?((e|E)\-?[0-9]+)?[\s]*,[\s]*\-?[0-9]+(\.[0-9]+)?((e|E)\-?[0-9]+)?){3})+[\s]*)|((Q|q)(([\s]+\-?[0-9]+(\.[0-9]+)?((e|E)\-?[0-9]+)?[\s]*,[\s]*\-?[0-9]+(\.[0-9]+)?((e|E)\-?[0-9]+)?){2})+[\s]*)|((S|s)(([\s]+\-?[0-9]+(\.[0-9]+)?((e|E)\-?[0-9]+)?[\s]*,[\s]*\-?[0-9]+(\.[0-9]+)?((e|E)\-?[0-9]+)?){2})+[\s]*)|((A|a)([\s]+\-?[0-9]+(\.[0-9]+)?((e|E)\-?[0-9]+)?[\s]*,[\s]*\-?[0-9]+(\.[0-9]+)?((e|E)\-?[0-9]+)?[\s]*\-?[0-9]+(\.[0-9]+)?((e|E)\-?[0-9]+)?[\s]+[0-1][\s]+[0-1][\s]+\-?[0-9]+(\.[0-9]+)?((e|E)\-?[0-9]+)?[\s]*,[\s]*\-?[0-9]+(\.[0-9]+)?((e|E)\-?[0-9]+)?)+[\s]*)|((Z|z)[\s]*))*
|
||||||
|
M 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12: Fail
|
20
test/regexp/bug316338
Normal file
20
test/regexp/bug316338
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
=>(((C|c)(([\s]*\-?[0-9]+(\.[0-9]+)?((e|E)\-?[0-9]+)?){3})+[\s]*))+
|
||||||
|
C 433
|
||||||
|
C 433 12
|
||||||
|
C 433 123
|
||||||
|
C 433 123 456
|
||||||
|
C 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12
|
||||||
|
=>(((C|c)(([\s]+\-?[0-9]+(\.[0-9]+)?((e|E)\-?[0-9]+)?){3})+[\s]*))+
|
||||||
|
C 433
|
||||||
|
C 433 12
|
||||||
|
C 433 123
|
||||||
|
C 433 123 456
|
||||||
|
C 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12
|
||||||
|
=>(((C|c)(([\s]+\-?[0-9]+(\.[0-9]+)?[\s]*,[\s]*\-?[0-9]+(\.[0-9]+)?){3})+[\s]*))+
|
||||||
|
C 433
|
||||||
|
C 433 12
|
||||||
|
C 433 123
|
||||||
|
C 433 123 456
|
||||||
|
C 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12
|
||||||
|
=>(((M|m)([\s]+\-?[0-9]+(\.[0-9]+)?((e|E)\-?[0-9]+)?[\s]*,[\s]*\-?[0-9]+(\.[0-9]+)?((e|E)\-?[0-9]+)?)[\s]*)|((L|l)([\s]+\-?[0-9]+(\.[0-9]+)?((e|E)\-?[0-9]+)?[\s]*,[\s]*\-?[0-9]+(\.[0-9]+)?((e|E)\-?[0-9]+)?)+[\s]*)|((H|h)([\s]+\-?[0-9]+(\.[0-9]+)?((e|E)\-?[0-9]+)?)+[\s]*)|((V|v)([\s]+\-?[0-9]+(\.[0-9]+)?((e|E)\-?[0-9]+)?)+[\s]*)|((C|c)(([\s]+\-?[0-9]+(\.[0-9]+)?((e|E)\-?[0-9]+)?[\s]*,[\s]*\-?[0-9]+(\.[0-9]+)?((e|E)\-?[0-9]+)?){3})+[\s]*)|((Q|q)(([\s]+\-?[0-9]+(\.[0-9]+)?((e|E)\-?[0-9]+)?[\s]*,[\s]*\-?[0-9]+(\.[0-9]+)?((e|E)\-?[0-9]+)?){2})+[\s]*)|((S|s)(([\s]+\-?[0-9]+(\.[0-9]+)?((e|E)\-?[0-9]+)?[\s]*,[\s]*\-?[0-9]+(\.[0-9]+)?((e|E)\-?[0-9]+)?){2})+[\s]*)|((A|a)([\s]+\-?[0-9]+(\.[0-9]+)?((e|E)\-?[0-9]+)?[\s]*,[\s]*\-?[0-9]+(\.[0-9]+)?((e|E)\-?[0-9]+)?[\s]*\-?[0-9]+(\.[0-9]+)?((e|E)\-?[0-9]+)?[\s]+[0-1][\s]+[0-1][\s]+\-?[0-9]+(\.[0-9]+)?((e|E)\-?[0-9]+)?[\s]*,[\s]*\-?[0-9]+(\.[0-9]+)?((e|E)\-?[0-9]+)?)+[\s]*)|((Z|z)[\s]*))*
|
||||||
|
M 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12 724.64,433.12 718.08,433.12 711.52,433.12
|
52
xmlregexp.c
52
xmlregexp.c
@ -1479,7 +1479,13 @@ xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
|
|||||||
switch (atom->quant) {
|
switch (atom->quant) {
|
||||||
case XML_REGEXP_QUANT_OPT:
|
case XML_REGEXP_QUANT_OPT:
|
||||||
atom->quant = XML_REGEXP_QUANT_ONCE;
|
atom->quant = XML_REGEXP_QUANT_ONCE;
|
||||||
xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop);
|
/*
|
||||||
|
* transition done to the state after end of atom.
|
||||||
|
* 1. set transition from atom start to new state
|
||||||
|
* 2. set transition from atom end to this state.
|
||||||
|
*/
|
||||||
|
xmlFAGenerateEpsilonTransition(ctxt, atom->start, 0);
|
||||||
|
xmlFAGenerateEpsilonTransition(ctxt, atom->stop, ctxt->state);
|
||||||
break;
|
break;
|
||||||
case XML_REGEXP_QUANT_MULT:
|
case XML_REGEXP_QUANT_MULT:
|
||||||
atom->quant = XML_REGEXP_QUANT_ONCE;
|
atom->quant = XML_REGEXP_QUANT_ONCE;
|
||||||
@ -1522,8 +1528,6 @@ xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
|
|||||||
atom->min = 0;
|
atom->min = 0;
|
||||||
atom->max = 0;
|
atom->max = 0;
|
||||||
atom->quant = XML_REGEXP_QUANT_ONCE;
|
atom->quant = XML_REGEXP_QUANT_ONCE;
|
||||||
xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop,
|
|
||||||
atom->start, counter);
|
|
||||||
if (to != NULL) {
|
if (to != NULL) {
|
||||||
newstate = to;
|
newstate = to;
|
||||||
} else {
|
} else {
|
||||||
@ -1533,6 +1537,13 @@ xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
|
|||||||
ctxt->state = newstate;
|
ctxt->state = newstate;
|
||||||
xmlFAGenerateCountedTransition(ctxt, atom->stop,
|
xmlFAGenerateCountedTransition(ctxt, atom->stop,
|
||||||
newstate, counter);
|
newstate, counter);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* first check count and if OK jump forward;
|
||||||
|
* if checking fail increment count and jump back
|
||||||
|
*/
|
||||||
|
xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop,
|
||||||
|
atom->start, counter);
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
@ -4299,6 +4310,15 @@ xmlRegExecPushChar(xmlRegExecCtxtPtr exec, int UCS) {
|
|||||||
if (exec->state->nbTrans > exec->transno + 1) {
|
if (exec->state->nbTrans > exec->transno + 1) {
|
||||||
xmlFARegExecSave(exec);
|
xmlFARegExecSave(exec);
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
|
* restart count for expressions like this ((abc){2})*
|
||||||
|
*/
|
||||||
|
if (trans->count >= 0) {
|
||||||
|
#ifdef DEBUG_REGEXP_EXEC
|
||||||
|
printf("Reset count %d\n", trans->count);
|
||||||
|
#endif
|
||||||
|
exec->counts[trans->count] = 0;
|
||||||
|
}
|
||||||
if (trans->counter >= 0) {
|
if (trans->counter >= 0) {
|
||||||
#ifdef DEBUG_REGEXP_EXEC
|
#ifdef DEBUG_REGEXP_EXEC
|
||||||
printf("Increasing count %d\n", trans->counter);
|
printf("Increasing count %d\n", trans->counter);
|
||||||
@ -5112,19 +5132,23 @@ xmlFAParsePiece(xmlRegParserCtxtPtr ctxt) {
|
|||||||
/**
|
/**
|
||||||
* xmlFAParseBranch:
|
* xmlFAParseBranch:
|
||||||
* @ctxt: a regexp parser context
|
* @ctxt: a regexp parser context
|
||||||
|
* @to: optional target to the end of the branch
|
||||||
|
*
|
||||||
|
* @to is used to optimize by removing duplicate path in automata
|
||||||
|
* in expressions like (a|b)(c|d)
|
||||||
*
|
*
|
||||||
* [2] branch ::= piece*
|
* [2] branch ::= piece*
|
||||||
8
|
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
xmlFAParseBranch(xmlRegParserCtxtPtr ctxt) {
|
xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr to) {
|
||||||
xmlRegStatePtr previous;
|
xmlRegStatePtr previous;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
previous = ctxt->state;
|
previous = ctxt->state;
|
||||||
ret = xmlFAParsePiece(ctxt);
|
ret = xmlFAParsePiece(ctxt);
|
||||||
if (ret != 0) {
|
if (ret != 0) {
|
||||||
if (xmlFAGenerateTransitions(ctxt, previous, NULL, ctxt->atom) < 0)
|
if (xmlFAGenerateTransitions(ctxt, previous,
|
||||||
|
(CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0)
|
||||||
return(-1);
|
return(-1);
|
||||||
previous = ctxt->state;
|
previous = ctxt->state;
|
||||||
ctxt->atom = NULL;
|
ctxt->atom = NULL;
|
||||||
@ -5132,8 +5156,8 @@ xmlFAParseBranch(xmlRegParserCtxtPtr ctxt) {
|
|||||||
while ((ret != 0) && (ctxt->error == 0)) {
|
while ((ret != 0) && (ctxt->error == 0)) {
|
||||||
ret = xmlFAParsePiece(ctxt);
|
ret = xmlFAParsePiece(ctxt);
|
||||||
if (ret != 0) {
|
if (ret != 0) {
|
||||||
if (xmlFAGenerateTransitions(ctxt, previous, NULL,
|
if (xmlFAGenerateTransitions(ctxt, previous,
|
||||||
ctxt->atom) < 0)
|
(CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0)
|
||||||
return(-1);
|
return(-1);
|
||||||
previous = ctxt->state;
|
previous = ctxt->state;
|
||||||
ctxt->atom = NULL;
|
ctxt->atom = NULL;
|
||||||
@ -5156,7 +5180,7 @@ xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) {
|
|||||||
/* if not top start should have been generated by an epsilon trans */
|
/* if not top start should have been generated by an epsilon trans */
|
||||||
start = ctxt->state;
|
start = ctxt->state;
|
||||||
ctxt->end = NULL;
|
ctxt->end = NULL;
|
||||||
xmlFAParseBranch(ctxt);
|
xmlFAParseBranch(ctxt, NULL);
|
||||||
if (top) {
|
if (top) {
|
||||||
#ifdef DEBUG_REGEXP_GRAPH
|
#ifdef DEBUG_REGEXP_GRAPH
|
||||||
printf("State %d is final\n", ctxt->state->no);
|
printf("State %d is final\n", ctxt->state->no);
|
||||||
@ -5172,15 +5196,7 @@ xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) {
|
|||||||
NEXT;
|
NEXT;
|
||||||
ctxt->state = start;
|
ctxt->state = start;
|
||||||
ctxt->end = NULL;
|
ctxt->end = NULL;
|
||||||
xmlFAParseBranch(ctxt);
|
xmlFAParseBranch(ctxt, end);
|
||||||
if (top) {
|
|
||||||
ctxt->state->type = XML_REGEXP_FINAL_STATE;
|
|
||||||
#ifdef DEBUG_REGEXP_GRAPH
|
|
||||||
printf("State %d is final\n", ctxt->state->no);
|
|
||||||
#endif
|
|
||||||
} else {
|
|
||||||
xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, end);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if (!top) {
|
if (!top) {
|
||||||
ctxt->state = end;
|
ctxt->state = end;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user