From a2b5c90a442295d2b75ae60af854b3c4a43aa0ff Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Tue, 21 Nov 2023 14:35:54 +0100 Subject: [PATCH] hash: Fix deletion of entries during scan Functions like xmlCleanSpecialAttr scan a hash table and possibly delete entries in the callback. xmlHashScanFull must detect such deletions and rescan the entry. This regressed when rewriting the hash table code in 4a513d56. Fixes #626. --- hash.c | 76 ++++++++++++++++++++++++++++------ result/issue626.xml | 14 +++++++ result/issue626.xml.rde | 12 ++++++ result/issue626.xml.rdr | 12 ++++++ result/issue626.xml.sax | 23 ++++++++++ result/issue626.xml.sax2 | 23 ++++++++++ result/noent/issue626.xml | 14 +++++++ result/noent/issue626.xml.sax2 | 23 ++++++++++ test/issue626.xml | 13 ++++++ 9 files changed, 198 insertions(+), 12 deletions(-) create mode 100644 result/issue626.xml create mode 100644 result/issue626.xml.rde create mode 100644 result/issue626.xml.rdr create mode 100644 result/issue626.xml.sax create mode 100644 result/issue626.xml.sax2 create mode 100644 result/noent/issue626.xml create mode 100644 result/noent/issue626.xml.sax2 create mode 100644 test/issue626.xml diff --git a/hash.c b/hash.c index f3ec6396..38341d78 100644 --- a/hash.c +++ b/hash.c @@ -913,15 +913,42 @@ xmlHashScan(xmlHashTablePtr hash, xmlHashScanner scan, void *data) { void xmlHashScanFull(xmlHashTablePtr hash, xmlHashScannerFull scan, void *data) { const xmlHashEntry *entry, *end; + xmlHashEntry old; + unsigned i; if ((hash == NULL) || (hash->size == 0) || (scan == NULL)) return; + /* + * We must handle the case that a scanned entry is removed when executing + * the callback (xmlCleanSpecialAttr and possibly other places). + * + * Find the start of a probe sequence to avoid scanning entries twice if + * a deletion happens. + */ + entry = hash->table; end = &hash->table[hash->size]; + while (entry->hashValue != 0) { + if (++entry >= end) + entry = hash->table; + } - for (entry = hash->table; entry < end; entry++) { - if ((entry->hashValue != 0) && (entry->payload != NULL)) - scan(entry->payload, data, entry->key, entry->key2, entry->key3); + for (i = 0; i < hash->size; i++) { + if ((entry->hashValue != 0) && (entry->payload != NULL)) { + /* + * Make sure to rescan after a possible deletion. + */ + do { + old = *entry; + scan(entry->payload, data, entry->key, entry->key2, entry->key3); + } while ((entry->hashValue != 0) && + (entry->payload != NULL) && + ((entry->key != old.key) || + (entry->key2 != old.key2) || + (entry->key3 != old.key3))); + } + if (++entry >= end) + entry = hash->table; } } @@ -966,22 +993,47 @@ xmlHashScanFull3(xmlHashTablePtr hash, const xmlChar *key, const xmlChar *key2, const xmlChar *key3, xmlHashScannerFull scan, void *data) { const xmlHashEntry *entry, *end; + xmlHashEntry old; + unsigned i; if ((hash == NULL) || (hash->size == 0) || (scan == NULL)) return; + /* + * We must handle the case that a scanned entry is removed when executing + * the callback (xmlCleanSpecialAttr and possibly other places). + * + * Find the start of a probe sequence to avoid scanning entries twice if + * a deletion happens. + */ + entry = hash->table; end = &hash->table[hash->size]; + while (entry->hashValue != 0) { + if (++entry >= end) + entry = hash->table; + } - for (entry = hash->table; entry < end; entry++) { - if (entry->hashValue == 0) - continue; - if (((key == NULL) || - (strcmp((const char *) key, (const char *) entry->key) == 0)) && - ((key2 == NULL) || (xmlFastStrEqual(key2, entry->key2))) && - ((key3 == NULL) || (xmlFastStrEqual(key3, entry->key3))) && - (entry->payload != NULL)) { - scan(entry->payload, data, entry->key, entry->key2, entry->key3); + for (i = 0; i < hash->size; i++) { + if ((entry->hashValue != 0) && (entry->payload != NULL)) { + /* + * Make sure to rescan after a possible deletion. + */ + do { + if (((key != NULL) && (strcmp((const char *) key, + (const char *) entry->key) != 0)) || + ((key2 != NULL) && (!xmlFastStrEqual(key2, entry->key2))) || + ((key3 != NULL) && (!xmlFastStrEqual(key3, entry->key3)))) + break; + old = *entry; + scan(entry->payload, data, entry->key, entry->key2, entry->key3); + } while ((entry->hashValue != 0) && + (entry->payload != NULL) && + ((entry->key != old.key) || + (entry->key2 != old.key2) || + (entry->key3 != old.key3))); } + if (++entry >= end) + entry = hash->table; } } diff --git a/result/issue626.xml b/result/issue626.xml new file mode 100644 index 00000000..001b3d9c --- /dev/null +++ b/result/issue626.xml @@ -0,0 +1,14 @@ + + + + + + + +]> + + + + diff --git a/result/issue626.xml.rde b/result/issue626.xml.rde new file mode 100644 index 00000000..a488238a --- /dev/null +++ b/result/issue626.xml.rde @@ -0,0 +1,12 @@ +0 10 doc 0 0 +0 1 doc 0 0 +1 14 #text 0 1 + +1 8 #comment 0 1 This tests whether xmlCleanSpecialAttr works. The attribute values + must not be normalized. +1 14 #text 0 1 + +1 1 e 1 0 +1 14 #text 0 1 + +0 15 doc 0 0 diff --git a/result/issue626.xml.rdr b/result/issue626.xml.rdr new file mode 100644 index 00000000..a488238a --- /dev/null +++ b/result/issue626.xml.rdr @@ -0,0 +1,12 @@ +0 10 doc 0 0 +0 1 doc 0 0 +1 14 #text 0 1 + +1 8 #comment 0 1 This tests whether xmlCleanSpecialAttr works. The attribute values + must not be normalized. +1 14 #text 0 1 + +1 1 e 1 0 +1 14 #text 0 1 + +0 15 doc 0 0 diff --git a/result/issue626.xml.sax b/result/issue626.xml.sax new file mode 100644 index 00000000..8e6b59b5 --- /dev/null +++ b/result/issue626.xml.sax @@ -0,0 +1,23 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.internalSubset(doc, , ) +SAX.attributeDecl(e, a1, 1, 3, NULL, ...) +SAX.attributeDecl(e, a2, 1, 3, NULL, ...) +SAX.attributeDecl(e, a3, 1, 3, NULL, ...) +SAX.attributeDecl(e, a4, 1, 3, NULL, ...) +SAX.attributeDecl(e, a5, 1, 3, NULL, ...) +SAX.attributeDecl(e, a6, 1, 3, NULL, ...) +SAX.externalSubset(doc, , ) +SAX.startElement(doc) +SAX.characters( + , 5) +SAX.comment( This tests whether xmlCleanSpecialAttr works. The attribute values + must not be normalized. ) +SAX.characters( + , 5) +SAX.startElement(e, a1=' x x ', a2=' x x ', a3=' x x ', a4=' x x ', a5=' x x ', a6=' x x ') +SAX.endElement(e) +SAX.characters( +, 1) +SAX.endElement(doc) +SAX.endDocument() diff --git a/result/issue626.xml.sax2 b/result/issue626.xml.sax2 new file mode 100644 index 00000000..edc2c1f9 --- /dev/null +++ b/result/issue626.xml.sax2 @@ -0,0 +1,23 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.internalSubset(doc, , ) +SAX.attributeDecl(e, a1, 1, 3, NULL, ...) +SAX.attributeDecl(e, a2, 1, 3, NULL, ...) +SAX.attributeDecl(e, a3, 1, 3, NULL, ...) +SAX.attributeDecl(e, a4, 1, 3, NULL, ...) +SAX.attributeDecl(e, a5, 1, 3, NULL, ...) +SAX.attributeDecl(e, a6, 1, 3, NULL, ...) +SAX.externalSubset(doc, , ) +SAX.startElementNs(doc, NULL, NULL, 0, 0, 0) +SAX.characters( + , 5) +SAX.comment( This tests whether xmlCleanSpecialAttr works. The attribute values + must not be normalized. ) +SAX.characters( + , 5) +SAX.startElementNs(e, NULL, NULL, 0, 6, 0, a1=' x ...', 6, a2=' x ...', 6, a3=' x ...', 6, a4=' x ...', 6, a5=' x ...', 6, a6=' x ...', 6) +SAX.endElementNs(e, NULL, NULL) +SAX.characters( +, 1) +SAX.endElementNs(doc, NULL, NULL) +SAX.endDocument() diff --git a/result/noent/issue626.xml b/result/noent/issue626.xml new file mode 100644 index 00000000..001b3d9c --- /dev/null +++ b/result/noent/issue626.xml @@ -0,0 +1,14 @@ + + + + + + + +]> + + + + diff --git a/result/noent/issue626.xml.sax2 b/result/noent/issue626.xml.sax2 new file mode 100644 index 00000000..edc2c1f9 --- /dev/null +++ b/result/noent/issue626.xml.sax2 @@ -0,0 +1,23 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.internalSubset(doc, , ) +SAX.attributeDecl(e, a1, 1, 3, NULL, ...) +SAX.attributeDecl(e, a2, 1, 3, NULL, ...) +SAX.attributeDecl(e, a3, 1, 3, NULL, ...) +SAX.attributeDecl(e, a4, 1, 3, NULL, ...) +SAX.attributeDecl(e, a5, 1, 3, NULL, ...) +SAX.attributeDecl(e, a6, 1, 3, NULL, ...) +SAX.externalSubset(doc, , ) +SAX.startElementNs(doc, NULL, NULL, 0, 0, 0) +SAX.characters( + , 5) +SAX.comment( This tests whether xmlCleanSpecialAttr works. The attribute values + must not be normalized. ) +SAX.characters( + , 5) +SAX.startElementNs(e, NULL, NULL, 0, 6, 0, a1=' x ...', 6, a2=' x ...', 6, a3=' x ...', 6, a4=' x ...', 6, a5=' x ...', 6, a6=' x ...', 6) +SAX.endElementNs(e, NULL, NULL) +SAX.characters( +, 1) +SAX.endElementNs(doc, NULL, NULL) +SAX.endDocument() diff --git a/test/issue626.xml b/test/issue626.xml new file mode 100644 index 00000000..5b0f6832 --- /dev/null +++ b/test/issue626.xml @@ -0,0 +1,13 @@ + + + + + + +]> + + + +