mirror of
https://gitlab.gnome.org/GNOME/libxml2
synced 2025-03-28 21:33:13 +00:00
87 lines
2.6 KiB
Python
87 lines
2.6 KiB
Python
#!/usr/bin/env python3
|
|
|
|
import glob
|
|
import json
|
|
import re
|
|
|
|
state_map = {
|
|
'Data state': 0,
|
|
'RCDATA state': 1,
|
|
'RAWTEXT state': 2,
|
|
'PLAINTEXT state': 3,
|
|
'Script data state': 4,
|
|
'CDATA section state': 5,
|
|
}
|
|
|
|
for filename in sorted(glob.glob('../html5lib-tests/tokenizer/*.test')):
|
|
match = re.search('/([^/]*).test$', filename)
|
|
if match is None:
|
|
continue
|
|
testname = match[1]
|
|
if testname == 'xmlViolation':
|
|
continue
|
|
|
|
with open(filename) as json_data:
|
|
root = json.load(json_data)
|
|
|
|
test_out = open(f'test/html-tokenizer/{testname}.test', 'w')
|
|
result_out = open(f'result/html-tokenizer/{testname}.test', 'w')
|
|
|
|
counter = 0
|
|
|
|
for tests in root.values():
|
|
for test in tests:
|
|
input = test['input']
|
|
|
|
# Skip surrogate tests
|
|
if re.search(r'\\uD[89A-F]', input, re.I):
|
|
continue
|
|
|
|
input = re.sub(r'\\u([A-Fa-f0-9]{4})',
|
|
lambda m: chr(int(m[1], 16)),
|
|
input)
|
|
|
|
output = ''
|
|
for token in test['output']:
|
|
output += token[0] + '\n'
|
|
|
|
if token[0] == 'DOCTYPE':
|
|
for i in range(1, 4):
|
|
if token[i] is None:
|
|
output += '<none>\n'
|
|
else:
|
|
output += token[i] + '\n'
|
|
else:
|
|
output += token[1]
|
|
if token[0] == 'StartTag':
|
|
for name, value in token[2].items():
|
|
output += f' {name}={value}'
|
|
output += '\n'
|
|
|
|
output = re.sub(r'\\u([A-Fa-f0-9]{4})',
|
|
lambda m: chr(int(m[1], 16)),
|
|
output)
|
|
output = re.sub(r'\x00', '\uFFFD', output)
|
|
|
|
for state in test.get('initialStates', ['Data state']):
|
|
state_no = state_map.get(state)
|
|
if state_no is None:
|
|
raise Exception(f'{filename}: unknown state: {state}')
|
|
if state_no == 5:
|
|
continue
|
|
|
|
start_tag = test.get('lastStartTag', '-')
|
|
|
|
test_out.write(f'{counter} {start_tag} {state_no} '
|
|
f'{len(input.encode())}\n')
|
|
test_out.write(input)
|
|
test_out.write('\n')
|
|
|
|
result_out.write(f'{counter}\n')
|
|
result_out.write(output)
|
|
|
|
counter += 1
|
|
|
|
test_out.close()
|
|
result_out.close()
|