Restore binary compat, more HTML stuff, allow stdin input, Daniel.

This commit is contained in:
Daniel Veillard 1999-07-06 22:25:25 +00:00
parent be70ff7162
commit 5233ffc8d3
10 changed files with 898 additions and 354 deletions

View File

@ -1,3 +1,9 @@
Wed Jul 7 00:25:42 CEST 1999 Daniel Veillard <Daniel.Veillard@w3.org>
* parser.h : Oops removed the binary compatibility problem
* HTMLparser.[ch], HTMLtree.h : More work on the HTML parse/dump
* parser.c, HTMLparser.c: applied patches for reading from stdin
Mon Jul 5 18:45:31 CEST 1999 Daniel Veillard <Daniel.Veillard@w3.org>
* parser.c, entities.c, valid.c: cleanup bug #1591

File diff suppressed because it is too large Load Diff

View File

@ -10,6 +10,9 @@
#define __HTML_PARSER_H__
#include "parser.h"
/*
* Most of the back-end structures from XML and HTML are shared
*/
typedef xmlParserCtxt htmlParserCtxt;
typedef xmlParserCtxtPtr htmlParserCtxtPtr;
typedef xmlParserNodeInfo htmlParserNodeInfo;
@ -20,7 +23,33 @@ typedef xmlParserInputPtr htmlParserInputPtr;
typedef xmlDocPtr htmlDocPtr;
typedef xmlNodePtr htmlNodePtr;
xmlEntityPtr htmlParseEntityRef(htmlParserCtxtPtr ctxt);
/*
* Internal description of an HTML element
*/
typedef struct htmlElemDesc {
const CHAR *name; /* The tag name */
int startTag; /* Whether the start tag can be implied */
int endTag; /* Whether the end tag can be implied */
int empty; /* Is this an empty element ? */
int depr; /* Is this a deprecated element ? */
int dtd; /* 1: only in Loose DTD, 2: only Frameset one */
const char *desc; /* the description */
} htmlElemDesc, *htmlElemDescPtr;
/*
* Internal description of an HTML entity
*/
typedef struct htmlEntityDesc {
int value; /* the UNICODE value for the character */
const CHAR *name; /* The entity name */
const char *desc; /* the description */
} htmlEntityDesc, *htmlEntityDescPtr;
/*
* There is only few public functions.
*/
htmlEntityDescPtr
htmlParseEntityRef(htmlParserCtxtPtr ctxt, CHAR **str);
int htmlParseCharRef(htmlParserCtxtPtr ctxt);
void htmlParseElement(htmlParserCtxtPtr ctxt);

View File

@ -23,6 +23,7 @@ libxml_la_SOURCES = \
HTMLparser.c \
debugXML.c \
tree.c \
HTMLtree.c \
valid.c
xmlincdir = $(includedir)/gnome-xml
@ -30,6 +31,7 @@ xmlinc_HEADERS = \
entities.h \
encoding.h \
parser.h \
HTMLparser.h \
parserInternals.h \
debugXML.h \
xml-error.h \

8
SAX.c
View File

@ -180,7 +180,13 @@ resolveEntity(void *ctx, const CHAR *publicId, const CHAR *systemId)
* TODO : not 100% sure that the appropriate handling in that case.
*/
if (systemId != NULL) {
return(xmlNewInputFromFile(ctxt, systemId));
if (!xmlStrncmp(systemId, "http://", 7)) {
/* !!!!!!!!! TODO */
} else if (!xmlStrncmp(systemId, "ftp://", 6)) {
/* !!!!!!!!! TODO */
} else {
return(xmlNewInputFromFile(ctxt, systemId));
}
}
return(NULL);
}

View File

@ -10,6 +10,9 @@
#define __HTML_PARSER_H__
#include "parser.h"
/*
* Most of the back-end structures from XML and HTML are shared
*/
typedef xmlParserCtxt htmlParserCtxt;
typedef xmlParserCtxtPtr htmlParserCtxtPtr;
typedef xmlParserNodeInfo htmlParserNodeInfo;
@ -20,7 +23,33 @@ typedef xmlParserInputPtr htmlParserInputPtr;
typedef xmlDocPtr htmlDocPtr;
typedef xmlNodePtr htmlNodePtr;
xmlEntityPtr htmlParseEntityRef(htmlParserCtxtPtr ctxt);
/*
* Internal description of an HTML element
*/
typedef struct htmlElemDesc {
const CHAR *name; /* The tag name */
int startTag; /* Whether the start tag can be implied */
int endTag; /* Whether the end tag can be implied */
int empty; /* Is this an empty element ? */
int depr; /* Is this a deprecated element ? */
int dtd; /* 1: only in Loose DTD, 2: only Frameset one */
const char *desc; /* the description */
} htmlElemDesc, *htmlElemDescPtr;
/*
* Internal description of an HTML entity
*/
typedef struct htmlEntityDesc {
int value; /* the UNICODE value for the character */
const CHAR *name; /* The entity name */
const char *desc; /* the description */
} htmlEntityDesc, *htmlEntityDescPtr;
/*
* There is only few public functions.
*/
htmlEntityDescPtr
htmlParseEntityRef(htmlParserCtxtPtr ctxt, CHAR **str);
int htmlParseCharRef(htmlParserCtxtPtr ctxt);
void htmlParseElement(htmlParserCtxtPtr ctxt);

View File

@ -230,8 +230,6 @@ typedef struct xmlDoc {
char *name; /* name/filename/URI of the document */
const CHAR *version; /* the XML version string */
const CHAR *encoding; /* encoding, if any */
const CHAR *ID; /* the HTML version */
const CHAR *DTD; /* the HTML dtd URI */
int compression;/* level of zlib compression */
int standalone; /* standalone document (no external refs) */
struct xmlDtd *intSubset; /* the document internal subset */

202
parser.c
View File

@ -264,74 +264,111 @@ xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
#endif
int res;
int len;
int cnt;
struct stat buf;
char *buffer;
char *buffer, *nbuf;
xmlParserInputPtr inputStream;
/* xmlCharEncoding enc; */
res = stat(filename, &buf);
if (res < 0) return(NULL);
#define MINLEN 40000
if (strcmp(filename,"-") == 0) {
#ifdef HAVE_ZLIB_H
len = (buf.st_size * 8) + 1000;
retry_bigger:
buffer = malloc(len);
#else
len = buf.st_size + 100;
buffer = malloc(len);
#endif
if (buffer == NULL) {
perror("malloc");
return(NULL);
}
memset(buffer, 0, len);
#ifdef HAVE_ZLIB_H
input = gzopen (filename, "r");
if (input == NULL) {
fprintf (stderr, "Cannot read file %s :\n", filename);
perror ("gzopen failed");
return(NULL);
}
input = gzdopen (fileno(stdin), "r");
if (input == NULL) {
fprintf (stderr, "Cannot read from stdin\n");
perror ("gzdopen failed");
return(NULL);
}
#else
#ifdef WIN32
input = _open (filename, O_RDONLY | _O_BINARY);
input = -1;
#else
input = open (filename, O_RDONLY);
input = fileno(stdin);
#endif
if (input < 0) {
fprintf (stderr, "Cannot read file %s :\n", filename);
perror ("open failed");
return(NULL);
if (input < 0) {
fprintf (stderr, "Cannot read from stdin\n");
perror ("open failed");
return(NULL);
}
#endif
len = MINLEN;
} else {
#ifdef HAVE_ZLIB_H
res = gzread(input, buffer, len);
input = gzopen (filename, "r");
if (input == NULL) {
fprintf (stderr, "Cannot read file %s :\n", filename);
perror ("gzopen failed");
return(NULL);
}
#else
res = read(input, buffer, buf.st_size);
#ifdef WIN32
input = _open (filename, O_RDONLY | _O_BINARY);
#else
input = open (filename, O_RDONLY);
#endif
if (res < 0) {
fprintf (stderr, "Cannot read file %s :\n", filename);
if (input < 0) {
fprintf (stderr, "Cannot read file %s :\n", filename);
perror ("open failed");
return(NULL);
}
#endif
res = stat(filename, &buf);
if (res < 0)
return(NULL);
len = buf.st_size+1;
if (len < MINLEN)
len = MINLEN;
}
buffer = (char *)malloc(len*sizeof(char));
if (buffer == NULL) {
fprintf (stderr, "Cannot malloc\n");
perror ("malloc failed");
return(NULL);
}
cnt = 0;
#ifdef HAVE_ZLIB_H
perror ("gzread failed");
while(!gzeof(input)) {
#else
perror ("read failed");
while(1) {
#endif
return(NULL);
if (cnt >= len) {
len *= 2;
nbuf = (char *)realloc(buffer,len*sizeof(char));
if (nbuf == NULL) {
fprintf(stderr,"Cannot realloc\n");
free(buffer);
perror ("realloc failed");
return(NULL);
}
buffer = nbuf;
}
#ifdef HAVE_ZLIB_H
res = gzread(input, &buffer[cnt], len-cnt);
#else
res = read(input, &buffer[cnt], len-cnt);
#endif
if (res < 0) {
fprintf (stderr, "Cannot read file %s :\n", filename);
#ifdef HAVE_ZLIB_H
perror ("gzread failed");
#else
perror ("read failed");
#endif
return(NULL);
}
if (res == 0)
break;
cnt += res;
}
#ifdef HAVE_ZLIB_H
gzclose(input);
if (res >= len) {
free(buffer);
len *= 2;
goto retry_bigger;
}
buf.st_size = res;
#else
close(input);
#endif
buffer[buf.st_size] = '\0';
buffer[cnt] = '\0';
inputStream = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
if (inputStream == NULL) {
@ -4594,28 +4631,36 @@ xmlCreateFileParserCtxt(const char *filename)
#endif
int res;
int len;
int cnt;
struct stat buf;
char *buffer;
char *buffer, *nbuf;
xmlParserInputPtr inputStream;
xmlCharEncoding enc;
res = stat(filename, &buf);
if (res < 0) return(NULL);
#define MINLEN 40000
if (strcmp(filename,"-") == 0) {
#ifdef HAVE_ZLIB_H
len = (buf.st_size * 8) + 1000;
retry_bigger:
buffer = malloc(len);
input = gzdopen (fileno(stdin), "r");
if (input == NULL) {
fprintf (stderr, "Cannot read from stdin\n");
perror ("gzdopen failed");
return(NULL);
}
#else
len = buf.st_size + 100;
buffer = malloc(len);
#ifdef WIN32
input = -1;
#else
input = fileno(stdin);
#endif
if (buffer == NULL) {
perror("malloc");
if (input < 0) {
fprintf (stderr, "Cannot read from stdin\n");
perror ("open failed");
return(NULL);
}
memset(buffer, 0, len);
#endif
len = MINLEN;
} else {
#ifdef HAVE_ZLIB_H
input = gzopen (filename, "r");
if (input == NULL) {
@ -4635,10 +4680,41 @@ retry_bigger:
return(NULL);
}
#endif
res = stat(filename, &buf);
if (res < 0)
return(NULL);
len = buf.st_size+1;
if (len < MINLEN)
len = MINLEN;
}
buffer = (char *)malloc(len*sizeof(char));
if (buffer == NULL) {
fprintf (stderr, "Cannot malloc\n");
perror ("malloc failed");
return(NULL);
}
cnt = 0;
#ifdef HAVE_ZLIB_H
res = gzread(input, buffer, len);
while(!gzeof(input)) {
#else
res = read(input, buffer, buf.st_size);
while(1) {
#endif
if (cnt == len) {
len *= 2;
nbuf = (char *)realloc(buffer,len*sizeof(char));
if (nbuf == NULL) {
fprintf(stderr,"Cannot realloc\n");
free(buffer);
perror ("realloc failed");
return(NULL);
}
buffer = nbuf;
}
#ifdef HAVE_ZLIB_H
res = gzread(input, &buffer[cnt], len-cnt);
#else
res = read(input, &buffer[cnt], len-cnt);
#endif
if (res < 0) {
fprintf (stderr, "Cannot read file %s :\n", filename);
@ -4649,19 +4725,17 @@ retry_bigger:
#endif
return(NULL);
}
if (res == 0)
break;
cnt += res;
}
#ifdef HAVE_ZLIB_H
gzclose(input);
if (res >= len) {
free(buffer);
len *= 2;
goto retry_bigger;
}
buf.st_size = res;
#else
close(input);
#endif
buffer[res] = '\0';
buffer[cnt] = '\0';
ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
if (ctxt == NULL) {

2
tree.c
View File

@ -387,8 +387,6 @@ xmlNewDoc(const CHAR *version) {
cur->type = XML_DOCUMENT_NODE;
cur->version = xmlStrdup(version);
cur->ID = NULL;
cur->DTD = NULL;
cur->name = NULL;
cur->root = NULL;
cur->intSubset = NULL;

2
tree.h
View File

@ -230,8 +230,6 @@ typedef struct xmlDoc {
char *name; /* name/filename/URI of the document */
const CHAR *version; /* the XML version string */
const CHAR *encoding; /* encoding, if any */
const CHAR *ID; /* the HTML version */
const CHAR *DTD; /* the HTML dtd URI */
int compression;/* level of zlib compression */
int standalone; /* standalone document (no external refs) */
struct xmlDtd *intSubset; /* the document internal subset */