/*
This product contains certain software code or other information
("AT&T Software") proprietary to AT&T Corp. ("AT&T").  The AT&T
Software is provided to you "AS IS".  YOU ASSUME TOTAL RESPONSIBILITY
AND RISK FOR USE OF THE AT&T SOFTWARE.  AT&T DOES NOT MAKE, AND
EXPRESSLY DISCLAIMS, ANY EXPRESS OR IMPLIED WARRANTIES OF ANY KIND
WHATSOEVER, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, WARRANTIES OF
TITLE OR NON-INFRINGEMENT OF ANY INTELLECTUAL PROPERTY RIGHTS, ANY
WARRANTIES ARISING BY USAGE OF TRADE, COURSE OF DEALING OR COURSE OF
PERFORMANCE, OR ANY WARRANTY THAT THE AT&T SOFTWARE IS "ERROR FREE" OR
WILL MEET YOUR REQUIREMENTS.

Unless you accept a license to use the AT&T Software, you shall not
reverse compile, disassemble or otherwise reverse engineer this
product to ascertain the source code for any AT&T Software.

(c) AT&T Corp. All rights reserved.  AT&T is a registered trademark of AT&T Corp.

***********************************************************************

History:

      24/11/99  - initial release by Hartmut Liefke, liefke@seas.upenn.edu
                                     Dan Suciu,      suciu@research.att.com
*/

//**************************************************************************
//**************************************************************************

// This module contains the XML-Parser. Based on class 'FileParser',
// the XML parser implements functions for finding and parsing elements and attributes
// the events are handled through an SAX-like interface called SAXClient

#include "stdafx.h"

inline char *TraverseWhiteSpaces(char *ptr,char *endptr)
{
   while((ptr<endptr)&&
         ((*ptr==' ')||(*ptr=='\t')||(*ptr=='\r')||(*ptr=='\n')))
      ptr++;
   return ptr;
}

XMLParse::XMLParse(Session *s): FileParser(s) 
{
	saxclient = NULL;
	ResetLastBlock();
}

XMLParse::~XMLParse()
{
}

void XMLParse::XMLParseError(char *errmsg)
   // Writes a parser error and exits
{
	//XMLParse *x = new XMLParse(session);
   char tmpstr[50];
   sprintf(tmpstr,"Parse error in line %lu:\n",GetCurLineNo());
   XMillException *e = new XMillException(XMILL_ERR_PARSE, tmpstr);
   e->ErrorCont(errmsg);
   throw e;
}

void XMLParse::XMLParseError(char *errmsg,int savelineno)
   // Writes a parser error and throws it
{
   char tmpstr[50];
   sprintf(tmpstr,errmsg, savelineno);
   throw new XMillException (XMILL_ERR_PARSE, tmpstr);
}

char XMLParse::SkipWhiteSpaces(char *isokay)
{
   char c;

	*isokay = TRUE;
   do
   {
		if (!PeekChar(&c)) {
			*isokay = FALSE;
		}
      if((c!=' ')&&(c!='\t')&&(c!='\r')&&(c!='\n'))
         break;
      if (!SkipChar()) {
			*isokay = FALSE;
		}
   }
   while(1);

   return c;
}

char XMLParse::ParseAttribs(char *isokay)
// This function scans the attributes in a given start label
// The returns as soon as the trailing '>' is reached
{
   char c;
   char *strptr;
   int  len;

	*isokay = TRUE;

   do
   {
      while(ReadWhiteSpaces(&strptr,&len)==0)
         // We read all white-spaces
         saxclient->HandleAttribWhiteSpaces(strptr,len,1);

      saxclient->HandleAttribWhiteSpaces(strptr,len,0);

      // Now we don't have any more white-spaces and we search
      // for '=' (if there is an attribute) or '>' (for the end of the element)
		if (!PeekChar(&c)) {
			*isokay = FALSE;
         return c;
			}
      if((c=='>')||(c=='/'))  // End of label?
      {
			if (!SkipChar()) {
				*isokay = FALSE;
			}
         return c;
      }
      // Let's find '=' or some white-space
      while(ReadStringUntil(&strptr,&len,1,'=',0)==0)
         // We scan until we reach '='
         saxclient->HandleAttribName(strptr,len,1);

      // We found '='
      saxclient->HandleAttribName(strptr,len-1,0);

      if(strptr[len-1]!='=')
         // We found white-spaces instead?
      {
         c=SkipWhiteSpaces(isokay);
			if (!isokay) {
				return c;
			}
         if(c!='=')
            XMLParseError("Symbol '=' expected !");
			if (!SkipChar()) {
				*isokay = FALSE;
				return c;
			}
      }
      // We skip all white spaces after '='
      c=SkipWhiteSpaces(isokay);
		if (!isokay) {
			return c;
		}

      // The next character should be a '"'
      // If not, then we assume that the value only goes until the
      // next white-space (or '>' or '/')!

      if(c!='"')
      {
         while(ReadStringUntil(&strptr,&len,1,'>','/')==0)
            saxclient->HandleAttribValue(strptr,len,0);

         saxclient->HandleAttribValue(strptr,len-1,1);

         c=strptr[len-1];
         if((c=='/')||(c=='>'))
            return c;
      }
      else
      {
			if (!SkipChar()) {
				*isokay = FALSE;
				return c;
			}

         while(ReadStringUntil(&strptr,&len,0,'"','>')==0)
            saxclient->HandleAttribValue(strptr,len,0);

         if(strptr[len-1]=='>')
         {
            char tmpstr[100];
            sprintf(tmpstr,"Line %lu: Missing '\"' at the end of attribute value '",GetCurLineNo());
            XMillException *e = new XMillException(XMILL_ERR_PARSE, tmpstr);
            e->ErrorCont(strptr,len-1);
            e->ErrorCont("'!");
            /*e->PrintErrorMsg();
				delete e;*/
				throw e;
				/* not reached */
            UndoReadChar();
            len--;
         }

         saxclient->HandleAttribValue(strptr,len-1,1);

			if (!PeekChar(&c)) {
				*isokay = FALSE;
				return c;
			}
         if((c!='>')&&(c!=' ')&&(c!='\t')&&(c!='\n')&&(c!='\r')&&(c!='/'))
         {
            char tmpstr[50];
            sprintf(tmpstr,"Skip invalid character '%c' in line %lu",c,GetCurLineNo());
            XMillException *e = new XMillException(XMILL_ERR_PARSE, tmpstr);
            /*e->PrintErrorMsg();
				delete e;
            SkipChar();*/
	   		throw e;
         }
      }
   }
   while(1);
}

char XMLParse::ParseLabel()
   // Scans a label after the '<' has already been parsed.
{
   char c,*ptr, attribokay;
   int  len;

   PeekChar(&c);

   if(c=='/') // An ending label ?
   {
      GetChar(&c);

      while(ReadStringUntil(&ptr,&len,FALSE,'>','<')==0)
//         while(ReadStringUntil(&ptr,&len,'>')==0)
         // We didn't find '>'  ?
			if (IsEndOfFile()) {
				/* undo the read chars and let the compressor retry after we fetch a new block */
				UndoReadChar(len+1+1);
				return FALSE;
			} else {
	         saxclient->HandleEndLabel(ptr,len,TRUE);
			}

      if(ptr[len-1]=='<')
      {
         XMillException *e = new XMillException(XMILL_ERR_PARSE, "Unfinished end label!");
         /*e->PrintErrorMsg();
			delete e;
         UndoReadChar();*/
			throw e;
      }

      saxclient->HandleEndLabel(ptr,len-1,FALSE);
      return TRUE;
   }

	/* this is a starting label */

	/* check if there is a closing bracket */
	if (ReadStringUntil(&ptr,&len,'>')==0) {
		/* nope */
		if (IsEndOfFile()) {
			/* undo the read chars and let the compressor retry after we fetch a new block */
			UndoReadChar(len+1);
			return FALSE;
		}
	}
	/* push everything back and start again */
	UndoReadChar(len);

	while(ReadStringUntil(&ptr,&len,TRUE,'>','/')==0) {
      // We didn't find '>' or '/' or a white-space ?
		if (IsEndOfFile()) {
			/* undo the read chars and let the compressor retry after we fetch a new block */
			UndoReadChar(len+1);
			return FALSE;
		} else {
	      saxclient->HandleStartLabel(ptr,len,TRUE);
		}
	}

   switch(ptr[len-1]) {
		case '>':
			/* just a start label */
			saxclient->HandleStartLabel(ptr,len-1,FALSE);
			return TRUE;

		case '/':
			/* start + end label without data */
			saxclient->HandleStartLabel(ptr,len-1,FALSE);
			if (!GetChar(&c)) {
				/* should not be reached! */
				UndoReadChar(len+1);
				return FALSE;
			}
			if(c!='>')
				XMLParseError("Symbol '/' in label must be followed by '>' !");

			saxclient->HandleEndLabel(NULL,0,FALSE);
			return TRUE;

		default: // Did we find some white space ??
			/* start label with attributes */
			saxclient->HandleStartLabel(ptr,len,FALSE);
			c=ParseAttribs(&attribokay);
			if (!attribokay) {
				/* should not be reached! */
				UndoReadChar(len+1);
				return FALSE;
			}
			if(c=='/')
			{
				// I.e. we received an empty label
				saxclient->HandleEndLabel(NULL,0,FALSE);
				if (!GetChar(&c)) {
					/* should not be reached! */
					UndoReadChar(len+1);
					return FALSE;
				}
			}
			if(c!='>') {
				XMLParseError("Symbol '>' expected after '/' in tag!");
			}
   }
	return TRUE;
}

char XMLParse::ParsePI()
   // Parses a processing instruction
{
   int len,savelineno=GetCurLineNo();
   char *ptr;

   do
   {
      if(ReadStringUntil(&ptr,&len,"?>"))
         break;

		if (IsEndOfFile()) { 
			UndoReadChar(len+2);
			return FALSE;
		} else if (len==0) {
         XMLParseError("Could not find closing '?>' for processing instruction in line %lu !",savelineno);
		}
      saxclient->HandlePI(ptr,len,1);
   } while (TRUE);

   saxclient->HandlePI(ptr,len,0);

	return TRUE;
}

char XMLParse::ParseCDATA()
   // Parses a CDATA section
{
   int len,savelineno=GetCurLineNo();
   char *ptr;

   while(ReadStringUntil(&ptr,&len,"]]>")==0)
   {
		if (IsEndOfFile()) { 
			UndoReadChar(len+3);
			return FALSE;
		} else if(len==0) {
         XMLParseError("Could not find closing ']]>' for CDATA section starting in line %lu !",savelineno);
		}
      saxclient->HandleCDATA(ptr,len,TRUE);
   }
   
   saxclient->HandleCDATA(ptr,len,FALSE);

	return TRUE;
}

char XMLParse::ParseComment()
   // Parses a comment section
{
   int len,savelineno=GetCurLineNo();
   char *ptr;

   while(ReadStringUntil(&ptr,&len,"-->")==0)
   {
		/* closing tag not found */
		if (IsEndOfFile()) {
			UndoReadChar(len+3);
			return FALSE;
		} else if(len==0) {
         XMLParseError("Could not find closing '-->' for comment starting in line %lu !",savelineno);
		}
      saxclient->HandleComment(ptr,len,1);
   }

   saxclient->HandleComment(ptr,len,0);

	return TRUE;
}

char XMLParse::IsLastBlock()
{
	return islastblock;
}

void XMLParse::SetLastBlock()
{
	islastblock = TRUE;
}

void XMLParse::ResetLastBlock()
{
	islastblock = FALSE;
}

// return TRUE if we parsed all the text
// returns FALSE if data ended and no '<' was found
char XMLParse::ParseText()
   // Parses some text data
{
	//static count = 0;
   char found;
   int len;
   char *ptr,*leftwsptr,*rightwsptr,*endptr;

   // We look for the end '<'
   found = ReadStringUntil(&ptr,&len,'<');

   if(!found && (len==0))
      return TRUE;

   endptr=ptr+len;

   // Let's traverse over all white spaces at the beginning
   leftwsptr=ptr;

   while((leftwsptr<endptr)&&
         (*leftwsptr==' ')||(*leftwsptr=='\t')||
         (*leftwsptr=='\r')||(*leftwsptr=='\n'))
      leftwsptr++;

   while(!found)  // We didn't find '<' yet ?
                  // No? => We must handle that text before we can continue
   {
      if(len>0) {
			if (IsEndOfFile(len) && !IsLastBlock()) {
				UndoReadChar(len);
				return FALSE;
			} else if(len==leftwsptr-ptr) {
            // If all remaining characters are white spaces,
            // we send one single sequence
            saxclient->HandleText(ptr,len,0,len,len);
			} else {
            saxclient->HandleText(ptr,len,1,leftwsptr-ptr,0);
			}
      }

      if(leftwsptr==endptr)   // Everything until now was just white spaces ?
                              // ==> We compute again the number of left white-spaces
      {
         found=ReadStringUntil(&ptr,&len,'<');

         if(!found && (len==0))  // No more characters? ==> We are done
            return TRUE;

         leftwsptr=ptr;

         while((leftwsptr<endptr)&&
               (*leftwsptr==' ')||(*leftwsptr=='\t')||
               (*leftwsptr=='\r')||(*leftwsptr=='\n'))
            leftwsptr++;
      } else {
         found=ReadStringUntil(&ptr,&len,'<');
         if(!found && (len==0))
            return TRUE;
      
         leftwsptr=ptr; // i.e. the number of left-white spaces is set to zero
      }
   }

   // We found the character '<'

   // We take the '<' back
   UndoReadChar();
   len--;

   endptr=ptr+len;

   // Let's find the number of white spaces at the end of the string
   rightwsptr=endptr-1;

   while((rightwsptr>=ptr)&&
         (*rightwsptr==' ')||(*rightwsptr=='\t')||
         (*rightwsptr=='\r')||(*rightwsptr=='\n'))
      rightwsptr--;

	if(len>0) {
		//count++;
      saxclient->HandleText(ptr,len,0,leftwsptr-ptr,endptr-rightwsptr-1);
	}

	return TRUE;
}

char XMLParse::ParseDOCTYPE()
   // Parses a DOCTYPE section.
   // A DOCTYPE has format <!DOCTYPE ... >  or  <!DOCTTYPE ... [ ... ] >
{
   int   len,savelineno=GetCurLineNo(); // We save the line
   char  *ptr;
   char  *myendptr,*curptr;

   // Let's get the current piece of buffer
   len=GetCurBlockPtr(&ptr);
   if(len==0)
      RefillAndGetCurBlockPtr(&ptr,&len);

	/* check for a closing '>' */
	if (!ReadStringUntil(&ptr,&len,'>')) {
		if (IsEndOfFile()) {
			/* put data back and let the user supply more data with a complete doctype element */
			UndoReadChar(len+9);
			return FALSE;
		}
	}
	/* doctype elements ends nicely, place chars back */
	UndoReadChar(len);
	
   myendptr=ptr+len;
   curptr=ptr;

   do
   {
      if(*curptr=='[')
      {
         do
         {
            curptr++;
            if(curptr==myendptr)
            {
               saxclient->HandleDOCTYPE(ptr,len,1);
               FastSkipData(len);
               RefillAndGetCurBlockPtr(&ptr,&len);
					if(len==0) {
                  XMLParseError("Could not find closing ']>' for DOCTYPE section starting in line %lu !",savelineno);
					}

               myendptr=ptr+len;
               curptr=ptr;
            }
         } while(*curptr!=']');
      }
		if(*curptr=='>') {
         break;
		}

      curptr++;
      if(curptr==myendptr) {
         saxclient->HandleDOCTYPE(ptr,len,1);
         FastSkipData(len);
         RefillAndGetCurBlockPtr(&ptr,&len);
			if(len==0) {
            XMLParseError("Could not find closing ']>' for DOCTYPE section starting in line %lu !",savelineno);
			}
         myendptr=ptr+len;
         curptr=ptr;
      }
   } while(1);

   saxclient->HandleDOCTYPE(ptr,curptr+1-ptr,0);
   FastSkipData(curptr+1-ptr);

	return TRUE;
}

char XMLParse::DoParsing(SAXClient *myclient)
   // This is the main parse function
{
   char c[9];
	char textokay = false, labelokay = false, piokay = false, commentokay = false, 
			doctypeokay = false, cdataokay = false;

	saxclient=myclient;
   session->xmlparser=this;

   do {
      // Let's start parsing text
		if(!(textokay = ParseText())) {
			return XMILL_END_BLOCK;
		}

      // If have reached the end of the file, we exit
		if(IsEndOfFile()) {
			if (session->curpath->curdepth == 0) {
				/* end of data reached */
				return XMILL_END_DATA;
			} else {
				/* no more data, but we have unclosed items. We expect more input blocks */
				return XMILL_END_BLOCK;
#if 0
            /* XML file not valid! */
				throw new XMillException(XMILL_ERR_PARSE, "There are unclosed XML tags!");
#endif
			}
		}

      // The next character must be an '<' character
      if(!PeekChar(c)) {
			return XMILL_END_BLOCK;
		}
      if(*c!='<') // This should actually be never true
      {
         throw new XMillException(XMILL_ERR_PARSE, "Character '<' expected !");
      }

      // let's look at the next three characters
		if(!PeekData(c,3)) {
			return XMILL_END_BLOCK;
		}

      switch(c[1])
      {
         case '?': // Processing Instruction ?
            if(c[2]=='>')
            {
					if(!SkipChar()) {
						return XMILL_END_BLOCK;
					}
					if (!(labelokay = ParseLabel())) {
						return XMILL_END_BLOCK;
					}
				} else {
					if(!(piokay = ParsePI())) {
						return XMILL_END_BLOCK;
					}
				}
            break;

         case '!':
            switch(c[2])
            {
					case '[': // We have <![CDATA[... ]]>
						if (!PeekData(c,9)) {
							return XMILL_END_BLOCK;
						}
						if(memcmp(c,"<![CDATA[",9)!=0)
						{
							XMillException *e = new XMillException(XMILL_ERR_PARSE, "Invalid tag '");
							e->ErrorCont(c,9);
							e->ErrorCont("...' should probably be '<![CDATA ...' !");
							throw e;
						}
						if(!(cdataokay = ParseCDATA())) {
							return XMILL_END_BLOCK;
						}
						break;
	   
					case 'D': // We must have <!DOCTYPE ... [ ... ] >
					{
						if (!PeekData(c,9)) {
							return XMILL_END_BLOCK;
						}
						if(memcmp(c,"<!DOCTYPE",9)!=0)
						{
							XMillException *e = new XMillException(XMILL_ERR_PARSE, "Invalid tag '");
							e->ErrorCont(c,9);
							e->ErrorCont("...' should probably be '<!DOCTYPE ...' !");
							throw e;
						}
						if(!(doctypeokay = ParseDOCTYPE())) {
							return XMILL_END_BLOCK;
						}
					}
					break;

					case '-': // We (probably) have a comment <!-- ... -->
						if (!PeekData(c,4)) {
							return XMILL_END_BLOCK;
						}

						if(c[3]!='-')
						{
							XMillException *e = new XMillException(XMILL_ERR_PARSE, "Invalid tag '");
							e->ErrorCont(c,4);
							e->ErrorCont("...' should probably be '<!-- ...' !");
							throw e;
						}
						if(!(commentokay = ParseComment())) {
							return XMILL_END_BLOCK;
						}
						break;

					default:
						XMillException *e = new XMillException(XMILL_ERR_PARSE, "Invalid tag '");
						e->ErrorCont(c,3);
						e->ErrorCont("...' !");
						throw e;
            }
            break;

      case '=': {
         XMillException *e = new XMillException(XMILL_ERR_PARSE, "Invalid label '<=...'!");
			throw e;
#if 0
			/* not reached */
			if (!SkipChar()) {
				return XMILL_END_BLOCK;
			}
         saxclient->HandleText("<",1,0,0,0);
         break;
#endif
					 }

      default: // If we only have a simple '<', we skip the character and
               // parse the following label
			if (!SkipChar()) {
				return XMILL_END_BLOCK;
			}
			if (!(labelokay = ParseLabel())) {
				return XMILL_END_BLOCK;
			}
      }
   } while(session->allocatedmemory<session->memory_cutoff);
      // We perform the parsing as long as the allocated memory is smaller than the
      // memory cut off

	/* internal data cutoff, no end-of-block or end-of-data reached */
   return XMILL_END_NONE;
}

XMLMemParse::XMLMemParse(Session *s): XMLParse(s)
{
	memfile = NULL;
}

XMLMemParse::XMLMemParse(Session *s, char *b, int len): XMLParse(s)
{
	memfile = NULL;
	setData(b, len);
}

void XMLMemParse::setData(char *b, int len)
{
	reInit();
	trydel (memfile);
	memfile = new MemFile(b, len);
	SetCFile(memfile);
	memfile = NULL;
}

XMLMemParse::~XMLMemParse()
{
	trydel (memfile);
}
