#include "StdAfx.h"
#include "XMLTextParser.h"
#include "TypedBuffer.h"

enum
{
	TAG_TYPE_START		= 0,
	TAG_TYPE_START_END	= 1,
	TAG_TYPE_END		= 2,
};

CXMLTextParser::CXMLTextParser(void)
{
	m_escape.AddEscape(_T("<"), _T("&lt;"));
	m_escape.AddEscape(_T(">"), _T("&gt;"));
	m_escape.AddEscape(_T("&"), _T("&amp;"));
}

CXMLTextParser::~CXMLTextParser(void)
{
}


/////////////////////////////////////////////////////////////////////////////////////
//	Jn
/////////////////////////////////////////////////////////////////////////////////////
/*!
	XL
*/
void CXMLTextParser::StartScan(CString xml)
{
	xml.Trim();
	m_xmlData = xml;
	m_dataLen = xml.GetLength();

	m_lpXml = m_xmlData.GetBuffer(0);

	DWORD tick = GetTickCount();
	BuildTagIndex();
	TRACE("BuildTagIndex : %d\n", GetTickCount() - tick);

	tick = GetTickCount();
	m_tagCounter = 0;
	while(ParseNextTag())
		;
	TRACE("ParseNextTag : %d\n", GetTickCount() - tick);
}


/*!
	t@C擾
*/
CString CXMLTextParser::LoadXmlFile(CString path)
{
	//	J
	FILE	*in;
	if(_tfopen_s(&in, path, _T("rb")) != 0)
		throw CXMLTextParserException("t@C̃I[vɎs܂", 0);
	
	//	TCY擾
	fseek(in, 0, SEEK_END);
	long	len = ftell(in);
	fseek(in, 0, SEEK_SET);

	//	obt@p
	CTypedBuffer<char> buf(len + 1);

	//	ǂݍ
	fread(buf, len, 1, in);
	buf[len] = 0;
	fclose(in);

	//	ϊ
	CStringW	ret;
	int wbLen = MultiByteToWideChar(CP_UTF8, 0, buf, len, NULL, 0);
	MultiByteToWideChar(CP_UTF8, 0, buf, len, ret.GetBuffer(wbLen), wbLen);
	ret.ReleaseBufferSetLength(wbLen);

	return(CString(ret));
}


/////////////////////////////////////////////////////////////////////////////////////
//	^O
/////////////////////////////////////////////////////////////////////////////////////
/*!
	CfbNX̍쐬
*/
void CXMLTextParser::BuildTagIndex()
{
	LPSTR str = m_lpXml;

	//	^O`FbN
	if(*str != '<')
		throw CXMLTextParserException(_T("ُȕo܂"), 0);

	//	^Oʒu
	int	curTagIsStart = FALSE;
	for(int i=0;i<m_dataLen;i++)
	{
		//	Jn^Oo
		if(*str == '<')
		{
			*str = 0;
			str++;
			if(curTagIsStart || *str == 0)
				throw CXMLTextParserException(_T("ُȃ^Oo܂"), i);

			m_tags.Add(str);
			curTagIsStart = TRUE;
			continue;
		}

		//	I^Oo
		if(*str == '>')
		{
			*str = 0;
			str++;
			if(!curTagIsStart)
				throw CXMLTextParserException(_T("ُȃ^Oo܂"), i);

			m_datas.Add(str);
			curTagIsStart = FALSE;
			continue;
		}

		str++;
	}

	//	ĂȂ
	if(curTagIsStart || m_tags.GetSize() != m_datas.GetSize())
		throw CXMLTextParserException(_T("ُȃ^Oo܂"), m_dataLen);
}

/*!
	Jn^O؂o
*/
int CXMLTextParser::ParseNextTag()
{
	//	̃^O擾
	LPSTR	lpTag, lpData;
	if(!GetNextTag(&lpTag, &lpData))
		return(0);

	//	^OƃIvV؂蕪
	LPSTR	lpOption;
	SplitTag(lpTag, &lpOption);

	//	^O
	if(lpTag[0] == '?' || lpTag[0] == '!')
	{
		OnSpecialTag(lpTag);
		return(1);
	}

	//	ُȃ^O
	if(!_istalpha(lpTag[0]) && lpTag[0] != '/')
		throw CXMLTextParserException(_T("ُȃ^Oo܂"), lpTag - m_lpXml);

	//	^O^Cv擾
	int tagType = GetTagType(&lpTag);

	//	^Cvo
	if(tagType == TAG_TYPE_END)
	{
		//	I^O
		OnEndTag(lpTag);
	}
	else if(tagType == TAG_TYPE_START_END)
	{
		//	
		CTagOptionArray	keys;
		ParseTagOption(lpTag, keys);

		//	Cxg
		OnStartTag(lpTag, keys);
		OnEndTag(lpTag);
	}
	else
	{
		//	
		CTagOptionArray	keys;
		ParseTagOption(lpTag, keys);

		//	Cxg
		OnStartTag(lpTag, keys);

		//	f[^
//		ParseData(data);
	}
	return(1);
}


/*!
	f[^؂o
*/
void CXMLTextParser::ParseData(CString &data)
{
	//	I/
	if(data.IsEmpty())
		return;

	//	ϊ
	data = m_escape.UnEscape(data);

	//	Cxg
	OnData(data);
}


/*!
	^OIvV̉
*/
void CXMLTextParser::ParseTagOption(LPSTR tag, CTagOptionArray &keys)
{
/*	//	Xy[Xu
	LPSTR	str = tag;
	while(*str)
	{
		if(*str == '\t')
			*str = ' ';
		str++;
	}

	//	^O؂o
	CString	tagName;
	tag.Replace(_T("\t"), _T(" "));
	if(!GetNextToken(tag, _T(" "), tagName))
	{
		while(1)
		{
			//	L[
			CString	key, data;
			if(GetNextToken(tag, _T("="), key))
				break;
			key.Trim();

			//	f[^
			if(GetNextToken(tag, _T("\""), data))
				throw CXMLTextParserException(_T("ُȃ^Oo܂"), m_dataLen - m_xmlData.GetLength());
			if(GetNextToken(tag, _T("\""), data))
				throw CXMLTextParserException(_T("ُȃ^Oo܂"), m_dataLen - m_xmlData.GetLength());

			//	ǉ
			keys.Add(CTagOption(key, data));
		}
	}

	tag = tagName;*/
}


/////////////////////////////////////////////////////////////////////////////////////
//	c[
/////////////////////////////////////////////////////////////////////////////////////
/*!
	w肵܂ł؂o
	w肵͐؂oɊ܂܂Ȃ

	w肵Ȃꍇ́A-1߂
*/
int CXMLTextParser::GetNextToken(CString &src, CString find, CString &token, int tokenStart, int removeToken)
{
	//	
	int tokenEnd = src.Find(find, tokenStart);
	if(tokenEnd == -1)
		return(-1);

	//	؂o
	token = src.Mid(tokenStart, tokenEnd - tokenStart);

	//	폜
	if(removeToken)
		src.Delete(0, tokenEnd + find.GetLength());
	else
		src.Delete(0, tokenEnd);

	return(0);
}


/*!
	̃^O擾
*/
_inline int CXMLTextParser::GetNextTag(LPSTR *tag, LPSTR *data)
{
	if(m_tags.GetSize() <= m_tagCounter)
		return(0);

	*tag = m_tags[m_tagCounter];
	*data = m_datas[m_tagCounter];
	m_tagCounter++;
	return(1);
}


/*!
	^O𕪗
*/
_inline void CXMLTextParser::SplitTag(LPSTR tag, LPSTR *tagOption)
{
	for(int i=0;;i++)
	{
		if(*tag == 0)
		{
			*tagOption = tag;
			return;
		}
		else if(*tag == ' ' || *tag == '\t')
		{
			*tag = 0;
			*tagOption = tag;
			return;
		}
		tag++;
	}
}


/*!
	^O^Cv擾
*/
_inline int CXMLTextParser::GetTagType(LPSTR *tag)
{
	int	tagLen = strlen(*tag);

	//	^Cvo
	if(*tag[0] == _T('/'))
	{
		*tag++;
		return TAG_TYPE_END;
	}
	else if((*tag)[tagLen-1] == _T('/'))
	{
		*tag[tagLen-1] = 0;
		return TAG_TYPE_START_END;
	}
	else
	{
		return TAG_TYPE_START;
	}
}

/*!
	w肵܂ł؂o
*/
/*
_inline int CXMLTextParser::GetNextToken(LPSTR *src, LPSTR splitter, LPSTR *token)
{
	LPSTR	str = *src;
	while(1)
	{
		//	S؂茟o
		for(int i=0;splitter[i];i++)
		{
			if(*str == splitter[i])
			{
				*src = str;

			}
		}
	}
}

*/