/*
 * xmlscope - minimalistic library for handling XML
 * Copyright (C) 2003 Alexey Voinov <voins@voins.program.ru>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 * DESCRIPTION: simple XML parser
 */
#include <xmlscope.hh>

#include <locale>
#include <istream>

namespace
{
	class loader
	{
		char c_;
		std::istream& is_;
		const std::ctype<char>& ct_;
	public:
		struct eof {};

		loader(std::istream& is)
			: is_(is),
			ct_(std::use_facet<std::ctype<char> >(is.getloc()))
		{}

		void load(xml::nodes_t& result,
			boost::shared_ptr<xml::node::namespaces_t>& nses);

	private:
		bool is_first_xml_char(char c) const
		{
			return ct_.is(std::ctype_base::alpha, c) ||
				c == ct_.widen('_') || c == ct_.widen(':');
		}

		bool is_xml_char(char c) const
		{
			return ct_.is(std::ctype_base::alnum, c) ||
				c == ct_.widen('_') || c == ct_.widen(':') ||
				c == ct_.widen('.') || c == ct_.widen('-');
		}

		char nextchar(void)
		{
			c_ = is_.get();
			if(is_.eof()) throw eof();
			return c_;
		}

		void putback(void)
		{
			is_.putback(c_);
		}

		void skip_whitespace(void);
		void skip_pi(void);
		void skip_comment(void);
		void skip_doctype(void);
		void add_text_node(
				const std::string& text,
				xml::nodes_t& result,
				boost::shared_ptr<
					xml::node::namespaces_t>& nses);
		std::string read_name(void);
		std::string read_cdata(void);
		void read_xml_attrs(xml::node::attributes_t& attrs);
		bool load_xml_elem(xml::node& n);
	};

	void loader::skip_whitespace(void)
	{
		while(ct_.is(std::ctype_base::space, nextchar())) /**/;
		putback();
	}

	void loader::skip_pi(void)
	{
		do
		{
			while(nextchar() != '?') /**/;
		} while(nextchar() != '>');
	}

	void loader::skip_comment(void)
	{
		int dashcount = 0;
		do
		{
			while(nextchar() != '-') /**/;
			putback();

			dashcount = 0;
			while(nextchar() == '-') dashcount++;
		} while(dashcount != 2 || c_ !='>');
	}

	void loader::skip_doctype(void)
	{
		std::string buffer;
		while(ct_.is(std::ctype_base::alpha, nextchar()))
			buffer += c_;
		if(buffer != "OCTYPE")
			while(nextchar() != '>') /**/;
		else
		{
			while(nextchar() != '>' && c_ != '[') /**/;

			if(c_ == '[')
			{
				do
				{
					putback();
					while(nextchar() != ']') /**/;
					skip_whitespace();
				} while(nextchar() != '>');
			}
		}
	}

	void loader::add_text_node(
			const std::string& text,
			xml::nodes_t& result,
			boost::shared_ptr<xml::node::namespaces_t>& nses)
	{
		if(text.empty()) return;

		if(!result.empty() && result.back().type == xml::node::text)
			result.back().name += text;
		else
		{
			xml::node n(nses);
			n.type = xml::node::text;
			n.name = text;
			result.push_back(n);
		}
	}

	std::string loader::read_name(void)
	{
		std::string buffer;

		if(is_first_xml_char(nextchar()))
		{
			buffer += c_;
			while(is_xml_char(nextchar()))
				buffer += c_;
		}
		putback();

		return buffer;
	}

	std::string loader::read_cdata(void)
	{
		std::string buffer;
		while(ct_.is(std::ctype_base::alpha, nextchar()))
			buffer += c_;
		if(buffer != "CDATA" || c_ != '[')
		{
			while(nextchar() != '>') /**/;
			buffer.clear();
		}
		else
		{
			buffer.clear();
			while(true)
			{
				while(nextchar() != ']')
					buffer += c_;

				if(nextchar() == ']')
				{
					if(nextchar() == '>') break;
					buffer += ']';
				}
				buffer += ']';
				buffer += c_;
			}
		}
		return buffer;
	}

	void loader::read_xml_attrs(xml::node::attributes_t& attrs)
	{
		skip_whitespace();
		while(true)
		{
			std::string value;
			std::string name = read_name();
			if(name.empty()) break;

			skip_whitespace();
			if(nextchar() == '=')
			{
				skip_whitespace();
				if(nextchar() == '"')
				{
					getline(is_, value, '"');
					skip_whitespace();
					attrs[name] = value;
				}
			}
		}
	}

	bool loader::load_xml_elem(xml::node& n)
	{
		n.type = xml::node::normal;
		n.nodes.clear();
		n.attrs.clear();
		n.name = read_name();
		if(n.name.empty()) return false;

		if(ct_.is(std::ctype_base::space, nextchar()))
		{
			read_xml_attrs(n.attrs);
			nextchar();
		}

		if(c_ == '/') return nextchar() == '>';

		if(c_ == '>')
		{
			load(n.nodes, n.nses);

			std::string buffer = read_name();
			skip_whitespace();
			return nextchar() == '>' && n.name == buffer;
		}

		return false;
	}

	void loader::load(
			xml::nodes_t& result,
			boost::shared_ptr<xml::node::namespaces_t>& nses)
	{
		std::string buffer;
		while(!is_.eof())
		{
			std::getline(is_, buffer, '<');
			add_text_node(buffer, result, nses);

			if(nextchar() == '/') break;

			if(c_ == '?') skip_pi();
			else if(c_ == '!')
			{
				if(nextchar() == '-')
					skip_comment();
				else if(c_ == '[')
					add_text_node(read_cdata(), result, nses);
				else if(c_ == 'D')
					skip_doctype();
				else while(nextchar() != '>') /**/;
			}
			else
			{
				xml::node n(nses);
				putback();
				if(load_xml_elem(n)) result.push_back(n);
			}
		}
	}
};

namespace xml
{
	void load(std::istream& is, nodes_t& result,
			boost::shared_ptr<node::namespaces_t>& nses)
	{
		try
		{
			loader(is).load(result, nses);
		}
		catch(loader::eof&) {}
	}

	void load(std::istream& is, nodes_t& result)
	{
		boost::shared_ptr<node::namespaces_t>
			nses(new node::namespaces_t);
		nses->push_back("");
		load(is, result, nses);
	}
}

