/*
 * Copyright (C) 2007-2009 Slava Semushin <php-coder@altlinux.ru>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 *
 */

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include "SourceFile.hh"
#include "DirectiveWithCondition.hh"
#include "syntax_error.hh"		// for syntax_error exception
#include "UserHeader.hh"
#include "SystemHeader.hh"

#include <cassert>	// for assert()
#include <cctype>	// for isalnum(), isspace() (when boost not uses)

#if !defined(ENABLE_BOOST) || !defined(NDEBUG)
#include <cstring>	// for strlen(), strspn() and strncmp()
#endif

#include <exception>// for terminate()
#include <iostream>	// for cerr, endl
#include <fstream>	// for ifstream
#include <stdexcept>// for out_of_range and runtime_error
#include <vector>	// for vector

#ifdef ENABLE_BOOST
#include <boost/foreach.hpp>

#include <boost/spirit/core.hpp>			// for most part of boost::spirit
#include <boost/spirit/utility/confix.hpp>	// for comment_p()

using boost::spirit::blank_p;
using boost::spirit::ch_p;
using boost::spirit::comment_p;
using boost::spirit::eps_p;
using boost::spirit::nothing_p;
using boost::spirit::parse;
using boost::spirit::parse_info;
using boost::spirit::space_p;
using boost::spirit::str_p;

#endif // ENABLE_BOOST

using std::cerr;
using std::endl;
using std::ifstream;
using std::out_of_range;
using std::runtime_error;
using std::string;
using std::terminate;
using std::vector;

SourceFile::SourceFile(const std::string &filename) : TextFile(filename) {

	// We can't use dirname() here because Windows doesn't have her
#ifndef _WIN32
	string::size_type pos = filename.find_last_of('/');
#else
	string::size_type pos = filename.find_last_of('\\');
#endif

	if (pos != string::npos) {
		filePath = filename.substr(0, pos+1);
	}

}

SourceFile::~SourceFile() {
#ifndef ENABLE_BOOST
	for (vector<LineOfSourceCode *>::const_iterator vec = sourceFile.begin();
			vec != sourceFile.end();
			++vec) {
		delete *vec;
	}
#else
	foreach(LineOfSourceCode *sf, sourceFile) {
		delete sf;
	}
#endif
}

const LineOfSourceCode &
SourceFile::operator[](size_t lineno) const {
	
	const LineOfSourceCode *sf;
	
	try {
		sf = getLineOfSourceCodeClass(lineno);
		
	} catch (out_of_range) {
		cerr << "SourceFile::operator[](" << lineno
			<< "): index out of range. Abort." << endl;
		terminate();
	}
	
	return *sf;
}

/**
 * @exception std::runtime_error when can't open file
 **/
void
SourceFile::parseSourceFile() {

	ifstream file(getFileName().c_str());
	if (!file) {
		throw runtime_error("Can't open file " + getFileName());
	}

	string line;
	size_t lineno = 1;
	bool inside_multiline_comment = false;
	
	while (getline(file, line)) {
		
		if (!inside_multiline_comment &&
			isMultilineCommentStart(line) &&
			!isStringInsideDoubleQuotes(line)) {
			inside_multiline_comment = true;
		}
		
		LineOfSourceCode *losc = LineOfSourceCodeFactory(line, lineno++, inside_multiline_comment);
		
		if (inside_multiline_comment && isMultilineCommentEnd(line)) {
			inside_multiline_comment = false;
		}
		
		try {
			sourceFile.push_back(losc);
		} catch (...) {
			delete losc;
			throw;
		}
	}
}

/**
 * @param[in] line line of file
 * @param[in] lineno line number in file
 * @param[in] inside_multiline_comment if this line inside multiline comment or not
 * @exception syntax_error when line has syntax error
 **/
LineOfSourceCode *
SourceFile::LineOfSourceCodeFactory(const std::string &line, size_t lineno, bool inside_multiline_comment) {
	
	LineType type = UNKDIR;
	
	const char *p;
	const char *pp = NULL;
	const char *ret = NULL;
	
	
	// It's impossible because getline() always returns string wo/
	// newline symbol
	assert(line.find_first_of('\n') == string::npos);
	
	if (!inside_multiline_comment && (line.empty() || line.find_first_not_of(" \t") == string::npos)) {
		type = BLANK;

	} else if (inside_multiline_comment || isComment(line.c_str())) {
		type = COMMENT;

	} else if ((p = isDirective(line.c_str())) == NULL) {
		type = CODE;

	} else if ((pp = detectDirective(p, "include")) != NULL) {
		
		LineType headerType = determineHeaderType(pp);
		
		if (headerType == SYSHDR) {
			return new SystemHeader(line, lineno);
		
		} else if (headerType == USRHDR) {
			return new UserHeader(line, lineno);
		
		} else if (headerType == UNKHDR) {
			return new Header(line, lineno, UNKHDR, ' ', ' ', pp);
			
		} else {
			throw syntax_error("#include expects \"FILENAME\" or <FILENAME>", lineno);
		}

	} else if ((ret = detectDirective(p, "ifdef"))) {
		if (*ret == '\0' || isComment(ret)) {
			throw syntax_error("no macro name given in #ifdef directive", lineno);
		}
		return new DirectiveWithCondition(line, lineno, IFDEF, ret);

	} else if ((ret = detectDirective(p, "ifndef"))) {
		if (*ret == '\0' || isComment(ret)) {
			throw syntax_error("no macro name given in #ifdef directive", lineno);
		}
		return new DirectiveWithCondition(line, lineno, IFNDEF, ret);

	} else if ((ret = detectDirective(p, "if"))) {
		type = IF;
		if (*ret == '\0' || isComment(ret)) {
			throw syntax_error("#if with no expression", lineno);
		}
		return new DirectiveWithCondition(line, lineno, IF, ret);

	} else if ((ret = detectDirective(p, "elif"))) {
		type = ELIF;
		if (*ret == '\0' || isComment(ret)) {
			throw syntax_error("#if with no expression", lineno);
		}

	} else if ((ret = detectDirective(p, "define"))) {
		type = DEFINE;
		if (*ret == '\0') {
			/// @todo more strict check -- \#define should have two arguments
			throw syntax_error("no macro name given in #define directive", lineno);
		}

	} else if ((ret = detectDirective(p, "undef"))) {
		type = UNDEF;
		if (*ret == '\0') {
			throw syntax_error("no macro name given in #undef directive", lineno);
		}

	} else if (detectDirective(p, "else")) {
		type = ELSE;

	} else if (detectDirective(p, "endif")) {
		type = ENDIF;

	}

	return new LineOfSourceCode(line, lineno, type);
}


#define DIGRAPH "%:"
#define TRIGRAPH "\?\?="

#ifndef ENABLE_BOOST
#define SKIP_SPACES(str) str += strspn(str, " \t")
#define DIGRAPH_SIZE  sizeof(DIGRAPH) -1
#define TRIGRAPH_SIZE sizeof(TRIGRAPH)-1
#endif

const char *
SourceFile::isDirective(const char *str) {
	
	// we always call this function with real string
	assert(str != NULL);
	
#ifndef ENABLE_BOOST
	SKIP_SPACES(str);

	if (*str == '#') {
		str++;
	
	// for digraphs
	// See http://en.wikipedia.org/wiki/Digraph_(computing)
	} else if (strncmp(str, DIGRAPH, DIGRAPH_SIZE) == 0) {
		str += DIGRAPH_SIZE;
	
	// for trigraphs
	// See http://en.wikipedia.org/wiki/C_trigraph
	} else if (strncmp(str, TRIGRAPH, TRIGRAPH_SIZE) == 0) {
		str += TRIGRAPH_SIZE;
		
	} else {
		return NULL;
	}

	SKIP_SPACES(str);

#else
	
	// if string like "[[:blank:]]*(#|%:|??=)[[:blank:]]*"
	parse_info<> res = parse(str, (*blank_p >> (ch_p('#') | str_p(DIGRAPH) | str_p(TRIGRAPH)) >> *blank_p), nothing_p);
	if (!res.hit) {
		return NULL;
	}
	
	// skip match fragment
	str += res.length;
	
#endif // !ENABLE_BOOST
	
	return str;
}

#ifdef DIGRAPH
#undef DIGRAPH
#endif

#ifdef TRIGRAPH
#undef TRIGRAPH
#endif

#ifdef DIGRAPH_SIZE
#undef DIGRAPH_SIZE
#endif

#ifdef TRIGRAPH_SIZE
#undef TRIGRAPH_SIZE
#endif

const char *
SourceFile::detectDirective(const char *str, const char *directiveName) {
	
	// we always call this function with real strings
	assert(str != NULL);
	assert(directiveName != NULL);
	
#ifndef ENABLE_BOOST
	size_t size = strlen(directiveName);

	if (strncmp(str, directiveName, size) != 0) {
		return NULL;
	}

	str += size;
	SKIP_SPACES(str);

#else
	
	// if string like "directive[[:blank:]]*"
	parse_info<> res = parse(str, (str_p(directiveName) >> *blank_p), nothing_p);
	if (!res.hit) {
		return NULL;
	}
	
	// skip match fragment
	str += res.length;
	
#endif // !ENABLE_BOOST
	
	return str;
}

bool
SourceFile::isComment(const char *str) {
	
	// we always call this function with real string
	assert(str != NULL);
	
#ifndef ENABLE_BOOST
	
	SKIP_SPACES(str);
	
	// calculate length of string only once
	size_t len = strlen(str);
	
	// Shouldn't happens because we already test it in
	// SourceFile::LineOfSourceCodeFactory()
	assert(len != 0);
	
	// special case for string with one symbol, like "0" in "#if 0"
	// case (see test #056)
	if (len == 1) {
		return false;
	}
	
	// check for c++-style comment
	if (str[0] == '/' && str[1] == '/') {
		return true;
	}
	
	// we inside comment or not
	bool inside_comment = false;
	
	for (size_t i = 0; i < len; i++) {
		
		if (str[i] == '/' && str[i+1] == '*') {
			inside_comment = true;
			// increase to one, next one will be added by for() loop
			i++;
			
		} else if (str[i] == '*' && str[i+1] == '/') {
			inside_comment = false;
			// increase to one, next one will be added by for() loop
			i++;
			
		} else if (isspace(str[i])) {
			// skip all spaces
			continue;
			
		} else if (inside_comment) {
			// skip any symbols inside comment
			continue;
			
		} else {
			// we found not commented symbols
			return false;
		}
	}
	
	return true;
	
#else
	
	parse_info<> res = parse(str,
			comment_p("//") | +(comment_p("/*", "*/" >> *space_p) >> eps_p) | comment_p("/*"),
			space_p);
	
	return res.full;
	
#endif // !ENABLE_BOOST
}

#ifdef SKIP_SPACES
#undef SKIP_SPACES
#endif

bool SourceFile::isStringLastOrAfterAnotherString(const std::string &line, const char *str, const char *other_str) {
	
	assert(str != NULL);
	assert(strlen(str) > 0);
	
	assert(other_str != NULL);
	assert(strlen(other_str) > 0);
	
	string::size_type open_pos =  line.rfind(str);
	string::size_type close_pos = line.rfind(other_str);
	
	if (open_pos != string::npos &&
		(close_pos == string::npos || close_pos < open_pos)) {
		return true;
	}
	
	return false;

}

// Find opened "/*" without closed "*/" after it
bool SourceFile::isMultilineCommentStart(const std::string &line) {
	return isStringLastOrAfterAnotherString(line, "/*", "*/");
}

// Find closed "*/" without opened "/*" after it
bool SourceFile::isMultilineCommentEnd(const std::string &line) {
	return isStringLastOrAfterAnotherString(line, "*/", "/*");
}

// Check if last occurence of substring located inside double quotes
bool SourceFile::isStringInsideDoubleQuotes(const std::string &line) {
	return (!isStringLastOrAfterAnotherString(line, "/*", "\"") &&
			isStringLastOrAfterAnotherString(line, "\"", "/*"));
}

LineType
SourceFile::determineHeaderType(const char *str) {
	
	// Shouldn't happens because we always got real lines
	assert(str != NULL);
	
	if (*str == '\0') {
		return UNKDIR;
	}
	
	// default values for include with definition
	char begin = ' ';
	char end = ' ';
	
	// default values for system header
	if (*str == '<') {
		begin = '<';
		end = '>';
		str++;
	
	// default values for user header
	} else if (*str == '"') {
		begin = '"';
		end = '"';
		str++;
	}
	
	while (isalnum(*str) || *str == '.'
		|| *str == '/' || *str == '_'
		|| *str == '-' || *str == '+') {
		str++;
	}
	
	if (begin != ' ' && *str != end) {
		return UNKDIR;
	}
	
	if (*str == '>') {
		return SYSHDR;
	
	} else if (*str == '"') {
		return USRHDR;
	}
	
	return UNKHDR;
}

