#include "StdConc.h"
#include "QueryParser.h"
#include "Bibliography.h"

#ifdef WIN32
	#include "simple_qparse.h"
#else
	#include "unix_simple_qparse.h"
#endif

#ifdef DETECT_MEMORY_LEAK
	#ifdef _DEBUG
	#define new DEBUG_NEW
	#undef THIS_FILE
	static char THIS_FILE[] = __FILE__;
	#endif
#endif

#include <sstream>





extern int yydebug;
int yyparse (void *);

int yylex (void* valp, void* _p)
{
	CQueryParser* _prs =  (CQueryParser*)_p;

	return _prs->yylex(valp);
};


void yyerror(const char *a)
{
	const char * s = a;
};





CQueryParser::CQueryParser(const CConcHolder* pHolder) 
	:	m_pQueryTree(NULL),
		simFlexLexer(0, 0)
{
	m_pHolder = pHolder;
	m_ContextSentencesCount = 0;
	m_bEnableBibliographyForThisQuery = true;
	m_bDebugRank = false;
	m_CurrentNodeIndex = 0;
}

int CQueryParser::GetNewNodeIndex()
{
	m_CurrentNodeIndex++;
	return m_CurrentNodeIndex - 1;
}
size_t CQueryParser::GetNodeIndicesCount() const
{
	return (size_t)m_CurrentNodeIndex;
}


CQueryParser::~CQueryParser()
{
	CleanParser();	
}


bool	CQueryParser::Evaluate()
{
	assert (m_pQueryTree != NULL);
	if (!m_pQueryTree) return false;
	m_pQueryTree->Evaluate () ; 
	return true;
}


const char* ContextStarter =  "#Cntxt";
const char* ContextStarterLower =  "#cntxt";

const size_t SortOperatorsCount = 11;
string SortOperators[SortOperatorsCount] = 
{"#less_by_date", "#greater_by_date", "#is_date",
"#less_by", "#greater_by", 
"#less_by_size", "#greater_by_size", "#is_size",
"#has_field", "#less_by_rank", "#greater_by_rank"};




bool CQueryParser::ParseQueryOperators (const string& OperatorName, const string& ArgumentsStr, CDDCFilterWithBounds& Filter)
{
	vector<string> Args(3);
	StringTokenizer tok(ArgumentsStr.c_str(), ",");
	for (size_t i=0; i < 3 && tok();  i++)
	{
		Args[i] = tok.val();
		Trim(Args[i]);
	};

	if (OperatorName == "#less_by_date")
	{
		Filter.m_FilterType = LessByDate;
		if (!Args[0].empty())
		{
			CBibliography B;
			B.m_DateStr = Args[0];
			if (!B.ConvertDateToInt(Filter.m_LevelStart)) return false;

		}
		if (!Args[1].empty())
		{
			CBibliography B;
			B.m_DateStr = Args[1];
			if (!B.ConvertDateToInt(Filter.m_LevelEnd)) return false;
			Filter.m_LevelEnd++;
		}
	}
	else
	if (OperatorName == "#is_date")
	{
		Filter.m_FilterType = LessByDate;
		if (Args[0].empty()) return false;
		CBibliography B;
		B.m_DateStr = Args[0];
		if (!B.ConvertDateToInt(Filter.m_LevelStart)) return false;
		Filter.m_LevelEnd = Filter.m_LevelStart+1;
	}
	else
	if (OperatorName == "#greater_by_date")
	{
		Filter.m_FilterType = GreaterByDate;
		if (!Args[0].empty())
		{
			CBibliography B;
			B.m_DateStr = Args[0];
			if (!B.ConvertDateToInt(Filter.m_LevelStart)) return false;
			Filter.m_LevelStart = -Filter.m_LevelStart;
		}
		if (!Args[1].empty())
		{
			CBibliography B;
			B.m_DateStr = Args[1];
			if (!B.ConvertDateToInt(Filter.m_LevelEnd)) return false;
			Filter.m_LevelEnd = -Filter.m_LevelEnd+1;
		}
	}
	else
	if (OperatorName == "#greater_by_rank")
	{
		Filter.m_FilterType = GreaterByRank;
	}
	else
	if (OperatorName == "#less_by_rank")
	{
		Filter.m_FilterType = LessByRank;
	}
	else
	if ( (OperatorName == "#less_by") || (OperatorName == "#greater_by") )
	{
		bool bGreaterOperator = (OperatorName == "#greater_by");
		Filter.m_FilterType = bGreaterOperator ? GreaterByFreeBiblField: LessByFreeBiblField;
		if (Args[0].empty()) return false;
		Filter.m_FreeBiblAttribName = Args[0];
		if (!Args[1].empty())
		{
			if (!m_pHolder->m_pIndexator->m_Bibl.GetValueFromBiblSet(m_pHolder->m_pIndexator->GetRegExpTables(), Args[1], Filter))
				return false;
			if (bGreaterOperator)
				Filter.m_LevelStart = -Filter.m_LevelStart;
		};

		if (!Args[2].empty())
		{
			CDDCFilterWithBounds TempFilter;
			TempFilter.m_FreeBiblAttribName = Args[0];
			if (!m_pHolder->m_pIndexator->m_Bibl.GetValueFromBiblSet(m_pHolder->m_pIndexator->GetRegExpTables(), Args[2], TempFilter))
				return false;

			if (bGreaterOperator)
				Filter.m_LevelStart = -TempFilter.m_LevelStart;
			else
				Filter.m_LevelEnd = TempFilter.m_LevelEnd;
		};
		// regular expressions in #less_by operator are prohibited 
		// since it is contradictory (to my mind)
		Filter.m_SatisfiedValues.clear();
	}
	else
	if (OperatorName == "#less_by_size")
	{
		Filter.m_FilterType = LessBySize;
		if (!Args[0].empty())
			Filter.m_LevelStart = atoi(Args[0].c_str());
		if (!Args[1].empty())
			Filter.m_LevelEnd = atoi(Args[1].c_str())+1;
	}
	else
	if (OperatorName == "#is_size")
	{
		Filter.m_FilterType = LessBySize;
		if (Args[0].empty()) return false;
		Filter.m_LevelStart = atoi(Args[0].c_str());
		Filter.m_LevelEnd = Filter.m_LevelStart+1;
	}
	else
	if (OperatorName == "#greater_by_size")
	{
		Filter.m_FilterType = GreaterBySize;
		if (!Args[0].empty())
			Filter.m_LevelStart = -atoi(Args[0].c_str());
		if (!Args[1].empty())
			Filter.m_LevelEnd = -atoi(Args[1].c_str())+1;
	}
	else
	if (OperatorName == "#has_field")
	{
		if (Args[0].empty()) return false;
		if (Args[1].empty()) return false;
		Filter.m_FreeBiblAttribName = Args[0];
		Filter.m_FilterType = NoSort;
		if (!m_pHolder->m_pIndexator->m_Bibl.GetValueFromBiblSet(m_pHolder->m_pIndexator->GetRegExpTables(), Args[1], Filter))
			return false;
	}
	else
	{
		assert (false);
		return false;
	};

	return true;
};


bool CQueryParser::ParseSortAndFilterOperators(string& QueryStr)
{
	m_Filters.clear();

	
	for (size_t i=0; i < QueryStr.length(); i++)
		for (size_t j=0; j < (int) SortOperatorsCount; j++)
		{
			size_t l = SortOperators[j].length();
			if (		QueryStr.substr(i, l) == SortOperators[j] 
					&&	(		(i+l == QueryStr.length())
							||	(QueryStr[i+l] == ' ')
							||	(QueryStr[i+l] == '\t')
							||	(QueryStr[i+l] == '[')
							||	(QueryStr[i+l] == '#') // another operator
						)
					)
			{
				QueryStr.erase(i, l);
				int start = QueryStr.find_first_not_of(' ',i);
				int end = QueryStr.find(']',i);
				string FilterValue;
				if (		(start != string::npos)
						&&	(QueryStr[start] == '[')
						&&	(end != string::npos)
					)
				{
					FilterValue = QueryStr.substr(start+1, end-start-1);
					QueryStr.erase(start, end-start+1);
					Trim(FilterValue);
				};
				CDDCFilterWithBounds F;
				if ( !ParseQueryOperators(SortOperators[j], FilterValue, F) )
					return false;
				m_Filters.push_back(F);				
			};
		};

	//  finding the last sort operator und pushing it to the end of the filter list in order
	// to present the result hit set in the right order
	for (int i=m_Filters.size()-1;  i>=0; i--)
		if(m_Filters[i].m_FilterType != NoSort)
		{
			if (i+1 != m_Filters.size())
				swap(m_Filters[i],  m_Filters.back());
			break;
		};


	return true;	
};


// It is very  important that our LEX@Yacc is reentrant
// since we use bison declaration %pure-parser
// we also use %option c++ for flex compilation, so  we believe that 
// CQueryParser::ParseQuery is thread-safe


bool CQueryParser::ParseQuery(const char* src)
{
	m_bEnableBibliographyForThisQuery = true;
	m_bDebugRank = false;
	m_ContextSentencesCount = 0;

	CleanParser();
	//yydebug = 1;
	if (src == NULL) return false;

	m_ContextSentencesCount = 0;
	string  Query = src;
	if (m_pHolder->m_pIndexator->HasContextOperator() )
	{
		int start = Query.rfind(ContextStarter);
		if (start == string::npos)
			start = Query.rfind(ContextStarterLower);
		if (start != string::npos)
		{
			int end = start+strlen(ContextStarter);
			end = Query.find_first_not_of(" \t",end);
			if (end == string::npos)
				return false;
			m_ContextSentencesCount = atoi(Query.c_str()+end);
			if (m_ContextSentencesCount == 0)
				return false;
			end = Query.find_first_not_of("0123456789",end);
			if (end == string::npos)
				Query.erase(start);
			else
				Query.erase(start, end-start);
		};
		if (m_ContextSentencesCount > 5)
			return false;
	};

	
	const char OnlyFileNamesOperator[] = "#file_names";
	{
		int i = Query.rfind(OnlyFileNamesOperator);
		if (i != string::npos)
		{
			Query.erase(i, strlen(OnlyFileNamesOperator));
			m_bEnableBibliographyForThisQuery = false;
		};
	};
	const char DebugRank[] = "#debug_rank";
	{
		int i = Query.rfind(DebugRank);
		if (i != string::npos)
		{
			Query.erase(i, strlen(DebugRank));
			m_bDebugRank = true;
		};
	};


	if (!ParseSortAndFilterOperators(Query))
		return false;

	m_HitTypeStr = m_pHolder->m_pIndexator->ProcessHitTypeStrInQueryStr(Query);

	m_TextAreaNo = m_pHolder->m_pIndexator->m_Bibl.ProcessTextAreaNoInQueryStr(Query);

	Trim(Query);
	if (Query.empty()) return false;


	

	istringstream 			QueryStream (Query.c_str());

	// We must reset the scanner start condition, i.e., call BEGIN(INITIAL)
	//  For example, if the previos calls was halted in the
	// state which was not "INITIAL", and the previous call returned "Parse error", 
	// then right now the parser is in  the last state,  which is not  "INITIAL".
	yy_start = 0;
	yy_init = 1;

	switch_streams((istream*)(&QueryStream), 0);
	

	if (yyparse((void*)this) != 0)
	{
		CleanParser();

		return false;
	}


	return true;

}


void CQueryParser::CleanParser()
{
	m_Labels.clear();
	m_CurrSequenceObj.clear();
	m_CurrDistances.clear();
	DeleteTree();
	m_Filters.clear();
	m_CurrentNodeIndex = 0;
}

void CQueryParser::DeleteTree()
{
	if (m_pQueryTree)
	{
		delete m_pQueryTree;
		m_pQueryTree = NULL;
	}
}



int CQueryParser::yylex(void* valp)
{
	int iRetVal = yylex();
	const char* __s = YYText();
	int len = YYLeng();

	if	(		(iRetVal == SIM_NAME) 
			||	(iRetVal == REG_EXPR_NAME) 
			||	(iRetVal == INDEX_NAME) 
			||	(iRetVal == CHUNK_NAME) 
		)
	{
		assert (YYLeng() < 10000-1);
		char buff[10000];
		strncpy (buff, YYText(), YYLeng());
		buff[YYLeng()] = 0;
		string s = buff;
		m_Labels.push_back(s);
		((YYSTYPE *)valp)->m_LabelPtr = & (m_Labels.back());
	}
	else
		((YYSTYPE *)valp)->m_LabelPtr = 0;

	return (iRetVal);
}
