#include "StdConc.h"
#include "IndexSet.h"
#include "ConcIndexator.h"

//==============================================
//================== CStringIndexSet ==========
//===============================================

CStringIndexSet::CStringIndexSet(const CStringIndexator* pParent) : CIndexSetForQueryingStage(pParent)
{
	assert (pParent != 0);
	m_StorageFile = 0;
};


void CStringIndexSet::InitIndexSet(string Name, string ShortName, bool bCreateItemStorage, bool bArchive)
{
	m_Name = Name;
	m_ShortName	= ShortName;
	m_bUseItemStorage = bCreateItemStorage;
	m_bArchiveOccurrences = bArchive;
};


CStringIndexSet::~CStringIndexSet()
{
	CloseStorageFile();
};

string CStringIndexSet::GetName() const
{
	return m_Name;
};





void CStringIndexSet::QueryTokenList (const string& WordForm, vector<DWORD>& MatchWords)	const
{
	LessIndexString1<CIndexItem> Less(&m_StringBuffer);

	vector<CIndexItem>::const_iterator it = lower_bound (m_Index.begin(), m_Index.end(), WordForm.c_str(), Less);
	
	if (MatchWords.size() > m_pParent->m_MaxRegExpExpansionSize)
			return;

	if (   
				( it != m_Index.end() )
			&&	( !strcmp(GetIndexItemStr(*it),WordForm.c_str())   )
		)
		MatchWords.push_back( it -  m_Index.begin() );
};


void CStringIndexSet::QueryTokenListWithRightTruncation (const string& WordForm, vector<DWORD>& MatchWords)	const
{
	LessIndexString1<CIndexItem> Less(&m_StringBuffer);
	int len =  WordForm.length();
	vector<CIndexItem>::const_iterator it = lower_bound (m_Index.begin(), m_Index.end(), WordForm.c_str(), Less);

	while ( 		( it != m_Index.end() )
			&&	( !strncmp(GetIndexItemStr(*it),WordForm.c_str(), len)   )
			)
	{
		if (MatchWords.size() > m_pParent->m_MaxRegExpExpansionSize)
			break;

		MatchWords.push_back( it -  m_Index.begin() );

		it++;
	};
};

void CStringIndexSet::QueryTokenListUsingRegExp (RML_RE &RegExp, vector<DWORD>& MatchWords)	const
{
	size_t Count = m_Index.size();
	for( size_t i=0; i<Count; i++ )
	{
		const char* str =  GetIndexItemStr(m_Index[i]);

		if(  RegExp.PartialMatch(str)  ) 
		{
			if (MatchWords.size() > m_pParent->m_MaxRegExpExpansionSize)
				break;

			MatchWords.push_back( i );
		};
	};


};


bool	CStringIndexSet::ReadFromTheDisk()
{
	LoadIndexSet();
	ReadVector(GetFileNameForInfos().c_str(), m_StringBuffer);

	if (m_bUseItemStorage)
		if (!OpenStorageFile())
			return false;

	return true;
};

bool	CStringIndexSet::DestroyIndexSet()
{
	if (!CIndexSetForQueryingStage::DestroyIndexSet ()) return false;

	ClearVector(m_StringBuffer);
	if (FileExists(GetStorageFileName().c_str()))
	{
		CloseStorageFile();
		if (remove(GetStorageFileName().c_str()))
			return false;
	};

	return true;
};

string	CStringIndexSet::GetStorageFileName() const
{
	assert (m_pParent->m_Path != "#empty_path");
	return MakeFName(m_pParent->m_Path, "_storage_"+GetName());
};


bool	CStringIndexSet::SaveOnePartOfUnionTokenStorage(FILE * out_fp, const map<DWORD, DWORD>& Old2New) const
{
	DWORD Old;
	FSeek(m_StorageFile, 0, SEEK_SET);

	while (fread(&Old, sizeof(DWORD), 1, m_StorageFile) == 1)
	{
		map<DWORD,DWORD>::const_iterator it = Old2New.find(Old);
		if (it == Old2New.end()) 
		{
			return false;
		}
		if (fwrite(&(it->second), 4, 1, out_fp) != 1) 
		{
			return false;
		};
	};
	return true;
};

bool	CStringIndexSet::CreateUnionTokenStorage(
	const CStringIndexSet& First, 
	const CStringIndexSet& Second,
	const map<DWORD, DWORD>& First2Result, 
	const map<DWORD, DWORD>& Second2Result)
{
	assert  (First.m_bUseItemStorage);
	assert  (Second.m_bUseItemStorage);
	CloseStorageFile();

	{
		FILE * out_fp = fopen(GetStorageFileName().c_str(), "wb");
		if (!out_fp) return false;
		if (!First.SaveOnePartOfUnionTokenStorage(out_fp, First2Result)) return true;
		if (!Second.SaveOnePartOfUnionTokenStorage(out_fp, Second2Result)) return true;
		fclose (out_fp);
	}

	return OpenStorageFile();
};


bool	CStringIndexSet::UnionIndexSet(const CStringIndexSet& First, const CStringIndexSet& Second, const CTokenNo EndToken1, const CTokenNo EndToken2)
{
	LessIndexString2<CIndexItem> Pred (&First.m_StringBuffer,&Second.m_StringBuffer);
	m_Index.clear();
	vector<CIndexItem>::const_iterator i1 = First.m_Index.begin();
	vector<CIndexItem>::const_iterator i2 = Second.m_Index.begin();
	if (First.m_Index.empty() && Second.m_Index.empty()) return true;
	QWORD size = (QWORD)(First.GetOccurrsFileSize())+(QWORD)(Second.GetOccurrsFileSize());
	if (size >= 0x7fffffff)
	{
		ErrorMessage("The size of index cannot be more than 2 Gb\n");
		return false;
	}

	FILE* res_fp = fopen(GetOccursFileName().c_str(), "wb");
	if (!res_fp ) return false;

	CItemIndexForLoading M;
	if (!M.InitOccurs()) return false;

	size_t CurrPositionInResFile = 0;
	int DebugAction = 0;
	bool FirstIsLarger = First.m_Index.size()> Second.m_Index.size();
	size_t Count = (FirstIsLarger) ? First.m_Index.size() : Second.m_Index.size();
	size_t Curr = 0;
	
	map<DWORD, DWORD> First2Result, Second2Result;

	while	(		(i1 !=  First.m_Index.end()) 
				||	(i2 !=  Second.m_Index.end()) 
			)
	{
		size_t pos1 =  i1-First.m_Index.begin();
		size_t pos2 =  i2-Second.m_Index.begin();
		Curr = (FirstIsLarger) ? pos1 : pos2;

		if (!(Curr%1000))
			fprintf (stderr, "%i/%i\r", Curr, Count);
		const char* IndexItemStr1 = First.GetIndexItemStr(*i1);
		const char* IndexItemStr2 = Second.GetIndexItemStr(*i2);

		//  setting the pointer to data
		M.SetIndexItemOffset(m_StringBuffer.size());
		size_t AddDeltaIndex;

		if	(		(i2 ==  Second.m_Index.end())
				||	(	
							(i1 !=  First.m_Index.end())
						&& Pred(*i1, *i2)	
					)
			)
		{
			// adding data to buffer			
			AddItemStrToBuffer(IndexItemStr1, strlen(IndexItemStr1));

			// reading occurrences
			First.ReadAllOccurrences(pos1, *M.GetOccurs());
			AddDeltaIndex = M.GetOccursSize();

			First2Result[pos1] = m_Index.size();

			i1++;
			

			//DebugAction = 1;
		}
		else
			if	(		(i1 ==  First.m_Index.end())	
					||	(		(i2 !=  Second.m_Index.end())
							&&	Pred.Greater(*i1, *i2)	
						)
				)
			{
				AddItemStrToBuffer(IndexItemStr2, strlen(IndexItemStr2));

				// reading occurrences
				Second.ReadAllOccurrences(pos2, *M.GetOccurs());
				AddDeltaIndex = 0;				
				Second2Result[pos2] = m_Index.size();
				i2++;
				//DebugAction = 2;
			}
			else
			{
				First2Result[pos1] = Second2Result[pos2] = m_Index.size();
				First.ReadAllOccurrences(pos1, *M.GetOccurs());
				AddDeltaIndex = M.GetOccursSize();
				vector<CTokenNo> Q;
				Second.ReadAllOccurrences(pos2, Q);
				
				M.GetOccurs()->insert(M.GetOccurs()->end(), Q.begin(), Q.end());
				
				AddItemStrToBuffer(IndexItemStr1, strlen(IndexItemStr1));
				i1++;
				i2++;
				//DebugAction = 3;
			};
		//printf ("DebugAction = %i\n", DebugAction);

		// adding delta to the occurrences  from the second corpus 
		for (size_t  i = AddDeltaIndex; i < M.GetOccursSize();  i++)
			(*M.GetOccurs())[i] += EndToken1;

		// writing data  and add IndexItem to Result.m_Index
		if (!AddOneIndexItem(M, res_fp, CurrPositionInResFile, EndToken1+EndToken2))
		{
			/*fprintf(stderr, "position in the first index =%i,position in the second index =%i\n", i1-First.m_Index.begin(), i2-Second.m_Index.begin());
			if (DebugAction == 3) 
				ErrorMessage("The last action was uniting\n");
			else
			if (DebugAction == 1) 
				ErrorMessage("The last action was reading from the first corpus\n");
			else
				ErrorMessage("The last action was reading from the second corpus\n");*/

			return false;
		};

		
	};
	fprintf (stderr, "%i/%i\n", Curr, Count);
	M.FreeOccurs();
	fclose(res_fp);
	if (!WritePeriodsDivision()) return false;

	if (m_bUseItemStorage)
		if (!CreateUnionTokenStorage(First, Second, First2Result, Second2Result))
			return false;

	return true;
};



bool CStringIndexSet::GetTokensFromStorage(const size_t start_offset,  const size_t end_offset, vector<COutputToken>& Tokens) const
{
	if (!m_bUseItemStorage) return false;
	size_t count  = end_offset-start_offset;
	Tokens.resize(count);
	if (count == 0) return true;
	file_off_t read_offset =  (DWORD)start_offset*(DWORD)sizeof(DWORD);
	
	if (!FSeek (m_StorageFile, read_offset, SEEK_SET)) return false;
	for (size_t i=0; i < count; i++)
	{
		DWORD u;
		if (fread(&(u),sizeof(u), 1, m_StorageFile) != 1) return false;
		if (u > m_Index.size()) return false;
		Tokens[i].m_TokenStr  = GetIndexItemStr(m_Index[u]);
		Tokens[i].m_bHighlight  = false;
	};
	return true;
};

bool CStringIndexSet::DumpStorage() const
{
	if (!m_bUseItemStorage) 
	{
		fprintf(stderr, "Error! This index has no item storage!\n");
		return false;
	};
	if (!m_StorageFile) return false;
	if (!FSeek (m_StorageFile, 0, SEEK_SET)) return false;
	DWORD u;
	while (fread(&u,sizeof(u), 1, m_StorageFile) == 1)
	{
		printf ("%s\n", GetIndexItemStr(m_Index[u]));
	};
	return  true;
};


