#include "StdConc.h"
#include "../ConcordLib/ConcIndexator.h"
#include "../ConcordLib/IndexSetForLoadingStage.h"
#include "ConcordAlgorithm.h"

#ifdef DETECT_MEMORY_LEAK
	#ifdef _DEBUG
	#define new DEBUG_NEW
	#undef THIS_FILE
	static char THIS_FILE[] = __FILE__;
	#endif
#endif



void ArchiveOccurrences(const vector<DWORD>& In,  vector<DWORD>& Out, bool bEnabled)
{
	if (!bEnabled)
		Out = In;
	else
	{
		ConvertToVariableInteger(In, Out);
	}
};


void	ArchivePortion (vector<DWORD>& Archive, const vector<DWORD>& Body, DWORD Start, DWORD End, bool bArchiveOccurrences)
{
	vector<CTokenNo> NewPortion;
	NewPortion.insert(NewPortion.begin(), Body.begin()+Start, Body.begin()+End);
	vector<CTokenNo> ArchivedNewPortion;
	ArchiveOccurrences(NewPortion, ArchivedNewPortion, bArchiveOccurrences);
	Archive.insert(Archive.end(), ArchivedNewPortion.begin(), ArchivedNewPortion.end());
};



/*
	this procedure  archives the sequence of occurrences (IndexItem.m_pOccurs)
	and calculates the borders between the search periods for this sequence

	For example,  let sequence of occurrences be a = "1 34 45 46 89"   and the search periods be "20 50 90"
	then  a should  be divided into 3 periods according to  the search periods i.e.
	" 1 | 34 45 46 | 89 |". These three  borders  should be saved to m_EndPeriodOffsets.
*/
bool	CStringIndexSet::BuildPeriodsDivisionAndArchive (CItemIndexForLoading& IndexItem, bool& bIsLongOccurrList)
{
	if (m_pParent->GetSearchPeriodsCount() == 0)
	{
		fprintf (stderr, "Error! There are no search periods\n");
		return false;
	}
	size_t count = IndexItem.GetOccursSize();

	vector<CTokenNo> Archive;

	// only archiving if it is necessary
	if (OccurBufferSize >= count) 
	{
		// the number of occurrences is too small for building  period division
		ArchivePortion(Archive, (*IndexItem.GetOccurs()), 0, IndexItem.GetOccurs()->size(), m_bArchiveOccurrences);

		// writing back to IndexItem archived version of occurrences
		(*IndexItem.GetOccurs()) = Archive;
		bIsLongOccurrList = false;
		return true;
	};

	vector<DWORD> EndOffsets;
	EndOffsets.resize(m_pParent->GetSearchPeriodsCount());
	
	size_t CurrentPeriod = 0;
	
	size_t PrevStart = 0;
	size_t j = 0;
	for (; j<count; j++)
		while ((*IndexItem.GetOccurs())[j] >= m_pParent->GetSearchPeriod(CurrentPeriod) )
			{
				// check the current search period
				if (CurrentPeriod >= m_pParent->GetSearchPeriodsCount())
				{
					// if we are here, it means that some j (*IndexItem.GetOccurs())[j]
					// is greater than the upper bound of the last search period
					fprintf (stderr, "Error while writing occurrences periods for \"%s\": CurrentPeriod >= m_pParent->m_pSearchPeriods.size()\n", GetName().c_str());
					fprintf (stderr, "CurrentPeriod = %i\n", CurrentPeriod);
					fprintf (stderr, "m_pParent->m_pSearchPeriods.size() = %i\n", m_pParent->GetSearchPeriodsCount());
					fprintf (stderr, "Occurrence =%i, Number of occurrences = %i\n",(*IndexItem.GetOccurs())[j], count);
					for (size_t p = 0; p < m_pParent->GetSearchPeriodsCount(); p++)
						fprintf (stderr, "CurrentPeriod = %i, EndTokenNo = %i\n", p, m_pParent->GetSearchPeriod(p));

					return false;
				};

				if (j > PrevStart)
					ArchivePortion(Archive, (*IndexItem.GetOccurs()), PrevStart, j, m_bArchiveOccurrences);
				else
				{
					//no occurrences in the search period
				}

				PrevStart = j;

				EndOffsets[CurrentPeriod] = Archive.size();
				
				CurrentPeriod++;
				
			};


	// writing the last search period, if there are some occurrences 
	if (j > PrevStart)
	{
		// check the current search period
		if (CurrentPeriod >= m_pParent->GetSearchPeriodsCount())
		{
			// if we are here, it means that some j (*IndexItem.GetOccurs())[j]
			// is greater than the upper bound of the last search period
			fprintf (stderr, "Error while writing occurrences periods for \"%s\": CurrentPeriod >= m_pParent->m_pSearchPeriods.size()\n", GetName().c_str());
			fprintf (stderr, "CurrentPeriod = %i\n", CurrentPeriod);
			fprintf (stderr, "m_pParent->m_pSearchPeriods.size() = %i\n", m_pParent->GetSearchPeriodsCount());
			fprintf (stderr, "Occurrence =%i, Number of occurrences = %i\n",(*IndexItem.GetOccurs())[j], count);
			for (size_t p = 0; p < m_pParent->GetSearchPeriodsCount(); p++)
				fprintf (stderr, "CurrentPeriod = %i, EndTokenNo = %i\n", p, m_pParent->GetSearchPeriod(p));
			return false;
		};

		ArchivePortion(Archive, (*IndexItem.GetOccurs()), PrevStart, j, m_bArchiveOccurrences);
	}
	else
	{
		//no occurrences in the last search period
	}

	for(; CurrentPeriod < m_pParent->GetSearchPeriodsCount(); CurrentPeriod++)
		EndOffsets[CurrentPeriod]	 = Archive.size();

	assert (EndOffsets.size() == m_pParent->GetSearchPeriodsCount());

	m_EndPeriodOffsets[IndexItem.GetIndexItemOffset()] = EndOffsets;

	// writing back to IndexItem archived version of occurrences
	(*IndexItem.GetOccurs()) = Archive;

	bIsLongOccurrList = true;

	return true;
};

bool	CStringIndexSet::AddOneIndexItem (CItemIndexForLoading& M, FILE* res_fp, size_t& CurrPositionInResFile, const CTokenNo EndTokeNo)
{
	CIndexItem I;
	I.SetItemIndexFlags( 0 );
	I.SetIndexItemOffset(M.GetIndexItemOffset());

	if (!M.CheckOccurrences(EndTokeNo))
	{
		ErrorMessage(Format("CheckOccurrences failed!\n Index Name: %s\nCount of occurrences=%i\n IndexItemOffset=%i\nItemStr=%s", 
			GetName().c_str(), 
			M.GetOccursSize(), 
			M.GetIndexItemOffset(),
			GetIndexItemStr(M)
			));
		return false;
	};

	if (M.GetOccursSize() == 1)
	{
		I.SetEndOccurOffset( (*M.GetOccurs())[0] );
		I.SetItemIndexFlags ( I.GetItemIndexFlags() | TheOnlyOccurIsInEndOccurNo );
	}
	else
	{
		bool bIsLongOccurrList;

		if (!BuildPeriodsDivisionAndArchive(M, bIsLongOccurrList))
		{
			return false;;
		};

		if (bIsLongOccurrList)
			I.SetItemIndexFlags ( I.GetItemIndexFlags()  );

		I.SetEndOccurOffset( CurrPositionInResFile + M.GetOccursSize() );
		CurrPositionInResFile += M.GetOccursSize();
		
		if (!M.WriteOccurrences(res_fp)) return false;
	};

	m_Index.push_back(I);
	return true;
};

bool	CStringIndexSet::WritePeriodsDivision ()
{
	string FName = GetPeriodsDevisionFileName();
	FILE * fp = fopen (FName.c_str(), "wb");
	if (!fp) return false;
	size_t count = 0;
	for (PeriodsDivisionMap::const_iterator it = m_EndPeriodOffsets.begin(); it != m_EndPeriodOffsets.end();it++)
	{
		size_t t = it->first;
		if (fwrite(&t,sizeof(t),1, fp) != 1)
		{
			fclose (fp);
			return false;
		}
		if (!WriteVectorInner(fp, it->second))
		{
			fclose (fp);
			return false;
		}
		count++;
	};
	fclose(fp);
	printf ("To file  %s %i vectors were written \n", FName.c_str(), count);
	return true;

};

bool	CStringIndexSet::ConvertLoadIndexToWorkingIndex ()
{

	FILE* main_fp = fopen(m_MainOccurTempFileName.c_str(), "rb");	
	if (!main_fp) return false;
	
	string OccursFileName = GetOccursFileName();
	FILE* res_fp = fopen(OccursFileName.c_str(), "wb");
	if (!res_fp ) 
	{
		fclose (main_fp);
		return false;
	};
	assert(	m_Index.empty() );
	m_Index.clear();
	const CTokenNo EndTokenNo = m_pParent->GetSearchPeriod(m_pParent->GetSearchPeriodsCount() - 1);
	
	CItemIndexForLoading M;
	if (!M.InitOccurs())
		return false;

	size_t CurrPositionInResFile = 0;
	while (!feof(main_fp))
	{
		if (!M.ReadFromTemporalFile(main_fp))
		{
			// the last index
			break;
		}
		if (!AddOneIndexItem(M,res_fp,CurrPositionInResFile, EndTokenNo))
		{
			fclose (main_fp);
			fclose(res_fp);
			return false;
		};
	};

	M.FreeOccurs();
	fclose(main_fp);
	fclose(res_fp);

	DeleteTempFiles();

	return  WritePeriodsDivision();
};

void CStringIndexSet::CloseStorageFile()
{
	if (m_StorageFile) 
	{
		fclose(m_StorageFile);
		m_StorageFile = 0;
		//printf ("close StorageFile\n");
	};
};

bool CStringIndexSet::OpenStorageFile()
{
	CloseStorageFile();
	m_StorageFile = fopen(GetStorageFileName().c_str(), "rb");
	//printf ("open StorageFile\n");
	return m_StorageFile != 0;
};

bool CStringIndexSet::WriteToFile(bool bAfterLoading)
{
	if (m_bUseItemStorage)
	{
		if (!ConvertTempStorageToPersistent(GetStorageFileName()))
			return false;
		if (!OpenStorageFile())
			return false;
	};

	if (bAfterLoading) 
		if (!ConvertLoadIndexToWorkingIndex()) 
			return false;

	if (!WriteVector(GetOccHdrFileName().c_str(), m_Index)) return false;
	if (!WriteVector(GetFileNameForInfos().c_str(), m_StringBuffer)) return false;

	return true;
};





