// ==========  This file is under  LGPL, the GNU Lesser General Public Licence
// ==========  Copyright by Alexey Sokirko, 2002-2003
// ==========  www.dwds.de, www.aot.ru


#include "StdConc.h"
#include "ConcCommon.h"
#include "ConcIndexator.h"
#include "ConcordAlgorithm.h"

#ifdef DETECT_MEMORY_LEAK
	#ifdef _DEBUG
	#define new DEBUG_NEW
	#undef THIS_FILE
	static char THIS_FILE[] = __FILE__;
	#endif
#endif




bool CConcIndexator::LoadProject(string FileName)
{
	if ( !LoadSourceFilesAndOptions(FileName) ) return false;

	if ( !LoadCorpusFiles() ) return false;
	
	AssertHasPath();

	LoadHitBorders(m_Path);

	ReadIndicesFromTheDisk();

	if (GetFileBreaks().size() !=  m_CorpusFiles.size())
	{
		ErrorMessage (Format("The number of files in %s is not equal to the number of files in the index",FileName.c_str()));
		return false;
	};
	
	if (!m_Bibl.LoadBibl(FileName, GetFileBreaks().size()))
	{
		return false;
	};

	return true;
}







void SortOccurrences (vector<CTokenNo>& occurrences, size_t SortChunkCount)	
{
	int size = occurrences.size();
	if (	(size < 30)
		||	(SortChunkCount > 1000)
		||	(size/SortChunkCount <  3)
	   ) // if occurrences are very defragmented then use simple sort
		sort(occurrences.begin(), occurrences.end());
	else
	{
		// else use sort based on list unions
		DwordVector Borders;
		Borders.reserve( size );
		assert (size > 0);
		for (size_t i = 0; i< size-1; i++)
			if (occurrences[i] > occurrences[i+1])
				Borders.push_back(i+1);

		Borders.push_back(size);

		SortWithLists(occurrences, Borders);
	};
	
};


void CStringIndexSet::FindOccurrences (const vector<DWORD>& IndexItems, const size_t PeriodNo, vector<CTokenNo>& occurrences,  CMyTimeSpanHolder& Profiler, CShortOccurCacheMap* pCaches, vector<int>& CacheIds)	const
{
	vector<CTokenNo> TempBuffer;

	CShortOccurCache*  pCacheByIndexSet = 0;
	if (pCaches)
		pCacheByIndexSet = &(*pCaches)[GetName()]; 	

	size_t Count = IndexItems.size();

	if (CacheIds.empty())
		CacheIds.resize(Count, -1);

	assert (CacheIds.size() == Count);

	for( size_t i=0; i<Count; i++ )
	{
		const char * _debug_view = GetIndexItemStr(m_Index[ IndexItems[i] ]);
		AddOccurs(IndexItems[i], occurrences, PeriodNo, TempBuffer, pCacheByIndexSet, CacheIds[i]);
	}

	SortOccurrences(occurrences, Count);

};


void CStringIndexSet::FindChunkOccurrences (const vector<DWORD>& IndexItems, vector<CTokenNo>& occurrences,  vector<size_t>& ChunkLengths, size_t PeriodNo, CMyTimeSpanHolder& Profiler, CShortOccurCacheMap* pCaches, vector<int>& CacheIds)	const
{
		
	vector<CTokenNo> TempBuffer;

	vector<pair<CTokenNo, size_t> > pos_and_lengths;

	size_t Count = IndexItems.size();

	CShortOccurCache*  pCacheByIndexSet = 0;
	if (pCaches)
		pCacheByIndexSet = &(*pCaches)[GetName()]; 	

	if (CacheIds.empty())
		CacheIds.resize(Count, -1);

	assert (CacheIds.size() == Count);


	for( size_t i=0; i<Count; i++ )	
	{
		int chunk_len;
		{
			const char * chunk_str = GetIndexItemStr(m_Index[ IndexItems[i] ]);
			const char* offset = strchr(chunk_str, ',');
			if (offset == 0) 
			{
				assert (false);
				continue;
			};
			chunk_len = atoi(offset+1);
			if (chunk_len<=0) continue;
		};
		vector<CTokenNo> curr_positions;	
		AddOccurs(IndexItems[i], curr_positions, PeriodNo, TempBuffer, pCacheByIndexSet, CacheIds[i]);
		{
			vector<pair<CTokenNo, size_t> > curr_pos_and_lengths;
			for (size_t k=0; k < curr_positions.size(); k++)
				curr_pos_and_lengths.push_back(make_pair(curr_positions[k], chunk_len));

			//add curr_pos_and_lengths to pos_and_lengths
			vector<pair<CTokenNo, size_t> > new_pos_and_lengths;
			new_pos_and_lengths.resize(curr_pos_and_lengths.size() + pos_and_lengths.size());
			vector<pair<CTokenNo, size_t> >::iterator it = 
				set_union(curr_pos_and_lengths.begin(), curr_pos_and_lengths.end(), pos_and_lengths.begin(), pos_and_lengths.end(), new_pos_and_lengths.begin());
			new_pos_and_lengths.resize(it - new_pos_and_lengths.begin());
			new_pos_and_lengths.swap(pos_and_lengths);
		}
	};

	Count = pos_and_lengths.size();
	for (size_t i=0; i < Count; i++)
	{
		size_t l = pos_and_lengths[i].second;
		for (int k = 0; k<l; k++)
		{
			CTokenNo LastPositionOfChunk = pos_and_lengths[i].first;
			CTokenNo FirstPositionOfChunk = LastPositionOfChunk - l +1;
			occurrences.push_back(FirstPositionOfChunk+k);
		}
		ChunkLengths.push_back(l);
	};
	
};

