#include "StdConc.h"
#include "../common/util_classes.h"
#include "../common/DwdsThesaurus.h"
#include "ConcCommon.h"
#include "ConcIndexator.h"
#include "sys/stat.h"
#include "time.h"
#include "limits.h"
#include "ConcordAlgorithm.h"
#include "../tinyxml/tinyxml.h"
#include "../RmlTar/RmlTar.h"
#include "../GraphanLib/HTMLConv.h"




#ifdef DETECT_MEMORY_LEAK
	#ifdef _DEBUG
	#define new DEBUG_NEW
	#undef THIS_FILE
	static char THIS_FILE[] = __FILE__;
	#endif
#endif




//===========================================
//================== Common functions =======
//===========================================


bool bEnglishMorph = false;
bool bRussianMorph = false;
bool bGermanMorph = false;
CLemmatizerEnglish* engLemmatizer = 0;
CLemmatizerRussian* rusLemmatizer = 0;
CLemmatizerGerman* gerLemmatizer = 0;

CEngGramTab* engGramTab = 0;
CRusGramTab*  rusGramTab = 0;
CGerGramTab*  gerGramTab = 0;


template <class  T, class Y>
void InitMorphologySystem(T*& Lemmatizer, Y*& Gramtab)
{
	Lemmatizer = new T;
	if (!Lemmatizer)
		return;
	string l = GetStringByLanguage(Lemmatizer->GetLanguage());
	fprintf (stderr, "   dictionary\n");
	string strError;
	if (!Lemmatizer->LoadDictionariesRegistry(strError))
	{
		ErrorMessage(Format("Cannot load %s  morphological dictionary: %s\n", l.c_str(), strError.c_str()));
		delete Lemmatizer;
		Lemmatizer = 0;

	};

	fprintf (stderr, "   gramtab\n");
	Gramtab = new Y;
	if (!Gramtab) return;
	if (!Gramtab->LoadFromRegistry())
	{
   		ErrorMessage(Format("Cannot load %s  gramtab\n", l.c_str()) );
		delete Gramtab;
		Gramtab = 0;

	};

};


const CLemmatizer* GetLemmatizerByLanguage (MorphLanguageEnum Langua)
{
	if  (Langua == morphEnglish)
	{
		if (!bEnglishMorph) 
		{
			InitMorphologySystem(engLemmatizer, engGramTab);
			bEnglishMorph = true;
		};
		return engLemmatizer;

	}
	else
		if  (Langua == morphRussian)
		{
			if (!bRussianMorph) 
			{
				InitMorphologySystem(rusLemmatizer, rusGramTab);
				bRussianMorph = true;
			};
			return rusLemmatizer;
		}
		else
			if  (Langua == morphGerman)
			{
				if (!bGermanMorph) 
				{
					InitMorphologySystem(gerLemmatizer, gerGramTab);
					bGermanMorph = true;
				};
				return gerLemmatizer;
			}
			else
			   return 0;
};


const CAgramtab* GetGramtabByLanguage (MorphLanguageEnum Langua)
{
		if  (Langua == morphEnglish)
		{
			if (!bEnglishMorph) 
			{
				InitMorphologySystem(engLemmatizer, engGramTab);
				bEnglishMorph = true;
			};
			return engGramTab;
		}
		else
			if  (Langua == morphRussian)
			{
				if (!bRussianMorph) 
				{
					InitMorphologySystem(rusLemmatizer, rusGramTab);
					bRussianMorph = true;
				};
				return rusGramTab;
			}
			else
			 if  (Langua == morphGerman)
				{
					if (!bGermanMorph) 
					{
						InitMorphologySystem(gerLemmatizer, gerGramTab);
						bGermanMorph = true;
					};
					return gerGramTab;
				}
			 else
			    return 0;
};



bool InitConcordDicts()
{
	try
	{	
		bRussianMorph = true;
		fprintf (stderr, "loading Russian morphology\n");
		InitMorphologySystem(rusLemmatizer, rusGramTab);

		bEnglishMorph = true;
		fprintf (stderr, "loading English morphology\n");
		InitMorphologySystem(engLemmatizer, engGramTab);

		bGermanMorph = true;
		fprintf (stderr, "loading German morphology\n");
		InitMorphologySystem(gerLemmatizer, gerGramTab);

		return true;
	}
	catch (CExpc c)
	{
		fprintf (stderr, "Exception %s \n",c.m_strCause.c_str());
		return false;
	}
	catch (...)
	{
		return false;
	}

}

void FreeConcordDicts()
{
	if (rusLemmatizer) delete rusLemmatizer;
	if (engLemmatizer) delete engLemmatizer;
	if (gerLemmatizer) delete gerLemmatizer;
	if (rusGramTab) delete rusGramTab;
	if (engGramTab) delete engGramTab;
	if (gerGramTab) delete gerGramTab;
	
	
};



void concord_daemon_log(const string&  t)
{
	FILE * fp = 0;
	try {
		
		string log_path  = GetRegistryString( "Software\\Dialing\\Logs\\Main" );
		struct tm today = RmlGetCurrentTime();

		char tmpbuf[255];
		strftime( tmpbuf, 255,"%d%B%Y", &today );

		string FileName = log_path + Format("/Logs/concord/%s.log", tmpbuf);

		strftime( tmpbuf, 255,"%H:%M:%S", &today );
		fp = fopen (FileName.c_str(), "a");
		if (!fp) return;
		fprintf (fp, "%s > %s\n", tmpbuf, t.c_str());
		fclose(fp);

	}

	catch (...) {
		if (fp) fclose(fp);
	};
}

// Here is the definition of a token
inline bool IsWord (const CGraphmatFile*	piGraphmat, long GraLine)	 
{
	return		piGraphmat->HasDescr(GraLine, OLLE)
		||	piGraphmat->HasDescr(GraLine, ORLE);
};

inline bool IsDigit (const CGraphmatFile*	piGraphmat, long GraLine)	 
{
	return		piGraphmat->HasDescr(GraLine, ODC)
		||	piGraphmat->HasDescr(GraLine, ODSC);
};

inline bool IsSentenceEnd (const CGraphmatFile*	piGraphmat, long GraLine)	 
{
	return	piGraphmat->HasDescr(GraLine,OSentEnd) ;
}




vector<string> GetGramInfosFromWord(const char* GraLine, MorphLanguageEnum Langua, bool bCapital)
{

	const CLemmatizer* Lemmatizer = GetLemmatizerByLanguage(Langua);
	if (!Lemmatizer) return vector<string>();

	const CAgramtab* A = GetGramtabByLanguage(Langua);

	char SrcCodesBuffer[2000];
	
	Lemmatizer->GetAllAncodesQuick((BYTE*)GraLine, bCapital, (BYTE*)SrcCodesBuffer);

	size_t len = strlen(SrcCodesBuffer);
	vector<string> Result;
	QWORD CommonGrammems = 0;
	bool bReadCommonGrammems = false;
	for (size_t i=0; i < len; i+=2)
	{
		if (!bReadCommonGrammems)
		{
			if (SrcCodesBuffer[i] == '?')
				CommonGrammems = 0;
			else
				A->GetGrammems(SrcCodesBuffer+i, CommonGrammems);
			bReadCommonGrammems = true;
			continue;
		};
		if (SrcCodesBuffer[i] == ';')
		{
			i--;
			bReadCommonGrammems = false;
			continue;
		};

		int ResultSavedSize = Result.size();
		{
			BYTE pos = A->GetPartOfSpeech(SrcCodesBuffer+i);
			// all nouns in German are uppercase
			if (Langua == morphGerman)
				if (!bCapital)
					if ( (pos == gSUB) || (pos == gEIG) )
						continue;

			if (pos != UnknownPartOfSpeech)
				Result.push_back(A->GetPartOfSpeechStr(pos));
		};
		
		QWORD Grammems;
		A->GetGrammems(SrcCodesBuffer+i, Grammems);

		size_t GrammemsCount = A->GetGrammemsCount();
		for (int k = 0; k <GrammemsCount; k++)
			if (_QM(k) & (Grammems|CommonGrammems))
				Result.push_back(A->GetGrammemStr(k));

		sort(Result.begin()+ResultSavedSize, Result.end());

		Result.push_back(MorphAnnotationsDelim);
	}

	
	return Result;
};
//==============================================================
//=======================  CHighlightTags ======================
//==============================================================


CSourceFileHolder::CSourceFileHolder() 
{
	m_bModifiedListOfFiles = false;
}

bool CSourceFileHolder::SaveSourceFileList(string FileName)
{
	FILE* fp = fopen(FileName.c_str(), "wb");
	if (!fp) return false;
	for (long i=0;i < m_SourceFiles.size(); i++)
		fprintf(fp, "%s\n",m_SourceFiles[i].c_str());
	fclose(fp);

	m_bModifiedListOfFiles  = false;
	return true;
}

bool CSourceFileHolder::ReadSourceFileList(string FileName)
{
	FILE* fp = fopen(FileName.c_str(), "rb");
	if (!fp) return false;
	char s[1024];
	m_SourceFiles.clear();
	while (fgets(s,1024,fp))
	{
		string q = s;
		Trim(q);
		if (q.empty()) continue;
		m_SourceFiles.push_back(q);
	};
	fclose(fp);
	return true;
}



void CSourceFileHolder::AddSourceFile(const char* FileName)
{
	m_SourceFiles.push_back(FileName);
	m_bModifiedListOfFiles = true;
}

void CSourceFileHolder::AddSourceFilesFrom(const CSourceFileHolder& X)
{
	m_SourceFiles.insert(m_SourceFiles.end(), X.m_SourceFiles.begin(),X.m_SourceFiles.end());
	m_bModifiedListOfFiles = true;
}

bool CSourceFileHolder::IsModified	( ) const
{
	return m_bModifiedListOfFiles;
};

void CSourceFileHolder::DeleteSourceFile(long ItemNo) 
{
	m_SourceFiles.erase(m_SourceFiles.begin()+ItemNo);
	m_bModifiedListOfFiles = true;
};

void CSourceFileHolder::DeleteAllSourceFiles() 
{
	m_SourceFiles.clear();
	m_bModifiedListOfFiles = true;
};

size_t CSourceFileHolder::GetSourceFilesCount()  const 
{
	return m_SourceFiles.size();
};

string  CSourceFileHolder::GetSourceFile(size_t FileNo)  const
{
	return m_SourceFiles[FileNo];
};

// found a file of the corpus which does not  exist, if not return -1
int CSourceFileHolder::FoundNotExistedFile	( ) const
{
	for (int i =0; i<m_SourceFiles.size();i++)
		if (access (m_SourceFiles[i].c_str(), 04) != 0)
			return i;

	return -1;
};



//==============================================================
//=======================  CHighlightTags ======================
//==============================================================
CHighlightTags::CHighlightTags()
{
	m_bWasReadFromString = false;
};

bool		CHighlightTags::ReadFromString(const string& s)
{
	string q = s; 
	StringTokenizer tok(Trim(q).c_str(), ";");
	vector<string> tokens;
	while (tok())
	{
		string l = tok.val();
		Trim(l);
		tokens.push_back(l);
	};
	if (tokens.size() != 4) 
		return false;
	m_FirstOpener = tokens[0];
	m_FirstCloser = tokens[1];
	m_RestOpener = tokens[2];
	m_RestCloser = tokens[3];
	m_bWasReadFromString = true; 
	return true;
}

string		CHighlightTags::ToString() const
{
	return Format("%s;%s;%s;%s", 
		m_FirstOpener.c_str(), m_FirstCloser.c_str(),
		m_RestOpener.c_str(), m_RestCloser.c_str());
};

//==============================================================
//=======================  CConcIndexator ======================
//==============================================================

const int DefaultKwicContextSize = 4;
CConcIndexator::CConcIndexator()  
{
	m_Path = "#empty_path";
	m_bUseParagraphTagToDivide = false;
	m_bEmptyLineIsSentenceDelim = true;
	m_bUseIndention = true;
	m_bIndexMorphPatterns = false;
	m_bUseDwdsThesaurus = false;
	m_UserMaxTokenCountInOnePeriod = 0;
	m_bUserMaxTokenCountInOnePeriod = false;
	m_bIndexPunctuation = true;
	m_bResumeOnIndexErrors = false;
	m_bQueryOnlyFiles = false;
	m_bDisableDefaultQueryLexicalExpansion = false;
	m_bUseDwdsThesaurus = false;
	m_bIndexChunks = false;
	m_bCaseSensitive = true;
	m_bOutputBibliographyOfHits = false;
	m_bDwdsCorpusInterface = false;
	m_bGutenbergInterface = false;
	m_bNoContextOperator = false;
	m_IndexType = DWDS_Index;
	m_bArchiveIndex = false;

	m_Language = morphUnknown;
	m_bShowNumberOfRelevantDocuments = false;
	m_HtmlHighlighting.m_FirstOpener = "<STRONG><FONT COLOR=red>";
	m_HtmlHighlighting.m_FirstCloser = "</FONT></STRONG>";
	m_HtmlHighlighting.m_RestOpener = "<STRONG><FONT COLOR=red>";
	m_HtmlHighlighting.m_RestCloser = "</FONT></STRONG>";
	m_TextHighlighting.m_FirstOpener = "&&";
	m_TextHighlighting.m_FirstCloser = "&&";
	m_TextHighlighting.m_RestOpener = "_&";
	m_TextHighlighting.m_RestCloser = "&_";
	m_LeftKwicContextSize = DefaultKwicContextSize;
	m_RightKwicContextSize = DefaultKwicContextSize;
	m_NumberOfKwicLinesInSnippets = 10;
	m_TfIdfRank = 0.3;
	m_NearRank = 0.3;
	m_PositionRank = 0.3;
	m_InterpDelimiter = "#";

};


CConcIndexator::~CConcIndexator()
{
};


string CConcIndexator::GetIndexItemSetByVectorString (const vector<string>& TokenProperties, bool bRegexp )
{
	string Result;
	if (bRegexp) 
		Result = ".*";

	size_t cnt = TokenProperties.size();
	for (size_t i=0;i <cnt; i++)
		{
			Result += '@';
			Result += TokenProperties[i];
			Result += '@';
			if (bRegexp) 
				Result += MorphAnnotationsDelimRegExp;
		};

	return Result;
};


void CConcIndexator::AssertHasPath() const
{
	assert(m_Path != "#empty_path");
	if (m_Path == "#empty_path")
	{
		ErrorMessage("Uninitialized path");
		throw CExpc("Exception: Uninitialized path for index files", errUnknownPath);
	};
};

//!  graphematical definition of a token
bool	CConcIndexator::IsDWDSToken (const CGraphmatFile*	piGraphmat, long GraLine)	 const 
{
	return		IsSentenceEnd(piGraphmat, GraLine) 
			||	IsWord(piGraphmat, GraLine)
			||	IsDigit(piGraphmat, GraLine)
			||	(
							m_bIndexPunctuation
						&&	piGraphmat->HasDescr(GraLine, OPun) 
				)
			;
}






bool	CConcIndexator::WasIndexed() const
{
	if (m_Path.empty()) return false;
	return access(GetSearchPeriodsFileName().c_str(), 04) == 0;
};

void CConcIndexator::InitGraphanProperties (CGraphmatFile* piGraphmat) const
{
	piGraphmat->m_Language = m_Language;
	piGraphmat->m_bUseParagraphTagToDivide = m_bUseParagraphTagToDivide;
	piGraphmat->m_bEmptyLineIsSentenceDelim = m_bEmptyLineIsSentenceDelim;
	piGraphmat->m_bUseIndention = m_bUseIndention;
	piGraphmat->m_bConvertRussianJo2Je = true;
	piGraphmat->m_bFilterUnprintableSymbols = false;
};

const char* CConcIndexator::GetIndexTypeStr () const
{
	switch (m_IndexType)
	{
		case DWDS_Index:  return "DWDS_Index";
		case MorphXML_Index:  return "MorphXML_Index";
		case Free_Index:  return "Free_Index";
	};
	assert (false);
	return "unknown";
};

bool  CConcIndexator::ReadIndexTypeFromStr (const string& s)
{
	if (s == "DWDS_Index") 
		m_IndexType =  DWDS_Index;
	else
	if (s == "MorphXML_Index") 
		m_IndexType =  MorphXML_Index;
	else
	if (s == "Free_Index") 
		m_IndexType =  Free_Index;
	else
		return false;

	return true;
};



string CConcIndexator::SaveOptionsToString()  const
{
	string Result;
	if (!m_LocalPathPrefix.empty())
		Result += Format("LocalPathPrefix %s\n", m_LocalPathPrefix.c_str());

	if (!m_InternetPathPrefix.empty())
		Result += Format("InternetPathPrefix %s\n", m_InternetPathPrefix.c_str());

	if (m_Language != morphUnknown)
		Result += GetStringByLanguage(m_Language)+"\n";


	if (!m_bCaseSensitive)
		Result += Format("CaseInsensitive\n");

	if (m_bShowNumberOfRelevantDocuments)
		Result += Format("ShowNumberOfRelevantDocuments\n");
	

	if (m_bUseParagraphTagToDivide)
		Result += Format("UseParagraphTagToDivide\n");
				
	if (!m_bEmptyLineIsSentenceDelim)
		Result += Format("EmptyLineIsNotSentenceDelim\n");

				
	if (!m_bUseIndention)
		Result += Format("DontUseIndention\n");
			
	if (m_bUseDwdsThesaurus)
		Result += Format("UseDwdsThesaurus\n");
			
	if (m_bIndexPunctuation)
		Result += Format("IndexPunctuation\n");

	if (m_bResumeOnIndexErrors)
		Result += Format("ResumeOnIndexErrors\n");
	

	if (m_bQueryOnlyFiles)
		Result += Format("QueryOnlyFiles\n");

	if (m_bDisableDefaultQueryLexicalExpansion)
		Result += Format("DisableDefaultQueryLexicalExpansion\n");

	if (m_LeftKwicContextSize != DefaultKwicContextSize)
		Result += Format("LeftKwicContextSize %i\n", m_LeftKwicContextSize);

	if (m_NumberOfKwicLinesInSnippets != 10)
		Result += Format("NumberOfKwicLinesInSnippets %i\n", m_NumberOfKwicLinesInSnippets);

	if (m_TfIdfRank != 0.3)
		Result += Format("TfIdfRank %f\n", m_TfIdfRank);
	if (m_NearRank != 0.3)
		Result += Format("NearRank %f\n", m_NearRank);
	if (m_PositionRank != 0.3)
		Result += Format("PositionRank %f\n", m_PositionRank);
	if (m_InterpDelimiter != "#")
	{
		Result += Format("InterpDelimiter  %s\n", m_InterpDelimiter.c_str()  );
		
	}
	if (m_IndicesToShow.size() > 1 || (m_IndicesToShow.size() == 1 &&  m_IndicesToShow[0] != 0))
	{
		string s = "IndicesToShow ";
		for(size_t i=0; i <m_IndicesToShow.size(); i++)
			s += Format("%i ",m_IndicesToShow[i]+1);
		Result += s+"\n";
	}

	if (m_RightKwicContextSize != DefaultKwicContextSize)
		Result += Format("RightKwicContextSize %i\n", m_RightKwicContextSize);

	if (m_bArchiveIndex)
		Result += Format("ArchiveIndex\n");
				
	if (m_bIndexMorphPatterns)
		Result += Format("IndexMorphPatterns\n");

	if (m_HtmlHighlighting.m_bWasReadFromString)
		Result += Format("HtmlHighlighting %s\n", m_HtmlHighlighting.ToString().c_str());

	if (m_TextHighlighting.m_bWasReadFromString)
		Result += Format("HtmlHighlighting %s\n", m_TextHighlighting.ToString().c_str());


	Result += Format("IndexType %s\n", GetIndexTypeStr() );

	if (m_bOutputBibliographyOfHits)
		Result += Format("OutputBibliographyOfHits\n");

	if (m_bIndexChunks)
		Result += Format("IndexChunks\n");

	if (m_bUserMaxTokenCountInOnePeriod)
		Result += Format("UserMaxTokenCountInOnePeriod %i\n", m_UserMaxTokenCountInOnePeriod);			

	Result += Format("MaxRegExpExpansionSize %i\n", m_MaxRegExpExpansionSize);			

	if (m_bDwdsCorpusInterface)
		Result += Format("DwdsCorpusInterface\n");


	if (m_bGutenbergInterface)
		Result += Format("GutenbergInterface\n");
	

	if (m_bNoContextOperator)
		Result += Format("NoContextOperator\n");

	string IndicesStr = GetIndicesString();
	if (!IndicesStr.empty())
		Result += "Indices " + IndicesStr + "\n";

	IndicesStr = GetBorderIndicesString();
	if (!IndicesStr.empty())
		Result += "HitBorders " + IndicesStr + "\n";
	
	Result += m_Bibl.GetFreeBibiAttributesDescr();


	return Result;
};

string CConcIndexator::GetFileNameForCorpusFileNames() const 
{
	return MakeFName(m_Path, "_con");
};

bool CConcIndexator::SaveOptions(string FileName) const
{
	string PathFile = MakeFName(FileName, "opt");
	FILE* fp = fopen(PathFile.c_str(), "w");
	if (!fp) return false;
	string s  = SaveOptionsToString();
	if (!s.empty())
		fprintf (fp,"%s", s.c_str());
	fclose(fp);
	return true;
};


bool CConcIndexator::SaveCorpusFileList() const
{
	FILE* fp = fopen(GetFileNameForCorpusFileNames().c_str(), "wb");
	if (!fp) return false;
	fprintf(fp, "%s",DDCVersion.c_str()); // should be with /n
	size_t cnt = m_CorpusFiles.size();
	for (long i=0;i< cnt; i++)
		fprintf(fp, "%s\n",m_CorpusFiles[i].c_str());
	fclose(fp);
	return true;
}



bool	CConcIndexator::HasEqualOptions(const CConcIndexator&  X) const
{
	string s1  = SaveOptionsToString();
	string s2  = X.SaveOptionsToString();
	return s1 == s2;
};


bool CConcIndexator::LoadOptionsFromString(string Options)
{
	StringTokenizer lines (Options.c_str(), "\n\r");
	MorphLanguageEnum langua;
	string IndicesStr, HitTypesStr, FreeBiblDescr, TextAreasDescr;
	m_Language = morphUnknown;
	m_IndicesToShow.resize(1,0);
	

	while (lines())
	{
		char s[1024];
		strcpy(s, lines.val());
		
		StringTokenizer fields(s, " \t\r\n");
		if (!fields()) continue;
		string q = fields.val();
		string q_lower = q;
		RmlMakeLower(q_lower, morphEnglish);
		
		if (q == "#include")
		{
			q = fields.get_rest();
			Trim(q);
			if ((q.length() < 2) || (q[0] != '\"') || (q[q.length() -1] != '\"'))
			{
				fprintf (stderr, "bad syntax in opt-file : %s!\n", s);
				return false;
			};
			string AddOptions;
			string IncFileName = q.substr(1,q.length()-2);
			if (!LoadFileToString(IncFileName , AddOptions))
			{
				fprintf (stderr, "cannot load file \"%s\"!\n", IncFileName.c_str());
				return false;
			};
			Options = AddOptions+"\n"+lines.get_rest();
			lines.reinitialize(Options.c_str(), "\n\r");
		}
		else
		if (q == "LocalPathPrefix")
		{
			q = fields.next_token();
			if (q.empty() )
			{
				fprintf (stderr, "bad syntax in opt-file : %s!\n", s);
				return false;
			};
			m_LocalPathPrefix = q;
		}
		else
			if (q == "InternetPathPrefix")
			{
				q = fields.next_token();
				if (q.empty() )
				{
					fprintf (stderr, "bad syntax in opt-file : %s!\n", s);
					return false;
				};

				m_InternetPathPrefix = q;
			}
		else
			if (GetLanguageByString(q, langua))
			{
				fprintf (stderr,"%s\n", GetStringByLanguage (langua).c_str());
				m_Language = langua;
			}
		else
			if (q == "CaseInsensitive")
			{
				m_bCaseSensitive = false;
				fprintf (stderr,"CaseInsensitive\n");
			}
		else
			if (q == "HtmlHighlighting")
			{
				if (!m_HtmlHighlighting.ReadFromString(fields.get_rest()))
				{
					fprintf (stderr,"Cannot parse HtmlHighlighting field\n");
					return false;
				};
				fprintf (stderr,"HtmlHighlighting is user-defined\n");
			}
		else
			if (q == "TextHighlighting")
			{
				if (!m_TextHighlighting.ReadFromString(fields.get_rest()))
				{
					fprintf (stderr,"Cannot parse TextHighlighting field\n");
					return false;
				};
				fprintf (stderr,"TextHighlighting is user-defined\n");
			}
		else
			if (q == "ShowNumberOfRelevantDocuments")
			{
				m_bShowNumberOfRelevantDocuments = true;
				fprintf (stderr,"ShowNumberOfRelevantDocuments\n");
			}
		else
			if (q == FreeBiblAttribOptionFieldName)
			{
				FreeBiblDescr += string(s) + "\n";
			}
		else
			if (q_lower == TextAreaOptionFieldName)
			{
				TextAreasDescr += string(s) + "\n";
			}
		else
			if (q == "UseParagraphTagToDivide")
			{
				m_bUseParagraphTagToDivide = true;
			}
		else
		if (q == "QueryOnlyFiles")
		{
				m_bQueryOnlyFiles = true;
				fprintf (stderr,"QueryOnlyFiles\n");
		}
		else
			if (q == "NoContextOperator" )
			{
				m_bNoContextOperator = true;
			}
			
		else
			if (q == "EmptyLineIsNotSentenceDelim")
			{
				m_bEmptyLineIsSentenceDelim = false;
			}
		else
			if (q == "DontUseIndention")
			{
				m_bUseIndention = false;
			}
		else
			if (q == "UseDwdsThesaurus")
			{
				m_bUseDwdsThesaurus = true;
				fprintf (stderr,"UseDwdsThesaurus\n");
			}
		else
			if (q == "MaxRegExpExpansionSize")
			{
				q = fields.next_token();
				if (!q.empty()) 
				{
					m_MaxRegExpExpansionSize = atoi(q.c_str());
					fprintf (stderr,"MaxRegExpExpansionSize = %i\n", m_MaxRegExpExpansionSize);
				}
			}
		else
			if (q == "RightKwicContextSize")
			{
				q = fields.next_token();
				if (!q.empty()) 
				{
					m_RightKwicContextSize = atoi(q.c_str());
					fprintf (stderr,"RightKwicContextSize = %i\n", m_RightKwicContextSize);
				}
				else
					return false;
			}
		else
			if (q == "LeftKwicContextSize")
			{
				q = fields.next_token();
				if (!q.empty()) 
				{
					m_LeftKwicContextSize = atoi(q.c_str());
					fprintf (stderr,"LeftKwicContextSize = %i\n", m_LeftKwicContextSize);
				}
				else
					return false;
			}
		else
			if (q == "NumberOfKwicLinesInSnippets")
			{
				q = fields.next_token();
				if (!q.empty()) 
				{
					m_NumberOfKwicLinesInSnippets = atoi(q.c_str());
					fprintf (stderr,"NumberOfKwicLinesInSnippets = %i\n", m_NumberOfKwicLinesInSnippets);
				}
				else
					return false;
			}
		else
			if (q == "TfIdfRank")
			{
				q = fields.next_token();
				if (!q.empty()) 
				{
					m_TfIdfRank = atof(q.c_str());
					fprintf (stderr,"TfIdfRank = %f\n", m_TfIdfRank);
				}
				else
					return false;
			}
		else
			if (q == "PositionRank")
			{
				q = fields.next_token();
				if (!q.empty()) 
				{
					m_PositionRank = atof(q.c_str());
					fprintf (stderr,"PositionRank = %f\n", m_PositionRank);
				}
				else
					return false;
			}
		else
			if (q == "InterpDelimiter")
			{
				q = fields.next_token();
				if (q.empty())  
				{
					fprintf (stderr,"InterpDelimiter is not set!\n");
					return false;
				}
				fprintf (stderr,"InterpDelimiter = %s\n", m_InterpDelimiter.c_str());
				m_InterpDelimiter = q;
			}
		else
			if (q == "IndicesToShow")
			{
				q = fields.next_token();
				if (q.empty())  return false;
				m_IndicesToShow.clear();
				while (!q.empty())
				{
					if (q == "0") 
					{
						fprintf (stderr,"Error! IndicesToShow must start from 1\n");
						return false;
					}
					int a = atoi(q.c_str());
					if (!a)
					{
						fprintf (stderr,"Error! Bad format of IndicesToShow, it must be a blank separated list of integers (started from 1!)!\n");
						return false;
					}
					for (size_t j=0; j<q.length(); j++)
						if (!isdigit((BYTE)q[j]))
						{
							fprintf (stderr,"Error! Bad format of IndicesToShow, it must be a blank separated list of integers (started from 1!)!\n");
							return false;
						}

					m_IndicesToShow.push_back(a-1);
					q = fields.next_token();
				}
				fprintf (stderr,"IndicesToShow is initialized with %i values\n", m_IndicesToShow.size());
			}
		else
			if (q == "NearRank")
			{
				q = fields.next_token();
				if (!q.empty()) 
				{
					m_NearRank = atof(q.c_str());
					fprintf (stderr,"NearRank = %f\n", m_NearRank);
				}
				else
					return false;
			}
			
		else
			if (q == "IndexType")
			{
				q = fields.next_token();
				if (!ReadIndexTypeFromStr(q)) 
				{
					fprintf (stderr,"Unknown value for IndexType\n");
					return false;
				}
				fprintf (stderr,"IndexType %s\n", GetIndexTypeStr());
			}
		else
			if (q == "GutenbergInterface")
			{
				m_bGutenbergInterface = true;
				fprintf (stderr,"GutenbergInterface\n");
			}
		else
			if (q == "DwdsCorpusInterface")
			{
				m_bDwdsCorpusInterface = true;
				fprintf (stderr,"DwdsCorpusInterface\n");
			}
		else
		if (q == "Indices")
		{
			IndicesStr = fields.get_rest();
		}
		else
		if (q == "HitBorders")
		{
			HitTypesStr = fields.get_rest();
		}
		else
		if (q == "IndexPunctuation")
		{
				m_bIndexPunctuation = true;
				fprintf (stderr,"IndexPunctuation\n");
		}
		else
		if (q == "ResumeOnIndexErrors")
		{
				m_bResumeOnIndexErrors = true;
				fprintf (stderr,"ResumeOnIndexErrors\n");
		}
		else
		if (q == "IndexMorphPatterns")
		{
				m_bIndexMorphPatterns = true;
				fprintf (stderr,"IndexMorphPatterns\n");
		}
		else
		if (q == "DisableDefaultQueryLexicalExpansion")
		{
				m_bDisableDefaultQueryLexicalExpansion = true;
				fprintf (stderr,"DisableDefaultQueryLexicalExpansion\n");
		}
		else
		if (q == "ArchiveIndex")
		{
				m_bArchiveIndex = true;
				fprintf (stderr,"ArchiveIndex\n");
		}
		else
		if (q == "OutputBibliographyOfHits")
		{
				m_bOutputBibliographyOfHits = true;
				fprintf (stderr,"OutputBibliographyOfHits\n");
		}
		else
		if (q == "IndexChunks")
		{
				m_bIndexChunks = true;
				fprintf (stderr,"IndexChunks\n");
		}
		else
		if (q == "UserMaxTokenCountInOnePeriod")
		{
				q = fields.next_token();
				if (!q.empty()) 
				{
					m_bUserMaxTokenCountInOnePeriod = true;
					m_UserMaxTokenCountInOnePeriod = atoi(q.c_str());
					

					fprintf (stderr,"UserMaxTokenCountInOnePeriod = %i\n", m_UserMaxTokenCountInOnePeriod);
					if (m_UserMaxTokenCountInOnePeriod == 0)
					{
						fprintf (stderr,"It cannot be less than 65000!\n");
						return false;
					};
				};
				
		}
		else
		{
			fprintf (stderr, "bad syntax in opt-file : %s!\n", q.c_str());
			return false;
		};
	};


	if (m_Language == morphUnknown)
	{
		ErrorMessage ("There is no language definition in the options file");
		return false;
	};

	RmlPcreMakeTables(m_PcreCharacterTables, m_Language);

	
	
	const char* ArchiveType = (m_bArchiveIndex) ? "archive" : "normal";
	
	switch (m_IndexType) 
	{
		case MorphXML_Index: 	IndicesStr = Format("[Token w %s];[MorphPattern m %s];[Lemma l %s]", 
									ArchiveType,
									ArchiveType,
									ArchiveType);
								break;

		case DWDS_Index:	IndicesStr = Format("[Token w %s];",ArchiveType);
							if (m_bIndexMorphPatterns)
								IndicesStr += Format("[MorphPattern m %s];",ArchiveType);
							if (m_bUseDwdsThesaurus)
								IndicesStr += Format("[Thes t %s];",ArchiveType);
							break;

		case Free_Index:	if ( IndicesStr.empty() )
							{
								fprintf (stderr, "Error! Cannot find an index definition in the options file.\n");
								return false;
							};
							break;
	};

	if (!RegisterStringIndices(IndicesStr))
	{
		fprintf (stderr," Cannot parse index defintion: %s\n", IndicesStr.c_str());
		return false;
	};

	if (m_bIndexChunks)
		if (!RegisterChunkIndex())
		{
			fprintf (stderr, "cannot register predefined chunk index!\n");
			return false;
		};

	fprintf (stderr," indices = %s\n", GetIndicesString().c_str() );
	
	switch (m_IndexType) {
		case MorphXML_Index:	HitTypesStr = "[s:sentence:default]";
								break;

		case DWDS_Index:	if (!m_bQueryOnlyFiles)
								HitTypesStr = "[s:sentence:default]";
							break;

		case Free_Index:	if ( HitTypesStr.empty() )
							{
								fprintf (stderr, "Error! Cannot find a hit types definition in the options file.\n");
								return false;
							};
							break;
	};


	string ErrorStr;
	if (!m_Bibl.RegisterFreeBiblAttributes(FreeBiblDescr, ErrorStr))
	{
		ErrorMessage( Format("Cannot parse %s\n (%s)", FreeBiblDescr.c_str(), ErrorStr.c_str()));
		return false;
	}
	fprintf (stderr,"%s", m_Bibl.GetFreeBibiAttributesDescr().c_str());

	if (!m_Bibl.RegisterTextAreas(TextAreasDescr, ErrorStr))
	{
		ErrorMessage( Format("Cannot parse %s\n (%s)", TextAreasDescr.c_str(), ErrorStr.c_str()));
		return false;
	}
	fprintf (stderr,"%s", m_Bibl.GetTextAreasDescr().c_str());

	if (!RegisterBorderIndices(HitTypesStr.c_str()))
	{
		fprintf (stderr," Cannot parse %s\n", HitTypesStr.c_str());
		return false;
	};
	fprintf (stderr," HitBorders = %s\n", GetBorderIndicesString().c_str());

	for(size_t i=0; i < m_IndicesToShow.size(); i++)
	{
		if (m_IndicesToShow[i] >= m_Indices.size())
		{
			fprintf (stderr,"Error! index %i specified in IndicesToShow is out  of range\n", m_IndicesToShow[i]);
			return false;
		}
		if (!m_Indices[m_IndicesToShow[i]]->m_bUseItemStorage)
		{
			fprintf (stderr,"Error! index %i has no storage\n", m_IndicesToShow[i]);
			return false;
		}
	}


	return true;

};


bool CConcIndexator::LoadCorpusFiles()
{
	m_CorpusFiles.clear();
	// reading corpus files from *.con file
	FILE* fp = fopen(GetFileNameForCorpusFileNames().c_str(), "rb");
	if (!fp) return false;
	char s[1024];
	fprintf(fp, "%s\n",DDCVersion.c_str());
	if (!fgets(s,1024, fp))
		return false;
	if (s != DDCVersion)
	{
		ErrorMessage (Format("The index was created by a different version of DDC: %s", m_Path.c_str()));
		fclose (fp);
		return false;
	};

	m_CommonFilePrefix = "$init";
	while (fgets(s,1024,fp))
	{
			string q = s;
			Trim(q);
			if (q.empty()) continue;
			if (m_CommonFilePrefix == "$init")
				m_CommonFilePrefix = q;
			else
			{
				int i = 0;
				for (; i<min(q.length(), m_CommonFilePrefix.length()); i++)
					if (q[i] != m_CommonFilePrefix[i])
						break;
				m_CommonFilePrefix.erase(i);
			};


			m_CorpusFiles.push_back(q);
	};
	fclose(fp);
	return true;
};

bool CConcIndexator::LoadSourceFilesAndOptions(string FileName)
{
	m_LocalPathPrefix = "";
	m_InternetPathPrefix = "";

	m_Path = FileName;

	m_bUserMaxTokenCountInOnePeriod = false;

	// reading options  from *.opt file
	string PathFile = MakeFName(FileName, "opt");
	string AllOptions;
	if (LoadFileToString(PathFile, AllOptions))
	{
		if (!LoadOptionsFromString(AllOptions)) 
			return false;
	};

	SetPath(FileName);

	// reading source files
	if (!ReadSourceFileList(FileName))
		return false;

	return true;
};



bool	CConcIndexator::DestroyIndex()
{	
	AssertHasPath();

	if (!RemoveHitBordersFileAndClear(m_Path))
		return false;

	if (!DestroyIndices())
		return false;

	if (FileExists(GetSearchPeriodsFileName().c_str()))
		if (remove(GetSearchPeriodsFileName().c_str()))
			return false;

	if (FileExists(GetFileNameForCorpusFileNames().c_str()))
		if (remove(GetFileNameForCorpusFileNames().c_str()))
			return false;
	m_CorpusFiles.clear();

	return true;

};







DWORD CConcIndexator::GetMaxTokenCountInOnePeriod() const
{
	if (m_bUserMaxTokenCountInOnePeriod)
		return m_UserMaxTokenCountInOnePeriod;
	else
		return 5000000;
};

bool	GetTextFromXMLRecursive(TiXmlNode* parent, string& Result)
{
	if (parent->Type() == TiXmlNode::TEXT)
	{
		if ( Result.empty() )
			Result += Format("\n<pb %u>\n",UnknownPageNumber);

		Result += parent->Value();
	}
	else
		if (parent->Type() == TiXmlNode::ELEMENT)
		{
			string type = parent->Value();
			Trim(type);

			if (type == "pb")
			{
				const char* pb_str = ((TiXmlElement*)parent)->Attribute("n");
				if (!pb_str) 
				{
					ErrorMessage ("Cannot parse page break element");
					return false;
				};
				Result += Format("\n<pb %s>\n",pb_str);
			}
			else
			{
				for (TiXmlNode* child = parent->FirstChild();  child;  child = child->NextSibling())
				{
					if (!GetTextFromXMLRecursive(child, Result)) 
						return false;
				};
			};
			if (type == "p")
				Result += "\n</p>\n";
		}
		else
		if (parent->Type() == TiXmlNode::COMMENT)
		{
			// skipping comments
		}
		else
		{
			ErrorMessage (Format("Unknown xml-node:%s. Some parts of file cannot be parsed!", parent->Value()));
			return false;
		};

	return true;
};

// read the input xml that is in CWB-format (corpus work bench format)
bool	GetCWBFormattedStringRecursive(const TiXmlNode* parent, string& Result)
{
	if (parent->Type() == TiXmlNode::TEXT)
	{
		Result += parent->Value()+string("\n");
	}
	else
		if (parent->Type() == TiXmlNode::ELEMENT)
		{
			string type = parent->Value();
			Trim(type);
			bool bNotLineBreak = (type != PredefinedTableLineTag);
			if (type == "pb")
			{
				const char*	page_number = parent->ToElement()->Attribute("n");
				if (page_number)
				{
					Result += Format("<pb %s>\n", page_number);
					return true;
				}
				else
				{
					ErrorMessage("bad page break");						
					return false;
				};
			}
			else
				if (bNotLineBreak)
					Result += Format("\n<%s>\n", type.c_str());




			for (TiXmlNode* child = parent->FirstChild();  child;  child = child->NextSibling())
			{
				string R;
				if (!GetCWBFormattedStringRecursive(child, Result)) 
					return false;
			};

			if (bNotLineBreak)
				Result += Format("\n</%s>\n", type.c_str());
			
		}
		else
		{
			ErrorMessage (Format("Unknown xml-node:%s. Some parts of file cannot be parsed!", parent->Value()));
			return false;
		};

	return true;
};


bool	CConcIndexator::LoadXmlFile(string FileName, const char* pFileBuffer, CGraphmatFile* piGraphmat, CBibliography& Bibl, string& strError)
{
	if (m_Bibl.GetTextAreasCount() == 0)
	{
		strError = Format("Error! Cannot index xml files, since no text areas defined in the options file!");
		return false;
	}

	TiXmlDocument doc( FileName.c_str() );
	
	if (!m_Bibl.LoadXmlAndReadBibliography(doc, pFileBuffer, Bibl, strError)) 
		return false;

	vector<TiXmlElement*> TextAreas;
	if (!m_Bibl.GetTextAreaElements(doc, TextAreas, strError))
		return false;

	string AllTexts;
	for (size_t TextAreaNo = 0; TextAreaNo< TextAreas.size(); TextAreaNo++)
	{
		TiXmlElement* body = TextAreas[TextAreaNo];

		string SourceText;

		if (Bibl.m_StartPageInfo != UnknownPageNumber)
			SourceText = Format("\n<pb %u>\n",Bibl.m_StartPageInfo);
	
		if (!GetTextFromXMLRecursive(body, SourceText)) 
		{
			ErrorMessage (Format("Cannot get text from file %s", FileName.c_str()));
			return false;
		};

		AllTexts += SourceText;
		AllTexts += "</textarea>\n\n";
	}

	bool bResult = piGraphmat->LoadStringToGraphan(AllTexts);
	if (!bResult)
		ErrorMessage (Format("piGraphmat->LoadStringToGraphan failed for file %s, Error:%s", FileName.c_str(), piGraphmat->GetLastError().c_str() ));

	return bResult;
};

bool IsXmlFile(const string& FileName)
{
	return				(FileName.length() > 4)
				&&	(		(FileName.substr(FileName.length()-4) == ".xml")
						||	(FileName.substr(FileName.length()-4) == ".XML")
					);
};

bool IsTarFile(const string& FileName)
{
	return				(FileName.length() > 4)
				&&	(		(FileName.substr(FileName.length()-4) == ".tar")
						||	(FileName.substr(FileName.length()-4) == ".TAR")
					);
};





void CConcIndexator::CalculateSearchPeriods	(DWORD MaxTokenCountInOnePeriod )
{
	if (GetFileBreaks().empty()) return;

	// Calculating Search Periods
	
	m_SearchPeriods.clear();

	CTokenNo StartTokenNo = 0;
	for (size_t i=0; i < m_CorpusFiles.size(); i++)
	{
		CTokenNo  FileStart = GetFileStartTokenNo(i);
		if (FileStart - StartTokenNo > MaxTokenCountInOnePeriod)
		{
			m_SearchPeriods.push_back(FileStart);
			StartTokenNo = FileStart;
		};
	};

	m_SearchPeriods.push_back(GetCorpusEndTokenNo());
};


bool	CConcIndexator::IndexOneTableTextArea(const string& Text, const CPageNumber& StartPageFromHeader, size_t& page_breaks_count, CTokenNo& NewCorpusEndTokenNo,	string& strError)
{
	StringTokenizer tok(Text.c_str(), "\r\n");
	deque<pair<string,CTokenNo> >	start_chunk_tags_str_and_pos;
	StartTextAreaBorders();
	try {
		while (tok())
		{
			string q = tok.val();
			Trim(q);
			if (q.empty()) 
			{
				strError = "Empty lines in CWB format!";
				return false;
			}
			size_t len = q.length();
			if	(		(len < 100)
					&&	(len > 2)
					&&	(q[0] == '<') 
					&&	(q[len-1] == '>') 
				)
			{
				if (q[1] != '/')
				{
					string tag_name = q.substr(1,len-2); // ge the tag name, for example "vp" from "</vp>"
					if (tag_name.length() > 3 && (tag_name.substr(0,3) == "pb ")) // page break
					{
						int page_number = atoi(tag_name.c_str() + 3);
						if (page_number != 0)
						{
							// add the first page break from the header, if it is not specified in the body
							if (page_breaks_count == 0)
								if ( (NewCorpusEndTokenNo - StartPageFromHeader.m_StartTokenNo) > 0)
									AddPageBreak(StartPageFromHeader);

							CPageNumber Page;
							Page.m_PageNumber = page_number;
							Page.m_StartTokenNo = NewCorpusEndTokenNo;
							AddPageBreak(Page);
							page_breaks_count++;
						}
						else
						{
							strError = Format("bad page break (%s)",  q.c_str());						
							return false;
						};
					}
					else
						if ( !IsRegisteredBreak(tag_name) ) 
						{
							if (m_bIndexChunks)
								start_chunk_tags_str_and_pos.push_back(make_pair(tag_name, NewCorpusEndTokenNo));
						};


				}
				else // an end tag
				{

					string tag_name = q.substr(2,len-3); // ge the tag name, for example "vp" from "</vp>"
					
					if (!AddBreakByName(tag_name, NewCorpusEndTokenNo)) // if it is not a registered break
						if (m_bIndexChunks)
						{
							if (		start_chunk_tags_str_and_pos.empty() 
									||	(start_chunk_tags_str_and_pos.back().first != tag_name)
								)
							{
								strError  = Format("Error! An end tag without a start tag is found (%s<>%s)",
													start_chunk_tags_str_and_pos.back().first.c_str(), tag_name.c_str() ); 
								return false;
							};
							if (NewCorpusEndTokenNo > 0)
							{
								string tag = Format("%s,%i",start_chunk_tags_str_and_pos.back().first.c_str(), 
															NewCorpusEndTokenNo-start_chunk_tags_str_and_pos.back().second);
								start_chunk_tags_str_and_pos.pop_back();
								assert (m_pChunkIndex);
								m_pChunkIndex->InsertToInputLoadIndex(tag.c_str(),tag.length(), vector<CTokenNo>(1, NewCorpusEndTokenNo-1) );
							}
							else
							{
								// An empty element at the beginning of the corpus is ignored; 
							}
						};
				};

			}
			else
			{
				if ( !IndexOneToken(q.c_str(), NewCorpusEndTokenNo))
				{
					strError = Format("Cannot index line: %s ", q.c_str());
					return false;
				};
				
				NewCorpusEndTokenNo++;
			}
		};
		if (!EndTextAreaBorders(NewCorpusEndTokenNo))
		{
			strError = Format("Cannot add end text area breaks");
			return false;
		};
	}
	catch (CExpc c)
	{
		strError = Format("Exception %s while processing \"%s\"\n", c.m_strCause.c_str(), tok.val());
		return false;
	}
	catch (...)
	{
		strError = Format("Exception while processing \"%s\"\n", tok.val());
		return false;
	};
	return true;
}

// Return value: return true, if the input XML is valid and it fits the index structure
bool	CConcIndexator::IndexTable(string FileName, const char* pFileBuffer, 	CTokenNo& NewCorpusEndTokenNo,	string& strError)
{

	
	CBibliography Bibliography;
		
	TiXmlDocument doc( FileName.c_str() );
	if (!m_Bibl.LoadXmlAndReadBibliography(doc, pFileBuffer, Bibliography, strError)) return false;

	vector<TiXmlElement*> TextAreas;
	if (!m_Bibl.GetTextAreaElements(doc, TextAreas, strError))
	{	
		return false;
	}

	CPageNumber StartPageFromHeader;
	StartPageFromHeader.m_PageNumber = Bibliography.m_StartPageInfo;
	StartPageFromHeader.m_StartTokenNo = NewCorpusEndTokenNo;
	size_t page_breaks_count = 0;
	
	//iterate through all text areas 
	for (size_t TextAreaNo = 0; TextAreaNo< TextAreas.size(); TextAreaNo++)
	{
		TiXmlElement* textarea = TextAreas[TextAreaNo];
		string Text;
		// get text from xml
		if (!GetCWBFormattedStringRecursive(textarea,  Text))
			return false;

		// index one text area
		if (!IndexOneTableTextArea(Text, StartPageFromHeader, page_breaks_count, NewCorpusEndTokenNo, strError))
			return false;
	}
	
	if (NewCorpusEndTokenNo != StartPageFromHeader.m_StartTokenNo)
	{
		// if at least one token is found, then add a bibliographical record
		m_Bibl.AddIndexItem(Bibliography);
	};
	
	return true;
};

bool	CConcIndexator::LoadFileIntoGraphan(string FileName,  const char* pFileBuffer,	CGraphmatFile* piGraphmat, CBibliography& Bibl, string& strError)
{
	Bibl.CleanBibliography();
	try
	{
		if	(IsXmlFile(FileName))
		{
			if (!LoadXmlFile(FileName, pFileBuffer, piGraphmat, Bibl, strError)) 
			{
				strError += "\nCConcIndexator::LoadXmlFile has crushed!";
				return false;
			}
		}
		else
		{
			if (!piGraphmat->LoadStringToGraphan(pFileBuffer)) 
			{
				if (!piGraphmat->GetLastError().empty())
					strError = piGraphmat->GetLastError();
				else
					strError = "CGraphmat::LoadStringToGraphan has crushed! ";
				return false;
			}
			m_Bibl.SetFreeBiblAttribsEmpty(Bibl);
		};
		

	}
	catch (CExpc c)
	{
		strError = Format("Exception %s in CConcIndexator::LoadFileIntoGraphan\n", c.m_strCause.c_str());
		return false;
	}
	catch(...)
	{
		strError = "CConcIndexator::LoadFileIntoGraphan has crushed! (general exception)";
		ErrorMessage ("An exception occurred in CConcIndexator::LoadFileIntoGraphan while processing " + FileName);
		return false;
	}

	return true;
}



bool	CConcIndexator::IndexTextOrHtmlFile	(
	CGraphmatFile* piGraphmat, 
	string FileName, 
	const char* pFileBuffer,
	const CDwdsThesaurus* pDwdsThesaurus,
	CTokenNo& NewCorpusEndTokenNo,
	string& strError)
{
	CBibliography Bibl;
	if (!LoadFileIntoGraphan(FileName, pFileBuffer, piGraphmat, Bibl, strError))
	{
		if (strError.empty())
			strError = "CConcIndexator::LoadFileIntoGraphan has crushed!";
		return false;
	}

	long GraLinesCount = piGraphmat->GetTokensCount();
	if (GraLinesCount <= 1)  
	{
		// the file is empty
		return true;
	};


	CTokenNo StartTokenNo = NewCorpusEndTokenNo; 
	CTokenNo CurrSentenceBreak = NewCorpusEndTokenNo;

	size_t FileTokensCount = 0;
	int SentNo = 0;
	char OneIndexLine[5000];
	bool bTheFirstPageBreakIsFound = false;
	
	int TextAreaNo = 0;
	int TextAreasCount = m_Bibl.GetTextAreasCount();
	size_t TextAreaStarter = UINT_MAX;

	for (long GraLineNo = 1; GraLineNo<GraLinesCount; GraLineNo++)
	{
		bool bSentenceEnd = false;
		bool bToken = IsDWDSToken(piGraphmat, GraLineNo);
		if	( bToken )
		{
 			BYTE TokenLen = piGraphmat->GetUnits()[GraLineNo].GetTokenLength();
			if (TokenLen == 0)
			{
				strError = Format("Fatal error: An empty word is found!\nGraph.Word No %i from %i \n", GraLineNo, GraLinesCount);
				return false;
			};

			const char* Token  = piGraphmat->GetUnits()[GraLineNo].GetToken();
			if ((BYTE)Token[0] == globalFieldDelimeter)
			{
				strError = Format("Error! An input word(LineNo=%i) cannot contain symbol \"%c\"(%i) \n", GraLineNo, (int)globalFieldDelimeter, (unsigned int)globalFieldDelimeter);
				return false;
			};

			const char* UpperToken  = piGraphmat->GetUppercaseToken(GraLineNo);

			strncpy(OneIndexLine, Token, TokenLen);
			

			bool bCapital =		(piGraphmat->HasDescr(GraLineNo, OUp))
							||  (piGraphmat->HasDescr(GraLineNo, OUpLw));


			OneIndexLine[TokenLen] = globalFieldDelimeter;
			OneIndexLine[TokenLen+1] = 0;
		
			if (m_bUseDwdsThesaurus && bCapital)
			{
				OneIndexLine[TokenLen+1] = globalFieldDelimeter;
				strcpy(OneIndexLine + TokenLen +  2, pDwdsThesaurus->GetAllThesInterpetations(UpperToken).c_str());
			};

			IndexOneToken(OneIndexLine, NewCorpusEndTokenNo);

			
			if(	IsSentenceEnd(piGraphmat, GraLineNo) )
			{
				// "1" is for this token
				CurrSentenceBreak = NewCorpusEndTokenNo+1;
				
				AddBreakByName("s", CurrSentenceBreak);

				SentNo++;

				bSentenceEnd = true;

			};

		} // IsDWDSToken == true

		if (piGraphmat->GetUnits()[GraLineNo].IsTextAreaEnd() || TextAreaStarter == NewCorpusEndTokenNo)
		{
			AddBreakByName(PredefinedTextAreaBreakName, NewCorpusEndTokenNo+1);
			TextAreaNo++;
		}


		if	(		(bToken && (FileTokensCount == 0) && !bTheFirstPageBreakIsFound) 
				||	piGraphmat->GetUnits()[GraLineNo].IsPageBreak()
			)
		{
			bTheFirstPageBreakIsFound = true;
			CPageNumber Page;
			Page.m_PageNumber = piGraphmat->GetPageNumber(GraLineNo);
			//If we have read end of sentence and after it we have read a page break, and
			//there is no  tokens between this end of sentence and  this page break, then we should
			//shift this page break backward to the end of sentence. Otherwise the first sentence of the  next
			//page will be considered as a sentence from the previous  page.
			//For example we have
			//    <pb 13> Sentence1 <end-of-sentence> <pb 14> Sentence2.
			//Sentence2 starts when Sentence1 terminates, then a page break occurs inside(!) of Sentence2,
			//and it means that Sentence2 starts at page 13 and terminates at page 14. That is not true and 
			//we fix this problem.
			

			if	(		bSentenceEnd
					||	(SentNo == 0)
				)
				Page.m_StartTokenNo = NewCorpusEndTokenNo; // normal case
			else
				Page.m_StartTokenNo = CurrSentenceBreak; //  shifting page break to the last end of sentence
				
			AddPageBreak(Page);
		};

		if (bToken)
		{
			NewCorpusEndTokenNo++;

			FileTokensCount++;
		};
	};

	// write the last sentence if it contains at least one token
	if (CurrSentenceBreak < NewCorpusEndTokenNo)
	{	
		fprintf (stderr,"The last end of sentence is not found!\n");
		CurrSentenceBreak = NewCorpusEndTokenNo;
		AddBreakByName("s", NewCorpusEndTokenNo);
		SentNo++;

	};

	if (NewCorpusEndTokenNo != StartTokenNo)
	{
		// if at least one token is found, then add a bibliographical record
		m_Bibl.AddIndexItem(Bibl);
	};

	if ( TextAreaNo != TextAreasCount )
	{
		strError = Format("Some text areas are not present in this document\n");
		return false;
	}
	piGraphmat->FreeTable();
	return true;
};

string CConcIndexator::GetShortFilename(size_t posFile) const 
{
	return m_CorpusFiles[posFile].substr(m_CommonFilePrefix.length());
};

string CConcIndexator::GetHtmlReference(size_t posFile) const 
{
	string InterfaceFileName, HtmlRefFileName;
	if (m_CorpusFiles[posFile].substr(0, m_LocalPathPrefix.length()) == m_LocalPathPrefix)
	{
		InterfaceFileName = m_CorpusFiles[posFile].substr(m_LocalPathPrefix.length());
		HtmlRefFileName = "http://"+m_InternetPathPrefix + InterfaceFileName;
	}
	else
	{
		HtmlRefFileName = InterfaceFileName = m_CorpusFiles[posFile];
	};

	return Format("<a href=\"%s\">%s</a>", HtmlRefFileName.c_str(),InterfaceFileName.c_str());
};



bool CConcIndexator::CreateMorphIndex()
{
	if (!m_bIndexMorphPatterns)
		return true;

	fprintf(stderr,"CreateMorphIndex...\n");
	CStringIndexSet* pMorphIndexSet = GetIndexByName("MorphPattern");
	if (!pMorphIndexSet )
	{
		ErrorMessage ("Cannot find MorphPattern index");
		return false;
	};
	CStringIndexSet* pTokensIndexSet = GetIndexByName("Token");
	if (!pTokensIndexSet )
	{
		ErrorMessage ("Cannot find Token index");
		return false;
	};
	
	if (!pTokensIndexSet->LoadIndexSet()) // if the index was just created, then Token index is not loaded
	{
		ErrorMessage ("Cannot load Token index");
		return false;
	};

	if (!pMorphIndexSet->DestroyIndexSet())
	{
		ErrorMessage ("Cannot destroy morph. index set (some files cannot be deleted)");
		return false;
	};
	pMorphIndexSet->CreateTempFiles(m_Path);
	size_t CountOfTokens = pTokensIndexSet->m_Index.size();
	size_t OccursCount =0;
	const DWORD MaxTokenCountInOnePeriod = GetMaxTokenCountInOnePeriod();

	for (int i = 0; i< CountOfTokens; i++)
	{
		if ( (i % 1000) == 0)
				fprintf(stderr, "%i/%i (%i/%i)             \r", i, CountOfTokens, OccursCount, 3*MaxTokenCountInOnePeriod);

		const char* sToken = pTokensIndexSet->GetIndexItemStr(pTokensIndexSet->m_Index[i]);
		if (CheckLanguage(sToken, m_Language)) 
		{
			vector<string> TokenProperties =  GetGramInfosFromWord(sToken, m_Language, is_upper_alpha((BYTE)sToken[0], m_Language));
			
			if (!TokenProperties.empty()) 
			{
					string GramInfoStr = GetIndexItemSetByVectorString(TokenProperties,false);
				
					vector<CTokenNo> occurrences;
					pTokensIndexSet->ReadAllOccurrences(i, occurrences);
					pMorphIndexSet->InsertToInputLoadIndex (GramInfoStr.c_str(), GramInfoStr.length(), occurrences);	
					OccursCount += occurrences.size();
			}
		}


		if	(		( OccursCount > 3*MaxTokenCountInOnePeriod)
				||	(i+1== CountOfTokens)
			)
		{
	
			pMorphIndexSet->SortInputAndMemoryIndices();
			
			if (!pMorphIndexSet->AddInputLoadIndexToMemoryLoadIndex())
			{
					fprintf (stderr, "AddInputLoadIndexToMemoryLoadIndex returned false in CConcIndexator::CreateMorphIndex\n");
					return false;
			};

			if (!pMorphIndexSet->SaveMemoryLoadIndex())
			{
					fprintf (stderr, "cannot save memory indices to disk; probably no space left on device\n");
					return false;
			};

			if (!pMorphIndexSet->AddMemoryLoadIndexToMainLoadIndex()) 
			{
				fprintf (stderr, "AddMemoryLoadIndexToMainLoadIndex returned false in CConcIndexator::CreateMorphIndex\n");
				return false;
			}
			OccursCount = 0;
		};
	};
	fprintf(stderr, "%i/%i                         \n", CountOfTokens, CountOfTokens);

	fprintf(stderr, "final writing morph index\n");
	if ( !pMorphIndexSet->WriteToFile(true) ) return false;

	fprintf(stderr,"done\n");
	return true;
};


bool	CConcIndexator::StartIndexing()
{
	if (!CStringIndexator::StartIndexing(m_Path)) return false;
	if (!CHitBorders::StartIndexing(m_Path)) return false;
	if (!m_Bibl.Start(m_Path))
	{
		fprintf (stderr, "Cannot start bibl indexator");
		return false;
	};
	return true;
};

bool	CConcIndexator::TerminateIndexing()
{
	if (!CStringIndexator::TerminateIndexing()) return false;
	if (!BordersEndIndexing(m_Path)) return false;
	m_Bibl.ExitWithoutSave();
	return true;

};

bool	CConcIndexator::NormalEndIndexing()
{
	if (!m_Bibl.FinalSaveBibliography())
	{
		fprintf(stderr, "Cannot terminate bibl indexator");
		return false;
	};
	return true;

};

bool	CConcIndexator::IndexOneFile(CGraphmatFile* piGraphmat, string FileName, const char* pFileBuffer, const CDwdsThesaurus* pDwdsThesaurus,  CTokenNo& CorpusEndTokenNo,string& strError)
{
	switch (m_IndexType) {
		case MorphXML_Index:	return IndexMorphXml(FileName, pFileBuffer, CorpusEndTokenNo, strError);
		case DWDS_Index:		return IndexTextOrHtmlFile(piGraphmat, FileName, pFileBuffer, pDwdsThesaurus, CorpusEndTokenNo, strError);
		case Free_Index:		return IndexTable(FileName, pFileBuffer, CorpusEndTokenNo, strError);
		default:  		assert(false);
						return false;
	}
};



//===============================================
//================== CConcIndexatorInvoker =======
//===============================================
CConcIndexatorInvoker::CConcIndexatorInvoker()
{
	m_bStoppedByUser = false;
	m_bStdout = false;
	m_bCorporaProcessing = false;
	m_CurrentSourceFileNo = 0;
	m_SourceFilesNumber = 0;
	// profiling takes 3 percent of working time, if we trust 
	// DevPartner or gprof, that's why by default it should be switched off.
	m_Profiler.m_bTimeSpanHolderEnabled = false;
	m_bOnlyReindexMorphology = false;
	m_bSkipInitialFileChecking = false;
};

void CConcIndexatorInvoker :: SetCurrMessage( string  Message)  const 
{
	m_CurrMessage = Message;
	if (m_bStdout)
	{
		fprintf (stderr, "%s\n", m_CurrMessage.c_str());
	};
};

string CConcIndexatorInvoker::GetTimeStatisticsFileName(string Path) const 
{
	return MakeFName(Path, "_time_statistics");
};

string CConcIndexatorInvoker::GetErrorLogFileName(string Path) const 
{
	return MakeFName(Path, "_error_log");
};


bool ReadSourceFile	(const string& SourceFileName, string& CorpusFileName, TAR* pTar, vector<char>& Buffer, bool& bError)
{
	bError = false;
	if (!pTar)
	{
		Buffer.clear();
		CorpusFileName = SourceFileName;
		ReadVector(SourceFileName,Buffer);
	}
	else
	{
		if (!tar_get_next_file(pTar, CorpusFileName, Buffer, bError))
			return false;

		CorpusFileName = SourceFileName+":"+CorpusFileName;
	};
	size_t len = Buffer.size();
	for (size_t i=0; i<len; i++)
		if (Buffer[i] == 0)
			Buffer[i] = ' ';

	int new_len = Buffer.size();
	for (int  i=new_len-1; i>=0 && isspace((BYTE)Buffer[i]); i--)
		Buffer.erase(Buffer.begin() +i);

	if (IsHtmlFile(SourceFileName) && !Buffer.empty() )
	{
		HTML html;
		string Plain = html.GetTextFromHTMLBuffer(&(Buffer[0]), Buffer.size());
		Buffer.clear();
		Buffer.insert(Buffer.end(), Plain.begin(), Plain.end());
	};

	Buffer.push_back(0);
	return true;
};


void CConcIndexatorInvoker::WriteTimeStatistics	(const CConcIndexator& Indexator, DWORD CorpusEndTokenNo, DWORD MaxTokenCountInOnePeriod) const 
{
	try
	{
		double AllClocksCount = m_Profiler.EndTimer("Indexing");

		FILE * fp  = fopen (GetTimeStatisticsFileName(Indexator.m_Path).c_str(), "w");
		if (!fp)
		{
			fprintf (stderr,"Cannot write time statistics to file!\n");
		}
		else
		{
			fprintf (fp, "%s", m_Profiler.GetStrRepresentation(AllClocksCount).c_str());	
			int AllSeconds = (int)(AllClocksCount/CLOCKS_PER_SEC);
			int u = (AllSeconds > 0) ? (int)((CorpusEndTokenNo)/AllSeconds) : 0;
			fprintf (fp, "Speed (Tokens per second) = %i\n",u);
			fprintf (fp, "MaxTokenCountInOnePeriod = %i (tokens in one subcorpus)\n",MaxTokenCountInOnePeriod);
			fprintf (fp, "Count of subcorpora = %i\n",Indexator.GetSearchPeriodsCount());
			fprintf (fp, "Count of running tokens  = %i\n",CorpusEndTokenNo);
			
			fclose(fp);
		};
	}
	catch(...)
	{
		SetCurrMessage("An exception occurred while saving time_statistsics files");
	}
};

bool CConcIndexatorInvoker::FinalizeIndex	(CConcIndexator& Indexator, DWORD CorpusEndTokenNo, DWORD MaxTokenCountInOnePeriod) const 
{
	if (!Indexator.SaveCorpusFileList())
	{
		printf ("Cannot save corpus file list %s\n", Indexator.GetFileNameForCorpusFileNames().c_str());
		return false;
	};		

	if (CorpusEndTokenNo >= CIndexItem::GetMaximalNumberOfRunningTokens())
	{
		SetCurrMessage( Format ("Error! Number of running tokens cannot be more than ", CIndexItem::GetMaximalNumberOfRunningTokens()) );
		return false;
	};

	m_Profiler.StartTimer("Final Save to Disc",1);

	if (!Indexator.BordersEndIndexing(Indexator.m_Path)) return false;
	if (!Indexator.LoadHitBorders(Indexator.m_Path)) return false;
	assert (Indexator.GetCorpusEndTokenNo() == CorpusEndTokenNo);
	Indexator.CalculateSearchPeriods(MaxTokenCountInOnePeriod);

	SetCurrMessage(  Format("Tokens Count = %i\n", CorpusEndTokenNo)  );

	if (!Indexator.AddInputLoadIndexToMemoryLoadIndex()) return false;
	if (!Indexator.SaveMemoryLoadIndex())  
	{
		SetCurrMessage(  "Cannot save memory indices to disk; probably no space left on device!");
		return false;
	}
	if (!Indexator.AddMemoryLoadIndexToMainLoadIndex())  return false;;
	if (!Indexator.FinalSaveAllIndices(true)) return false;;
	SetCurrMessage("finished final saving");
	if (!Indexator.NormalEndIndexing()) return false;;
	SetCurrMessage("finished deleting temp. files");
	m_Profiler.EndTimer("Final Save to Disc");

	m_Profiler.StartTimer("CreateMorphIndex",1);
	if (!Indexator.CreateMorphIndex()) 
	{
		SetCurrMessage("Error! Cannot create morph index!");
		return false;;
	}
	m_Profiler.EndTimer("CreateMorphIndex");

	SetCurrMessage("");
	return  true;
};

bool CConcIndexatorInvoker::BuildOnlyMorphIndex	(string ProjectFile) const 
{
	CConcIndexator Indexator;
	if(!Indexator.LoadProject(ProjectFile))
	{
		SetCurrMessage(Format("cannot load index of project %s\n", ProjectFile.c_str()));
		return false;
	};
	if (!Indexator.CreateMorphIndex())
	{
		SetCurrMessage ("cannot index morphology index !\n");
		return false;
	}
	else
	{
		SetCurrMessage ("reindexing was successful!\n");
		return true;
	};
};

void  newhandler( )
{
   fprintf (stderr,"Cannot allocate memory (newhandler is called); terminate the process...\n");
   throw bad_alloc( );
   return;
}

bool CConcIndexatorInvoker::BuildIndex	(string ProjectFile)
{
	set_new_handler (newhandler);

	if (m_bOnlyReindexMorphology)
		return BuildOnlyMorphIndex(ProjectFile);

	const size_t MaxInputIndexSize = 400000;
	CConcIndexator Indexator;

	if (!Indexator.LoadSourceFilesAndOptions(ProjectFile)) 
	{
		SetCurrMessage(Format("cannot load options or source files of project %s\n", ProjectFile.c_str()));
		return false;
	};

	m_SourceFilesNumber = Indexator.GetSourceFilesCount();

	if ( m_SourceFilesNumber == 0 ) 
	{
		SetCurrMessage("No files in the project!");
		return false;
	};

	SetCurrMessage(Format("Files %i\n", m_SourceFilesNumber) );
	if (!m_bSkipInitialFileChecking)
	{
		int FileNo = Indexator.FoundNotExistedFile();
		if (FileNo != -1)
		{
			ErrorMessage(Format("File %s was not found or it is empty, cannot load the project\n", Indexator.GetSourceFile(FileNo).c_str()) );
			return false;
		};
	};

	///======================
	//Indexator.m_Indices[0]->CreateTempFilesDebug(Indexator.m_Path);
	//ReadVector(MakeFName(Indexator.m_Path, "debug_string_buffer").c_str(), Indexator.m_Indices[0]->m_StringBuffer);
	//if (!Indexator.AddMemoryLoadIndexToMainLoadIndex()) 
	//{
	//	SetCurrMessage("Fatal Error! Indexator.AddMemoryLoadIndexToMainLoadIndex returned false\n");
	//	goto error_label;
	//};
	//exit(1);
	///======================

	if (!Indexator.DestroyIndex())
	{
		fprintf (stderr,"Error! Cannot destroy index, this corpus is possibly already used!\n");
		ErrorMessage("Error! Cannot destroy index, this corpus is possibly already used");
		return false;
	};
	if (!Indexator.StartIndexing() )
	{
		fprintf (stderr,"Cannot create temp files in the catalog of the corpus!");
		return false;
	};
	

	// a reference to DWDS thesaurus if applicable
	const CDwdsThesaurus* pDwdsThesaurus = 0;
	if ( Indexator.UseDwdsThesaurus() )
		pDwdsThesaurus = LoadDwdsThesaurus();

	

	const DWORD MaxTokenCountInOnePeriod = Indexator.GetMaxTokenCountInOnePeriod();
	SetCurrMessage( Format ("MaxTokenCountInOnePeriod = %i", MaxTokenCountInOnePeriod));

	m_Profiler.ClearTimers();
	m_Profiler.StartTimer("Indexing", 0);

	CGraphmatFile*	piGraphmat = 0;
	bool bResult = false;

	CTokenNo CorpusEndTokenNo = 0;

	TiXmlBase::SetCondenseWhiteSpace(false);
	time_t speed_start_time;
	time (&speed_start_time);
	try
	{
		piGraphmat = new CGraphmatFile;
		if (!piGraphmat)
		{
			SetCurrMessage("Cannot load graphan");
			goto error_label;
		};
		//  first we should init properties, for example, language,
		//  because  loading dictionaries of graphan depends upon the language
		Indexator.InitGraphanProperties(piGraphmat);
		if (!piGraphmat->LoadDicts())
		{
			SetCurrMessage(Format("Graphan::LoadDicts failed! Error: %s",piGraphmat->GetLastError().c_str()));
			goto error_label;
		}
		assert(piGraphmat != NULL);
		
		size_t InputLoadIndexSize = 0;
		size_t MemoryLoadIndexSize = 0;
		vector<char> FileBuffer;
		vector<string> m_ErrorCorpusFiles;
		for (m_CurrentSourceFileNo = 0; m_CurrentSourceFileNo < m_SourceFilesNumber ; m_CurrentSourceFileNo++)
		{
			if (m_bStoppedByUser)
			{
				bResult =  true;
				goto error_label;
			};
			
			m_CurrentSourceFileName = Indexator.GetSourceFile(m_CurrentSourceFileNo);
			TAR *pTar = 0;
			if (IsTarFile(m_CurrentSourceFileName))
			{
				tar_open(&pTar, m_CurrentSourceFileName.c_str());
				if (!pTar)
				{
					bResult =  false;
					SetCurrMessage( Format("Error! Cannot open %s", m_CurrentSourceFileName.c_str()));
					goto error_label;
				};
			};

			do // reading files in one tar-archive
			{
				DWORD SaveCorpusEndTokenNo = CorpusEndTokenNo;
				if ( InputLoadIndexSize > MaxInputIndexSize)
				{
					time_t speed_curr_time;
					time (&speed_curr_time);
					size_t dur = speed_curr_time-speed_start_time;
					size_t  speed  = SaveCorpusEndTokenNo/dur;
					SetCurrMessage( Format ("File  %s", m_CurrentSourceFileName.c_str()));
					SetCurrMessage( Format ("(%i from  %i)", m_CurrentSourceFileNo+1, m_SourceFilesNumber));
					SetCurrMessage( Format ("Processed tokens = %i;Speed(tokens per sec)=%u", SaveCorpusEndTokenNo, speed));
					m_Profiler.StartTimer("AddInputLoadIndexToMemoryLoadIndex",1);
					if (!Indexator.AddInputLoadIndexToMemoryLoadIndex()) 
					{
			
						SetCurrMessage( Format("Fatal error! Cannot AddInputLoadIndexToMemoryLoadIndex for %s", m_CurrentSourceFileName.c_str()));
						bResult =  false;
						goto error_label;
					};
					m_Profiler.EndTimer("AddInputLoadIndexToMemoryLoadIndex");

					MemoryLoadIndexSize += InputLoadIndexSize;
					InputLoadIndexSize = 0;
				};

				string strError;
				m_Profiler.StartTimer("Indexator.IndexOneFile", 1);
				string CorpusFileName;
				bool bError;
				if (!ReadSourceFile(m_CurrentSourceFileName, CorpusFileName, pTar, FileBuffer, bError))
				{
					assert (pTar);
					tar_close(pTar);
					pTar = 0;
					break;
				};

				if (bError)
				{
					SetCurrMessage( Format("Error! Cannot read %s", m_CurrentSourceFileName.c_str()));
					bResult =  false;
					goto error_label;
				};

				if ( FileBuffer.empty() )
				{
					string err = Format("skipping empty %s", CorpusFileName.c_str());
					SetCurrMessage( err );
					m_ErrorCorpusFiles.push_back(err);	
					continue;
				};

				if (!Indexator.IndexOneFile(piGraphmat, CorpusFileName, &(FileBuffer[0]), pDwdsThesaurus, CorpusEndTokenNo, strError))
				{
					string err = Format ("Error! %s; Indexator.IndexOneFile returned false (file %s)",
						strError.c_str(), 
						CorpusFileName.c_str());
					SetCurrMessage( err );
					if (!Indexator.m_bResumeOnIndexErrors)
					{
						bResult =  false;
						goto error_label;
					}
					else
					{
						m_ErrorCorpusFiles.push_back(err);	
						SetCurrMessage( Format("skipping %s", CorpusFileName.c_str()));
						continue;
					}
				};

				m_Profiler.EndTimer("Indexator.IndexOneFile");
				if (CorpusEndTokenNo == SaveCorpusEndTokenNo)
				{
					string err = Format("skipping empty %s", CorpusFileName.c_str());
					SetCurrMessage( err );
					m_ErrorCorpusFiles.push_back(err);	
					continue;
				};

				if (CorpusEndTokenNo >= CIndexItem::GetMaximalNumberOfRunningTokens())
				{
					bResult =  false;
					SetCurrMessage( Format ("Error! Number of running tokens cannot be more than ", CIndexItem::GetMaximalNumberOfRunningTokens()) );
					goto error_label;
				};


				InputLoadIndexSize += CorpusEndTokenNo-SaveCorpusEndTokenNo;

				if (m_bStoppedByUser)
				{
					bResult =  true;
					goto error_label;
				};
				

				// Setting File address
				Indexator.AddBreakByName(PredefinedFileBreakName, CorpusEndTokenNo);	
				Indexator.m_CorpusFiles.push_back(CorpusFileName);

				if (MemoryLoadIndexSize + InputLoadIndexSize > MaxTokenCountInOnePeriod)
				{
					SetCurrMessage( "Saving index to disk" );
					m_Profiler.StartTimer("AddInputLoadIndexToMemoryLoadIndex",1);
					if (!Indexator.AddInputLoadIndexToMemoryLoadIndex()) 
					{
						SetCurrMessage( Format("Fatal Error! Cannot AddInputLoadIndexToMemoryLoadIndex"));
						goto error_label;
					};
					m_Profiler.EndTimer("AddInputLoadIndexToMemoryLoadIndex");

					m_Profiler.StartTimer("Intermediate Save to Disc",1);

					if (!Indexator.SaveMemoryLoadIndex()) 
					{
						SetCurrMessage("Cannot save indices to disk; probably no space left on device!\n");
						goto error_label;
					};
					if (!Indexator.AddMemoryLoadIndexToMainLoadIndex()) 
					{
						SetCurrMessage("Fatal Error! Indexator.AddMemoryLoadIndexToMainLoadIndex returned false\n");
						goto error_label;
					};
					m_Profiler.EndTimer("Intermediate Save to Disc");
					
					MemoryLoadIndexSize = 0;
					InputLoadIndexSize = 0;


				};

			}	while  (pTar);
		
		}; //for each(m_SourceFilesNumber)

		if (CorpusEndTokenNo == 0)
		{
			SetCurrMessage("Cannot index empty corpus!");
			goto error_label;
		}

		if (!FinalizeIndex(Indexator, CorpusEndTokenNo, MaxTokenCountInOnePeriod))
		{
			SetCurrMessage("Cannot save index to disk, probably no space left!");
			goto error_label;
		};

		remove (GetErrorLogFileName(Indexator.m_Path).c_str());
		if (Indexator.m_bResumeOnIndexErrors && !m_ErrorCorpusFiles.empty())
		{
			FILE* fp = fopen (GetErrorLogFileName(Indexator.m_Path).c_str(), "w");
			if (fp)
			{
				for (size_t i=0; i < m_ErrorCorpusFiles.size(); i++)
					fprintf (fp, "%s\n", m_ErrorCorpusFiles[i].c_str());
				fclose(fp);
			}
			SetCurrMessage(Format("%i error(s) were written into %s", m_ErrorCorpusFiles.size(), GetErrorLogFileName(Indexator.m_Path).c_str()) );
		};
	}
	catch(CExpc e)
	{
		SetCurrMessage(e.m_strCause);
		goto error_label;
	}
	catch(...)
	{
		SetCurrMessage("An exception occurred");
		goto error_label;
	}

	WriteTimeStatistics(Indexator, CorpusEndTokenNo, MaxTokenCountInOnePeriod);

	m_bCorporaProcessing = false;	
	if (piGraphmat) delete piGraphmat;

	if (pDwdsThesaurus)
		FreeDwdsThesaurus(pDwdsThesaurus);
	return true;


error_label:
	if (piGraphmat) delete piGraphmat;
	if (!Indexator.TerminateIndexing())
	{
		SetCurrMessage("Cannot normally terminate indexing!");
	};
	m_bCorporaProcessing = false;
	Indexator.DestroyIndex();
	if (pDwdsThesaurus)
		FreeDwdsThesaurus(pDwdsThesaurus);
	return bResult;
	
};


