// ==========  This file is under  LGPL, the GNU Lesser General Public Licence
// ==========  Dialing Syntax Analysis (www.aot.ru)
// ==========  Copyright by Dmitry Pankratov, Igor Nozhov, Alexey Sokirko



#include "stdafx.h"
#include "Sentence.h"
#include "Word.h"
#include "assert.h"
#undef NDEBUG
#include "SynPlmLine.h"
#include "../SimpleGrammarLib/SimpleGrammar.h"
#include "../AgramtabLib/ger_consts.h"

//===============================================
//================= CLASS CHomonym ==============
//===============================================

CHomonym::CHomonym(const CSentence* pSent)
{
	m_pSent = pSent;
	
	m_CoordConjNo  = pSent->GetOpt()->GetCoordConjCount();
	m_iCmpnLen = 0;
	m_bCmplLem = false;
	m_bUnkGramcodes = false;
	m_iGrammems = 0;
	m_iPoses = 0; 
	m_iTagID = UnknownPartOfSpeech;
	m_bAdvAdj = false;
	m_lPradigmID = -1;
	m_bDelete = false;
	m_bGoodHomonym = false;	
	m_bRussianOdin = false;

	m_bOborot1 = false;
	m_bOborot2 = false;
	m_bInOb = false;
	m_OborotNo = -1;

	m_lFreqHom = 0;
	m_bCanSynDependOnAdj = false;
	m_bCanSynDependOnAdv = false;	
	m_bCanSubdueInfinitive = false;
	m_bCanSubdueInstr = false;
	m_bNounHasAdjectiveDeclination = false;
	m_bMonth = false;

	m_bPassive = false;
	

	m_bPerfectAnomalie = false;
	m_bInfinitiveConstruction = false;
	m_bAdjWithActiveValency = false;
}

const	CSyntaxOpt* CHomonym::GetOpt() const	
{
	return m_pSent->GetOpt(); 
};

void	CHomonym::SetLemma(string Lemma)
{
	GerMakeUpper(Lemma);
	m_strLemma = Lemma;
};



bool	CHomonym::HasSetOfGrammemsExact(QWORD Grammems) const
{
	for (int i = 0; i < m_GramCodes.length(); i += 2)
	{
		QWORD g;
		if (!GetOpt()->GetGramTab()->GetGrammems(m_GramCodes.c_str()+i, g))
		{
				assert (false);
		};
		if ((g & Grammems) ==  Grammems)
			return true;

	};

	return false;;
};



bool CHomonym::IsOb1() const
{ 
	return m_bOborot1;	
}

bool CHomonym::IsOb2() const
{ 
	return m_bOborot2;	
}

bool CHomonym::IsIsOb() const
{
	return m_bInOb;	
}


bool CHomonym::IsLemma(const char* lemma) const 
{
	if (lemma == 0) return false;
	return m_strLemma == lemma;

};

bool CHomonym::IsSynNoun() const
{
	return GetOpt()->GetGramTab()->IsSynNoun(m_iPoses, m_strLemma.c_str() );
};

bool  CHomonym::IsMorphNoun() const
{
	return GetOpt()->GetGramTab()->IsMorphNoun(m_iPoses);
};
bool	CHomonym::IsLeftNounModifier() const
{
	return GetOpt()->GetGramTab()->is_left_noun_modifier(m_iPoses, m_iGrammems);
};


string	CHomonym::GetGrammemsStr() const
{
	return GetGrammemsByAncodes(GetOpt()->GetGramTab());
};

string	CHomonym::GetPartOfSpeechStr() const
{
	if (m_iTagID == UnknownPartOfSpeech) return "";
	return GetOpt()->GetGramTab()->GetPartOfSpeechStr(m_iTagID);
};



bool CHomonym::ProcessLemmaAndGrammems(const char* CurrStr)
{
	StringTokenizer tok(CurrStr," ");

	//  if it a punctuation mark then return true, and do not try to find lemma and morph. information
	if( !tok() ) return true;


	// lem-sign
	{
		const char* s = tok.val();
		if	(		(strlen(s) != 3) 
				||	(		(s[0] != '+') 
						&&	(s[0] != '-') 
						&&	(s[0] != '?') 
					)
			)

			
		{
			ErrorMessage( Format("Bad lem-sign in \"%s\" ",CurrStr) );
			return false;
		}
		m_LemSign = s[0];
		m_CommonGramCode = s+1;
	}

	// lemma
	if( !tok() ) return false;
	SetLemma(tok.val());
	if( !m_strLemma.empty() )
	{
		m_iCmpnLen = strcspn (m_strLemma.c_str(),"-");
		m_bCmplLem =  ((BYTE)m_iCmpnLen != m_strLemma.length());
	}

	// ancode
	if( !tok() ) return false;
	m_GramCodes = tok.val();
	m_bUnkGramcodes = ((unsigned char)m_GramCodes[0] == '?');
	
	// paradigm ID
	if( !tok() ) return false;
	m_lPradigmID = atoi(tok.val());

	// frequence
	if (!tok() ) return false;
	m_lFreqHom = atoi(tok.val());
    if (0 == m_lFreqHom) 
				m_lFreqHom = 1; 

	return true;
}




bool CHomonym::CompareWithPredefinedWords(const SDatItems& DatItems) const
{
	if (!(DatItems.m_Poses & m_iPoses)) 
		return false;

	string  T = m_strLemma;
	RmlMakeLower(T, GetOpt()->m_Language);
	return binary_search(DatItems.m_vectorDatItems.begin(), DatItems.m_vectorDatItems.end(), T);
	
}


void CHomonym::DeleteOborotMarks()
{
		m_bOborot1 = false;
		m_bOborot2 = false;
		m_bInOb = false;
		m_OborotNo = -1;
};

//===============================================
//================= CLASS WORD ==================
//===============================================

CWord::CWord(CSentence* pSent) 
{
	m_pSent = pSent;
	Reset();	
};

const	CSyntaxOpt* CWord::GetOpt() const	
{
	return m_pSent->GetOpt(); 
};


bool CWord::HasDes(const char* strWhat) const
{
	return m_GraphemDescriptors.find(strWhat) != string::npos;
}



#define   StupidSymbol1 160
extern int GetMorphSignPosition(const char* LineStr);

// returns the end point of the graphematical descriptors
int CWord::ProcessGraphematicalDescriptors(const char* LineStr)
{
	int MorphSignPos = GetMorphSignPosition(LineStr);
	if (MorphSignPos == -1)
		MorphSignPos = strlen(LineStr);

	m_GraphemDescriptors = string (LineStr, MorphSignPos);
	m_bSpace =			HasDes("SPC") 
					||	HasDes("EOLN")		   
					||	((BYTE)m_strWord[0] == StupidSymbol1)
					||	( ((BYTE)m_strWord[0] == '_') && (m_strWord.length() == 1));

	for (int k=(int)RLE; k < OTHER_TOKEN_TYPE; k++)
		if (HasDes (TokenTypeToString((MainTokenTypeEnum)k).c_str() ))
		{
            m_TokenType = (MainTokenTypeEnum)k;
			break;
		};
	m_bLastInSent = HasDes ("SENT_END");
	m_bWord  = (m_TokenType==RLE) || (m_TokenType==LLE); 

	m_bFileName1 = HasDes ("FILE1"); 
	m_bFileName2 = HasDes ("FILE2"); 
	m_bKeyb1    = HasDes ("KEY1"); ;
	m_bKeyb2    = HasDes ("KEY2"); ;
	m_bFio1    = HasDes ("FAM1"); ;
	m_bFio2    = HasDes ("FAM2"); ;
	m_bDate1 = HasDes ("DT1"); 
	m_bDate2 = HasDes ("DT2"); 

	if (HasDes ("Aa"))
		m_Register = UpLow;
	else
	if (HasDes ("AA"))
		m_Register = UpUp;
	else
	if (HasDes ("aa"))
		m_Register = LowLow;		
	else
		m_Register = AnyRegister;		


	m_bSimilarConj = m_pSent->GetCoordConjNo(m_strUpperWord.c_str()) != GetOpt()->GetCoordConjCount();

	m_bComma = (m_strWord.length() == 1)  && (m_strWord[0] == ',');	
	m_bDash = (m_strWord.length() == 1)  && (m_strWord[0] == '-');	

	bool bRomanNumber = is_roman_number(m_strWord.c_str(), m_strWord.length() );
	int hyphen_occur = m_strWord.find("-");
	if ((hyphen_occur != string::npos) && (hyphen_occur!=0))
	{
		// " I-" 
		// "I-" -  
		bRomanNumber = is_roman_number(m_strWord.c_str(), hyphen_occur);
	};
	if (bRomanNumber)
		m_TokenType = ROMAN_NUM;
	return MorphSignPos;

}

void rtrim (char* s,int* len)
 {
     //    ,   
   while (*len > 0 && isspace((unsigned char)s[*len-1]))
	s[--(*len)] = 0;
 }


bool CWord::AddNextHomonym(char* strPlmLine)
{
	assert (strPlmLine[0] == ' ');
	assert (strPlmLine[1] == ' ');
	strPlmLine += 2;

	assert (!m_Homonyms.empty() );
	
	int iPlmLineLen = strlen(strPlmLine);
	rtrim(strPlmLine,&iPlmLineLen);

	int iFirstFieldLen = strcspn(strPlmLine," ");

	int iSomeUnusefulDigitsLen = strspn(strPlmLine + iFirstFieldLen," -1234567890");

	CHomonym Homonym(m_pSent);

	strPlmLine += iFirstFieldLen + iSomeUnusefulDigitsLen;

	int MorphSectionOffset = ProcessGraphematicalDescriptors(strPlmLine);

	if (!Homonym.ProcessLemmaAndGrammems(strPlmLine+MorphSectionOffset)) return false;

	m_pSent->InitHomonymLanguageSpecific(Homonym, this);

	m_Homonyms.push_back(Homonym);

	return true;

}
void CWord::InitSubordConjNoSlot()
{
	m_SubordinateConjNo = -1;
	for(int i = 0 ; i < m_Homonyms.size() ; i++ )
	{
		{
			CHomonym& pHomonym = m_Homonyms[i];
			if (		!m_bPredicted 
					&&	(			(GetOpt()->m_Language != morphGerman)
							||		!pHomonym.HasPos(gPRP) // "bis", "als" and  so on
						)
				)

				m_SubordinateConjNo = GetOpt()->m_pOborDic->FindSubConj(pHomonym.m_strLemma.c_str() );
		}
	}

};

bool  CWord::ProcessGramCodes()
{
	if(m_Homonyms.size() == 0)
		return false;

	assert (m_Homonyms.size() != 0);

	/*
		       .       
		  ,       . 
		
		-                                             89 15 RLE aa HYP  ?? -1
		                                                        89 8 RLE aa HP1 ++   111409 # 0
		  -                                                             97 1 PUN HYP
		                                                          98 6 RLE aa HP2 ++   117398 # 0
			  .
	*/
	if (m_Homonyms[0].m_strLemma.empty())
		if (m_Homonyms.size() > 1)
			m_Homonyms.erase(m_Homonyms.begin());


	for(int i = 0 ; i < m_Homonyms.size() ; i++ )
		if	(		( m_bWord && (GetOpt()->m_Language == morphRussian))
				||	( (m_TokenType == LLE) && (GetOpt()->m_Language != morphRussian))
			)
		{
			CHomonym& pHomonym = m_Homonyms[i];

			m_pSent->InitHomonymMorphInfo(pHomonym);

			pHomonym.m_CoordConjNo = m_pSent->GetCoordConjNo(pHomonym.m_strLemma.c_str());
		}

	InitSubordConjNoSlot();
	sort(m_Homonyms.begin(), m_Homonyms.end());

	return true;
}




bool CWord::ProcessPlmLineForTheFirstHomonym(char* strPlmLine)
{
	CHomonym Homonym (m_pSent);
	
	//    ,   
	int iPlmLineLen = strlen(strPlmLine);
	rtrim(strPlmLine,&iPlmLineLen);

	int iFirstFieldLen = strcspn(strPlmLine," ");
	char WordBuffer[CriticalTokenLength+1];
	strncpy(WordBuffer, strPlmLine, iFirstFieldLen);
	WordBuffer[iFirstFieldLen] = '\0';
	SetWordStr(WordBuffer);


	int iSomeUnusefulDigitsLen = strspn(strPlmLine + iFirstFieldLen," -1234567890");

	//  reading file position of an item from graline
	int dummy;
	if (sscanf(strPlmLine + iFirstFieldLen, "%i %i", &m_GraphematicalUnitOffset, &dummy) != 2)
		return false;

	strPlmLine += iFirstFieldLen + iSomeUnusefulDigitsLen;

	int MorphSectionOffset = ProcessGraphematicalDescriptors(strPlmLine);

	if( m_bSpace )
		return true;

	if( MorphSectionOffset == strlen(strPlmLine) )
		Homonym.SetLemma(m_strWord);
	else
		if (!Homonym.ProcessLemmaAndGrammems(strPlmLine+MorphSectionOffset))
			return false;

	m_pSent->InitHomonymLanguageSpecific(Homonym, this);

	if	(		(m_strWord == "\"")
			||	(m_strWord == "'")
		)
		m_TokenType = OTHER_TOKEN_TYPE;

	m_Homonyms.push_back(Homonym);


	if (Homonym.m_LemSign != '+')
			m_bPredicted = true;

	return true;
}



class CHomonymPartOfSpeech_eq : public unary_function<CWord,bool>
{
	int m_iPartOfSpeech;

public:
	CHomonymPartOfSpeech_eq(int iPartOfSpeech): m_iPartOfSpeech(iPartOfSpeech) {};
	bool operator() (const CHomonym& homonym)
	{ return homonym.m_iTagID == m_iPartOfSpeech; }

};


bool CWord::IsThisPartOfSpeech(int iPartOfSpeech,int& iHomNum, int iStartHom) const
{
	if( (iStartHom < 0) || (iStartHom >= m_Homonyms.size()) )
		return false;

	vector<CHomonym>::const_iterator homonym = microsoft_find_if(m_Homonyms.begin() + iStartHom, m_Homonyms.end(), CHomonymPartOfSpeech_eq( iPartOfSpeech ));

	if( m_Homonyms.end() != homonym)
	{
		iHomNum = homonym - m_Homonyms.begin();
		return true;
	}

	iHomNum = -1;
	return false;
}


void CWord::SetAllOtherHomsDel(int iHom)
{
	for(int i = 0 ; i < m_Homonyms.size() ; i++ )
	{
		m_Homonyms[i].m_bDelete =  (i != iHom);
	}
}





bool CWord::IsFirstOfGraPair() const
{
	return		m_bDate1 
			||	m_bFileName1
			||	HasDes("GDC1") 
			||	IsOborot1()
			||	m_bKeyb1
			||	m_bFio1;
}

bool CWord::IsSecondOfGraPair() const
{
	return		m_bDate2
			||	m_bFileName2
			||	HasDes("GDC2")
			||	IsOborot2()
			||	m_bKeyb2
			||	m_bFio2;
}

bool CWord::IsFirstOfGraPair(EGraPairType type) const
{

	switch(type)
	{
		case Date:
			return m_bDate1;

		case WebAddr:
			return m_bFileName1;
	
		case Oborot:
			return IsOborot1();	

		case Keyb:
			return m_bKeyb1;

		case GermanDividedCompound:
			return HasDes("GDC1");

	
	}
	return false;
}


bool CWord::IsSecondOfGraPair(EGraPairType type) const
{
	switch(type)
	{
		case Date:
			return m_bDate2;

		case WebAddr:
			return m_bFileName2;
	
		case Oborot:
			return IsOborot2();	

		case Keyb:
			return m_bKeyb2;

		case GermanDividedCompound:
			return HasDes("GDC2");
	
	}
	return false;

}

void CWord::Reset()
{
	m_bKeyb1 = false;
	m_bKeyb2 = false;
	m_bFio1 = false;
	m_bFio2 = false;
	m_iClauseNo = -1;
	m_bLastInSent = false;
	m_bSpace = false;
	m_TokenType = OTHER_TOKEN_TYPE; 
	m_bWord = false; 
	m_Homonyms.clear();
	m_bDash = false;
	m_bInTermin = false;
	m_bFileName1 = false;
	m_bFileName2 = false;
	m_bDate1 = false;
	m_bDate2 = false;
	m_Register = AnyRegister;
	m_ThesType = NoneThes;
	m_bFirstWordInTermin = false;;
	m_bLastWordInTermin = false;
	m_iTerminID = -1;
	m_bArtificialCreated = false;
	m_bComma = false;
	m_iReduplication = 0;
	m_bSmallNumber = false;
	m_bPredicted = false;
	m_bSimilarConj = false;
	m_SubordinateConjNo = -1;
	m_bBadParenthesis = false;
	m_GraphematicalUnitOffset = -1;
	m_bHasSpaceBefore = false;
	m_bDeleted = false;
	m_TrennbarePraefixWordNo = -1;
	m_MainVerbs.clear();
}


CHomonym CWord::CloneHomonymByAnotherHomonym(const CHomonym* pHomonym, QWORD iGrammems, BYTE iTagID) const 
{
	CHomonym hom = *pHomonym;
	hom.m_iTagID = iTagID;
	if( iTagID == UnknownPartOfSpeech )
		hom.m_iPoses = 0;
	else
		hom.m_iPoses = (1 << iTagID);
	hom.m_iGrammems = iGrammems;

	return hom;
}



void CWord::CloneHomonymForOborot()
{
	assert(m_Homonyms.size() > 0);

	CHomonym H = CloneHomonymByAnotherHomonym(&m_Homonyms.back(), 0, UnknownPartOfSpeech);
	
	if( IsOborot1() )
	{
		//nim :  .. =      GF 
		if (!m_bBadParenthesis)
			if ( GetOborotPtr()->HasPartOfSpeech(GetOpt()->m_RusParenthesis) )
				H.m_iTagID = GetOpt()->m_RusParenthesis;
		
	}
	H.m_lPradigmID = -1;

	//         
	DeleteOborotMarks();
	
	//        ,       CWord::GetOborotNo()
	m_Homonyms.insert(m_Homonyms.begin(), H);
}












// for German 
extern bool is_article_for_weak_declination(const CHomonym& H);
extern bool is_article_for_mixed_declination(const CHomonym& H);
// ===

bool CWord::InitializePlmLine(CSynPlmLine& pPlmWord, int HomonymNo)  const
{
	if( (HomonymNo < 0) || (HomonymNo >= m_Homonyms.size()) )
		return false;

	const CHomonym& pActiveHomonym = m_Homonyms[HomonymNo];
	pPlmWord.SetGrammems(pActiveHomonym.m_iGrammems) ;
	pPlmWord.m_lemma = pActiveHomonym.m_strLemma.c_str();
	pPlmWord.SetPoses(pActiveHomonym.m_iPoses);
	pPlmWord.tag_id = pActiveHomonym.m_iTagID;
	pPlmWord.m_word =  m_strWord.c_str();
	pPlmWord.m_word_upper = m_strUpperWord.c_str();

	pPlmWord.m_pOborot = (pActiveHomonym.m_OborotNo==-1)? 0 : pActiveHomonym.GetOborotPtr();
	pPlmWord.m_gramcodes = pActiveHomonym.m_GramCodes.c_str();

	pPlmWord.SetFlag(fl_comma, m_bComma);
	pPlmWord.SetFlag(fl_digit, m_TokenType == NUM);
	pPlmWord.SetFlag(fl_ile , m_TokenType == LLE);
	pPlmWord.SetFlag(fl_le , m_bWord);
	pPlmWord.SetFlag(fl_month , pActiveHomonym.m_bMonth);
	pPlmWord.SetFlag(fl_digits, m_TokenType == NUM);
	pPlmWord.SetFlag(fl_oborot1 , pActiveHomonym.m_bOborot1);
	pPlmWord.SetFlag(fl_oborot2 , pActiveHomonym.m_bOborot2);
	pPlmWord.SetFlag(fl_punct , m_TokenType == PUNCTUAT);
	pPlmWord.SetFlag(fl_small_number , pActiveHomonym.m_bSmallNumber);
	pPlmWord.SetFlag(fl_unk_gramcodes , pActiveHomonym.m_bUnkGramcodes);
	pPlmWord.SetFlag(fl_adv_adj , pActiveHomonym.m_bAdvAdj);
	pPlmWord.SetFlag(fl_ambiguous, m_Homonyms.size() > 1);
	
	pPlmWord.m_CoordConjNo = pActiveHomonym.m_CoordConjNo;
	pPlmWord.SetFlag(fl_can_syn_depend_on_adj , pActiveHomonym.m_bCanSynDependOnAdj);
	pPlmWord.SetFlag(fl_can_syn_depend_on_adv , pActiveHomonym.m_bCanSynDependOnAdv);
	pPlmWord.SetFlag(fl_can_subdue_infinitive , pActiveHomonym.m_bCanSubdueInfinitive);
	pPlmWord.SetFlag(fl_can_subdue_instr , pActiveHomonym.m_bCanSubdueInstr);
	pPlmWord.SetFlag(fl_noun_has_adj_declination , pActiveHomonym.m_bNounHasAdjectiveDeclination);
	
	
	
	pPlmWord.SetFlag(fl_file_name1,  m_bFileName1);
	pPlmWord.SetFlag(fl_file_name2 ,  m_bFileName2);
	pPlmWord.SetFlag(fl_keyb1 , m_bKeyb1);
	pPlmWord.SetFlag(fl_keyb2 , m_bKeyb2);
	pPlmWord.SetFlag(fl_dg_ch , m_TokenType == NUM_CHAR);
	pPlmWord.SetFlag(fl_dt_fork , (m_bDate1 || m_bDate2));
	pPlmWord.SetFlag(fl_russian_odin , pActiveHomonym.m_bRussianOdin);
	pPlmWord.SetFlag(fl_in_oborot , pActiveHomonym.IsIsOb());

	pPlmWord.SetFlag(fl_standard_param_abbr , GetOpt()->GetGramTab()->IsStandardParamAbbr(m_strUpperWord.c_str()));
	pPlmWord.SetFlag(fl_ranknoun , m_pSent->IsProfession(pActiveHomonym));
	pPlmWord.SetFlag(fl_dash, pPlmWord.is_single_punct('-') );
	pPlmWord.SetFlag(fl_fullstop , pPlmWord.is_single_punct('.') );
	pPlmWord.SetFlag(fl_bracket, HasDes("OPN") || HasDes("CLS") );
	
	pPlmWord.SetFlag(fl_morph_noun, GetOpt()->GetGramTab()->IsMorphNoun(pActiveHomonym.m_iPoses));
	pPlmWord.SetFlag(fl_syn_noun, pActiveHomonym.IsSynNoun());
	pPlmWord.m_pSimplePrepNos = &pActiveHomonym.m_SimplePrepNos;

	pPlmWord.SetFlag(fl_article_for_weak_declination,is_article_for_weak_declination(pActiveHomonym));
	pPlmWord.SetFlag(fl_article_for_mixed_declination, is_article_for_mixed_declination(pActiveHomonym));

	
	pPlmWord.SetFlag(fl_has_space_before, m_bHasSpaceBefore);

	pPlmWord.SetFlag(fl_fam1, m_bFio1);
	pPlmWord.SetFlag(fl_fam2, m_bFio2);
	pPlmWord.SetFlag(fl_first_char_is_upper, (m_Register == UpLow) || (m_Register == UpUp));
	pPlmWord.SetFlag(fl_register_BB, (m_Register == UpUp));
	
	pPlmWord.SetFlag(fl_morph_predicted, m_bPredicted);
	

	
	
	return true;
}

void CWord::DeleteOborotMarks()
{
	for (int i = 0; i < m_Homonyms.size(); i++)
	{
		m_Homonyms[i].DeleteOborotMarks();
	};
}

int		CWord::GetOborotNo() const
{
	assert ( !m_Homonyms.empty() );
	return m_Homonyms[0].m_OborotNo;
};


bool		CWord::IsOborot1() const
{
	assert ( !m_Homonyms.empty() );
	return m_Homonyms[0].m_bOborot1;
};

bool		CWord::IsOborot2() const
{
	assert ( !m_Homonyms.empty() );
	return m_Homonyms[0].m_bOborot2;
};

const COborotForSyntax*	CWord::GetOborotPtr() const
{
	assert ( !m_Homonyms.empty() );
	return m_Homonyms[0].GetOborotPtr();
};


bool		CWord::IsInOborot() const
{
	assert ( !m_Homonyms.empty() );
	return m_Homonyms[0].m_bInOb;
};


bool  CWord::CanBeSynNoun() const
{
	for( int i = 0 ; i <m_Homonyms.size() ; i++ )
		if( m_Homonyms[i].IsSynNoun() )
			return true;	

	return false;
}




void CWord::KillHomonymOfPartOfSpeech(int iPartOfSpeech)
{
	if(m_Homonyms.size() == 1)
		return;

	for(int i = 0 ; i < m_Homonyms.size(); i++)
	{
		if( m_Homonyms[i].m_iTagID == iPartOfSpeech )
		{
			EraseHomonym( i);
			break;
		}
	}
}
void CWord::SetHomonymsDel(bool Value)
{
	for(int i = 0 ; i < m_Homonyms.size() ; i++ )
	{
		m_Homonyms[i].m_bDelete = Value;
	}
}

void CWord::EraseHomonym(int iHom)
{
	//assert (m_pSent->m_Clauses.empty());

	m_Homonyms.erase(m_Homonyms.begin() + iHom);

	BuildTerminalSymbolsByWord();
}

void CWord::DeleteMarkedHomonymsBeforeClauses()
{
	for(int i = m_Homonyms.size() - 1 ; i >= 0 ; i-- )
	{
		if( m_Homonyms[i].m_bDelete )
			EraseHomonym(i);			
	}

}

void	CWord::SetWordStr (string NewValue)
{
	m_strWord = NewValue;
	m_strUpperWord =  NewValue;
	RmlMakeUpper(m_strUpperWord, GetOpt()->m_Language);
};



bool CWord::FindLemma(string strLemma)  const
{

	for (int i =0; i<m_Homonyms.size();i++)
		if (m_Homonyms[i].IsLemma(strLemma.c_str())) 
			return true;

	return false;	
}

int  CWord::GetHomonymByPOS(BYTE POS) const
{
	for( int i = 0 ; i < m_Homonyms.size() ; i++ )
		if( m_Homonyms[i].HasPos(POS) )
			return i;	

	return -1;
}

int CWord::GetHomonymByGrammem(BYTE grammem) const
{
	for( int i = 0 ; i < m_Homonyms.size() ; i++ )
		if( m_Homonyms[i].HasGrammem(grammem) )
			return i;	

	return -1;
}
int CWord::GetHomonymByPOSandGrammem(BYTE POS, BYTE grammem) const
{
	for( int i = 0 ; i < m_Homonyms.size() ; i++ )
		if(			m_Homonyms[i].HasGrammem(grammem) 
				&&	m_Homonyms[i].HasPos(POS)
			)
			return i;	

	return -1;
}



bool CheckGrammems(const CHomonym& L, const CGrammarItem& I)
{
	if (I.m_MorphPattern.m_SearchStatus != AnyStatus)
	{
		if ((I.m_MorphPattern.m_SearchStatus == FoundInDictionary) == (L.m_lPradigmID == -1))
			return false;
	};

	//  nor punctuation marks neither abbreviations can match a morphological pattern
	if (!L.m_iPoses)
		return false;
	
	if	((I.m_MorphPattern.m_Grammems & L.m_iGrammems) != I.m_MorphPattern.m_Grammems) 
		return false;

	return		(I.m_MorphPattern.m_Poses== 0) 
			||	(I.m_MorphPattern.m_Poses & L.m_iPoses) > 0; 

};

bool CWord::IsEqualToGrammarItem(const CHomonym& L, const CGrammarItem& I)
{
	if (!I.m_MorphPattern.m_GrmAttribute.empty())
	{
		if (!CheckGrammems(L,I))
			return false;
	};
	
	if (I.m_TokenType != m_TokenType)
		return false;


	if (!I.m_Token.empty())
		if	(			(L.m_strLemma != I.m_Token) // equal lemmas
					&&	(		(I.m_Token[0]!= '*') // or equality with right truncation
							||	(L.m_strLemma.length() <= I.m_Token.length())
							||	(I.m_Token.substr(1) != L.m_strLemma.substr(L.m_strLemma.length()-I.m_Token.length() + 1)) 
						)
			)
			return false;

	if (I.m_pListFile != NULL)
	{
		
		const StringSet& PossibleLemmas = I.m_pListFile->m_PossibleLemmas;
		if (PossibleLemmas.find(L.m_strLemma) == PossibleLemmas.end()) // check lemma
			if	(		(L.m_strLemma == m_strUpperWord) // check the token itself
					||	!m_bPredicted
					||	PossibleLemmas.find(m_strUpperWord) == PossibleLemmas.end() 
				)
			{
				int hyphen = L.m_strLemma.find('-'); // check the postfix after the last hyphen, if there is a hyphen 
				if	(		(hyphen == string::npos)
						|| ( PossibleLemmas.find(L.m_strLemma.substr(hyphen+1)) == PossibleLemmas.end() )
					)
				return false;
			};
	};

	
	return true;
};



// we should try to call this procedure only on the first stage, 
// not each time when it is needed
void CWord::BuildTerminalSymbolsByWord()							
{
	if (GetOpt()->m_Language != morphGerman) return;


	const CWorkGrammar& G = GetOpt()->m_FormatsGrammar;
	const vector<CGrammarItem>&	TerminalSymbols = G.m_UniqueGrammarItems;

	// adding an end of stream symbol to each word
	assert (!m_Homonyms.empty());
	m_AutomatSymbolInterpetationUnion.clear();
	m_AutomatSymbolInterpetationUnion.insert(CInputSymbol(G.GetEndOfStreamSymbol(), "", ""));

	for (size_t j=0; j<m_Homonyms.size(); j++)
		m_Homonyms[j].m_AutomatSymbolInterpetation.clear();

	if (m_bDeleted) 
		return;

	for (size_t i=0; i<TerminalSymbols.size(); i++)
	{
		const CGrammarItem& I = TerminalSymbols[i];

		if (I.m_bMeta) continue;

		if (!I.m_bCanHaveManyHomonyms && (m_Homonyms.size() > 1)) continue;

		if (I.m_Register != AnyRegister)
		{
				if (I.m_Register != m_Register)
					continue;
		};
				

		for (size_t j=0; j<m_Homonyms.size(); j++)
			if (IsEqualToGrammarItem(m_Homonyms[j], I))
			{
				CInputSymbol N(i, m_Homonyms[j].m_GramCodes, m_Homonyms[j].m_CommonGramCode);
				m_Homonyms[j].m_AutomatSymbolInterpetation.insert(N);
				m_AutomatSymbolInterpetationUnion.insert(N);
			};
	}

};
