// ==========  This file is under  LGPL, the GNU Lesser General Public Licence
// ==========  Dialing Posmorphological Module (www.aot.ru)
// ==========  Copyright by Dmitry Pankratov, Alexey Sokirko (1999-2002)

#include "MAPostMain.h"
#include "../common/PlmLine.h"
#include "../common/utilit.h"
#include "../common/rus_numerals.h"

void CMAPost::log(string s)
{
	if (m_LogFileName == "")  return;
	try {
		FILE* fp = fopen(m_LogFileName.c_str(), "a");
		if (!fp)  
		{ 
			m_LogFileName = "";
			return;
		};
		fprintf (fp, "%s\n", s.c_str());
		fclose(fp);
	}
	catch (...) {
	};

}


void CMAPost::RunRules()
{
	try
	{
			log("Rule_FilterProperName");
			Rule_FilterProperName();

			log("Rule_AdverbFromAdjectives");
			Rule_AdverbFromAdjectives(); 

			log("Odnobuk");
			Odnobuk();


			if (m_bCanChangeInputText)
			{
				log("Cifrdef");
				Cifrdef();

				log("ILeDefLe");
				ILeDefLe();	

				log("Defslo");
				Defslo();	
			}

			log("ParticipleAndVerbInOneForm");
			ParticipleAndVerbInOneForm();	

			log("PronounP_Pronoun_Homonymy");
			PronounP_Pronoun_Homonymy();


			if (m_bCanChangeInputText)
			{
				log("FixedCollocations");
				FixedCollocations();
			};

			log("CorrectOborots");
			CorrectOborots();

			log("SemiAdjectives");
			SemiAdjectives();

			log("Interjections");
			Rule_Interjections();
			
			log("SemiNouns");
			SemiNouns();

			log("Rule_UZHE");
			Rule_UZHE();

			log("Rule_Ideclinable");
			Rule_Ideclinable();

			log("Rule_DeadPlurals");
			Rule_DeadPlurals();
			
			log("Rule_RelationalAdjective");
			Rule_RelationalAdjective();

			//log("Rule_Surnames");
			//Rule_Surnames();

			//log("Rule_FemineSurnames");
			//Rule_FemineSurnames();

			log("Rule_Fio");
			Rule_Fio();

			if (m_bCanChangeInputText)
			{
				log("Rule_NumeralAdjectives");
				Rule_NumeralAdjectives();
			}

			log("Rule_No");
			Rule_No();

			if (m_bCanChangeInputText)
			{
				log("Rule_QuoteMarks");
				Rule_QuoteMarks();
			};

			log("Rule_ILE");
			Rule_ILE();

			if (m_bCanChangeInputText)
			{
				log("Rule_KAK_MOZHNO");
				Rule_KAK_MOZHNO();

				log("Rule_Redublication");
				Rule_Redublication();

				log("Rule_CHTO_ZA");
				Rule_CHTO_ZA();

				log("Rule_VOT_UZHE");
				Rule_VOT_UZHE();

				//log("Memlem");
				//Memlem();	
			};
			
			log("Rule_UnknownNames");
			Rule_UnknownNames();

			log("Rule_SOROK");
			Rule_SOROK();

			Rule_Abbreviation();

			Rule_ChangePatronymicLemmas();
	}
	catch(...)
	{
		ErrorMessage("Mapost", "Mapost has crushed!");
		return;
	}
}



bool CMAPost::ProcessData(const CPlmLineCollection *piInTextItems, CPlmLineCollection& piOutTextItems)
{
	try
	{
		m_Lines.clear();
		long i = 0;
		try 
		{
			for (; i < piInTextItems->m_Items.size(); i++)
			{
				CPlmLine P;
				if (!P.LoadPlmLineFromString(piInTextItems->m_Items[i].c_str(), (i == 0), m_pRusGramTab))
					throw CExpc (" a parse error");
				
				m_Lines.push_back(P);
			};
		}
		catch(...)
		{
			ErrorMessage ("MAPOST", Format("Cannot read line %i (\"%s\") from morphology", i, piInTextItems->m_Items[i].c_str()));
			return false;
		}


		RunRules();

		ClearVector(piOutTextItems.m_Items);
		for (CLineIter it=m_Lines.begin(); it !=  m_Lines.end(); it++)
		{
			piOutTextItems.m_Items.push_back(it->GetStr());
		};
		m_Lines.clear();
		return true;

	}
	catch(...)
	{
		return false;
	}
}


void CMAPost::SetDeleteFalse()
{
 	for (CLineIter it=m_Lines.begin(); it !=  m_Lines.end(); it++)
	{
		 it->m_bToDelete = false;
	};
};

CLineIter CMAPost::Remove(CLineIter it, bool bRemoveSpaceAfterDeletedWord)
{
	bool bPrevHom = it->m_bHomonym;

	CLineIter next_it = it;
	next_it++;

	m_Lines.erase(it);
		
	//       -  
	if (next_it != m_Lines.end())
		if(bRemoveSpaceAfterDeletedWord && is_gra_space((unsigned char)next_it->GetWord()[0])  )
		{
			CLineIter tmp_it =  next_it;
			tmp_it++;
			m_Lines.erase(next_it);
			next_it = tmp_it;
		}
		else 
			if(!bPrevHom && next_it->m_bHomonym )
			{
				next_it->m_bHomonym = false;
			}	

	return next_it;
}

//================================================
//======               ===================
//================================================


#define next_iter() CLineIter next_it=it;next_it++;if(next_it==m_Lines.end())continue;
#define prev_iter() CLineIter prev_it=it;prev_it--;
#define next_next_iter() CLineIter next_next_it=next_it;next_next_it++;if(next_next_it==m_Lines.end())continue;


/*
         , 
    ,    .
*/

void CMAPost::Odnobuk()    
{
	for (CLineIter it=m_Lines.begin(); it !=  m_Lines.end(); it++)
	{
		if (it->m_TokenType != RLE) continue;

		const string& GramCodes = it->GetGramCodes();

		//        , ,   
		int i=0;
		for (; i < GramCodes.length(); i+=2)
			if (GramCodes[i]!='?')
			{
				BYTE pos = m_pRusGramTab->GetPartOfSpeech(GramCodes.substr(i, 2).c_str());
				if (   (pos != PREP)
					&& (pos != CONJ)
					&& (pos != INTERJ)
					&& (pos != PARTICLE)
					)
					break;
			};
		//         

		if (i >= GramCodes.length()) 
		{ 
			//  ,  .- 
			next_iter()
				CPlmLine& Debug = *it;
			if (    
				(it->GetWord().length() == 1) 
				&&  (next_it->GetWord() == ".") 
				&&   it->m_bFirstUpperAlpha
				&&  (it->m_bOborot1 ==  it->m_bOborot2) // ".." -  ,    
				)
			{
				const string& debug = it->m_GraphDescr;
				it->SetMorphUnknown();
				it->SetGramCodes( m_DURNOVOGramCode, m_pRusGramTab);

				it->DeleteOb1();
				it->DeleteOb2();

			}
		}
	}
}

/*
 1.      ,      
 2.    ""  ""         ,
	      ,
	       .
 
*/

void CMAPost::Rule_Interjections()    
{
    for (CLineIter it=m_Lines.begin(); it !=  m_Lines.end(); it++)
	{
		CPlmLine& P  = *it;
		if (P.m_Pos == INTERJ)
		{
			next_iter();
			CLineIter the_close_next_it = next_it;
			for(;(next_it != m_Lines.end()) && next_it->m_bHomonym;next_it++);

			if (		(next_it != m_Lines.end())
					&&	(next_it->m_TokenType != PUNCTUAT)
					&&	( it->m_bHomonym || ((the_close_next_it != m_Lines.end()) &&  the_close_next_it->m_bHomonym) )
				)
			{
				it = Remove(it, false);
			};
		}
	
		/*if (		( (P.m_Pos == CONJ) ||  (P.m_Pos == PARTICLE))
				&&	(	(P.m_Lemma == "") || (P.m_Lemma == "")
			)
			{
				CLineIter prev_it = it;
				if (it != m_Lines.begin()
					prev_it--;

				if (P.m_Pos == PARTICLE)
				{
					if (		(it == m_Lines.begin())
							||	(prev_it->m_TokenType == PUNCTUAT)
						)
						it = Remove(it, false);
				}
				else
					if (		(it > m_Lines.begin())
							&& (prev_it->m_TokenType != PUNCTUAT)
						)
						it = Remove(it, false);
			};*/

	};

};



//
/*
        LLE HYP ,   -  ,  
       ,  .  LLE  HYP
        ,    LLE  HYP .
    :
     ftp-
	 ftp-
*/
void CMAPost::ILeDefLe()
{
    for (CLineIter it=m_Lines.begin(); it !=  m_Lines.end(); it++)
	if ( it->m_TokenType == LLE ) 
	{
		next_iter();
		if( next_it->m_GraphDescr.find(" HYP") != string::npos)
		{
			
			next_it++;
			bool bFound = false;
			CLineIter save_it = next_it;
			while(		(      next_it->m_bHomonym 
					      && ( next_it != m_Lines.end() ) 
						)
					||	(save_it == next_it)
				 )
			{
				if( next_it->m_TokenType != RLE)
				{
					if (save_it == next_it)
						break;
					next_it++;
					continue;
				}
				if(    (next_it->m_TokenType == RLE)
					&& !next_it->IsFoundInMorphology()
				  )
				{
					if (save_it == next_it)
						break;
					next_it++;
					continue;
				}
				if (		(next_it->m_Pos != ADJ_FULL) //    
						&&	(next_it->m_Pos != NOUN) 
				   )
				{
					if (save_it == next_it)
						break;
					next_it++;
					continue;
				}					
				next_it->SetWord( it->GetWord()+ string("-") +next_it->GetWord() );

				next_it->m_bHomonym = false;

				/*  */
				CLineIter tmp_it = next_it;
				for (tmp_it++; tmp_it != m_Lines.end(); )
				{
					if (!tmp_it->m_bHomonym) break;
					tmp_it = Remove(tmp_it, false);
				};
			   /*  */
				for (tmp_it = it; tmp_it != next_it;  )
				{
						tmp_it = Remove(tmp_it, true);
						if (tmp_it == m_Lines.end()) break;
				}

				it = next_it;
				it--;
				break;
			}
			
		}
	}
}

string CMAPost::GetSimilarNumAncode (const string&  Lemma, const string&  Flexia) 
{
	if (Lemma.length() == 0) return "";
	vector<CFormInfo> Paradigms;
	string h = Lemma;
	m_pRusLemmatizer->CreateParadigmCollection(false, h, false, Paradigms);

	//  
	long k=0;
	for (; k < Paradigms.size(); k++)
	{
	  string AnCode = Paradigms[k].GetAncode(0);
	  BYTE POS = m_pRusGramTab->GetPartOfSpeech(AnCode.c_str() );
	  if ( (POS == NUMERAL) ||  (POS == NUMERAL_P))
		  break;
	};
	assert (k < Paradigms.size());
	const CFormInfo& P = Paradigms[k];

	//      
	string AnCodes;
  	for (k=0; k < P.GetCount(); k++)
	{
		  string Form = P.GetWordForm(k);
		  EngRusMakeLower(Form);

		  if (Form.length() > Flexia.length())
			  if (Flexia == Form.substr (Form.length()-Flexia.length()))
				  AnCodes += P.GetAncode(k);
	};

	return AnCodes;
	

};


void CMAPost::Cifrdef()    
{
	
	//    
	// (  )
	//	1848                            4 0 4 DC -1
	//  -                               1 4 1 PUN HYP -1
	//	                              2 5 2 RLE aa ?? -1
	// ( ,       )
	// 1960-                           2 6 DSC

	//    
	//	1848-                            4 0 4 RLE -= 1848  -1 #0

	for (CLineIter it=m_Lines.begin(); it !=  m_Lines.end(); it++)
	{
		size_t CaseNo = -1;
        string NumWordForm;
		string Flexia;
		CLineIter next_it;
		CLineIter next_next_it;
		

		if (	it->m_TokenType == NUM) 
		{
			CaseNo = 1;

			next_it=it;
			next_it++;
			if(next_it==m_Lines.end())continue;
			if (next_it->GetWord()  != "-") continue;

			next_next_it=next_it;
			next_next_it++;
			if(next_next_it==m_Lines.end())continue;
			if (next_next_it->m_TokenType != RLE)  continue;

	        NumWordForm = it->GetUpperWord(); 
			Flexia = next_next_it->GetWord();
			
		}
		else
			if (it->m_TokenType == NUM_CHAR)
			{
				CaseNo = 2;

				int hyphen_occur = it->GetWord().find("-");
				if ((hyphen_occur == string::npos) || (hyphen_occur ==0)) continue;
				if (!isdigit((BYTE)it->GetWord()[0])) continue;
				if (!is_russian_alpha((BYTE)it->GetWord()[it->GetWord().length() - 1])) continue;
		        NumWordForm = it->GetWord().substr(0, hyphen_occur); 
				Flexia = it->GetWord().substr(hyphen_occur+1); ;
				

			}
			else
				continue;



		//       ,          .
		int i = NumeralToNumberCount - 1;
		for(; i >= 0;  i--)
		{
			string NumValue;
			if (it->m_TokenType == ROMAN_NUM)
				NumValue = NumeralToNumber[i].m_RomanNumber;
			else
				NumValue = IntToStr(NumeralToNumber[i].m_Number);
			
			if (NumValue.length() > 0)
				if (NumWordForm.length() >= NumValue.length())
					if (NumValue  == NumWordForm.substr(NumWordForm.length() - NumValue.length()) )
						break;
		};
		if (i < 0) continue;
		EngRusMakeLower(Flexia);
		string AnCodes = GetSimilarNumAncode(NumeralToNumber[i].m_Cardinal, Flexia);
		if  (AnCodes.length() == 0)
			AnCodes = GetSimilarNumAncode(NumeralToNumber[i].m_Ordinal, Flexia );
		if  (AnCodes.length() == 0) continue;


		CPlmLine& P  = *it;
		if (CaseNo == 1)
			P.SetWord("-" + Flexia);
		P.m_TokenType = RLE;
		P.m_Register = LowLow;
		P.SetMorphUnknown();
		P.m_Lemma =  NumWordForm;
		P.SetGramCodes( AnCodes, m_pRusGramTab);
		

		if (CaseNo == 1)
		{
			if  (    next_it->m_bSent2
				  || next_next_it->m_bSent2 
			 ) 
			 P.m_bSent2 = true;
			next_it = Remove(next_it, true);
			next_it = Remove(next_it, true);
		}
	};

};






/*

      .  Z0 = <L,M> -   ,  
  L - ,   -  . ,  
        Zi    <L,M>, 
  Zi  ,        Z0.
      , "" (/), "" (/)
   ParadigmId  Zi  .
*/

void CMAPost::Memlem()   
{
try
	{
	 int  cnt = Count();

  	 for (CLineIter it=m_Lines.begin(); it !=  m_Lines.end(); it++)
	 {
	   const string& Lemma = it->m_Lemma;
	   const string& CommonAncode = it->GetCommonGramCode();
	   if  (it->m_bHomonym) continue;
	   if (it->m_bToDelete) continue;
   
	   /*     */
	   next_iter();
	   for (;   next_it != m_Lines.end(); ) 
	   {
		 if ( !next_it->m_bHomonym )
			break;
		 
		 /*
		 //   
		 -                       9 0 9 RLE aa HYP CS? ?? -1
		                              4 0 4 RLE aa HP1 -n   -1
		  -                               1 4 1 PUN HYP
		                              4 5 4 RLE aa HP2 CS? SENT_END -n   -1

		 */
		 if (next_it->m_GraphDescr.find (" HYP") != string::npos)  break;


		 if (   (next_it->m_Lemma == Lemma)
			 && (next_it->GetCommonGramCode() == CommonAncode)
			) 
		 {  
		   //         
		   string GramCodes1 = it->GetGramCodes();
		   BYTE pos1 = it->m_Pos;
		   const string& GramCodes2 = next_it->GetGramCodes();
		   BYTE pos2 = next_it->m_Pos;
			//  ,      -  
		   if (			(m_DURNOVOGramCode == GramCodes1)
				||		(m_DURNOVOGramCode == GramCodes2)
			   )
		   {
				next_it++;
				continue;
		   };
       
		   /* GramCodes2   GramCodes1*/
		   if (pos1 == pos2) 
		   {
			for (int i2=0; i2 < GramCodes2.length(); i2+=2) 
			{
				int i1=0;
				for (; i1 < GramCodes1.length(); i1+=2)
				 if (GramCodes1.substr(i1,2) == GramCodes2.substr(i2,2))
					 break;

			 if (i1 == GramCodes1.length()) 
			   GramCodes1 += GramCodes2.substr(i2,2);

		   }

			it->SetGramCodes(GramCodes1, m_pRusGramTab);

		   next_it = Remove(next_it, true);;	
		   continue;
		  }
		 }  //  
   	     next_it++;
	   }
	 }
	}
	catch(...)
	{
		return ;
	}

}



//    
//   ,       
//  ,       
// , "  +  "
//   ,,,,3,
//   ,,,,,,,
//     
//   +  
//     +  

void CMAPost::ParticipleAndVerbInOneForm() 
{
 for (CLineIter it=m_Lines.begin(); it !=  m_Lines.end(); it++)
 {
   const string& GramCodes = it->GetGramCodes();
   string VerbGramCodes;
   string PartGramCodes;
   if (GramCodes == "??") continue;
   for (long i=0; i < GramCodes.length(); i+=2)
   {
	   string gram = GramCodes.substr(i, 2);
	   BYTE POS = m_pRusGramTab->GetPartOfSpeech(gram.c_str());
	   if (POS == PARTICIPLE)
		   PartGramCodes += gram;
       else 
	    if (POS == VERB)
			VerbGramCodes += gram;
   };
   if (!VerbGramCodes.length() || !PartGramCodes.length()) continue;
   it->SetGramCodes( VerbGramCodes, m_pRusGramTab );

   CPlmLine NewLine = *it;	
   NewLine.SetGramCodes(PartGramCodes, m_pRusGramTab);
   NewLine.m_bHomonym = true;
   it++;
   m_Lines.insert (it, NewLine);
 };
};
/*
    ,   -     =/= "", "", "", "", ""
       - 
   (, "", ""). ,      
  ,  , ,  . ,        .
  ,    .
 , 
   +  
   +  
   -  ,   .
   - .
   -  /  
   -   .
*/
void CMAPost::PronounP_Pronoun_Homonymy() 
{
 for (CLineIter it=m_Lines.begin(); it !=  m_Lines.end(); it++)
 {
   if (   (it->GetUpperWord() == "") 
	   || (it->GetUpperWord() == "") 
	   || (it->GetUpperWord() == "") 
	   || (it->GetUpperWord() == "") 
	   || (it->GetUpperWord() == "") 
	   || (it->GetUpperWord() == "Ũ") 
	  )
   continue;
   
   CLineIter pronoun_p_it = m_Lines.end();
   CLineIter pronoun_it = m_Lines.end();

   if( it->m_bHomonym ) 
	   continue;
   
   string PronounPGramCodes;
   for (;;)
   {
	   BYTE POS = it->m_Pos;
	   if(POS == PRONOUN_P)
	   {
			pronoun_p_it = it;
			PronounPGramCodes = it->GetGramCodes();
	   }
	   else if (POS == PRONOUN)
			pronoun_it = it;
	   	   
	   it++;

	   if(it == m_Lines.end()) 
		   break;
	   
	   if(!it->m_bHomonym) 
		   break;
   };
   it--;
   
   if ((pronoun_p_it == m_Lines.end()) || (pronoun_it == m_Lines.end()))
        continue;
   
   next_iter();
   for (; next_it != m_Lines.end();  next_it++)
	   if (!is_gra_space((unsigned char)next_it->GetWord()[0])) 
		   break;

   if ( next_it == m_Lines.end() ) continue;
   {
		CLineIter next_next_it = next_it;
		next_next_it++;
		if	(		(next_next_it != m_Lines.end())
				&&	(next_next_it->m_bHomonym)
			)
			continue;
   };

   const string& GramCodes = next_it->GetGramCodes();
   if ((GramCodes.length() == 0) || (GramCodes == "??")) break;

   //      	
   if (!m_pRusGramTab->GleicheGenderNumberCase (next_it->GetCommonGramCode().c_str(),GramCodes.c_str(), PronounPGramCodes.c_str()))
	   break;

   Remove(pronoun_it, true);
   it = pronoun_p_it;
 };

};

//       
bool CMAPost::GetParadigmByFormAndPOS(string WordForm, BYTE POS, QWORD Grammems, CFormInfo& Result) const
{
	vector<CFormInfo> Paradigms;
	m_pRusLemmatizer->CreateParadigmCollection(false, WordForm, false, Paradigms);

	long k=0;
	for (; k < Paradigms.size(); k++)
	{
	  string AnCode = Paradigms[k].GetSrcAncode();
	  BYTE _POS = m_pRusGramTab->GetPartOfSpeech(AnCode.c_str() );
	  QWORD Grams;
	  m_pRusGramTab->GetGrammems(AnCode.c_str(), Grams);
	  if (   (_POS == POS) 
		  && ( (Grams & Grammems ) || (Grammems == 0) )
		 )
		 break;
	};

    if (k == Paradigms.size()) return false;
	Result = Paradigms[k];
	return true;
}

//    ,     
bool CMAPost::GetParadigmByNormAndPOS(string WordForm, BYTE POS, QWORD Grammems, CFormInfo& Result) const
{
	vector<CFormInfo> Paradigms;
	m_pRusLemmatizer->CreateParadigmCollection(false, WordForm, false, Paradigms);

	long k=0;
	for (; k < Paradigms.size(); k++)
	{
		string AnCode = Paradigms[k].GetAncode(0);
		BYTE _POS = m_pRusGramTab->GetPartOfSpeech(AnCode.c_str() );
		if (_POS == POS)
			 break;
	};
    if (k == Paradigms.size()) return false;

	const CFormInfo& P = Paradigms[k];

	for (k=0; k < P.GetCount(); k++)
	{
		string AnCode = P.GetAncode(0);
		QWORD Grams =  m_pRusGramTab->GetAllGrammems(AnCode.c_str());
		if ((Grams & Grammems) == Grammems) break;
	};
    if (k == P.GetCount()) return false;
	string  Form = P.GetWordForm(k);
	return GetParadigmByFormAndPOS(Form, POS, Grammems, Result);
}


void CMAPost::SetParadigmToLineAndDelHomonyms (CLineIter it, const CFormInfo& P)
{
	it->SetMorph(P.GetLemSign()[0],P.GetCommonAncode(), P.GetParadigmId());
	it->m_Lemma = P.GetWordForm(0);
	it->SetGramCodes( P.GetSrcAncode(), m_pRusGramTab);
	it->m_HomoWeight = "0";
	it++;
	while ( it != m_Lines.end() ) 
	{
	  if( !it->m_bHomonym ) break;
	  it = Remove(it, true);
	};
};


bool CMAPost::CanBeFixedCollocItem(CLineIter it)  
{
  return    (it->m_TokenType == RLE) 
	     || (it->m_TokenType == LLE)
		 || (it->m_TokenType == NUM)
		 || (it->m_TokenType == ROMAN_NUM)
		 || (it->m_TokenType == NUM_CHAR);
};


//          
CLineIter CMAPost::AddCollocation(CLineIter start_it, CLineIter end_it, CLineIter main_it, bool HasEndSent,  int CollocNo) 
{
	
	//     
	main_it->m_Lemma = m_FixedCollocs[CollocNo].m_InterfaceString;
	main_it->m_bHomonym = false;

	if (HasEndSent)
		main_it->m_bSent2 = true;

	//      
	while (start_it != main_it)
	{
		start_it = Remove(start_it, true);
		if (start_it == m_Lines.end()) break;
	};

	//      
	CLineIter next_it=start_it;
	next_it++;
	while (next_it != end_it)
	{
		//  bRemoveSpaceAfterDeletedWord     false,
		//  next_it     ,     
		//    
		next_it = Remove(next_it, false);
		if (next_it == m_Lines.end()) break;
	};
	return next_it;
};
/*
     ,     ,   
          CFixedColloc::m_InterfaceString
 ,  "    "   "?"
*/
void CMAPost::FixedCollocations() 
{
	for (CLineIter it=m_Lines.begin(); it !=  m_Lines.end(); it++)
	if (  CanBeFixedCollocItem (it) )	 
	{
		if( it->m_bHomonym ) continue;

		for (long CollocNo =0; CollocNo  < m_FixedCollocs.size(); CollocNo++)
		{
			CLineIter curr_it = it;
			CLineIter main_it = m_Lines.end();
			bool HasEndSent = false;
			long ItemNo=0;

			bool bInOborot = false;
			for (; ItemNo < m_FixedCollocs[CollocNo].m_Lemmas.size(); ItemNo++)
			{
				/* ,     */
				for (; curr_it != m_Lines.end(); curr_it++) 
					if (CanBeFixedCollocItem (curr_it)) break;

				if ( curr_it == m_Lines.end() ) break;


				/*
					     ,    ,     
					(,"   "  " "),       
				*/
				assert (!curr_it->m_bHomonym);
				if (curr_it->m_bOborot1) 
						bInOborot = true;

				if (curr_it->m_bOborot2) 
					if (!bInOborot)
					{
						bInOborot = true;	
						break; //
					}
					else
						bInOborot = false;

				CLineIter found_it = m_Lines.end();

				//       
				CLineIter hom_it=curr_it;
				for (; hom_it != m_Lines.end(); hom_it++)
				{
					if (!hom_it->m_bHomonym && (hom_it != curr_it)) break;

					HasEndSent |= hom_it->m_bSent2;

					//  
					bool Found = hom_it->m_Lemma == m_FixedCollocs[CollocNo].m_Lemmas[ItemNo].m_Lemma;
					//    
					if (Found && (m_FixedCollocs[CollocNo].m_Lemmas[ItemNo].m_POS != 255))
					if (hom_it->m_Pos != m_FixedCollocs[CollocNo].m_Lemmas[ItemNo].m_POS)
						Found = false;

					if (Found)
					{
						found_it = hom_it;
					};

				};
				if (found_it == m_Lines.end()) break;

				//    ,     
				if (ItemNo  == m_FixedCollocs[CollocNo].m_MainWordNo)
				main_it = found_it;

				curr_it = hom_it;
			};
			//  .
			// it      
			// curr_it    ,      

			if (!bInOborot)
				if (ItemNo == m_FixedCollocs[CollocNo].m_Lemmas.size())
				{
					it = AddCollocation(it, curr_it, main_it, HasEndSent, CollocNo);
					//     ,      
					break;
				};
		}; //   

	}
};


/*
   EXPR1  EXPR_NOxxx  . ,        
  EXPR2  10 
 */

void CMAPost::CorrectOborots() 
{
	for (CLineIter it=m_Lines.begin(); it !=  m_Lines.end(); it++)
	{
		if (it->m_bHomonym) continue;
		bool Ob1 = it->m_bOborot1;
		bool Ob2 = it->m_bOborot2;
		/*
		     ,    2.
		*/
		if ( Ob1 && !Ob2)
		{
			bool bError = true;;
			next_iter();
			int Count = 0;
			for (; next_it !=  m_Lines.end(); next_it++)
			{
			if (next_it->m_bHomonym) continue;
			Ob1 = next_it->m_bOborot1;
			Ob2 = next_it->m_bOborot2;
			// ,  
			if  (Ob2 && !Ob1)
			{
				bError = false;
				break;
			};

			 // !
			 if  (   (Ob1 && Ob2) 
				  || (Ob1 && !Ob2)
				 )
				 break;

			 // ,       20  
			 if (Count > 20)
				 break;

			 Count++;
		 };
		 /*
		    EXPR1  EXPR_NOxxx  . ,        
		  EXPR2
		 */
		 if (bError)
		 {
			it->DeleteOb1();
			it++;
			for (; (it != m_Lines.end()) &&  it->m_bHomonym; it++)
			 it->DeleteOb1();


			it--;
		 };


		 
 	 };

 };

};

/*
     ,      .
 ,       "-",   ,    
      ,     ,   .
     "#"
*/

void CMAPost::SemiAdjectives() 
{
 for (CLineIter it=m_Lines.begin(); it !=  m_Lines.end(); it++)
 {
	if ( it->m_TokenType != RLE) continue;
	if (it->GetUpperWord().substr(0,4) != "") continue;
	if (it->IsFoundInMorphology()) continue;

	string WordForm = it->GetUpperWord();
	WordForm.erase(0,4);
    CFormInfo P;
	if (!GetParadigmByFormAndPOS(WordForm, ADJ_FULL, 0, P) )  continue;
	SetParadigmToLineAndDelHomonyms (it, P);
	//   
	it->m_GraphDescr += " # ";
 };
};


/*
     ,      .
 ,       "-" ("-"),   ,    
      ,     ,   .
     "#".
        ,   
  ., .   .
 ,
    ()
     ()
     ()
         .
*/

/*
  "      "
  " "
   ,	      "-"    
   ,    " " .  :
   " "
   "-   " 
       ,    ,
         .
*/

void CMAPost::SemiNouns() 
{
 for (CLineIter it=m_Lines.begin(); it !=  m_Lines.end(); it++)
 {
	if ( it->m_TokenType != RLE) continue;

	bool b_pol4 = it->GetUpperWord().substr(0,4) == "";
	if (!b_pol4) 
		 b_pol4 = it->GetUpperWord().substr(0,4) == "-";
	bool b_pol3 = it->GetUpperWord().substr(0,3) == "";
	if (!b_pol4 && !b_pol3) continue;

	
	if ( it->IsFoundInMorphology() )	  continue;

	string WordForm = it->GetUpperWord();
	WordForm.erase(0, b_pol4?4:3);
	if (WordForm.length() <= 3) continue;
	bool bNOUN = true; 
    CFormInfo P;
	if (!GetParadigmByFormAndPOS(WordForm, NOUN, 0,  P) )
	{ 
		bNOUN = false;
		if (!GetParadigmByFormAndPOS(WordForm, NUMERAL, 0, P)) continue; //
	};
	SetParadigmToLineAndDelHomonyms (it, P);
	//   
	it->m_GraphDescr += " # ";

	//   
	string GramCodes = it->GetGramCodes();
	QWORD Grammems;
	bool Found = false;
    for (int i=0; i < GramCodes.length(); i+=2) 
		if (GramCodes[i]!='?')
		{
		  m_pRusGramTab->GetGrammems((GramCodes.c_str() + i), Grammems);
		  if (Grammems & _QM(rSingular))
		  {
			if ( (Grammems & rAllCases) == _QM(rGenitiv) )
			 Found = true;
		  }

		  //    
		  if (Grammems & _QM(rSingular))	
		  {
			  Grammems &= (~ _QM(rSingular));
			Grammems |= _QM (rPlural);
			string NewGramCode;
			m_pRusGramTab->GetGramCodeByGrammemsAndPartofSpeechIfCan (bNOUN ? NOUN:NUMERAL, Grammems, NewGramCode);
			
			GramCodes.replace(i, 2, NewGramCode);
		  };
 
		};
	if (GramCodes.length() > 0)
		it->SetGramCodes(GramCodes, m_pRusGramTab);

	//      . ,   
	if (Found)
	{
		long Gram  = Grammems & (~ _QM(rGenitiv));
		     Gram  &= (~ _QM(rSingular));
			 Gram  |= _QM(rPlural);

		string NewGramCode;
		m_pRusGramTab->GetGramCodeByGrammemsAndPartofSpeechIfCan (bNOUN ? NOUN:NUMERAL, Gram| _QM(rNominativ), NewGramCode);
		GramCodes.insert(0, NewGramCode);

		m_pRusGramTab->GetGramCodeByGrammemsAndPartofSpeechIfCan (bNOUN ? NOUN:NUMERAL, Gram| _QM(rAccusativ), NewGramCode);
		GramCodes.insert(0, NewGramCode);
	};


 }

};




/*
          .       
    ,       
   
  :
   +    
   +  
   -   ,  .
   -    .
*/
void CMAPost::Rule_UZHE() 
{
 for (CLineIter it=m_Lines.begin(); it !=  m_Lines.end(); it++)
 {
	bool IsHom = it->m_bHomonym;
	if (it->GetUpperWord() !=  "") continue;
    const string& GramCodes = it->GetGramCodes();;
	BYTE POS = it->m_Pos;
	if (POS != ADJ_FULL) continue;

	
	CLineIter tmp_it = it;
	for (; tmp_it != m_Lines.end(); tmp_it++)
	   if (!tmp_it->m_bHomonym  && (tmp_it != it)) break;

	tmp_it = PassSpaces(tmp_it);

	if ( tmp_it != m_Lines.end() )
	{
		const CPlmLine& L = *tmp_it;
		if (L.GetWord() == ",") continue;
		if (L.m_Grammems & _QM(rGenitiv)) continue;
			
	}
	it = Remove(it, true);
	if (!IsHom)
    {
	   //      ,    ""  
	    if (it != m_Lines.end())
	     it->m_bHomonym = false;
    };
	it--;
 };

};



bool  CMAPost::NounHasObviousPluralContext(CLineIter it)
{
	if ( it  == m_Lines.begin() ) return false;

	prev_iter();
	for (; prev_it != m_Lines.begin(); prev_it--)
		{
		    if ( prev_it->m_TokenType == RLE )
			break;
		};

	//             .    
	while (prev_it != m_Lines.begin()) 
	{
		
		QWORD Grammems = 	prev_it->m_Grammems;
		BYTE POS = prev_it->m_Pos;
		if (POS == NUMERAL) return true;

		if  (    ( (Grammems & _QM(rPlural)) > 0 )
			  && (   (POS == ADJ_FULL)
				  || (POS == NUMERAL_P)
				  || (POS == PRONOUN_P)
				 )

			)
			return true;

		if (!prev_it->m_bHomonym) break;
		prev_it--;

	};

	return false;
};

/*
     ,    "-".
       ( ""), ., .  -
    ,    . 
       . 
 - 
 - 
 + 
 + 
 + 
*/
/*
	     ,   
	   ( ""),     "-o".
	        ,
	     "" (""+ "" = "")
*/

void CMAPost::Rule_Ideclinable() 
{
	for (CLineIter it=m_Lines.begin(); it !=  m_Lines.end(); it++)
	{
		bool IsHom = it->m_bHomonym;
		const string& WordForm = it->GetUpperWord();
		if (WordForm.size() == 0) continue;
		if ((unsigned char)WordForm[WordForm.length() - 1] != (unsigned char)'') continue;

		if ( (it->m_Grammems & rAllCases) != rAllCases) continue; 
		if ( (it->m_Grammems & rAllNumbers) != rAllNumbers) continue;

		string GramCodes = it->GetGramCodes();
		//     ,    
		if (GramCodes.length() != 2) continue;
		if (!NounHasObviousPluralContext(it))
		{
			//   
			QWORD Grammems = it->m_Grammems; 
			Grammems &=  ~_QM(rPlural);
			string NewGramCode;
			m_pRusGramTab->GetGramCodeByGrammemsAndPartofSpeechIfCan (NOUN, Grammems, NewGramCode);
			//     -    . 
			if (NewGramCode == "")
				continue;
			it->SetGramCodes(NewGramCode, m_pRusGramTab);
		};

	};
};

/*
	    .       ,    
	       "  ", 
	       .
*/
void CMAPost::Rule_DeadPlurals() 
{
	for (CLineIter it=m_Lines.begin(); it !=  m_Lines.end(); it++)
	{
		const CPlmLine& L = *it;
		if	(		((L.m_Grammems & _QM(rDeFactoSingTantum)) > 0)
				&&	!NounHasObviousPluralContext(it)
				&&	L.IsFoundInMorphology()
			)
		{
				
			string GramCodes = it->GetGramCodes();
			for (int i=0; i < GramCodes.length(); i+=2) 
				if (GramCodes[i]!='?')
				{
					QWORD Grammems;
					m_pRusGramTab->GetGrammems ( GramCodes.c_str() + i, Grammems);
					if (   (Grammems & _QM(rPlural))  )
					  {
							  GramCodes.erase(i,2);
								i -= 2;
					};	
				}
			if (GramCodes.length() > 0)
			{
					it->SetGramCodes(GramCodes, m_pRusGramTab);
			}
			else
			{
				//  word has only plural but also has homonyms 
				if (L.m_bHomonym )
				{
					it = Remove(it, true);
					continue;
				}
				else
				{
					next_iter();
					if (next_it->m_bHomonym)
					{
						it = Remove(it, true);
						assert (it == next_it);
						it->m_bHomonym = false;
					};
					continue;
				}

			};

		};

	};

};

/*
	    .   ,    , 
	     . ,   ""
	      "",     
	"".

*/
void CMAPost::Rule_RelationalAdjective() 
{
	for (CLineIter it=m_Lines.begin(); it !=  m_Lines.end(); it++)
	{
		const CPlmLine& L = *it;
		if	(		((L.m_Grammems & _QM(rQualitative)) == 0)
				&&	(		(L.m_Pos == ADJ_FULL)
						||	(L.m_Pos == ADJ_SHORT)
					)
				&&	(		(L.m_Grammems & _QM(rShortForm))
						||	(L.m_Grammems & _QM(rComparative))
					)
			)
		{
			if (L.m_bHomonym)
			{
				it = Remove(it, false);
				it--;
			}
			else
			{
				CLineIter next_it = it;
				next_it++;
				if	(		(next_it != m_Lines.end())
						&&	(next_it->m_bHomonym)
					)
				{
					it = Remove(it, false);
					assert (it == next_it);
					it->m_bHomonym = false;
					it--;
				}
				else
				{
					//     ,    	
					if (L.GetGramCodes().length()  == 2 )
						continue;
					else
					{
						//   .     ,
						//       ,
						// ,  "".
						string GramCodes;
						for (int i=0; i < L.GetGramCodes().length(); i+=2) 
							if (L.GetGramCodes()[i]!='?')
							{
								QWORD Grammems;
								m_pRusGramTab->GetGrammems ( L.GetGramCodes().c_str() + i, Grammems);
								if (   (Grammems & _QM(rComparative))  == 0)
									GramCodes += L.GetGramCodes().substr(i,2);
							};
						it->SetGramCodes(GramCodes, m_pRusGramTab);
					};
				}
				
				
			}
			

		};

	};

};



/*
	     .         
	       CSurnameSuffix::m_Suffix,  
	1.   ,  ;
	2.    ,          
	 M,     .
*/

/*
string CMAPost::GetNormSurnameSuffix(long ParadigmNo) const
{
  for (long i=0; i< m_SurnameSuffixes.size(); i++)
     if (m_SurnameSuffixes[i].m_ParadigmNo == ParadigmNo)
		 return m_SurnameSuffixes[i].m_Suffix;

  assert (false);
  return "";
};

void CMAPost::GetSurnamesHypots(CLineIter it, SurnameHypotsVec& MorphInterps) 
{
	vector<long> Hypots;

	if (!it->m_bFirstUpperAlpha) return;

	const string& WordForm = it->GetUpperWord();
	

	long i=0;
	for (; i< m_SurnameSuffixes.size(); i++)
     if (WordForm.length()+1 > m_SurnameSuffixes[i].m_Suffix.length())
	  if (m_SurnameSuffixes[i].m_Suffix == WordForm.substr(WordForm.length() - m_SurnameSuffixes[i].m_Suffix.length()))
	  {
	    Hypots.push_back(i);
	  };

	DWORD LastParadigmNo  = 0xffffffff;
	for (i=0; i < Hypots.size(); i++)
	{
		if (LastParadigmNo != m_SurnameSuffixes[Hypots[i]].m_ParadigmNo)
		{
		  if (MorphInterps.size() == 19) break;
		  MorphInterps.Add(CLemmaAndCodes());
		  LastParadigmNo = m_SurnameSuffixes[Hypots[i]].m_ParadigmNo;
		  MorphInterps.back().m_Lemma = WordForm.substr(0,  WordForm.length() - m_SurnameSuffixes[Hypots[i]].m_Suffix.length()); 
		  MorphInterps.back().m_Lemma += GetNormSurnameSuffix(LastParadigmNo);
		};

		MorphInterps.back().m_GramCodes += m_SurnameSuffixes[Hypots[i]].m_GramCode;
	};

};

void CMAPost::Rule_Surnames() 
{
	int debug;
	debug = m_Lines.size();
	for (CLineIter it=m_Lines.begin(); it !=  m_Lines.end(); it++)
	{
		CPlmLine& L = *it;
		if (L.m_bHomonym) continue;
		if (L.m_TokenType != RLE) continue;
		if (L.IsFoundInMorphology()) continue;
		
		// ,    ,    .
		// , "-",    ,   
		// "-"   .
		if ( L.m_bHyphenWord ) continue; 

		SurnameHypotsVec MorphInterps;
		GetSurnamesHypots(it, MorphInterps);
		if (MorphInterps.size() == 0) continue;
		
		L.SetMorphUnknown();
		L.m_Lemma = MorphInterps.m_Items[0].m_Lemma;
		L.SetGramCodes( MorphInterps.m_Items[0].m_GramCodes, m_pRusGramTab);
		L.m_HomoWeight = "0";

		//   
		CPlmLine P = *it;

		it++;
		while (it != m_Lines.end()) 
		{
			if( !it->m_bHomonym) break;
			it =  Remove(it, true);
		};
		
		debug = m_Lines.size();

		
		for (long i =1; i <MorphInterps.size(); i++)
		{
			//    
			P.m_bHomonym = true;
			P.m_Lemma = MorphInterps.m_Items[i].m_Lemma;
			P.SetGramCodes(MorphInterps.m_Items[i].m_GramCodes, m_pRusGramTab);
			P.m_ParadigmId = "-1";
			P.m_HomoWeight = "0";
			it = m_Lines.insert (it, P);
		};
		debug = m_Lines.size();
		//  it == m_Lines.end(),  it++     it
		if (it ==  m_Lines.end()) break;
	};
};
*/
/*
     ,     ,   
   ,        , 
    . ,     ,  ""
 .   "",        
  .   ,    .
*/
/*
bool IsFemineSurname (const QWORD& Grammems) 
{
	return  (Grammems & ( _QM(rSurName) | _QM(rFeminum))) == ( _QM(rSurName) | _QM(rFeminum));

};

bool IsMasculineSurname (const QWORD& Grammems) 
{
	return  (Grammems & ( _QM(rSurName) | _QM(rMasculinum))) == ( _QM(rSurName) | _QM(rMasculinum));

};

void CMAPost::Rule_FemineSurnames() 
{
 CLineIter last_not_homonym_it = m_Lines.begin();	

 for (CLineIter it=m_Lines.begin(); it !=  m_Lines.end(); it++)
 {
	if ( !it->m_bHomonym ) last_not_homonym_it = it;
	if (it->m_TokenType != RLE) continue;
	if (!IsMasculineSurname(it->m_Grammems)) continue;

	bool bHasFemine = false;

	for (CLineIter tmp_it = last_not_homonym_it; tmp_it != m_Lines.end(); tmp_it++) 
	{
	  if (tmp_it != last_not_homonym_it)
	    if( !tmp_it->m_bHomonym ) break;

	  if (IsFemineSurname(tmp_it->m_Grammems)) 
		  bHasFemine = true;
	  
	};

	
	//	   -    !
	
	if (bHasFemine) continue;

	CPlmLine P = *it;

	SurnameHypotsVec MorphInterps;
	GetSurnamesHypots(it, MorphInterps);
	for (int i=0; i < MorphInterps.size(); i++)
	{
		const string& GramCodes = 	MorphInterps.m_Items[i].m_GramCodes;
		if (GramCodes.empty() || (GramCodes == "??")) continue;
		QWORD G = m_pRusGramTab->GetAllGrammems(GramCodes.c_str());

		if (IsFemineSurname(G))
		 {
		   P.m_bHomonym = true;
		   P.m_Lemma = MorphInterps.m_Items[i].m_Lemma;
		   P.SetGramCodes(MorphInterps.m_Items[i].m_GramCodes, m_pRusGramTab);
		   P.m_ParadigmId = "-1";
		   P.m_HomoWeight = "0";
		   it++;
		   m_Lines.insert (it, P);
		};
	};

 };
};

*/


/*
       20-.  ,  ,   
	 ,      "$" ("2-", "3-")     ,  , 
	   .   .  
	   ("3-"),  ,    .
	  .
*/
void CMAPost::Rule_NumeralAdjectives()    
{
	//  
	//	20                            4 0 4 DC -1
	//  -                               1 4 1 PUN HYP -1
	//	                              2 5 2 RLE aa ?? -1
	//    
	//	20#                            4 0 4 RLE +=   -1 #0

	for (CLineIter it=m_Lines.begin(); it !=  m_Lines.end(); it++)
	{
		if (it->m_TokenType != NUM) continue;
		next_iter();
		if (next_it->GetWord() != "-") continue;
		next_next_iter();
		if ( next_next_it->m_TokenType != RLE )  continue;
		if (next_next_it->m_bHomonym) continue;

	    string WordForm = "-"+next_next_it->GetWord();
		CFormInfo P;
		if (GetParadigmByFormAndPOS(WordForm, ADJ_FULL, 0, P))
		{
		  SetParadigmToLineAndDelHomonyms (next_next_it, P);
		};

		next_next_it->SetWord( it->GetWord() +'#' + next_next_it->GetWord());

		it = Remove(it, true);
		it = Remove(it, true);
	};

};

enum QuoteMarkEnum {QuoteMark, EOS};
struct CQuoteMark
{
	QuoteMarkEnum m_TokenType;
	CLineIter     m_LineIter;
	bool m_bFirstBeforeLastQuote;
	CQuoteMark () 
	{
		m_bFirstBeforeLastQuote = false;
	};
};

void CMAPost::Rule_QuoteMarks()    
{
	vector<CQuoteMark> QuoteMarks;
	
	/*  ,        , ,  
	 "" .    [  SENT_END]*/
	for (CLineIter it=m_Lines.begin(); it !=  m_Lines.end(); it++)
	{
		if (it->GetWord() == "\"")
		{
			CQuoteMark Q;
			Q.m_TokenType = QuoteMark;
			Q.m_LineIter = it;
			QuoteMarks.push_back(Q);
		}
		if ( it->m_bSent2 ) 
		{
			if (it->m_bHomonym) continue;
			it++;
			while ( it != m_Lines.end() ) 
			{
				if( !it->m_bHomonym ) break;
				it++;
			};
			it--;
			CQuoteMark Q;
			Q.m_TokenType = EOS;
			Q.m_LineIter = it;
			QuoteMarks.push_back(Q);
		};
	};
	if (QuoteMarks.size() == 0) return;

	/*
	      
	*/
	long i=QuoteMarks.size() - 1;
	for (; i >=0; i--)
	 if (QuoteMarks[i].m_TokenType == EOS)
		{
		  long Count = 0;
		  for (long j=i-1; (j>=0) && (QuoteMarks[j].m_TokenType == QuoteMark); j--)
		  {
			  Count++;
			  if (Count == 2) 
			  {
				  QuoteMarks[j].m_bFirstBeforeLastQuote = true;
				  break;
			  }
		  };
		};

	for (i=0; i < QuoteMarks.size(); )
	 if (QuoteMarks[i].m_TokenType == QuoteMark)
	 {
		 CLineIter Start =  QuoteMarks[i].m_LineIter;
		 i++;
		 if ( i == QuoteMarks.size()) continue;
		 if ( QuoteMarks[i].m_TokenType != QuoteMark ) continue;
		 CLineIter End = QuoteMarks[i].m_LineIter;
		 if (QuoteMarks[i].m_bFirstBeforeLastQuote)
		 {
			 i++;
			 if ( i == QuoteMarks.size()) continue;
	 		 if (QuoteMarks[i].m_TokenType != QuoteMark) continue;
			 End = QuoteMarks[i].m_LineIter;
		 };
		 for (CLineIter it = Start; it != End; it++)
			it->m_bQuoteMarks = true;
		 i++;

	 }
	 else
		 i++;


	/*
		  
	*/
 	for (i=QuoteMarks.size()-1; i >=0; i--)
	 if (QuoteMarks[i].m_TokenType == QuoteMark)
	 {
		CLineIter it = QuoteMarks[i].m_LineIter;
		/*
			    ,        .
		*/
		if (it != m_Lines.begin())
 		 if ( it->m_bSent2 )
		 {
			prev_iter();
			prev_it->m_bSent2 = true;
		 };
		/*
			    ,        .
		*/
		CLineIter next_it = it;
		next_it++;

		it = Remove(it, true);
	 };
};


/*
     
*/
void CMAPost::Rule_ILE()    
{

	for (CLineIter it=m_Lines.begin(); it !=  m_Lines.end(); it++)
	{
		if ( it->m_TokenType == LLE ) 
		{
	         it->SetMorphUnknown();
			 it->m_Lemma = it->GetUpperWord();
			 it->SetGramCodes(m_DURNOVOGramCode, m_pRusGramTab);
		     it->m_HomoWeight = "0";
		}
	};
			
};


/*
      ,       [ ],
       ,          [ ]. 
        #_.
*/
void CMAPost::Rule_KAK_MOZHNO()    
{
	for (CLineIter it=m_Lines.begin(); it !=  m_Lines.end(); it++)
	{
		//   ""
		if (it->m_bHomonym) continue;
		
		if (it->GetUpperWord() != "") continue;

		//   ""
		next_iter();
		for (;  next_it != m_Lines.end(); next_it++)
 		    if (!next_it->m_bHomonym) 
				break;

		next_it = PassSpaces (next_it);
		if ( next_it == m_Lines.end() ) break;
		if ( next_it->GetUpperWord() != "") continue;
		//  [ ]
		next_it++;
		for (;  next_it != m_Lines.end(); next_it++)
			if (!next_it->m_bHomonym) 
				break;
		next_it = PassSpaces (next_it);

		CLineIter first_it = next_it;
		CLineIter best_it = m_Lines.end();
		for (; next_it != m_Lines.end(); next_it++)
		{
			if (next_it != first_it)
			  if (!next_it->m_bHomonym ) break;

			if  (    (next_it->GetUpperWord() == "")
				  || (next_it->GetUpperWord() == "")
				)
			{
				best_it = next_it;
				break;
			};


			if (next_it->m_Pos != ADJ_FULL) continue;
			if ( (next_it->m_Grammems & _QM(rComparative)) == 0) continue; 
			best_it = next_it;
			break;
		};

		if (best_it != m_Lines.end()) 
		{
			/*
			    [ ]
			*/
			for (next_it=first_it; next_it != best_it;)
			{   
				next_it = Remove(next_it, true);
				if (next_it == m_Lines.end()) break;
			};

			next_it=best_it;

			for (next_it++; next_it != m_Lines.end(); )
				{
			        if ( !next_it->m_bHomonym ) break;
				    
					next_it = Remove(next_it, true);
				}

			/*
			    #_
			*/
			best_it->m_GraphDescr += " #_ ";

			/*
			     
			*/
			for (next_it = it; next_it != best_it; ) 
			{

				next_it = Remove(next_it, true);
				if (next_it == m_Lines.end()) break;
			};

			it = best_it;

		};

		
		
	}


};

bool CMAPost::CanBeDubleDelimiter(CLineIter it)   
{
		const string& WordForm = it->GetUpperWord();

		return (   (WordForm == "") 
			 || (WordForm == ",")
			 || (WordForm == "")
			 || (WordForm == "-")
			);
};

void CMAPost::Rule_Redublication()    
{
	for (CLineIter it=m_Lines.begin(); it !=  m_Lines.end(); it++)
	{
		if (it->m_bHomonym) continue;
		if  ( it->m_TokenType != RLE ) continue;
		//    ,  ,, " ,   "
		if (		it->m_bOborot1
				 ||	it->m_bOborot2
			)	
			continue;
		//   
		next_iter();
		for (;  next_it != m_Lines.end(); next_it++)
 		    if (!next_it->m_bHomonym) break;

		next_it = PassSpaces (next_it);

		if ( next_it == m_Lines.end() ) break;
		CLineIter first_delimiter_iter = next_it;
		if (!CanBeDubleDelimiter(next_it)) continue;

		/*   ( "")*/
		
		for (next_it++;  next_it != m_Lines.end(); next_it++)
 		       if (!next_it->m_bHomonym) 
				   break;

		next_it = PassSpaces (next_it);

		const string& WordForm = it->GetUpperWord();
		long CountDuble = 1;
		for (; next_it != m_Lines.end(); )
		{
			 string s = next_it->GetUpperWord();
			 if (s == "")
			 {
			   next_it++;  
			   if ( next_it == m_Lines.end() ) break;
			   s = next_it->GetUpperWord();
			 };

			 //     !
			 if (s != WordForm) break;

			 //    ,  ,, " ,   "
			 if (		next_it->m_bOborot1
					 ||	next_it->m_bOborot2
				)	
				break;

			 CountDuble++;
			 for (next_it++;  next_it != m_Lines.end(); next_it++)
 		       if (!next_it->m_bHomonym) 
				   break;
			 next_it = PassSpaces (next_it); 
			 if ( next_it == m_Lines.end() ) break;
			 if (!CanBeDubleDelimiter(next_it)) break;

			 //      ,  ,, "--?"
			 if (		next_it->m_bOborot1
					 ||	next_it->m_bOborot2
				)	
				break;
	 	 	 /*   (  "")*/
		 	 for (next_it++;  next_it != m_Lines.end(); next_it++)
 		       if (!next_it->m_bHomonym) 
				   break;

		};

		/*
		     ,   
		*/

		//  last_it         ,       
		CLineIter last_it = PassSpaces(next_it);
		//        ,    ,       
		// (   )
		CLineIter it2 =  BackSpaces (next_it);
		if (CanBeDubleDelimiter(it2)) 
			last_it = it2;
				
		if (CountDuble < 2) continue;
		bool HasPRD2 = false;
		for (next_it=first_delimiter_iter; next_it != last_it; )
		{
			if  ( next_it->m_bSent2 )
				HasPRD2 = true;
 			next_it = Remove(next_it, true);
			if (next_it == m_Lines.end()) break;
		};

		/*  #       */
		it->m_GraphDescr += " # ";
		if (HasPRD2) it->m_bSent2 = true;
		next_it = it;
		for (next_it++;  next_it != m_Lines.end(); next_it++)
		{
 		    if (!next_it->m_bHomonym) break;
			next_it->m_GraphDescr += " # ";
			if (HasPRD2) next_it->m_bSent2 = true;;
		};
		
	};


};

CLineIter CMAPost::PassSpaces(CLineIter it) 
{
	while (it != m_Lines.end())
	{
		if (!is_gra_space((unsigned char)it->GetWord()[0])) break;
		it++;
	};
	return it;
	
};

CLineIter CMAPost::BackSpaces(CLineIter it) 
{
	if (it == m_Lines.end()) it--;

	while (it != m_Lines.begin())
	{
		if (!is_gra_space((unsigned char)it->GetWord()[0])) break;
		it--;
	};
	return it;
	
};

void CMAPost::Rule_CHTO_ZA() 
{
	for (CLineIter it=m_Lines.begin(); it !=  m_Lines.end(); it++)
	{
		if (it->m_bHomonym) continue;
		if  (it->m_TokenType != RLE ) continue;
		if (it->GetUpperWord() != "") continue;
		if( it->m_bOborot1 != it->m_bOborot2)  continue;
		next_iter();
		for (; next_it != m_Lines.end(); next_it++)
			if (!next_it->m_bHomonym) break;

		next_it = PassSpaces(next_it);
        if (next_it ==m_Lines.end()) return;  

		if (next_it->GetUpperWord() != "") continue;

		if( next_it->m_bOborot1 && !next_it->m_bOborot2)  continue;

		for (next_it++; next_it != m_Lines.end(); next_it++)
			if (!next_it->m_bHomonym) 
				break;
		

		next_it = PassSpaces(next_it);
        if (next_it ==m_Lines.end()) return;  

		const string& CurrGramCodes = next_it->GetGramCodes();
		QWORD Grammems =     m_pRusGramTab->GetAllGrammems(CurrGramCodes.c_str());

		if ((Grammems  & (  _QM(rAccusativ) | _QM(rNominativ) )) > 0)
		 {
			 QWORD KAK_Grams =0;
			 CFormInfo P;
			 string AuxGramcodes;
	 		 if ( (Grammems  & _QM(rPlural)) == 0)
			 	 KAK_Grams = (Grammems & rAllGenders);
			 else
				 KAK_Grams = _QM(rPlural);

			 if (!GetParadigmByNormAndPOS("", PRONOUN_P, _QM(rAccusativ)|KAK_Grams, P))  continue;
			 AuxGramcodes = P.GetSrcAncode();
			 if ( !GetParadigmByNormAndPOS("", PRONOUN_P, _QM(rNominativ)| KAK_Grams, P) ) continue;

			 // " ,    " ()
			 // " ,     " ()
			 // " ,    " ()
			 // " ,     " ()
			 
			 
			 long SaveCount = Count();
			 SetParadigmToLineAndDelHomonyms(it, P);
			 it->SetGramCodes( it->GetGramCodes() + AuxGramcodes, m_pRusGramTab);
			 it->DeleteOb1();
			 it->DeleteOb2();
			 it->SetWord("");
			  
			 //   
			 it++;
			 while (it != next_it)
			 {
				 it = Remove(it, true);
				 if (it == m_Lines.end()) break;
			 };
		 }
	};


};

void CMAPost::InsertComma(CLineIter it)
{
	CPlmLine P;
	P.m_FilePosition = 0;
	P.m_TokenLengthInFile = 1;
	P.m_bHomonym = false;
	P.m_GraphDescr = "PUN"; 
	P.m_HomoWeight = "0";
	P.m_Lemma = "";
	P.SetWord(",");
	P.SetGramCodes("", m_pRusGramTab);
	P.m_ParadigmId = "-1";
	m_Lines.insert(it,P);
};
/*
     "  ...  ...",
 , "      ".
      
	"    ,   ".
*/

void CMAPost::Rule_VOT_UZHE() 
{
	for (CLineIter it=m_Lines.begin(); it !=  m_Lines.end(); it++)
	{
		if (it->m_bHomonym) continue;
		if  (it->m_TokenType != RLE) continue;
		if (it->GetUpperWord() != "") continue;
		next_iter();
		for (; next_it != m_Lines.end(); next_it++)
			if (!next_it->m_bHomonym) break;
		next_it = PassSpaces(next_it);
        if ( next_it == m_Lines.end() ) return;  

		if ( next_it->GetUpperWord() != "" ) continue;
		for (next_it++; next_it != m_Lines.end(); next_it++)
			if (!next_it->m_bHomonym) 
				break;
		next_it = PassSpaces(next_it);
		if ( next_it == m_Lines.end() ) return;  

		/*
			     
		*/
		bool bFound =  false;
		int WordCount =0;
		for (; (WordCount < 10) && (next_it != m_Lines.end()); next_it++)
		{
			if (next_it->GetUpperWord() == "") 
			{	
				bFound = true;
				break;
			};

		    const string& GramCodes = next_it->GetGramCodes();

	        //        ,     
			if (next_it->GetGramCodes().size() > 0)
			{
	  			BYTE pos = next_it->m_Pos;
				if (   (pos == VERB)  
					|| (pos == INFINITIVE)  
					|| (pos == ADVERB_PARTICIPLE)  
				   )
				   break;
			};

			WordCount ++;
		};
		if (!bFound) break;
		
		CPlmLine P;
		P.m_FilePosition = 0; //   ,    ...
		P.m_TokenLengthInFile = 0;
		P.m_bHomonym = false;
		P.m_TokenType = RLE;
		P.m_Register = LowLow;
		P.m_HomoWeight = "1";
		P.m_Lemma = "";
		P.SetWord("");
		CFormInfo Paradigm;
		if (!GetParadigmByFormAndPOS("",VERB, 0, Paradigm)) continue;
		P.SetGramCodes(Paradigm.GetSrcAncode(), m_pRusGramTab);
		P.SetMorph(Paradigm.GetLemSign()[0],Paradigm.GetCommonAncode(), Paradigm.GetParadigmId());
		P.m_bQuoteMarks = next_it->m_bQuoteMarks;
		m_Lines.insert(next_it,P);
		InsertComma(next_it);
	};
}


/*
    Aa  AA,          
     .
*/

bool CMAPost::IsNoun(const CPlmLine& P) const 
{
   return 						(P.m_Pos == NOUN) 			
							;
};


/*
     ,       ( Aa), 
          : ,, , ,, ,
  , , , , , , , , ,  
        
	:
	 +    
	 +  -
	 -  -    
*/

void CMAPost::Rule_UnknownNames() 
{	
	bool bPrevWasSent2 = false;
	for (CLineIter it=m_Lines.begin(); it !=  m_Lines.end(); it++)
	{
		if ( bPrevWasSent2 ) 
		{
			bPrevWasSent2 = false;
			continue;
		};
		bPrevWasSent2 = it->m_bSent2;

		if (it->m_bHomonym) continue;
		if ( it->m_TokenType != RLE ) continue;
		if ( it->IsFoundInMorphology() ) continue;
		if ( !it->m_bFirstUpperAlpha) continue;
		

		/*
		  ,        , 
		     ,      
		  m_bToDelete 
		*/
		it->m_bToDelete = !IsNoun(*it);
		bool bHasNoun = !it->m_bToDelete;

		CLineIter next_it = it;
		next_it++;

		for (; next_it != m_Lines.end(); next_it++)
		{
			if (!next_it->m_bHomonym) break;
			next_it->m_bToDelete  = !IsNoun(*next_it);
			bHasNoun |= !next_it->m_bToDelete ;
		};
		if (!bHasNoun) continue;;

		//      
		if (it->m_bToDelete)
			it = Remove(it, true);

		next_it = it;
		next_it++;
		for (; next_it != m_Lines.end(); )
		{
			if (!next_it->m_bHomonym) break;

			if ( next_it->m_bToDelete)
				next_it = Remove(next_it, true);
			else
				next_it++;

		};
		it->m_bHomonym = false;
	};
};


/*
	   "",     
	,      ""   
	+   
	+  
	?     ( ()  )
*/


void CMAPost::Rule_SOROK() 
{	
	for (CLineIter it=m_Lines.begin(); it !=  m_Lines.end(); it++)
	{
		if (it->m_Lemma != "") continue;

		CLineIter start_it = it;
		for (;
			 (start_it != m_Lines.begin()) && start_it->m_bHomonym; 
			 start_it--)
		;
		
		CLineIter end_it = start_it;

		if (!start_it->m_bHomonym) end_it++;

		for (; end_it != m_Lines.end(); end_it++)
		{
			if (!end_it->m_bHomonym) break;
		};

		end_it = PassSpaces(end_it);

		if ( end_it == m_Lines.end() ) break;

		if (end_it->GetGramCodes().size() == 0) continue;


		if (   (end_it->m_Pos != NUMERAL) 
			&& (end_it->m_Pos != NUMERAL_P)
			&& (end_it->m_Lemma != "")
			) continue;

		/*
			 -
		*/

		for (CLineIter i = start_it; i != end_it; )
		{
			if (i->m_Lemma == "")
				i = Remove(i, true);
			else
				i++;

			if (i == m_Lines.end()) break;
		};

		it->m_bHomonym = false;
	};
};



/*
	  ,       , 
	        "",     .
	,   ""    "" ("  "),        
	 .
*/


void CMAPost::Rule_Abbreviation() 
{	
	for (CLineIter it=m_Lines.begin(); it !=  m_Lines.end(); it++)
	{
		const CPlmLine& L = *it;
		if (it->m_TokenType != RLE) continue;
		if (it->m_Register == UpUp) continue;
		if ( (it->m_Grammems &  _QM(rInitialism) ) == 0) continue;

		if (it->m_bHomonym)
		{
			it = Remove(it, false);
			continue;
		};

		CLineIter next_it = it;
		next_it++;
		if (next_it->m_bHomonym)
		{
			next_it->m_bHomonym = false;
			it = Remove(it, false);
		};
	};
};


/*
	     "-", "-", ("" +  )
	    ,      ,
	,   .
*/


void CMAPost::Rule_AdverbFromAdjectives() 
{	
	for (CLineIter it=m_Lines.begin(); it !=  m_Lines.end(); it++)
	{
		const CPlmLine& L = *it;
		if(    !L.IsFoundInMorphology()
			&& L.m_bHyphenWord
			&& (L.GetUpperWord().substr(0,3) == "-")
			&&	(		(L.GetUpperWord().substr(L.GetUpperWord().length() - 2)  == "")
					||	(L.GetUpperWord().substr(L.GetUpperWord().length() - 2)  == "")
				)
				
		  )
		{
			string S = it->GetUpperWord();
			if (S.substr(S.length() - 2)  == "")
				S = S.substr(3) + "";	 // "-"
			else
				S = S.substr(3);	 // "-"

			//      ,  
			CFormInfo FormInfo;
			if (!GetParadigmByFormAndPOS(S, ADJ_FULL, 0, FormInfo) ) 
				continue;

			CLineIter next_it = it;
			next_it++;
			while	(		(next_it != m_Lines.end())
						&&	next_it->m_bHomonym
					)
			{
				next_it = Remove(next_it, false);
			};
			it->m_Lemma = it->GetUpperWord();
			string NewGramCodes;
			m_pRusGramTab->GetGramCodeByGrammemsAndPartofSpeechIfCan (ADV, 0, NewGramCodes);
			it->SetGramCodes(NewGramCodes, m_pRusGramTab);
			it->SetMorph('-',"??", -1);
		};
	};
};



void CMAPost::SaveToFile(string s)
{
	FILE * fp = fopen (s.c_str(), "w");
	assert (fp);
	if (!fp) return;
	for (CLineIter it=m_Lines.begin(); it !=  m_Lines.end(); it++)
	{
		fprintf (fp, "%s\n", it->GetStr().c_str());
	};
	fclose(fp);

};


/*
	      ,       
	   
*/
void CMAPost::Rule_FilterProperName()
{
	for (CLineIter it=m_Lines.begin(); it !=  m_Lines.end(); it++)
	{
		if (it->m_Register != LowLow) continue;
		const CPlmLine& L = *it;


		if  (		L.m_bMorphName 
				||	L.m_bMorphSurname
				||	(L.m_Grammems & (_QM(rSurName) | _QM(rName) | _QM(rPatronymic) | _QM(rToponym)))
			)
		{
			//      . . -   
			if (!it->m_bHomonym)
			{
				CLineIter next_it = it;
				next_it++;
				if (	(next_it == m_Lines.end())
						||	(!next_it->m_bHomonym)
					)
					continue;
			}

			it = Remove(it, false);
			it--;
		};
	}
	
}

/*
	    ,     ,
	  -> 
	   ""     .
    ,  -> 
	            -> 
*/
void CMAPost::Rule_ChangePatronymicLemmas()
{
	for (CLineIter it=m_Lines.begin(); it !=  m_Lines.end(); it++)
	{
			if (it->m_TokenType != RLE) continue;
			if ((it->m_Grammems & _QM(rPatronymic) ) == 0) continue;

			vector<CFormInfo> Paradigms;
			string Word = it->GetWord();
			m_pRusLemmatizer->CreateParadigmCollection(false, Word, true, Paradigms);
			for (long k=0; k < Paradigms.size(); k++)
			{

				string AnCode = Paradigms[k].GetSrcAncode();
				if (AnCode == it->GetGramCodes())
				{
					for (size_t j=0; j<Paradigms[k].GetCount(); j++)
					{
						QWORD g;
						m_pRusGramTab->GetGrammems(Paradigms[k].GetAncode(j).c_str(), g);
						if	(		(g &  _QM(rPatronymic))
								&&  (g & _QM(rNominativ)) 
								&&  (g & _QM(rSingular)) 
								&&	(rAllGenders & g & it->m_Grammems)
							)
						{
							CPlmLine& L = *it;
							L.m_Lemma = Paradigms[k].GetWordForm(j);
							break;
						}
					};
					break;

				};
			}


	}

};

void CMAPost::Rule_No()
{
	CLineIter prev_it = m_Lines.begin();
	for (CLineIter it=m_Lines.begin(); it !=  m_Lines.end(); it++)
	{
		if (		(it->m_TokenType == RLE)  
				&&	(it->GetUpperWord() == "" ) 
				&&	(prev_it->GetUpperWord() == ",")
			)
		{
			if (it->m_Pos == CONJ)
			{
				CLineIter next_it = it;
				next_it++;
				while (next_it != m_Lines.end() && next_it->m_bHomonym)
					next_it = Remove(next_it, false);		
			}
			else
			{
				do {
					if (it->m_Pos != CONJ)
						it = Remove(it, false);		
					else
						it++;
				}
				while (it != m_Lines.end() && it->m_bHomonym);

				if (it == m_Lines.end()) break;
			};
		};


		if (		(it->m_TokenType == RLE)  
				&&	(it->GetUpperWord() == "" ) 
			)
		{
			CLineIter next_it = it;
			next_it++;
			for(; next_it != m_Lines.end() && next_it->m_bHomonym; next_it++);
			bool bParticle = 		(next_it != m_Lines.end())
								&&	(next_it->GetUpperWord() == "?" );

			{
				for(; it != m_Lines.end() && it != next_it; )
				{
					if ( (it->m_Pos == PARTICLE) && !bParticle)
						it = Remove(it, false);		
					else
						if ( (it->m_Pos == CONJ) && bParticle)
							it = Remove(it, false);	
						else
						it++;

				};
				if (it == m_Lines.end()) break;
				
				
			};
		};


		if (it->m_TokenType != OTHER_TOKEN_TYPE) 
			prev_it = it;

	}

};

