// ==========  This file is under  LGPL, the GNU Lesser General Public Licence
// ==========  Dialing Posmorphological Module (www.aot.ru)
// ==========  Copyright by Dmitry Pankratov, Alexey Sokirko (1999-2002)

#include "MAPostMain.h"
#include "../common/PlmLine.h"



typedef CSmallVector<CLineIter,5> SmallHomonymsVec;

enum FioItemTypeEnum {fiName, fiSurname, fiMiddle,fiAbbr,fiStop,fiRoman,fiProbName, fiString, fiOrdinal};
struct CFIOItem {
	FioItemTypeEnum m_fiType;
	string			m_ItemStr;
};


struct CFIOFormat 
{
	string m_FormatStr;
	bool   m_GleicheCase;
	vector<CFIOItem> m_Items;
	CFIOFormat () {};

	CFIOFormat (string  FormatStr, bool GleicheCase) 
	{
		m_FormatStr = FormatStr;
		m_GleicheCase = GleicheCase;
		StringTokenizer tok(FormatStr.c_str(), " ");
		while (tok()) 
		{
			string s = tok.val();
			FioItemTypeEnum t;
		    if  (s == "")
				t = fiName;
			else
			if (s == "")
				t = fiSurname;
			else
			if (s == "")
				t = fiMiddle;
		    else
			if (s == "")
				t = fiAbbr;	
			else
			if (s == "")
				t = fiStop;
		    else
			if (s == "_")
				t = fiRoman;
			else
		 	if (s == "NAM?")
				t = fiProbName;
			else
		 	if (s == "_")
				t = fiOrdinal;
		    else
			{
		       t = fiString;
			};
			CFIOItem I;
			I.m_fiType = t;
			I.m_ItemStr = s;
			m_Items.push_back(I);
		};	
	};
};


static void GetCommonVariants(const vector<SmallHomonymsVec>&  Parents, 
									   SmallHomonymsVec&   V,
							  vector<SmallHomonymsVec>&  Variants, 
								long       Position)
{ 
    if (Variants.size() > 1000) return;
	if (Position < V.size())
	for (long i=0; i< Parents[Position].size(); i++)
	{
			V.m_Items[Position] = Parents[Position].m_Items[i];
			GetCommonVariants(Parents, V, Variants, Position+1);
	}
	else
			Variants.push_back(V);

};


static bool IsPartFio(const CMAPost& C, const CFIOItem& I, const CPlmLine& P)  
{

  if ( P.m_bFI1 || P.m_bFI2  )    return false;

  if  (I.m_fiType == fiName)
	  return P.m_bMorphName;
  else
  if (I.m_fiType == fiSurname)
	  return P.m_bMorphSurname;
  else
	 if (I.m_fiType == fiMiddle)
	 {
		if (!P.m_bFirstUpperAlpha) return false;
		if (P.GetWord().length() < 3) return false;
		const char* suffix = P.GetUpperWord().c_str()+ P.GetUpperWord().length() - 2;
		return (   !strcmp(suffix,"")
				|| !strcmp(suffix,"")
				|| !strcmp(suffix,"")
				);
	 }
	 else
	 if (I.m_fiType == fiAbbr)
	 {
		 if (!P.m_bFirstUpperAlpha) return false;
		 if (P.GetWord().size() != 1) return false;
		 if (P.m_TokenType == RLE) return false;
		 return true;
	 }
	 else
	 if (I.m_fiType == fiStop)
	 {
		 if (P.GetWord().size() != 1) return false;
		 return P.GetWord() == ".";
	 }
	 else
	 if (I.m_fiType == fiRoman)
	 {
		 return P.m_TokenType == ROMAN_NUM;
	 }
 	 else
	 if (I.m_fiType == fiOrdinal)
	 {
		 return P.m_Pos == NUMERAL_P;
	 }
 	 if (I.m_fiType == fiProbName)
	 {
		 if (!is_upper_alpha((unsigned char)P.GetWord()[0], morphRussian)) return false;
		 if (!P.m_bName) return false;
		 // 
		 if (P.GetGramCodes() != C.m_DURNOVOGramCode) return false;

		 return true;
	 }
	 else
	 {
		 return P.m_Lemma == I.m_ItemStr;
	 };


   assert (false);
   return false;
};




static bool CheckFioFormat (CMAPost& C, const CFIOFormat& Format, CLineIter start_it, vector<CLineIter>& Lines) 
{
   vector<SmallHomonymsVec> Hypots;
   if (start_it->m_bHomonym) return false;
   
   Hypots.resize(Format.m_Items.size());
   int CountOfVariants = 1;
   for (long ItemNo = 0; ItemNo < Format.m_Items.size(); ItemNo++)
   {
			CLineIter it=start_it;
			for (; !(it == C.m_Lines.end()); it++)
			{
				if (  !it->m_bHomonym
					&& !(it == start_it) 
					) 
				break;

				//  ".. "  
				if (it->IsPartOfNonSingleOborot()) return false;

				//  passing spaces
				for (; !(it == C.m_Lines.end()); it++)
				{
					if (it->m_TokenType != OTHER_TOKEN_TYPE)
						break;
					const CPlmLine& debug = *it;

					//  ".. "  
					if (it->IsPartOfNonSingleOborot()) return false;
				};

				if (!(it == C.m_Lines.end()))
				{
					if (IsPartFio(C, Format.m_Items[ItemNo], *it))
						Hypots[ItemNo].Add(it);
				}
				else
					break;
			};
			if (Hypots[ItemNo].size() == 0) return false;
			CountOfVariants *= Hypots[ItemNo].size();
			start_it = it;
   };

   SmallHomonymsVec V; //   
   vector<SmallHomonymsVec> Variants;
   Variants.reserve(CountOfVariants);
   V.m_ItemsCount = Hypots.size();
   GetCommonVariants(Hypots, V, Variants, 0);

   if (Format.m_GleicheCase)
	for (long VarNo=0; VarNo < Variants.size(); VarNo++)
	{
		UINT Grammems = rAllCases;
		for (long i=0; i < Variants[VarNo].size(); i++)
		{
	 		  Grammems  &=   Variants[VarNo].m_Items[i]->m_Grammems;
		};
		if (Grammems  == 0)
		{
			Variants.erase(Variants.begin()+VarNo);
			VarNo--;
		};

	};

   if (Variants.size() == 0) return false;

   Lines.resize(Variants[0].m_ItemsCount);

   for (long i=0;i < Variants[0].m_ItemsCount; i++)
	   Lines[i]  = Variants[0].m_Items[i];
   return true;

};

static void SetFio(CMAPost& C, vector<CLineIter>& FioLines) 
{
	bool HasSent2 = false;
	for (long i=0; i <FioLines.size(); i++)
	{
		HasSent2 |= FioLines[i]->m_bSent2;
		FioLines[i]->m_bSent2 = false;
			
		//     
		if (FioLines[i]->m_bHomonym)
		{
			CLineIter it = FioLines[i];
			it--;
			for (; it != C.m_Lines.begin(); it--)
			{
				bool IsHom = it->m_bHomonym;
				it = C.Remove(it, true);
				if (!IsHom) break;
			};
		}

		//     
		CLineIter it = FioLines[i];
		it++;
		for (; it != C.m_Lines.end(); )
		{
			if (!it->m_bHomonym) break;
			it = C.Remove(it, true);
		};
		FioLines[i]->DeleteOb1();
		FioLines[i]->DeleteOb2();
		FioLines[i]->m_bHomonym = false;
	};


	FioLines[0]->m_bFI1 = true;
	FioLines.back()->m_bFI2 = true;

	int Count = 0;
	CLineIter it = FioLines.back();
	for (; it != C.m_Lines.end(); it++)
		if ( it->m_bSent2 )
			break;

   /*
         ,        
	  ,    SENT_END    .
   */
    if (HasSent2)
      if (it == C.m_Lines.end())
	 			FioLines.back()->m_bSent2 = true;

};

/*
  ,
  ,
 ,
 ,
 ,
  ,
  II
.
*/

void CMAPost::Rule_Fio() 
{
  vector<CFIOFormat> FioFormats;
  FioFormats.push_back(CFIOFormat("  ", true));
  FioFormats.push_back(CFIOFormat("  ", true));
  FioFormats.push_back(CFIOFormat(" ", true));
  FioFormats.push_back(CFIOFormat(" ", true));
  FioFormats.push_back(CFIOFormat(" ", true));
  FioFormats.push_back(CFIOFormat("  ?", false)); //   
  FioFormats.push_back(CFIOFormat("  ", false)); // .
  FioFormats.push_back(CFIOFormat("  _", false)); //   II
  FioFormats.push_back(CFIOFormat(" _", false)); //  II
  FioFormats.push_back(CFIOFormat(" _", false)); //  
  FioFormats.push_back(CFIOFormat("  _", false)); //   
  FioFormats.push_back(CFIOFormat(" ", false)); //  

	CLineIter it=m_Lines.begin();
  for (; it !=  m_Lines.end(); it++)
  {
		if (!it->m_bFirstUpperAlpha) continue;;
		if (it->m_Grammems & _QM(rName) )
			it->m_bMorphName = true;
		if (it->m_Grammems & _QM(rSurName))

		it->m_bMorphSurname = true;
  };

  for (it=m_Lines.begin(); it !=  m_Lines.end(); it++)
  {
		vector<CLineIter> Lines;

		for (long FormatNo=0; FormatNo < FioFormats.size(); FormatNo++)
 		if (CheckFioFormat (*this, FioFormats[FormatNo], it, Lines))
		{
					SetFio (*this, Lines);
					it = Lines.back();
					break;
		};
  };

};

