// ==========  This file is under  LGPL, the GNU Lesser General Public Licence
// ==========  Dialing Syntax Analysis (www.aot.ru)
// ==========  Copyright by Dmitry Pankratov, Igor Nozhov, Alexey Sokirko

#ifndef synan_word_h
#define synan_word_h

#if _MSC_VER > 1000
#pragma once
#endif // _MSC_VER > 1000


#include "stdafx.h"
#include "SyntaxInit.h"
#include "Word.h"
#include "SynPlmLine.h"
#include "../SimpleGrammarLib/InputSymbol.h"
#include "AncodePattern.h"

#pragma warning(disable:4786) 


typedef vector<int> CIntVector;

enum EGraPairType {Date, WebAddr, Oborot, Keyb,  GermanDividedCompound, UnknownPairType };

class CWord;
class CSentence;

class CHomonym : public CAncodePattern
{
public:
	// morphology
	string	m_strLemma;

	// corpus frequence information
	long m_lFreqHom; 

	// Oborots
	int  m_OborotNo;
	bool m_bOborot1; 
	bool m_bOborot2; 
	bool m_bInOb;
	const COborotForSyntax*	GetOborotPtr()  const
	{
		assert ( m_OborotNo != -1);
		return &GetOpt()->m_pOborDic->m_Entries[m_OborotNo];
	};

	
	
	long m_lPradigmID;
	bool m_bAdvAdj;
	bool m_bCanSynDependOnAdj;
	bool m_bCanSynDependOnAdv;
	bool m_bCanSubdueInfinitive;
	bool m_bCanSubdueInstr;
	bool m_bNounHasAdjectiveDeclination;
	
	BYTE m_CoordConjNo;

	bool m_bSmallNumber;
	int  m_iCmpnLen;
	bool m_bCmplLem;	
	bool m_bUnkGramcodes;	
	bool m_bRussianOdin;
	
	
	bool m_bMonth;

	bool m_bPassive; // is used for German verbs only

	// a special slot to mark all homonyms which should be deleted by 
	// function CWord::DeleteMarkedHomonymsWithClauses
	bool m_bDelete;

	// a special slot which is now used only in CSentence::FindTermins to select all  homonyms which 
	// suit the found termin
	bool m_bGoodHomonym;

	// all German verbs which should have special Perfect order in  subclauses, for example
	//  Ich weiss, dass du die Gefahr hast kommen sehen
	bool	m_bPerfectAnomalie;

	// all German verbs which can subdue an infinitive without zu
	//  Ich bleibe stehen.
	bool m_bInfinitiveConstruction;

	// all German adjectives  which can subdue at least on NP
	//  "arm an+D"
	bool m_bAdjWithActiveValency;

	//  all possible interpretation in the formats grammar
	set<CInputSymbol> m_AutomatSymbolInterpetation;

	const CSentence* m_pSent;	
	
	CHomonym(const CSentence* pSentence);
	const	CSyntaxOpt* GetOpt() const;
	
	bool operator<(const CHomonym& hom) const
	{
		return m_strLemma < hom.m_strLemma;	
	}

	bool	HasSetOfGrammemsExact(QWORD Grammems) const;
	bool	IsOb1() const;
	bool	IsOb2() const;
	bool	IsIsOb() const;
	bool	IsLemma(const char* lemma) const;
	bool	IsSynNoun() const;
	bool	IsMorphNoun() const;
	bool	IsLeftNounModifier() const;
	void	DeleteOborotMarks();

	bool	ProcessLemmaAndGrammems(const char* strLemma);
	bool	CompareWithPredefinedWords(const SDatItems& arr) const;
	void	SetLemma(string Lemma);
	string	GetGrammemsStr() const;
	string	GetPartOfSpeechStr() const;
	
	
};

class CWord  
{
public:

	// ======= Graphematics ======================	

	// input word form
	string m_strWord;

	// input word uppercase form 
	string m_strUpperWord;

	// is graphematical space 
	bool m_bSpace;

	// graphematical descriptors in one string
	string m_GraphemDescriptors;

	//      
	bool m_bLastInSent;	

	// token type
	MainTokenTypeEnum	m_TokenType;

	// graphematical descriptor RLE or LLE
	bool m_bWord; 

	// a single comma
	bool m_bComma;

	// a single hyphen
	bool m_bDash;

	//  graphematical descriptors FILE1-FILE2
	bool m_bFileName1;
	bool m_bFileName2;

	//  graphematical descriptors KEY1-KEY2
	bool m_bKeyb1;
	bool m_bKeyb2;

	//  graphematical descriptors FAM1-FAM2
	bool m_bFio1;
	bool m_bFio2;

	//  graphematical descriptors DT1-DT2
	bool m_bDate1;
	bool m_bDate2;


	//  a reference to a divided prefix (for German)
	int		m_TrennbarePraefixWordNo;

	// graphematical register 
	RegisterEnum m_Register;

	//  offset in the graphematcil buffer (= in the input file for text files)
	int	 m_GraphematicalUnitOffset;


	// true, if this word has a space before ot it is at the beginning of the sentence.
	bool	m_bHasSpaceBefore;

	//  true if the word was deleted and should be ignored 
	bool	m_bDeleted;

	

	// ========================  Morphology ========================

	vector<CHomonym> m_Homonyms;

	// is morphologically predicted
	bool m_bPredicted;

	// is a simple coordinating conjunction
	bool m_bSimilarConj;

	// the result of the search of subordinating  conjunction list via GetOpt()->m_pOborDic->FindSubConj
	int  m_SubordinateConjNo;


	// =============          Thesaurus interpretation =================
	// the type of thesaurus of the domain collocation (applicable only for the first word of the collocation)
	EThesType m_ThesType;

	// is between m_bFirstWordInTermin  m_bLastWordInTermin (inside a domain collocation)
	bool m_bInTermin;

	// the first word of a domain collocation
	bool m_bFirstWordInTermin;

	// the last word of a domain collocation
	bool m_bLastWordInTermin;

	//   ,      
	int	 m_iTerminID;



	// ========================  Russian Language  ========================
	//       ("", "")
	bool m_bSmallNumber;

	//   ,     GF=,       
	bool m_bBadParenthesis;


	int m_iReduplication;

	// the main verb of an auxiliary verb
	vector<int>	m_MainVerbs;

	

	// =============          Special slots =================

	// the holder of this Word
	CSentence* m_pSent;

	// m_iClauseNo is used for semantics interface
	int	 m_iClauseNo;
	

	// is created by syntax
	bool m_bArtificialCreated;


	set<CInputSymbol> m_AutomatSymbolInterpetationUnion;

	CWord(CSentence* pSent); 
	const	CSyntaxOpt* GetOpt() const; 
	

	
	void DeleteOborotMarks();
	

	bool AddNextHomonym(char* strPlmLine);
	bool ProcessPlmLineForTheFirstHomonym(char* strPlmLine);
	bool HasDes(const char* strWhat) const;
	
	void	Reset();
	int		ProcessGraphematicalDescriptors(const char* LineStr);
	bool	ProcessGramCodes();
	bool	IsThisPartOfSpeech(int iPartOfSpeech, int& iHomNum, int iStartHom) const ;
	void	SetWordStr (string NewValue);


	bool	FindLemma(string strLemma) const;	
	int		GetHomonymByPOS(BYTE POS) const;
	int		GetHomonymByGrammem(BYTE grammem) const;
	int		GetHomonymByPOSandGrammem(BYTE POS, BYTE grammem) const;
	bool	IsWordUpper(const char* s)	const  {return m_strUpperWord == s; };

	void	SetAllOtherHomsDel(int iHom);
	int		GetHomonymsCount() const	{ return m_Homonyms.size(); }
	bool	InitializePlmLine(CSynPlmLine& pPlmWord, int HomonymNo) const;
	CHomonym CloneHomonymByAnotherHomonym(const CHomonym* pHomonym, QWORD iGrammems, BYTE iTagID) const;


	const CHomonym& GetHomonym(int i) const	{ return m_Homonyms[i]; }

	CHomonym& GetHomonym(int i) 	{ return m_Homonyms[i]; }

	void SetGoodHomonym(int i )
	{  
	   m_Homonyms[i].m_bGoodHomonym = true;
	   m_Homonyms[i].m_bDelete	 = false;
	}

	void EraseHomonym(int iHom);
	void DeleteMarkedHomonymsBeforeClauses();
	void SetHomonymsDel(bool Value);

	bool IsFirstOfGraPair(EGraPairType type) const;
	bool IsFirstOfGraPair() const;
	bool IsSecondOfGraPair(EGraPairType type) const;
	bool IsSecondOfGraPair() const;

	void	CloneHomonymForOborot();
	void	TryBuildVerbLemmaWithKa();

	int		GetOborotNo() const;
	const COborotForSyntax*	GetOborotPtr() const;
	bool	IsOborot1() const;
	bool	IsOborot2() const;
	bool	IsInOborot() const;
	void	AddDash(CHomonym& DashHom);
	bool	CanBeSynNoun() const;
	void	KillHomonymOfPartOfSpeech(int iPartOfSpeech);
	void	BuildTerminalSymbolsByWord();
	bool	IsEqualToGrammarItem(const CHomonym& L, const CGrammarItem& I);
	void	InitSubordConjNoSlot();
};

typedef vector<CWord> CWordVector;







#endif 
