/*
	Copyright (c) 2000-2007 Michael Pozhidaev<msp@altlinux.org>
   This file is part of the VOICEMAN speech system.

   VOICEMAN speech system is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   VOICEMAN speech system is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.
*/

#include"voiceman.h"
#include"config.h"
#include"languages.h"
#include"eng.h"

#define ENG_LETTERS L"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
#define ENG_ZERO L"zero"

static const wchar_t* const engOnes[10] = {L"",
					   L"one",
					   L"two",
					   L"three",
					   L"four",
					   L"five",
					   L"six",
					   L"seven",
					   L"eight",
					   L"nine"
};

static const wchar_t* const engTens[10] = {L"ten",
					   L"eleven",
					   L"twelve",
					   L"thirteen",
					   L"fourteen",
					   L"fifteen",
					   L"sixteen",
					   L"seventeen",
					   L"eighteen",
					   L"nineteen"
};

static const wchar_t* const engDecimals[10] = {L"", L"",
					       L"twenty",
					       L"thirty",
					       L"forty",
					       L"fifty",
					       L"sixty",
					       L"seventy",
					       L"eighty",
					       L"ninety"
};

static const wchar_t* const engMlrds[2] = {
  L"milliard",
  L"milliards"
};

static const wchar_t* const engMlns[2] = {
  L"million",
  L"millions"
};

static const wchar_t* const engThnds[2] = {
  L"thousand",
  L"thousands"
};

static const wchar_t* const engHundreds[2] = {
  L"hundred",
  L"hundreds"
};

int EngLang::getCharType(wchar_t c) const
{
  if (c >= 'a' && c <= 'z')
    return LOWCASE;
  if (c >= 'A' && c <= 'Z')
    return UPCASE;
  return OTHER;
}

std::wstring EngLang::getAllChars() const
{
  return ENG_LETTERS;
}

bool EngLang::equalChars(wchar_t c1, wchar_t c2) const
{
  return toLower(c1)==toLower(c2);
}

wchar_t EngLang::toUpper(wchar_t ch) const
{
  if (getCharType(ch) == LOWCASE)
    return 'A' + (ch -'a');
  return ch;
}

wchar_t EngLang::toLower(wchar_t ch) const
{
  if (getCharType(ch) == UPCASE)
    return 'a'+(ch - 'A');
  return ch;
}

std::wstring EngLang::toUpper(const std::wstring& str) const
{
  std::wstring newStr;
  for(int i=0;i<str.length();i++)
    newStr+=toUpper(str[i]);
  return newStr;
}

std::wstring EngLang::toLower(const std::wstring& str) const
{
  std::wstring newStr;
  for(int i=0;i<str.length();i++)
    newStr+=toLower(str[i]);
  return newStr;
}

std::wstring EngLang::processHundred(const std::wstring& inStr, const wchar_t* const items[]) const
{
  std::wstring s;
  std::wstring str = inStr;
  int i;
  assert(str.length() <= 3);
  for(i=0;i<str.length();i++)
    {
      assert(str[i] >= '0' && str[i] <= '9');
      if (str[i] != '0')
	break;
    }
  if (i==str.length())
    return std::wstring();
  while(str.length() < 3)
    str = L'0'+str;
  if (str[0] != '0')
    {
      attachString<std::wstring>(s, engOnes[str[0]-'0']);
      if (str[0] == '1')
	attachString<std::wstring>(s, engHundreds[0]); else
	attachString<std::wstring>(s, engHundreds[1]);
      if (str[1] != '0' || str[2] != '0')
	attachString<std::wstring>(s, L"and");
    }
  if (str[1] != '0' && str[1] != '1')
    attachString<std::wstring>(s, engDecimals[str[1]-'0']);
  if (str[1] == '1')
    attachString<std::wstring>(s, engTens[str[2]-'0']); else
    {
      attachString<std::wstring>(s, engOnes[str[2]-'0']);
    }
  if (!items)
    return s;
  if (str[1] == '1')
    attachString<std::wstring>(s, items[1]); else
    if (str[2] == '0')
      attachString<std::wstring>(s, items[1]); else
      if (str[2] == '1')
	attachString<std::wstring>(s, items[0]); else
	attachString<std::wstring>(s, items[1]);
  return s;
}

std::wstring EngLang::digitsToWords(const std::wstring& inStr) const
{
  int i;
  std::vector<std::wstring> sList;
  std::wstring str;
  assert(!inStr.empty());
  for(i=0;i<inStr.length();i++)
    {
      assert(inStr[i] >= '0' && inStr[i] <= '9');
      if (inStr[i] != '0')
	break;
    }
  if (i==inStr.length())
    return ENG_ZERO;
  bool accepting = 0;
  for(i=0;i<inStr.length();i++)
    {
      if (inStr[i] != '0')
	accepting = 1;
      if (accepting)
	str += inStr[i];
    }
  assert(accepting);
  while(str.length())
    {
      if (str.length() >= 3)
	{
	  std::wstring ss;
	  ss += str[str.length()-3];
	  ss += str[str.length()-2];
	  ss += str[str.length()-1];
	  sList.push_back(ss);
	  str.resize(str.size()-3);
	  continue;
	}
      if (str.length() == 2)
	{
	  std::wstring ss;
	  ss += str[0];
	  ss += str[1];
	  sList.push_back(ss);
	  str.erase();
	  continue;
	}
      if (str.length() == 1)
	{
	  std::wstring ss;
	  ss = str[0];
	  sList.push_back(ss);
	  str.erase();
	  continue;
	}
      assert(0);
    }
  int j;
  str.erase();
  for(j=sList.size()-1;j>=0;j--)
    {
      if (j>3)
	attachString<std::wstring>(str, processHundred(sList[j], NULL)); else
	if (j == 3)
	  attachString<std::wstring>(str, processHundred(sList[j], engMlrds)); else
	  if (j == 2)
	    attachString<std::wstring>(str, processHundred(sList[j], engMlns)); else
	    if (j == 1)
	      attachString<std::wstring>(str, processHundred(sList[j], engThnds)); else
	      attachString<std::wstring>(str, processHundred(sList[j], NULL));
    }
  return str;
}

void EngLang::expandNumbers(std::wstring& str, bool singleDigits) const
{
  std::wstring inStr=str;
  int i;
  str.erase();
  if (singleDigits)
    {
      bool b = 0;
      for(i=0;i<inStr.length();i++)
	{
	  if (inStr[i] >= '0' && inStr[i] <= '9')
	    {
	      b = 1;
	      if (inStr[i] == '0')
		attachString<std::wstring>(str, ENG_ZERO); else
		attachString<std::wstring>(str, engOnes[inStr[i]-'0']);
	    } else
	    {
	      if (b)
		{
		  str += ' ';
		  b = 0;
		}
	      str += inStr[i];
	    }
	}
      return;
    }
  bool d = 0;
  std::wstring sStr;
  for(i=0;i<inStr.length();i++)
    {
      if (inStr[i]>= '0' && inStr[i] <= '9')
	{
	  d = 1;
	  sStr += inStr[i];
	} else
	{
	  if (d)
	    {
	      attachString<std::wstring>(str, digitsToWords(sStr));
	      d = 0;
	      sStr.erase();
	      str+=' ';
	    }
	  str += inStr[i];
	}
    }
  if (d)
    attachString<std::wstring>(str, digitsToWords(sStr));
}

std::wstring EngLang::separate(const std::wstring& text) const
{
  std::wstring s;
  for(int i=0;i<text.length();i++)
    {
      if (i!=0 && getCharType(text[i])==UPCASE && getCharType(text[i-1])==LOWCASE)
	{
	  s+=' ';
	  s+=text[i];
	} else
	s+=text[i];
    }
  return s;
}

bool EngLang::checkCapList(const std::wstring& str, int pos, std::wstring& result) const
{
  std::list<CAPITEM>::const_iterator i;
  for(i=m_capItems.begin();i!=m_capItems.end();i++)
    {
      std::wstring s = toLower(i->str);
      if (str.length()-pos < s.length())
	continue;
      int j;
      for(j=0;j<s.length();j++)
	if (str[pos+j] != s[j])
	  break;
      if (j<s.length())
	continue;
      if (!i->before && pos>0 && getCharType(str[pos-1])!=OTHER)
	continue;
      if (!i->after && pos+s.length()<str.length() && getCharType(str[pos+s.length()])!=OTHER)
	continue;
      result=i->str;
      return 1;
    } // for;
  return 0;
}

void EngLang::processCapList(const std::wstring& str, std::vector<bool>& marks) const
{
  int i;
  for(i=0;i<str.length();i++)
    {
      int j;
      std::wstring s;
      if (!checkCapList(toLower(str), i, s))
	continue;
      for(j=0;j<s.length();j++)
	if (getCharType(s[j])==UPCASE)// Capital letter;
	  marks[i+j]=1;
      i+=s.length()-1;
    } // for;
}

void EngLang::markCapitals(const std::wstring& text, std::vector<bool>& marks) const
{
  assert(text.length()==marks.size());
  StringIterator<std::wstring> i(text, getAllChars());
  while(i.next())
    {
      if (i.str().length()<2)
	continue;
      if (!contains<std::wstring>(i.str(), L"eEuUiIoOaAyY"))
	{
	  int j;
	  for(j=i.start();j<i.end();j++)
	    marks[j]=1;
	}
    }
  processCapList(text, marks);
}

void EngLang::load(const std::string& fileName)
{
  DelimitedFile f;
  f.read(fileName);
  for(int k=0;k<f.getLineCount();k++)
    {
      std::string s=trim(f.getRawLine(k));
      int l, r;
      if (s[0]=='+')
	l=1; else
	l=0;
      if (s[s.length()-1]=='+')
	r=s.length()-2; else
	r=s.length()-1;
      if (r<l)
	throw ConfigurationException("Caps file contains the illegal line \'"+s+"\'.");
      std::wstring value;
      for(int t=l;t<=r;t++)
	{
	  if (getCharType(s[t])==OTHER)
	    continue;
	  value+=s[t];
	}
      m_capItems.push_back(CAPITEM(value, s[0]=='+', s[s.length()-1]=='+'));
    }
}
