/*
	Copyright (c) 2000-2006 Michael Pozhidaev<msp@altlinux.org>. 
   This file is part of the VOICEMAN speech system.

   VOICEMAN speech system is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   VOICEMAN speech system is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.
*/

#include"voiceman.h"
#include"langs.h"

vm_language_mapper language_mapper;

bool vm_lang::equal_chars(vm_char c1, vm_char c2) const
{
  vm_string s1, s2;
  s1 += c1;
  s2 += c2;
  return equal_strings(s1, s2);
}

bool vm_lang::equal_strings(const vm_string &s1, const vm_string &s2) const
{
  return low_case(s1) == low_case(s2);
}

static vm_char *eng_ones[10] = {WSTR(""),
				WSTR("one"),
				WSTR("two"),
				WSTR("three"),
				WSTR("four"),
				WSTR("five"),
				WSTR("six"),
				WSTR("seven"),
				WSTR("eight"),
				WSTR("nine")};

static vm_char *eng_tens[10] = {WSTR("ten"),
				WSTR("eleven"),
				WSTR("twelve"),
				WSTR("thirteen"),
				WSTR("fourteen"),
				WSTR("fifteen"),
				WSTR("sixteen"),
				WSTR("seventeen"),
				WSTR("eighteen"),
				WSTR("nineteen")};

static vm_char *eng_decimals[10] = {WSTR(""), WSTR(""),
				    WSTR("twenty"),
				    WSTR("thirty"),
				    WSTR("forty"),
				    WSTR("fifty"),
				    WSTR("sixty"),
				    WSTR("seventy"),
				    WSTR("eighty"),
				    WSTR("ninety")
};

#define ENG_ZERO WSTR("zero")

static vm_char *eng_mlrds[2] = {
  WSTR("milliard"),
  WSTR("milliards")};

static vm_char *eng_mlns[2] = {
  WSTR("million"),
  WSTR("millions")};

static vm_char *eng_thnds[2] = {
  WSTR("thousand"),
  WSTR("thousands")};

static vm_char *eng_hundreds[2] = {
  WSTR("hundred"),
  WSTR("hundreds")};

vm_eng_lang eng_lang;

vm_string vm_eng_lang::get_chars() const
{
  return ENG_LETTERS;
}

uint vm_eng_lang::get_char_type(vm_char c) const
{
  if ((c >= WSTR('a')) && (c <= WSTR('z')))
    return 1;
  if ((c >= WSTR('A')) && (c <= WSTR('Z')))
    return 2;
  return 0;
}

vm_string vm_eng_lang::up_case(const vm_string &str) const
{
  uint i;
  vm_string new_str;
  for(i=0;i<str.length();i++)
    {
      if (get_char_type(str[i]) == 1)
	new_str += WSTR('A') + (str[i] -WSTR('a')); else
	  new_str += str[i];
    }
  return new_str;
}

vm_string vm_eng_lang::low_case(const vm_string &str) const
{
  uint i;
  vm_string new_str;
  for(i=0;i<str.length();i++)
    {
      if (get_char_type(str[i]) == 2)
	new_str += WSTR('a')+(str[i] - WSTR('A')); else
	  new_str += str[i];
    }
  return new_str;
}

void attach_word(vm_string &str, vm_string word)
{
  if ((!str.empty()) && (str[str.length()-1] != WSTR(' ')))
    str += WSTR(' ');
  str += word;
}

vm_string vm_eng_lang::process_hundred(const vm_string &in_str, vm_char *items[]) const
{
  vm_string s;
  vm_string str = in_str;
  uint i;
  bool b = 0;
  if (str.length() > 3)
    VM_STOP("bad input group");
  for(i=0;i<str.length();i++)
    {
      if (!((str[i] >= WSTR('0')) && (str[i] <= WSTR('9'))))
	VM_STOP("bad digit");
      if (str[i] != '0')
	b = 1;
    }
  if (!b)
    return WSTR("");
  while(str.length() < 3)
    str = WSTR('0')+str;
  if (str[0] != WSTR('0'))
    {
      attach_word(s, eng_ones[str[0]-WSTR('0')]);
      if (str[0] == WSTR('1'))
	attach_word(s, eng_hundreds[0]); else
	  attach_word(s, eng_hundreds[1]);
      if ((str[1] != WSTR('0')) || (str[2] != WSTR('0')))
	attach_word(s, WSTR("and"));
    }
  if ((str[1] != WSTR('0')) && (str[1] != WSTR('1')))
    attach_word(s, eng_decimals[str[1]-WSTR('0')]);
  if (str[1] == WSTR('1'))
    attach_word(s, eng_tens[str[2]-WSTR('0')]); else
      {
	attach_word(s, eng_ones[str[2]-WSTR('0')]);
      }
  if (!items)
    return s;
  if (str[1] == WSTR('1'))
    attach_word(s, items[1]); else
      if (str[2] == WSTR('0'))
	attach_word(s, items[1]); else
	  if (str[2] == WSTR('1'))
	    attach_word(s, items[0]); else
	      attach_word(s, items[1]);
  return s;
}

vm_string vm_eng_lang::digits_to_words(const vm_string &in_str) const
{
  uint i;
  vector<vm_string> slist;
  vm_string str;
  bool b=0;
  if (in_str.empty())
    VM_STOP("empty input");
  for(i=0;i<in_str.length();i++)
    {
      if (!((in_str[i] >= WSTR('0')) && (in_str[i] <= WSTR('9'))))
	VM_STOP("bad input digits");
      if (in_str[i] != WSTR('0'))
	b = 1;
    }
  if (!b)
    return ENG_ZERO;
  b = 0;
  for(i=0;i<in_str.length();i++)
    {
      if (in_str[i] != WSTR('0'))
	b = 1;
      if (b)
	str += in_str[i];
    }
  while(str.length())
    {
      if (str.length() >= 3)
	{
	  vm_string ss;
	  ss += str[str.length()-3];
	  ss += str[str.length()-2];
	  ss += str[str.length()-1];
	  slist.push_back(ss);
	  str.resize(str.size()-3);
	  continue;
	}
      if (str.length() == 2)
	{
	  vm_string ss;
	  ss += str[0];
	  ss += str[1];
	  slist.push_back(ss);
	  str.erase();
	  continue;
	}
      if (str.length() == 1)
	{
	  vm_string ss;
	  ss = str[0];
	  slist.push_back(ss);
	  str.erase();
	  continue;
	}
    }
  char j;
  str.erase();
  for(j=slist.size()-1;j>=0;j--)
    {
      if (j>3)
	attach_word(str, process_hundred(slist[j], NULL)); else
	  if (j == 3)
	    attach_word(str, process_hundred(slist[j], eng_mlrds)); else
	      if (j == 2)
		attach_word(str, process_hundred(slist[j], eng_mlns)); else
		  if (j == 1)
		    attach_word(str, process_hundred(slist[j], eng_thnds)); else
		      attach_word(str, process_hundred(slist[j], NULL));
    }
  return str;
}

vm_string vm_eng_lang::insert_digits(const vm_string &in_str, bool single_digits) const
{
  if (single_digits)
    {
      uint i;
      bool b = 0;
      vm_string str;
      for(i=0;i<in_str.length();i++)
	{
	  if ((in_str[i] >= WSTR('0')) && (in_str[i] <= WSTR('9')))
	    {
	      b = 1;
	      if (in_str[i] == WSTR('0'))
		attach_word(str, ENG_ZERO); else
		  attach_word(str, eng_ones[in_str[i]-WSTR('0')]);
	    } else
	      {
		if (b)
		  {
		    str += WSTR(' ');
		    b = 0;
		  }
		str += in_str[i];
	      }
	}
      return str;
    }
  vm_string str;
  uint i;
  bool d = 0;
  vm_string sstr;
  for(i=0;i<in_str.length();i++)
    {
      if ((in_str[i]>= WSTR('0')) && (in_str[i] <= WSTR('9')))
	{
	  d = 1;
	  sstr += in_str[i];
	} else
	  {
	    if (d)
	      {
		attach_word(str, digits_to_words(sstr));
		d = 0;
		sstr.erase();
		str += WSTR(' ');
	      }
	    str += in_str[i];
	  }
	}
  if (d)
    attach_word(str, digits_to_words(sstr));
  return str;
}

void vm_eng_lang::mark_caps(const vm_string &text, std::vector<bool> &mask)
{
  assert(text.length()==mask.size());
  vm_string::iterator i=text.create_iterator(get_chars());
  while(i.next())
    {
      if (i.str().length()<2)
	continue;
      if (!i.str().contains(WSTR("eEuUiIoOaAyY")))
	{
	  int j;
	  for(j=i.start();j<i.end();j++)
	    mask[j]=1;
	}
    }
  process_cap_list(text, mask);
}

bool vm_eng_lang::check_cap_list(const vm_string &str, uint pos, vm_string& result) const
{
  std::list<vm_cap_item>::const_iterator i;
  for(i=m_cap_items.begin();i!=m_cap_items.end();i++)
    {
      vm_string s = low_case(i->m_str);
      if (str.length()-pos < s.length())
	continue;
      int j;
      for(j=0;j<s.length();j++)
	if (str[pos+j] != s[j])
	  break;
      if (j<s.length())
	continue;
      if (!i->m_before && pos>0 && get_char_type(str[pos-1])>0)
	continue;
      if (!i->m_after && pos+s.length()<str.length() && get_char_type(str[pos+s.length()])>0)
	continue;
      result=i->m_str;
      return 1;
    } // for;
  return 0;
}

void vm_eng_lang::process_cap_list(const vm_string& str, std::vector<bool>& mask) const
{
  int i;
  for(i=0;i<str.length();i++)
    {
      int j;
      vm_string s;
      if (!check_cap_list(low_case(str), i, s))
	continue;
      for(j=0;j<s.length();j++)
	if (get_char_type(s[j])==2)// Capital letter;
	  mask[i+j]=1;
      i+=s.length()-1;
    } // for;
}

vm_string vm_eng_lang::space_separation(const vm_string& text) const
{
  vm_string s;
  for(int i=0;i<text.length();i++)
    {
      if (i!=0 && get_char_type(text[i])==2 && get_char_type(text[i-1])==1)
	s.space_undup_attach(WSTR(' '));
      s.space_undup_attach(text[i]);
    }
  return s;
}
