/*
	Copyright (c) 2000-2006 Michael Pozhidaev<msp@altlinux.org>. 
   This file is part of the VOICEMAN speech system.

   VOICEMAN speech system is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   VOICEMAN speech system is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.
*/

#include"voiceman.h"

static std::string cur_io;
static iconv_t iconv_io2wstring, iconv_wstring2io, iconv_utf82wstring, iconv_wstring2utf8;

#define ICONV_UTF8_ID "utf8"
#define ICONV_WSTRING_ID "utf32le"
#define ICONV_BLOCK_SIZE 50
#define WSTRING_BAD_CHAR L'?'
#define STRING_BAD_CHAR '?'

wstring io2wstring(const string &s)
{
  wstring res;
  int i;
  char *b = new char[s.length()];
  char *bb=b;
  for(i=0;i<s.length();i++)
    b[i]=s[i];
  size_t bsize = s.length();

  while(bsize)
    {
      wchar_t *r = new wchar_t[ICONV_BLOCK_SIZE];
      char *rr=(char*)r;
      size_t rsize=ICONV_BLOCK_SIZE*sizeof(wchar_t);
      if (iconv(iconv_io2wstring, &b, &bsize, &rr, &rsize) == (size_t)(-1))
	{
	  if (errno == EILSEQ)
	    {
	      for(i=0;i<ICONV_BLOCK_SIZE-rsize/sizeof(wchar_t);i++)
		res+=r[i];
	      res+=WSTRING_BAD_CHAR;
	      b++;
	      bsize--;
	      delete[] r;
	      continue;
	    } // bad sequence
	  if (errno == EINVAL)
	    {
	      for(i=0;i<ICONV_BLOCK_SIZE-rsize/sizeof(wchar_t);i++)
		res+=r[i];
	      res+=WSTRING_BAD_CHAR;
	      delete[] r;
	      break;
	    }
	  if (errno == E2BIG)
	    {
	      for(i=0;i<ICONV_BLOCK_SIZE-rsize/sizeof(wchar_t);i++)
		res+=r[i];
	      delete[] r;
	      continue;
	    }
	} // (size_t)(-1)

      for(i=0;i<ICONV_BLOCK_SIZE-rsize/sizeof(wchar_t);i++)
	res+=r[i];
      delete[] r;
    } // while
  delete[] bb;
  return res;
}

string wstring2io(const wstring &s)
{
  string res;
  int i;
  wchar_t *b = new wchar_t[s.length()];
  char *bb=(char*)b;
  for(i=0;i<s.length();i++)
    b[i]=s[i];
  size_t bsize = s.length()*sizeof(wchar_t);

  while(bsize)
    {
      char *r = new char[ICONV_BLOCK_SIZE], *rr=r;
      size_t rsize=ICONV_BLOCK_SIZE;
      if (iconv(iconv_wstring2io, &bb, &bsize, &rr, &rsize) == (size_t)(-1))
	{
	  if (errno == EILSEQ)
	    {
	      for(i=0;i<ICONV_BLOCK_SIZE-rsize;i++)
		res+=r[i];
	      res+=STRING_BAD_CHAR;
	      for(i=0;i<sizeof(wchar_t);i++)
		bb++;
	      bsize-sizeof(wchar_t);
	      delete[] r;
	      continue;
	    } // bad sequence
	  if (errno == EINVAL)
	    {
	      for(i=0;i<ICONV_BLOCK_SIZE-rsize;i++)
		res+=r[i];
	      res+=STRING_BAD_CHAR;
	      delete[] r;
	      break;
	    }
	  if (errno == E2BIG)
	    {
	      for(i=0;i<ICONV_BLOCK_SIZE-rsize;i++)
		res+=r[i];
	      delete[] r;
	      continue;
	    }
	} // (size_t)(-1)

      for(i=0;i<ICONV_BLOCK_SIZE-rsize;i++)
	res+=r[i];
      delete[] r;
    } // while
  delete[] b;
  return res;
}

string wstring2utf8(const wstring &s)
{
  string res;
  int i;
  wchar_t *b = new wchar_t[s.length()];
  char *bb=(char*)b;
  for(i=0;i<s.length();i++)
    b[i]=s[i];
  size_t bsize = s.length()*sizeof(wchar_t);

  while(bsize)
    {
      char *r = new char[ICONV_BLOCK_SIZE], *rr=r;
      size_t rsize=ICONV_BLOCK_SIZE;
      if (iconv(iconv_wstring2utf8, &bb, &bsize, &rr, &rsize) == (size_t)(-1))
	{
	  if (errno == EILSEQ)
	    {
	      for(i=0;i<ICONV_BLOCK_SIZE-rsize;i++)
		res+=r[i];
	      res+=STRING_BAD_CHAR;
	      for(i=0;i<sizeof(wchar_t);i++)
		bb++;
	      bsize-sizeof(wchar_t);
	      delete[] r;
	      continue;
	    } // bad sequence
	  if (errno == EINVAL)
	    {
	      for(i=0;i<ICONV_BLOCK_SIZE-rsize;i++)
		res+=r[i];
	      res+=STRING_BAD_CHAR;
	      delete[] r;
	      break;
	    }
	  if (errno == E2BIG)
	    {
	      for(i=0;i<ICONV_BLOCK_SIZE-rsize;i++)
		res+=r[i];
	      delete[] r;
	      continue;
	    }
	} // (size_t)(-1)

      for(i=0;i<ICONV_BLOCK_SIZE-rsize;i++)
	res+=r[i];
      delete[] r;
    } // while
  delete[] b;
  return res;
}

wstring utf82wstring(const string &s)
{
  wstring res;
  int i;
  char *b = new char[s.length()];
  char *bb=b;
  for(i=0;i<s.length();i++)
    b[i]=s[i];
  size_t bsize = s.length();

  while(bsize)
    {
      wchar_t *r = new wchar_t[ICONV_BLOCK_SIZE];
      char *rr=(char*)r;
      size_t rsize=ICONV_BLOCK_SIZE*sizeof(wchar_t);

      if (iconv(iconv_utf82wstring, &b, &bsize, &rr, &rsize) == (size_t)(-1))
	{
	  if (errno == EILSEQ)
	    {
	      for(i=0;i<ICONV_BLOCK_SIZE-rsize/sizeof(wchar_t);i++)
		res+=r[i];
	      res+=WSTRING_BAD_CHAR;
	      res+=WSTRING_BAD_CHAR;
	      res+=WSTRING_BAD_CHAR;
	      delete[] r;
	      break;
	    } // bad sequence
	  if (errno == EINVAL)
	    {
	      for(i=0;i<ICONV_BLOCK_SIZE-rsize/sizeof(wchar_t);i++)
		res+=r[i];
	      res+=WSTRING_BAD_CHAR;
	      delete[] r;
	      break;
	    }
	  if (errno == E2BIG)
	    {
	      for(i=0;i<ICONV_BLOCK_SIZE-rsize/sizeof(wchar_t);i++)
		res+=r[i];
	      delete[] r;
	      continue;
	    }
	} // (size_t)(-1)

      for(i=0;i<ICONV_BLOCK_SIZE-rsize/sizeof(wchar_t);i++)
	res+=r[i];
      delete[] r;
    } // while
  delete[] bb;
  return res;
}

bool utf82wstring1(const string &s, wstring &res)
{
  res.erase();
  int i;
  char *b = new char[s.length()];
  char *bb=b;
  for(i=0;i<s.length();i++)
    b[i]=s[i];
  size_t bsize = s.length();

  while(bsize)
    {
      wchar_t *r = new wchar_t[ICONV_BLOCK_SIZE];
      char *rr=(char*)r;
      size_t rsize=ICONV_BLOCK_SIZE*sizeof(wchar_t);
      if (iconv(iconv_utf82wstring, &b, &bsize, &rr, &rsize) == (size_t)(-1))
	{
	  if (errno == EILSEQ)
	    {
	      delete[] r;
	      delete[] bb;
	      return 0;
	    } // bad sequence
	  if (errno == EINVAL)
	    {
	      delete[] r;
	      delete[] bb;
	      break;
	    }
	  if (errno == E2BIG)
	    {
	      for(i=0;i<ICONV_BLOCK_SIZE-rsize/sizeof(wchar_t);i++)
		res+=r[i];
	      delete[] r;
	      continue;
	    }
	} // (size_t)(-1)
      
      for(i=0;i<ICONV_BLOCK_SIZE-rsize/sizeof(wchar_t);i++)
	res+=r[i];
      delete[] r;
    } // while
  delete[] bb;
  return 1;
}

static bool strings_init()
{
  iconv_io2wstring = iconv_open(ICONV_WSTRING_ID, cur_io.c_str());
  if (iconv_io2wstring == (iconv_t)-1)
    {
      std::cerr << "ERROR:Could not create iconv descriptor for conversions from " << cur_io << " to " << ICONV_WSTRING_ID << "." << std::endl;
      return 0;
    }
  iconv_wstring2io = iconv_open(cur_io.c_str(), ICONV_WSTRING_ID);
  if (iconv_wstring2io==(iconv_t)-1)
    {
      std::cerr << "ERROR:Could not create iconv descriptor for conversions from " << ICONV_WSTRING_ID << " to " << cur_io << "." << std::endl;
      return 0;
    }
  iconv_utf82wstring = iconv_open(ICONV_WSTRING_ID, ICONV_UTF8_ID);
  if (iconv_utf82wstring==(iconv_t)-1)
    {
      std::cerr << "ERROR:Could not create iconv descriptor for conversions from " << ICONV_UTF8_ID << " to " << ICONV_WSTRING_ID << "." << std::endl;
      return 0;
    }
  iconv_wstring2utf8 = iconv_open(ICONV_UTF8_ID, ICONV_WSTRING_ID);
  if (iconv_wstring2utf8==(iconv_t)-1)
    {
      std::cerr << "ERROR:Could not create iconv descriptor for conversions from " << ICONV_WSTRING_ID << " to " << ICONV_UTF8_ID << "." << std::endl;
      return 0;
    }
  return 1;
}

bool primary_init()
{
  setlocale(LC_ALL, "");
  string lang;
  char *l=getenv("LANG");
  if (!l)
    lang="POSIX"; else
      lang=l;

  int d=-1;
  uint i;
  for(i=0;i<lang.length();i++)
    if (lang[i]=='.')
      d=i;
  if (d < 0)
    {
      cur_io="US-ASCII";
      return strings_init();
      return 0;
    }

  string cp;
  for(i=d+1;i<lang.length();i++)
    cp += lang[i];
  cur_io = cp;
  return strings_init();
}

std::ostream &operator <<(std::ostream &s, const wstring &ws)
{
  s << wstring2io(ws);
  return s;
}

const vm_string &vm_string::operator =(const vm_char s)
{
  erase();
  operator +=(s);
}

vm_string vm_string::trim() const
{
  int l1, l2;
  if (empty())
    return vm_string();
  const vm_string &s=*this;
  for(l1=0;l1<s.length() && (s[l1]==32 || s[l1]==9);l1++);
  for(l2=s.length();l2>l1&& (s[l2-1]==32 || s[l2-1]==9);l2--);
  vm_string ss;
  int i;
  for(i=l1;i<l2;i++)
    ss += s[i];
  return ss;
}

vm_string vm_string::upper_case() const
{
  int i;
  vm_string s=*this;
  for(i=0;i<s.length();i++)
    if (s[i]>=WSTR('a') && s[i]<=WSTR('z'))
      s[i] = WSTR('A') + (s[i] -WSTR('a'));
  return s;
}

vm_string vm_string::lower_case() const
{
  int i;
  vm_string s=*this;
  for(i=0;i<s.length();i++)
    if (s[i]>=WSTR('A') && s[i]<=WSTR('Z'))
      s[i]= WSTR('a')+(s[i] - WSTR('A'));
  return s;
}

template<> bool vm_string::is<bool>() const
{
  vm_string s=trim().lower_case();
  if (s==WSTR("yes") ||
      s==WSTR("no") ||
      s==WSTR("true") ||
      s==WSTR("false") ||
      s==WSTR("0") ||
      s==WSTR("1"))
    return 1;
  return 0;
}

template<> bool vm_string::is<uint>() const
{
  vm_string s=trim();
  if (s.empty())
    return 0;
  int i=0;
  if (s[0]==WSTR('+'))
    i=1;
  if (i>=s.length())
    return 0;
  for(;i<s.length();i++)
    if (s[i]<WSTR('0') || s[i]>WSTR('9'))
      return 0;
  return 1;
}

template<> bool vm_string::is<int>() const
{
  vm_string s=trim();
  if (s.empty())
    return 0;
  int i=0;
  if (s[0]==WSTR('+') || s[0]==WSTR('-'))
    i=1;
  if (i>=s.length())
    return 0;
  for(;i<s.length();i++)
    if (s[i]<WSTR('0') || s[i]>WSTR('9'))
      return 0;
  return 1;
}

template<> bool vm_string::to<bool>() const
{
  vm_string s=trim().lower_case();
  if (s==WSTR("true") || s==WSTR("yes") || s==WSTR("1"))
    return 1;
  if (s==WSTR("false") || s==WSTR("no") || s== WSTR("0"))
    return 0;
  VM_STOP("Type mismatch");
}

template<> int vm_string::to<int>() const
{
  vm_string s=trim();
  if (s.empty())
    VM_STOP("Type mismatch");
  int n=0;
  int i=0;
  bool b=0;
  if (s[0]==WSTR('-'))
    {
      b=1;
      i=1;
    } else
      if (s[0]==WSTR('+'))
	i=1;
  if (i>=s.length())
    VM_STOP("Type mismatch");
  for(;i<s.length();i++)
    {
      if (s[i]<WSTR('0') || s[i] > WSTR('9'))
	VM_STOP("Type mismatch");
      n *= 10;
      n += s[i]-WSTR('0');
    }
  if (b)
    n *= -1;
  return n;
}

template<> uint vm_string::to<uint>() const
{
  vm_string s=trim();
  if (s.empty())
    VM_STOP("Type mismatch");
  uint n=0;
  int i=0;
  if (s[0]==WSTR('+'))
    i=1;
  if (i>=s.length())
    VM_STOP("Type mismatch");
  for(;i<s.length();i++)
    {
      if (s[i]<WSTR('0') || s[i] > WSTR('9'))
	VM_STOP("Type mismatch");
      n *= 10;
      n += s[i]-WSTR('0');
    }
  return n;
}

bool vm_string::is_double() const
{
  vm_string s=trim();
  int state=0;
  for(int i=0;i<s.length();i++)
    {
      if (state==0 &&(s[i]==WSTR('-') || s[i]==WSTR('+') || (s[i]>=WSTR('0') && s[i]<=WSTR('9'))))
	{
	  state=1;
	  continue;
	}

      if (state==1 && s[i]>=WSTR('0') && s[i]<=WSTR('9'))
	continue;

      if (state==1 && s[i]==WSTR('.'))
	{
	  state=2;
	  continue;
	}

      if (state==2 && s[i]>=WSTR('0') && s[i]<=WSTR('9'))
	{
	  state=3;
	  continue;
	}

      if (state==3 && s[i]>=WSTR('0') && s[i]<=WSTR('9'))
	continue;
      return 0;
    }// for
  return (state==1 || state==3);
}


double vm_string::to_double() const
{
  assert(is_double());
  vm_string s1, s2, s=trim();
  bool b=0, sign=0;
  int i;
  for(i=0;i<s.length();i++)
    {
      if (s[i]==WSTR('.'))
	{
	  b=1;
	  continue;
	}
      if (s[i]==WSTR('-'))
	{
	  sign=1;
	  continue;
	}
      if (s[i]<WSTR('0') || s[i]>WSTR('9'))
	continue;
      if (!b)
	s1+=s[i];else 
	  s2+=s[i];
    }
  assert(!s1.empty());
  double d1=0, d2=0;
  for(i=0;i<s1.length();i++)
    {
      d1*=10;
      d1+=s1[i]-WSTR('0');
    }
  if (!s2.empty())
    {
      int z=s2.length()-1;
      while(z>=0 && s2[z]==WSTR('0')) z--;
      if (z>=0)
	{
	  for(i=0;i<=z;i++)
	    {
	      d2*=10;
	      d2+=s2[i]-WSTR('0');
	    }
	  for(i=0;i<=z;i++)
	    d2/=10;
	}
    }
  d1+=d2;
  if (sign)
    d1*= -1;
  return d1;
}

vm_string::vm_string(double f, unsigned long p)
{
  assert(p<=10);
  double ff=f;
  int i=p;
  for(;i>0;i--)
    ff*=10;
  long long c=(long long)ff;
  bool sign=c<0;
  if (sign)
    c*=-1;
  std::wostringstream ss;
  ss<<c;
  vm_string s=ss.str();
  while(s.length() < p+1)
    s = WSTR('0')+s;
  if (sign)
    s=WSTR('-')+s;
  if (p==0)
    {
      operator =(s);
      return;
    }
  s+=WSTR(' ');
  for(i=s.length()-1;i>=s.length()-p;i--)
    s[i]=s[i-1];
  s[s.length()-p-1]=WSTR('.');
  operator =(s);
}

bool vm_string::contains(const vm_string &s) const
{
  int i;
  for(i=0;i<length();i++)
    {
      int j;
      for(j=0;j<s.length();j++)
	if (operator [](i)==s[j])
	  return 1;
    }
  return 0;
};

void vm_string::space_undup_attach(vm_char c)
{
  if (c!=WSTR(' '))
    {
      operator += (c);
      return;
    }
  if (empty())
    return;
  if (operator [](length()-1)!=WSTR(' '))
    operator +=(c);
}

void vm_string::space_undup_attach(const vm_string &s)
{
  int i;
  for(i=0;i<s.length();i++)
    space_undup_attach(s[i]);
}

void vm_string::space_undup()
{
  vm_string s;
  int i;
  for(i=0;i<length();i++)
    s.space_undup_attach(operator [](i));
  operator =(s.trim());
}

vm_string vm_string::substr(int index) const
{
  assert(index >= 0);
  vm_string s=trim();
  int i, k;
  for(i=0,k=0;i<s.length() && k<index;i++)
    if (s[i] == WSTR(':'))
      k++;
  if (i>=s.length())
    return vm_string();
  vm_string ss;
  for(;i<s.length() && s[i]!= WSTR(':');i++)
    ss += s[i];
  return ss;
}

