#!/usr/bin/python
import os
import os.path
import tempfile
import shutil
import glob
import subprocess
import sys

phone_groups=[
    ('Vowel',['ii','yy','uu','ee','oo','aa','a','e','i','y','u','ae','ay','ur']),
    ('Consonant',['p','pp','b','bb','t','tt','d','dd','k','kk','g','gg','c','ch','f','ff','v','vv','s','ss','z','zz','sh','sch','zh','h','hh','m','mm','n','nn','l','ll','r','rr','j']),
    ('Front_Vowel',['ee','ii','e','i']),
    ('MidFrontnessVowel',['yy','aa','y','a','ae','ay']),
    ('Back_Vowel',['uu','oo','u','ur']),
    ('High_Vowel',['ii','yy','uu','u','i','y','ur']),
    ('MidHeightVowel',['ee','oo','e','ae','ay']),
    ('Low_Vowel',['aa','a']),
    ('Unrounded_Vowel',['ii','yy','ee','aa','a','e','i','y','ae','ay']),
    ('Long_Vowel',['aa','oo','ee','ii','yy','uu']),
    ('Short_Vowel',['a','e','i','y','u']),
    ('Reduced_Vowel',['ae','ay','ur']),
    ('Unstressed_Vowel',['a','e','i','u','y','ae','ay','ur']),
    ('Voiced',['b','bb','d','dd','g','gg','v','vv','z','zz','zh','m','mm','n','nn','l','ll','r','rr','j']),
    ('Unvoiced',['p','pp','t','tt','k','kk','c','ch','f','ff','s','ss','sh','sch','h','hh']),
    ('Soft',['pp','bb','tt','dd','kk','gg','ch','ff','vv','ss','zz','sch','hh','mm','nn','ll','rr','j']),
    ('Hard',['p','b','t','d','k','g','c','f','v','s','z','sh','zh','h','m','n','l','r']),
    ('Stop',['p','pp','b','bb','t','tt','d','dd','k','kk','g','gg']),
    ('Fricative',['f','ff','v','vv','s','ss','z','zz','sh','sch','zh','h','hh','j']),
    ('Affricative',['c','ch']),
    ('Nasal',['n','nn','m','mm']),
    ('LConsonant',['l','ll']),
    ('RConsonant',['r','rr']),
    ('Sonorant',['m','mm','n','nn','l','ll','r','rr','j']),
    ('Labial',['p','pp','b','bb','m','mm','f','ff','v','vv']),
    ('LabioDental',['f','ff','v','vv']),
    ('LabioLabial',['p','pp','b','bb','m','mm']),
    ('Dental',['t','tt','d','dd','c','s','ss','z','zz','n','nn','l','ll']),
    ('Alveolar',['ch','sh','zh','r','rr','sch']),
    ('Palatal',['k','kk','g','gg','h','hh']),
    ('silence',['pau'])]

qst_type_1=1
qst_type_2=2
qst_type_3=3

def write_questions(ofile,qst_name,lab_fmt,str_vals,qst_type,add_1=True):
    if qst_type==qst_type_2:
        # Position/distance/length questions.
        m=max(map(int,str_vals))
        if add_1:
            m+=1
        vals=range(m+1)
    else:
        vals=sorted(str_vals)
    if qst_type==qst_type_1:
        labs=",".join([lab_fmt % v for v in vals])
        ofile.write('QS "%s" {%s}\n' % (qst_name,labs))
    else:
        for v in (["x"]+vals):
            ofile.write('QS "%s==%s" {%s}\n' % (qst_name,str(v),(lab_fmt % str(v))))
        ofile.write("\n")
        if qst_type==qst_type_2:
            for v in vals:
                labs=",".join(lab_fmt % str(v0) for v0 in ["x"]+range(v+1))
                ofile.write('QS "%s<=%d" {%s}\n' % (qst_name,v,labs))
            ofile.write("\n")
                
if __name__=="__main__":
    if len(sys.argv)<2:
        print "This script generates questions for training a Russian HTS voice"
        print "Usage: %s <training directory>" % sys.argv[0]
        sys.exit(2)
    estdir=os.environ.get("ESTDIR")
    if not estdir:
        sys.exit("ESTDIR variable is not set")
    dumpfeats=os.path.join(estdir,"..","festival","examples","dumpfeats")
    if not os.path.isfile(dumpfeats):
        sys.exit("Cannot find dumpfeats script")
    utts_list=glob.glob(os.path.join(sys.argv[1],"data","utts","*.utt"))
    if not utts_list:
        sys.exit("Cannot find the utterances")
    temp_dir=tempfile.mkdtemp()
    f_utts_list=open(os.path.join(temp_dir,"utts.list"),"w")
    for name in map(os.path.abspath,utts_list):
        f_utts_list.write(name+"\n")
    f_utts_list.close()
    dumpfeats_args=[
        dumpfeats,
        "-relation","Segment",
        "-output","utts.data",
        "-feats",os.path.abspath(os.path.join(sys.argv[1],"data","scripts","label.feats")),
        "-eval",os.path.abspath(os.path.join(sys.argv[1],"data","scripts","extra_feats.scm")),
        "-from_file","utts.list"]
    subprocess.call(dumpfeats_args,cwd=temp_dir)
    if not os.path.isfile(os.path.join(temp_dir,"utts.data")):
        shutil.rmtree(temp_dir)
        sys.exit(1)
    f_feats=open(os.path.join(sys.argv[1],"data","scripts","label.feats"))
    feats=[]
    for line in f_feats:
        line=line.strip()
        if line and (not line.startswith(";")):
            line=line.replace(";"," ")
            feats.append(line.split()[0])
    f_feats.close()
    data={}
    for name in feats:
        data[name]=set()
    del data["segment_start"]
    del data["segment_end"]
    f_data=open(os.path.join(temp_dir,"utts.data"),"rU")
    for line in f_data:
        for k,v in zip(feats,line.split()):
            s=data.get(k)
            if s is not None:
                s.add(v)
    f_data.close()
    shutil.rmtree(temp_dir)
    f_out_1=open(os.path.join(sys.argv[1],"data","questions","questions_qst001.hed"),"w")
    f_out_2=open(os.path.join(sys.argv[1],"data","questions","questions_utt_qst001.hed"),"w")
    for qst_name_fmt,lab_fmt in [("LL-%s","%s^*"),("L-%s","*^%s-*"),("C-%s","*-%s+*"),("R-%s","*+%s=*"),("RR-%s","*=%s@*")]:
        for gr,phs in phone_groups:
            write_questions(f_out_1,(qst_name_fmt % gr),lab_fmt,phs,qst_type_1)
        write_questions(f_out_1,(qst_name_fmt % "name"),lab_fmt,phone_groups[0][1]+phone_groups[1][1],qst_type_3)
    write_questions(f_out_1,"Seg_Fw","*@%s_*",data["pos_in_syl"],qst_type_2)
    write_questions(f_out_1,"Seg_Bw","*_%s/A:*",data["pos_in_syl"],qst_type_2)
    write_questions(f_out_1,"L-Syl_Stress","*/A:%s_*",data["R:SylStructure.parent.R:Syllable.stress"],qst_type_3)
    write_questions(f_out_1,"L-Syl_Num-Segs","*_%s/B:*",data["R:SylStructure.parent.R:Syllable.syl_numphones"],qst_type_2,False)
    write_questions(f_out_1,"C-Syl_Stress","*/B:%s-*",data["R:SylStructure.parent.R:Syllable.stress"],qst_type_3)
    write_questions(f_out_1,"C-Syl_Num-Segs","*-%s@*",data["R:SylStructure.parent.R:Syllable.syl_numphones"],qst_type_2,False)
    write_questions(f_out_1,"Pos_C-Syl_in_C-Word(Fw)","*@%s-*",data["R:SylStructure.parent.R:Syllable.pos_in_word"],qst_type_2)
    write_questions(f_out_1,"Pos_C-Syl_in_C-Word(Bw)","*-%s&*",data["R:SylStructure.parent.R:Syllable.pos_in_word"],qst_type_2)
    write_questions(f_out_1,"Pos_C-Syl_in_C-Phrase(Fw)","*&%s-*",data["R:SylStructure.parent.R:Syllable.syl_out"],qst_type_2)
    write_questions(f_out_1,"Pos_C-Syl_in_C-Phrase(Bw)","*-%s#*",data["R:SylStructure.parent.R:Syllable.syl_out"],qst_type_2)
    write_questions(f_out_1,"Num-StressedSyl_before_C-Syl_in_C-Phrase","*#%s-*",data["R:SylStructure.parent.R:Syllable.ssyl_in"],qst_type_2)
    write_questions(f_out_1,"Num-StressedSyl_after_C-Syl_in_C-Phrase","*-%s$*",data["R:SylStructure.parent.R:Syllable.ssyl_in"],qst_type_2)
    write_questions(f_out_1,"Num-Syl_from_prev-StressedSyl","*!%s-*",data["R:SylStructure.parent.R:Syllable.lisp_distance_to_p_stress"],qst_type_2,False)
    write_questions(f_out_1,"Num-Syl_from_next-StressedSyl","*-%s;*",data["R:SylStructure.parent.R:Syllable.lisp_distance_to_n_stress"],qst_type_2,False)
    for gr,phs in phone_groups:
        if set(phs).issubset(phone_groups[0][1]):
            write_questions(f_out_1,"C-Syl_"+gr,"*|%s/C:*",phs,qst_type_1)
    write_questions(f_out_1,"C-Syl_Vowel","*|%s/C:*",phone_groups[0][1]+["novowel"],qst_type_3)
    write_questions(f_out_1,"R-Syl_Stress","*/C:%s+*",data["R:SylStructure.parent.R:Syllable.stress"],qst_type_3)
    write_questions(f_out_1,"R-Syl_Num-Segs","*+%s/D:*",data["R:SylStructure.parent.R:Syllable.syl_numphones"],qst_type_2,False)
    write_questions(f_out_1,"L-Word_GPOS","*/D:%s_*",data["R:SylStructure.parent.parent.R:Word.gpos"],qst_type_3)
    write_questions(f_out_1,"L-Word_Num-Syls","*_%s/E:*",data["R:SylStructure.parent.parent.R:Word.word_numsyls"],qst_type_2,False)
    write_questions(f_out_1,"C-Word_GPOS","*/E:%s+*",data["R:SylStructure.parent.parent.R:Word.gpos"],qst_type_3)
    write_questions(f_out_1,"C-Word_Num-Syls","*+%s@*",data["R:SylStructure.parent.parent.R:Word.word_numsyls"],qst_type_2,False)
    write_questions(f_out_1,"Pos_C-Word_in_C-Phrase(Fw)","*@%s+*",data["R:SylStructure.parent.parent.R:Word.pos_in_phrase"],qst_type_2)
    write_questions(f_out_1,"Pos_C-Word_in_C-Phrase(Bw)","*+%s&*",data["R:SylStructure.parent.parent.R:Word.pos_in_phrase"],qst_type_2)
    write_questions(f_out_1,"Num-ContWord_before_C-Word_in_C-Phrase","*&%s+*",data["R:SylStructure.parent.parent.R:Word.content_words_in"],qst_type_2)
    write_questions(f_out_1,"Num-ContWord_after_C-Word_in_C-Phrase","*+%s#*",data["R:SylStructure.parent.parent.R:Word.content_words_out"],qst_type_2)
    write_questions(f_out_1,"Num-Words_from_prev-ContWord","*#%s+*",data["R:SylStructure.parent.parent.R:Word.lisp_distance_to_p_content"],qst_type_2,False)
    write_questions(f_out_1,"Num-Words_from_next-ContWord","*+%s/F:*",data["R:SylStructure.parent.parent.R:Word.lisp_distance_to_n_content"],qst_type_2,False)
    write_questions(f_out_1,"R-Word_GPOS","*/F:%s_*",data["R:SylStructure.parent.parent.R:Word.gpos"],qst_type_3)
    write_questions(f_out_1,"R-Word_Num-Syls","*_%s/G:*",data["R:SylStructure.parent.parent.R:Word.word_numsyls"],qst_type_2,False)
    write_questions(f_out_1,"L-Phrase_Num-Syls","*/G:%s_*",data["R:SylStructure.parent.parent.R:Phrase.parent.lisp_num_syls_in_phrase"],qst_type_2,False)
    write_questions(f_out_1,"L-Phrase_Num-Words","*_%s/H:*",data["R:SylStructure.parent.parent.R:Phrase.parent.lisp_num_words_in_phrase"],qst_type_2,False)
    write_questions(f_out_1,"C-Phrase_Num-Syls","*/H:%s=*",data["R:SylStructure.parent.parent.R:Phrase.parent.lisp_num_syls_in_phrase"],qst_type_2,False)
    write_questions(f_out_1,"C-Phrase_Num-Words","*=%s@*",data["R:SylStructure.parent.parent.R:Phrase.parent.lisp_num_words_in_phrase"],qst_type_2,False)
    write_questions(f_out_1,"Pos_C-Phrase_in_Utterance(Fw)","*@%s=*",data["R:SylStructure.parent.R:Syllable.sub_phrases"],qst_type_2)
    write_questions(f_out_1,"Pos_C-Phrase_in_Utterance(Bw)","*=%s|*",data["R:SylStructure.parent.R:Syllable.sub_phrases"],qst_type_2)
    write_questions(f_out_1,"R-Phrase_Num-Syls","*/I:%s=*",data["R:SylStructure.parent.parent.R:Phrase.parent.lisp_num_syls_in_phrase"],qst_type_2,False)
    write_questions(f_out_1,"R-Phrase_Num-Words","*=%s/J:*",data["R:SylStructure.parent.parent.R:Phrase.parent.lisp_num_words_in_phrase"],qst_type_2,False)
    write_questions(f_out_1,"Num-Syls_in_Utterance","*/J:%s+*",data["lisp_total_syls"],qst_type_2,False)
    write_questions(f_out_2,"Num-Syls_in_Utterance","*/J:%s+*",data["lisp_total_syls"],qst_type_2,False)
    write_questions(f_out_1,"Num-Words_in_Utterance","*+%s-*",data["lisp_total_words"],qst_type_2,False)
    write_questions(f_out_2,"Num-Words_in_Utterance","*+%s-*",data["lisp_total_words"],qst_type_2,False)
    write_questions(f_out_1,"Num-Phrases_in_Utterance","*-%s",data["lisp_total_phrases"],qst_type_2,False)
    write_questions(f_out_2,"Num-Phrases_in_Utterance","*-%s",data["lisp_total_phrases"],qst_type_2,False)
    f_out_2.close()
    f_out_1.close()


