@article{oai:nagoya.repo.nii.ac.jp:00013181, author = {Ryu, Koichiro and Matsubara, Shigeki and Kawaguchi, Nobuo and Inagaki, Yasuyoshi}, journal = {O-COCOSDA 2003 : proceedings of the 6th Oriental COCOSDA workshop in Singapore on Oct 1-3 2003}, month = {Jun}, note = {Speech-to-speech translation has been an important research topic with the advance of technologies for speech processing and language processing. This paper describes a bilingual speech dialogue corpus which has been constructed for research on simultaneous machine interpretation at the Center for Integrated Acoustic Information Research (CIAIR), Nagoya University. The corpus has been implemented by collecting simulated cross-lingual conversations between English speech and Japanese speech through simultaneous interpretation, and by transcribing them manually with bilingual sentence alignment. In the year 2002, 216 spoken dialogues have been collected under a real environment, and transcribed into text files consisting of about 300,000 morphemes. In order to utilize the bilingual corpus effectively, every source utterance speech has been segmented into interpreting units according to its word-for-word translation and the word alignment of them. The interpreting unit means a linguistic chunk that could be interpreted separately and simultaneously. This paper has investigated linguistic characters of such the unit, and examined the feasibility of simultaneous machine interpretation.}, pages = {164--168}, title = {Bilingual Speech Dialogue Corpus for Simultaneous Machine Interpretation Research}, year = {2003} }