{"created":"2021-03-01T06:20:04.421232+00:00","id":13072,"links":{},"metadata":{"_buckets":{"deposit":"88b3f446-9fd5-44cd-b4e8-2ac345312d2d"},"_deposit":{"id":"13072","owners":[],"pid":{"revision_id":0,"type":"depid","value":"13072"},"status":"published"},"_oai":{"id":"oai:nagoya.repo.nii.ac.jp:00013072","sets":["312:313:314"]},"author_link":["41135","41136","41137"],"item_10_biblio_info_6":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicIssueDates":{"bibliographicIssueDate":"2008-03-01","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"3","bibliographicPageEnd":"466","bibliographicPageStart":"457","bibliographicVolumeNumber":"E91-D","bibliographic_titles":[{"bibliographic_title":"IEICE transactions on information and systems","bibliographic_titleLang":"en"}]}]},"item_10_description_4":{"attribute_name":"抄録","attribute_value_mlt":[{"subitem_description":"In a distant-talking environment, the length of channel impulse response is longer than the short-term spectral analysis window. Conventional short-term spectrum based Cepstral Mean Normalization (CMN) is therefore, not effective under these conditions. In this paper, we propose a robust speech recognition method by combining a short-term spectrum based CMN with a long-term one. We assume that a static speech segment (such as a vowel, for example) affected by reverberation, can be modeled by a long-term cepstral analysis. Thus, the effect of long reverberation on a static speech segment may be compensated by the long-term spectrum based CMN. The cepstral distance of neighboring frames is used to discriminate the static speech segment (long-term spectrum) and the non-static speech segment (short-term spectrum). The cepstra of the static and non-static speech segments are normalized by the corresponding cepstral means. In a previous study, we proposed an environmentally robust speech recognition method based on Position-Dependent CMN (PDCMN) to compensate for channel distortion depending on speaker position, and which is more efficient than conventional CMN. In this paper, the concept of combining short-term and long-term spectrum based CMN is extended to PDCMN. We call this Variable Term spectrum based PDCMN (VT-PDCMN). Since PDCMN/VT-PDCMN cannot normalize speaker variations because a position-dependent cepstral mean contains the average speaker characteristics over all speakers, we also combine PDCMN/VT-PDCMN with conventional CMN in this study. We conducted the experiments based on our proposed method using limited vocabulary (100 words) distant-talking isolated word recognition in a real environment. The proposed method achieved a relative error reduction rate of 60.9% over the conventional short-term spectrum based CMN and 30.6% over the short-term spectrum based PDCMN.","subitem_description_language":"en","subitem_description_type":"Abstract"}]},"item_10_identifier_60":{"attribute_name":"URI","attribute_value_mlt":[{"subitem_identifier_type":"URI","subitem_identifier_uri":"http://www.ieice.org/jpn/trans_online/index.html"},{"subitem_identifier_type":"HDL","subitem_identifier_uri":"http://hdl.handle.net/2237/14966"}]},"item_10_publisher_32":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"Institute of Electronics, Information and Communication Engineers","subitem_publisher_language":"en"}]},"item_10_relation_43":{"attribute_name":"関連情報","attribute_value_mlt":[{"subitem_relation_type":"isVersionOf","subitem_relation_type_id":{"subitem_relation_type_id_text":"http://www.ieice.org/jpn/trans_online/index.html","subitem_relation_type_select":"URI"}}]},"item_10_rights_12":{"attribute_name":"権利","attribute_value_mlt":[{"subitem_rights":"Copyright (C) 2008 IEICE","subitem_rights_language":"en"}]},"item_10_select_15":{"attribute_name":"著者版フラグ","attribute_value_mlt":[{"subitem_select_item":"publisher"}]},"item_10_source_id_7":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"0916-8532","subitem_source_identifier_type":"PISSN"}]},"item_1615787544753":{"attribute_name":"出版タイプ","attribute_value_mlt":[{"subitem_version_resource":"http://purl.org/coar/version/c_970fb48d4fbd8a85","subitem_version_type":"VoR"}]},"item_access_right":{"attribute_name":"アクセス権","attribute_value_mlt":[{"subitem_access_right":"open access","subitem_access_right_uri":"http://purl.org/coar/access_right/c_abf2"}]},"item_creator":{"attribute_name":"著者","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"WANG, Longbiao","creatorNameLang":"en"}],"nameIdentifiers":[{"nameIdentifier":"41135","nameIdentifierScheme":"WEKO"}]},{"creatorNames":[{"creatorName":"NAKAGAWA, Seiichi","creatorNameLang":"en"}],"nameIdentifiers":[{"nameIdentifier":"41136","nameIdentifierScheme":"WEKO"}]},{"creatorNames":[{"creatorName":"KITAOKA, Norihide","creatorNameLang":"en"}],"nameIdentifiers":[{"nameIdentifier":"41137","nameIdentifierScheme":"WEKO"}]}]},"item_files":{"attribute_name":"ファイル情報","attribute_type":"file","attribute_value_mlt":[{"accessrole":"open_date","date":[{"dateType":"Available","dateValue":"2018-02-20"}],"displaytype":"detail","filename":"393.pdf","filesize":[{"value":"350.5 kB"}],"format":"application/pdf","licensetype":"license_note","mimetype":"application/pdf","url":{"label":"393.pdf","objectType":"fulltext","url":"https://nagoya.repo.nii.ac.jp/record/13072/files/393.pdf"},"version_id":"05c0c528-97cf-498f-82d7-8f52f772addc"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"robust speech recognition","subitem_subject_scheme":"Other"},{"subitem_subject":"distant-talking environment","subitem_subject_scheme":"Other"},{"subitem_subject":"CMN","subitem_subject_scheme":"Other"},{"subitem_subject":"long-term spectrum","subitem_subject_scheme":"Other"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"eng"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourcetype":"journal article","resourceuri":"http://purl.org/coar/resource_type/c_6501"}]},"item_title":"Robust Speech Recognition by Combining Short-Term and Long-Term Spectrum Based Position-Dependent CMN with Conventional CMN","item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"Robust Speech Recognition by Combining Short-Term and Long-Term Spectrum Based Position-Dependent CMN with Conventional CMN","subitem_title_language":"en"}]},"item_type_id":"10","owner":"1","path":["314"],"pubdate":{"attribute_name":"PubDate","attribute_value":"2011-06-28"},"publish_date":"2011-06-28","publish_status":"0","recid":"13072","relation_version_is_last":true,"title":["Robust Speech Recognition by Combining Short-Term and Long-Term Spectrum Based Position-Dependent CMN with Conventional CMN"],"weko_creator_id":"1","weko_shared_id":-1},"updated":"2023-01-16T03:59:57.287459+00:00"}