WEKO3
アイテム
{"_buckets": {"deposit": "dabc7bd0-8990-461c-8e27-62b0853cacf8"}, "_deposit": {"id": "13157", "owners": [], "pid": {"revision_id": 0, "type": "depid", "value": "13157"}, "status": "published"}, "_oai": {"id": "oai:nagoya.repo.nii.ac.jp:00013157", "sets": ["314"]}, "author_link": ["41485", "41486", "41487"], "item_10_biblio_info_6": {"attribute_name": "書誌情報", "attribute_value_mlt": [{"bibliographicIssueDates": {"bibliographicIssueDate": "2006-03-01", "bibliographicIssueDateType": "Issued"}, "bibliographicIssueNumber": "3", "bibliographicPageEnd": "1049", "bibliographicPageStart": "1040", "bibliographicVolumeNumber": "E89-D", "bibliographic_titles": [{"bibliographic_title": "IEICE transactions on information and systems", "bibliographic_titleLang": "en"}]}]}, "item_10_description_4": {"attribute_name": "抄録", "attribute_value_mlt": [{"subitem_description": "This study shows the effectiveness of using gamma distribution in the speech power domain as a more general prior distribution for the model-based speech enhancement approaches. This model is a super-set of the conventional Gaussian model of the complex spectrum and provides more accurate prior modeling when the optimal parameters are estimated. We develop a method to adapt the modeled distribution parameters from each actual noisy speech in a frame-by-frame manner. Next, we derive and investigate the minimum mean square error (MMSE) and maximum a posterior probability (MAP) estimations in different domains of speech spectral magnitude, generalized power and its logarithm, using the proposed gamma modeling. Finally, a comparative evaluation of the MAP and MMSE filters is conducted. As the MMSE estimations tend to more complicated using more general prior distributions, the MAP estimations are given in closed-form extractions and therefore are suitable in the implementation. The adaptive estimation of the modeled distribution parameters provides more accurate prior modeling and this is the principal merit of the proposed method and the reason for the better performance. From the experiments, the MAP estimation is recommended due to its high efficiency and low complexity. Among the MAP based systems, the estimation in log-magnitude domain is shown to be the best for the speech recognition as the estimation in power domain is superior for the noise reduction.", "subitem_description_language": "en", "subitem_description_type": "Abstract"}]}, "item_10_identifier_60": {"attribute_name": "URI", "attribute_value_mlt": [{"subitem_identifier_type": "URI", "subitem_identifier_uri": "http://www.ieice.org/jpn/trans_online/index.html"}, {"subitem_identifier_type": "HDL", "subitem_identifier_uri": "http://hdl.handle.net/2237/15052"}]}, "item_10_publisher_32": {"attribute_name": "出版者", "attribute_value_mlt": [{"subitem_publisher": "Institute of Electronics, Information and Communication Engineers", "subitem_publisher_language": "en"}]}, "item_10_relation_43": {"attribute_name": "関連情報", "attribute_value_mlt": [{"subitem_relation_type": "isVersionOf", "subitem_relation_type_id": {"subitem_relation_type_id_text": "http://www.ieice.org/jpn/trans_online/index.html", "subitem_relation_type_select": "URI"}}]}, "item_10_select_15": {"attribute_name": "著者版フラグ", "attribute_value_mlt": [{"subitem_select_item": "publisher"}]}, "item_10_source_id_7": {"attribute_name": "ISSN", "attribute_value_mlt": [{"subitem_source_identifier": "0916-8532", "subitem_source_identifier_type": "PISSN"}]}, "item_1615787544753": {"attribute_name": "出版タイプ", "attribute_value_mlt": [{"subitem_version_resource": "http://purl.org/coar/version/c_970fb48d4fbd8a85", "subitem_version_type": "VoR"}]}, "item_access_right": {"attribute_name": "アクセス権", "attribute_value_mlt": [{"subitem_access_right": "open access", "subitem_access_right_uri": "http://purl.org/coar/access_right/c_abf2"}]}, "item_creator": {"attribute_name": "著者", "attribute_type": "creator", "attribute_value_mlt": [{"creatorNames": [{"creatorName": "DAT, Tran Huy", "creatorNameLang": "en"}], "nameIdentifiers": [{"nameIdentifier": "41485", "nameIdentifierScheme": "WEKO"}]}, {"creatorNames": [{"creatorName": "TAKEDA, Kazuya", "creatorNameLang": "en"}], "nameIdentifiers": [{"nameIdentifier": "41486", "nameIdentifierScheme": "WEKO"}]}, {"creatorNames": [{"creatorName": "ITAKURA, Fumitada", "creatorNameLang": "en"}], "nameIdentifiers": [{"nameIdentifier": "41487", "nameIdentifierScheme": "WEKO"}]}]}, "item_files": {"attribute_name": "ファイル情報", "attribute_type": "file", "attribute_value_mlt": [{"accessrole": "open_date", "date": [{"dateType": "Available", "dateValue": "2018-02-20"}], "displaytype": "detail", "download_preview_message": "", "file_order": 0, "filename": "431.pdf", "filesize": [{"value": "591.9 kB"}], "format": "application/pdf", "future_date_message": "", "is_thumbnail": false, "licensetype": "license_note", "mimetype": "application/pdf", "size": 591900.0, "url": {"label": "431.pdf", "objectType": "fulltext", "url": "https://nagoya.repo.nii.ac.jp/record/13157/files/431.pdf"}, "version_id": "9ea40467-842a-48fd-aa47-e2b557ffb63f"}]}, "item_keyword": {"attribute_name": "キーワード", "attribute_value_mlt": [{"subitem_subject": "speech enhancement", "subitem_subject_scheme": "Other"}, {"subitem_subject": "speech recognition", "subitem_subject_scheme": "Other"}, {"subitem_subject": "gamma modeling", "subitem_subject_scheme": "Other"}, {"subitem_subject": "fourth-order moment", "subitem_subject_scheme": "Other"}, {"subitem_subject": "MMSE", "subitem_subject_scheme": "Other"}, {"subitem_subject": "MAP", "subitem_subject_scheme": "Other"}, {"subitem_subject": "spectral magnitude", "subitem_subject_scheme": "Other"}, {"subitem_subject": "power", "subitem_subject_scheme": "Other"}, {"subitem_subject": "log-spectral magnitude", "subitem_subject_scheme": "Other"}]}, "item_language": {"attribute_name": "言語", "attribute_value_mlt": [{"subitem_language": "eng"}]}, "item_resource_type": {"attribute_name": "資源タイプ", "attribute_value_mlt": [{"resourcetype": "journal article", "resourceuri": "http://purl.org/coar/resource_type/c_6501"}]}, "item_title": "Gamma Modeling of Speech Power and Its On-Line Estimation for Statistical Speech Enhancement", "item_titles": {"attribute_name": "タイトル", "attribute_value_mlt": [{"subitem_title": "Gamma Modeling of Speech Power and Its On-Line Estimation for Statistical Speech Enhancement", "subitem_title_language": "en"}]}, "item_type_id": "10", "owner": "1", "path": ["314"], "permalink_uri": "http://hdl.handle.net/2237/15052", "pubdate": {"attribute_name": "PubDate", "attribute_value": "2011-07-07"}, "publish_date": "2011-07-07", "publish_status": "0", "recid": "13157", "relation": {}, "relation_version_is_last": true, "title": ["Gamma Modeling of Speech Power and Its On-Line Estimation for Statistical Speech Enhancement"], "weko_shared_id": -1}
Gamma Modeling of Speech Power and Its On-Line Estimation for Statistical Speech Enhancement
http://hdl.handle.net/2237/15052
http://hdl.handle.net/2237/150522eca6e21-af80-4edb-a785-237cf503d0ad
名前 / ファイル | ライセンス | アクション |
---|---|---|
431.pdf (591.9 kB)
|
|
Item type | 学術雑誌論文 / Journal Article(1) | |||||
---|---|---|---|---|---|---|
公開日 | 2011-07-07 | |||||
タイトル | ||||||
タイトル | Gamma Modeling of Speech Power and Its On-Line Estimation for Statistical Speech Enhancement | |||||
言語 | en | |||||
著者 |
DAT, Tran Huy
× DAT, Tran Huy× TAKEDA, Kazuya× ITAKURA, Fumitada |
|||||
アクセス権 | ||||||
アクセス権 | open access | |||||
アクセス権URI | http://purl.org/coar/access_right/c_abf2 | |||||
キーワード | ||||||
主題Scheme | Other | |||||
主題 | speech enhancement | |||||
キーワード | ||||||
主題Scheme | Other | |||||
主題 | speech recognition | |||||
キーワード | ||||||
主題Scheme | Other | |||||
主題 | gamma modeling | |||||
キーワード | ||||||
主題Scheme | Other | |||||
主題 | fourth-order moment | |||||
キーワード | ||||||
主題Scheme | Other | |||||
主題 | MMSE | |||||
キーワード | ||||||
主題Scheme | Other | |||||
主題 | MAP | |||||
キーワード | ||||||
主題Scheme | Other | |||||
主題 | spectral magnitude | |||||
キーワード | ||||||
主題Scheme | Other | |||||
主題 | power | |||||
キーワード | ||||||
主題Scheme | Other | |||||
主題 | log-spectral magnitude | |||||
抄録 | ||||||
内容記述 | This study shows the effectiveness of using gamma distribution in the speech power domain as a more general prior distribution for the model-based speech enhancement approaches. This model is a super-set of the conventional Gaussian model of the complex spectrum and provides more accurate prior modeling when the optimal parameters are estimated. We develop a method to adapt the modeled distribution parameters from each actual noisy speech in a frame-by-frame manner. Next, we derive and investigate the minimum mean square error (MMSE) and maximum a posterior probability (MAP) estimations in different domains of speech spectral magnitude, generalized power and its logarithm, using the proposed gamma modeling. Finally, a comparative evaluation of the MAP and MMSE filters is conducted. As the MMSE estimations tend to more complicated using more general prior distributions, the MAP estimations are given in closed-form extractions and therefore are suitable in the implementation. The adaptive estimation of the modeled distribution parameters provides more accurate prior modeling and this is the principal merit of the proposed method and the reason for the better performance. From the experiments, the MAP estimation is recommended due to its high efficiency and low complexity. Among the MAP based systems, the estimation in log-magnitude domain is shown to be the best for the speech recognition as the estimation in power domain is superior for the noise reduction. | |||||
言語 | en | |||||
内容記述タイプ | Abstract | |||||
出版者 | ||||||
言語 | en | |||||
出版者 | Institute of Electronics, Information and Communication Engineers | |||||
言語 | ||||||
言語 | eng | |||||
資源タイプ | ||||||
資源タイプresource | http://purl.org/coar/resource_type/c_6501 | |||||
タイプ | journal article | |||||
出版タイプ | ||||||
出版タイプ | VoR | |||||
出版タイプResource | http://purl.org/coar/version/c_970fb48d4fbd8a85 | |||||
関連情報 | ||||||
関連タイプ | isVersionOf | |||||
識別子タイプ | URI | |||||
関連識別子 | http://www.ieice.org/jpn/trans_online/index.html | |||||
ISSN | ||||||
収録物識別子タイプ | PISSN | |||||
収録物識別子 | 0916-8532 | |||||
書誌情報 |
en : IEICE transactions on information and systems 巻 E89-D, 号 3, p. 1040-1049, 発行日 2006-03-01 |
|||||
著者版フラグ | ||||||
値 | publisher | |||||
URI | ||||||
識別子 | http://www.ieice.org/jpn/trans_online/index.html | |||||
識別子タイプ | URI | |||||
URI | ||||||
識別子 | http://hdl.handle.net/2237/15052 | |||||
識別子タイプ | HDL |