{"created":"2023-06-20T13:20:47.468316+00:00","id":843,"links":{},"metadata":{"_buckets":{"deposit":"5484420e-5b57-4cea-b394-c8bf5b316d66"},"_deposit":{"created_by":1,"id":"843","owners":[1],"pid":{"revision_id":0,"type":"depid","value":"843"},"status":"published"},"_oai":{"id":"oai:ir.soken.ac.jp:00000843","sets":["2:429:19"]},"author_link":["0","0","0"],"item_1_creator_2":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"WATTARUJEEKRIT, Tuangthong"}],"nameIdentifiers":[{}]}]},"item_1_creator_3":{"attribute_name":"フリガナ","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"ワッタールジークリット, ツァンチョン"}],"nameIdentifiers":[{}]}]},"item_1_date_granted_11":{"attribute_name":"学位授与年月日","attribute_value_mlt":[{"subitem_dategranted":"2005-09-30"}]},"item_1_degree_grantor_5":{"attribute_name":"学位授与機関","attribute_value_mlt":[{"subitem_degreegrantor":[{"subitem_degreegrantor_name":"総合研究大学院大学"}]}]},"item_1_degree_name_6":{"attribute_name":"学位名","attribute_value_mlt":[{"subitem_degreename":"博士(情報学)"}]},"item_1_description_12":{"attribute_name":"要旨","attribute_value_mlt":[{"subitem_description":"Named entity recognition (NER) in the molecular biology domain, the task of identifying and categorizing molecular entities appearing in text, is one of the most important tasks in a biological text mining engine. In general, this task is taken as the first step towards the more ambitious task of molecular event extraction (relation extraction)and, eventually, pathway discovery. However, NER in this scientific domain, which seems to be the easiest task among others in text mining, still achieves quite low performance. As can be seen from the most recent shared-task evaluations of NER in this domain(JNLPBA-2004), the best performance in terms of Fl-score is only 72.6. This result is far below what is achieved by NER system in newswire domain (Fl-score of about 96%) which is near the human level of performance. At present, most NER systems employ term internal features (e.g., lexical and morphology) and co-occurrence information as term external features. Due to the lack of molecular naming convention, which leads to the difficulty of terminological variations as well as the difficulty of polysemy (i.e. the sharing of names between different entities), such features are insufficient to handle the difficulties for NER in the molecular biology domain. To obtain a complete set of rules for lexical patterns of molecular names seem impossible, thus to use term external features other than co-occurrence information is of interest.
 In this thesis, the semantic relationships between a predicate and its arguments in terms of semantic roles are proposed to enhance NER system in the molecular biology domain. The semantic role information is derived from a predicate-argument structure (PAS) which is a higher sentence representation level than syntactic relation and surface form levels. Thus, the use of semantic roles is more consistent than co-occurrence information derived from a surface level. To employ the semantic role for NER system, it is realized in various sets of syntactic features which were used by a machine learning model to explore the most efficient way in allowing this knowledge to provide the highest positive effect on the NER.
 As a result, the best feature set composed of the 6 lexical features (i.e., surface word, lemma form, orthographic feature, part-of-speech, phrase-chunk and head word of NP-chunk) and 4 PAS-related features for representing an argument's semantic role (i.e., predicate's surface form, predicate's lemma, voice and the united feature of subject-object head's lemma and transitive-intransitive sense). Moreover, the use of semantic roles can show the positive effects for only the predicates conforming to the criteria as follows. A predicate must have its arguments as both agent and theme with a higher probability of belonging to a named entity class than non-named entity class; otherwise, a predicate must have its arguments as both agent and theme with a lower probability of belonging to a named entity class than non-named entity class and the number of training examples for this predicate should be large enough (by observing from empirical evidences, at least 270 sentences). The improvement in performance obtained from the NER system using PAS-related features, compared to not using these features, affirms that the using of semantic roles can enhance NER system.","subitem_description_type":"Other"}]},"item_1_description_18":{"attribute_name":"フォーマット","attribute_value_mlt":[{"subitem_description":"application/pdf","subitem_description_type":"Other"}]},"item_1_description_7":{"attribute_name":"学位記番号","attribute_value_mlt":[{"subitem_description":"総研大甲第905号","subitem_description_type":"Other"}]},"item_1_select_14":{"attribute_name":"所蔵","attribute_value_mlt":[{"subitem_select_item":"有"}]},"item_1_select_8":{"attribute_name":"研究科","attribute_value_mlt":[{"subitem_select_item":"複合科学研究科"}]},"item_1_select_9":{"attribute_name":"専攻","attribute_value_mlt":[{"subitem_select_item":"17 情報学専攻"}]},"item_1_text_10":{"attribute_name":"学位授与年度","attribute_value_mlt":[{"subitem_text_value":"2005"}]},"item_creator":{"attribute_name":"著者","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"WATTARUJEEKRIT, Tuangthong","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_files":{"attribute_name":"ファイル情報","attribute_type":"file","attribute_value_mlt":[{"accessrole":"open_date","date":[{"dateType":"Available","dateValue":"2016-02-17"}],"displaytype":"simple","filename":"甲905_要旨.pdf","filesize":[{"value":"234.9 kB"}],"format":"application/pdf","licensetype":"license_11","mimetype":"application/pdf","url":{"label":"要旨・審査要旨","url":"https://ir.soken.ac.jp/record/843/files/甲905_要旨.pdf"},"version_id":"e67830c1-d38a-4b89-b159-0c2c2e66dc22"},{"accessrole":"open_date","date":[{"dateType":"Available","dateValue":"2016-02-17"}],"displaytype":"simple","filename":"甲905_本文.pdf","filesize":[{"value":"15.9 MB"}],"format":"application/pdf","licensetype":"license_11","mimetype":"application/pdf","url":{"label":"本文","url":"https://ir.soken.ac.jp/record/843/files/甲905_本文.pdf"},"version_id":"10c03d8a-f15a-4a02-8c15-2800384f3a7d"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"eng"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourcetype":"thesis","resourceuri":"http://purl.org/coar/resource_type/c_46ec"}]},"item_title":"Exploring Semantic roles for Named Entity Recognition in the Molecular biology domain","item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"Exploring Semantic roles for Named Entity Recognition in the Molecular biology domain"},{"subitem_title":"Exploring Semantic roles for Named Entity Recognition in the Molecular biology domain","subitem_title_language":"en"}]},"item_type_id":"1","owner":"1","path":["19"],"pubdate":{"attribute_name":"公開日","attribute_value":"2010-02-22"},"publish_date":"2010-02-22","publish_status":"0","recid":"843","relation_version_is_last":true,"title":["Exploring Semantic roles for Named Entity Recognition in the Molecular biology domain"],"weko_creator_id":"1","weko_shared_id":-1},"updated":"2023-06-20T16:10:05.329482+00:00"}