{"created":"2023-06-20T13:23:17.693469+00:00","id":4082,"links":{},"metadata":{"_buckets":{"deposit":"3b342f3b-2717-462f-88d5-d3242f9fbf5a"},"_deposit":{"created_by":21,"id":"4082","owners":[21],"pid":{"revision_id":0,"type":"depid","value":"4082"},"status":"published"},"_oai":{"id":"oai:ir.soken.ac.jp:00004082","sets":["2:429:19"]},"author_link":["2286","2288","2287"],"item_1_creator_2":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"藤田, 悦郎"}],"nameIdentifiers":[{}]}]},"item_1_creator_3":{"attribute_name":"フリガナ","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"フジタ, エツロウ"}],"nameIdentifiers":[{}]}]},"item_1_date_granted_11":{"attribute_name":"学位授与年月日","attribute_value_mlt":[{"subitem_dategranted":"2013-03-22"}]},"item_1_degree_grantor_5":{"attribute_name":"学位授与機関","attribute_value_mlt":[{"subitem_degreegrantor":[{"subitem_degreegrantor_name":"総合研究大学院大学"}]}]},"item_1_degree_name_6":{"attribute_name":"学位名","attribute_value_mlt":[{"subitem_degreename":"博士(情報学)"}]},"item_1_description_12":{"attribute_name":"要旨","attribute_value_mlt":[{"subitem_description":"With the successful adoption of link analysis techniques such as PageRank\nand web spam filtering, current web search engines support navigational\nsearch well, where a user is looking for a particular web resource that\nthe user has in mind. However, such engines do not necessarily support\ninformational search well, where a user is looking for information about\na certain topic that might be on diverse web resources. This is because\na user often forms an informational query by a few keywords that does\nnot necessarily model the user information need well while such engines\nsearch web documents basically based on conjunctive Boolean searching\nusing the submitted keywords. Informational search would be better han-\ndled by a web search engine based on an information retrieval (IR) model\ncombined with automatic query expansion. Moreover, the realization of\nsuch an engine requires a method to process the IR model efficiently. So\nin this thesis, we propose new top-k document retrieval algorithms that\nefficiently process long queries generated by automatic query expansion,\nby introducing a simple additional data structure called “query-term-by-\ndocument binary matrix,” which indicates which document contains which\nquery term. We show on the basis of theoretical analysis that our algo-\nrithms not only find the top-k documents exactly but also have a desirable\nproperty on processing cost as described below. Furthermore, we show\non the basis of empirical evaluation using the TREC GOV2 collection that\nour algorithms achieve considerable performance gains over existing al-\ngorithms especially when the number of query terms gets larger, yielding\nspeedup of up to a factor of about 2 over existing algorithms for top-100\ndocument retrieval for 64-term queries. Then, we extend our algorithms\nfor supporting proximity search to take advantage of the structured nature\nof web documents, and show that the extended versions of our algorithms\nare still exact for finding the top-k documents and desirable on process-\ning cost. The proposed algorithms presented in this thesis are applicable\nnot only to web search but also to other areas such as enterprise search.\nThe novel contribution of this thesis is summarized as follows: (a) The\nproposal of new top-k document retrieval algorithms that efficiently pro-\ncess long queries generated by automatic query expansion, by introducing\na simple additional data structure called query-term-by-document binary\nmatrix. (b) The theoretical analysis on the proposed algorithms. We show\nthat our algorithms not only find the top-k documents exactly but also have\nthe desirable property on processing cost. (c) The empirical evaluation\nof the proposed algorithms. We demonstrate that our algorithms achieve\nconsiderable performance gains over existing algorithms. And (d) The\nextension of the above algorithms for supporting proximity search. The\nalgorithms proposed in this thesis efficiently process an IR model com-\nbined with automatic query expansion and/or proximity search, by intro-\nducing a simple additional data structure called query-term-by-document\nbinary matrix. Due to the simplicity of our method using query-term-by-\ndocument binary matrix, our method is also applicable to other IR tech-\nniques. We believe that our method paves the way for practical use of vari-\nous IR techniques, including an IR model combined with automatic query\nexpansion and/or proximity search, in large-scale text databases such as\nthe web, which has been considered difficult because of their inefficiency.","subitem_description_type":"Other"}]},"item_1_description_7":{"attribute_name":"学位記番号","attribute_value_mlt":[{"subitem_description":"総研大甲第1602号 ","subitem_description_type":"Other"}]},"item_1_select_14":{"attribute_name":"所蔵","attribute_value_mlt":[{"subitem_select_item":"有"}]},"item_1_select_8":{"attribute_name":"研究科","attribute_value_mlt":[{"subitem_select_item":"複合科学研究科"}]},"item_1_select_9":{"attribute_name":"専攻","attribute_value_mlt":[{"subitem_select_item":"17 情報学専攻"}]},"item_1_text_10":{"attribute_name":"学位授与年度","attribute_value_mlt":[{"subitem_text_value":"2012"}]},"item_creator":{"attribute_name":"著者","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"FUJITA, Etsuro ","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_files":{"attribute_name":"ファイル情報","attribute_type":"file","attribute_value_mlt":[{"accessrole":"open_date","date":[{"dateType":"Available","dateValue":"2016-02-26"}],"displaytype":"simple","filename":"甲1602_要旨.pdf","filesize":[{"value":"341.8 kB"}],"format":"application/pdf","licensetype":"license_11","mimetype":"application/pdf","url":{"label":"要旨・審査要旨","url":"https://ir.soken.ac.jp/record/4082/files/甲1602_要旨.pdf"},"version_id":"eec93d67-0f40-438b-b685-6f251e9a75e5"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"eng"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourcetype":"thesis","resourceuri":"http://purl.org/coar/resource_type/c_46ec"}]},"item_title":"Efficient Retrieval of Highly Ranked Documents for Informational Search on Large Scale Text Databases","item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"Efficient Retrieval of Highly Ranked Documents for Informational Search on Large Scale Text Databases"},{"subitem_title":"Efficient Retrieval of Highly Ranked Documents for Informational Search on Large Scale Text Databases","subitem_title_language":"en"}]},"item_type_id":"1","owner":"21","path":["19"],"pubdate":{"attribute_name":"公開日","attribute_value":"2013-11-20"},"publish_date":"2013-11-20","publish_status":"0","recid":"4082","relation_version_is_last":true,"title":["Efficient Retrieval of Highly Ranked Documents for Informational Search on Large Scale Text Databases"],"weko_creator_id":"21","weko_shared_id":21},"updated":"2023-06-20T15:15:48.111763+00:00"}