{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,2]],"date-time":"2026-03-02T22:08:57Z","timestamp":1772489337126,"version":"3.50.1"},"reference-count":11,"publisher":"Oxford University Press (OUP)","issue":"7","license":[{"start":{"date-parts":[[2017,1,8]],"date-time":"2017-01-08T00:00:00Z","timestamp":1483833600000},"content-version":"vor","delay-in-days":4,"URL":"https:\/\/academic.oup.com\/journals\/pages\/about_us\/legal\/notices"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61363021, 61540061, 91331105"],"award-info":[{"award-number":["61363021, 61540061, 91331105"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["31360514"],"award-info":[{"award-number":["31360514"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"CAS 100-Talents (Dr. Niu Beifang), and the transformation project in scientific and technological achievements of Qinghai Province","award":["2016-SF-127"],"award-info":[{"award-number":["2016-SF-127"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017,4,1]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:sec>\n                  <jats:title>Summary<\/jats:title>\n                  <jats:p>With the advent of next-generation sequencing, traditional bioinformatics tools are challenged by massive raw metagenomic datasets. One of the bottlenecks of metagenomic studies is lack of large-scale and cloud computing suitable data analysis tools. In this paper, we proposed a Spark-based tool, called MetaSpark, to recruit metagenomic reads to reference genomes. MetaSpark benefits from the distributed data set (RDD) of Spark, which makes it able to cache data set in memory across cluster nodes and scale well with the datasets. Compared with previous metagenomics recruitment tools, MetaSpark recruited significantly more reads than many programs such as SOAP2, BWA and LAST and increased recruited reads by \u223c4% compared with FR-HIT when there were 1 million reads and 0.75\u2009GB references. Different test cases demonstrate MetaSpark\u2019s scalability and overall high performance.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Availability<\/jats:title>\n                  <jats:p>https:\/\/github.com\/zhouweiyg\/metaspark<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Supplementary information<\/jats:title>\n                  <jats:p>Supplementary data are available at Bioinformatics online<\/jats:p>\n               <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btw750","type":"journal-article","created":{"date-parts":[[2016,11,25]],"date-time":"2016-11-25T12:05:45Z","timestamp":1480075545000},"page":"1090-1092","source":"Crossref","is-referenced-by-count":30,"title":["MetaSpark: a spark-based distributed processing tool to recruit metagenomic reads to reference genomes"],"prefix":"10.1093","volume":"33","author":[{"given":"Wei","family":"Zhou","sequence":"first","affiliation":[{"name":"School of Software, Yunnan University, Kunming, China"}]},{"given":"Ruilin","family":"Li","sequence":"additional","affiliation":[{"name":"Computer Network Information Center of Chinese Academy of Sciences, Beijing, China"},{"name":"University of Chinese Academy of Sciences, Beijing, China"}]},{"given":"Shuo","family":"Yuan","sequence":"additional","affiliation":[{"name":"School of Software, Yunnan University, Kunming, China"}]},{"given":"ChangChun","family":"Liu","sequence":"additional","affiliation":[{"name":"School of Software, Yunnan University, Kunming, China"}]},{"given":"Shaowen","family":"Yao","sequence":"additional","affiliation":[{"name":"School of Software, Yunnan University, Kunming, China"}]},{"given":"Jing","family":"Luo","sequence":"additional","affiliation":[{"name":"School of Life Sciences and State Key Laboratory for Conservation and Utilization of Bio-Resources in Yunnan, Yunnan University, Kunming, China"}]},{"given":"Beifang","family":"Niu","sequence":"additional","affiliation":[{"name":"Computer Network Information Center of Chinese Academy of Sciences, Beijing, China"},{"name":"University of Chinese Academy of Sciences, Beijing, China"}]}],"member":"286","published-online":{"date-parts":[[2017,1,4]]},"reference":[{"key":"2023020205004433400_btw750-B1","doi-asserted-by":"crossref","first-page":"941","DOI":"10.1038\/nmeth.3041","article-title":"Mapping brain activity at scale with cluster computing","volume":"11","author":"Freeman","year":"2014","journal-title":"Nat. Methods"},{"key":"2023020205004433400_btw750-B2","doi-asserted-by":"crossref","first-page":"487","DOI":"10.1101\/gr.113985.110","article-title":"Adaptive seeds tame genomic sequence comparison","volume":"21","author":"Kielbasa","year":"2011","journal-title":"Genome Res"},{"key":"2023020205004433400_btw750-B3","doi-asserted-by":"crossref","first-page":"1754","DOI":"10.1093\/bioinformatics\/btp324","article-title":"Fast and accurate short read alignment with Burrows\u2013Wheeler transform","volume":"25","author":"Li","year":"2009","journal-title":"Bioinformatics"},{"key":"2023020205004433400_btw750-B4","doi-asserted-by":"crossref","first-page":"713","DOI":"10.1093\/bioinformatics\/btn025","article-title":"SOAP: short oligonucleotide alignment program","volume":"24","author":"Li","year":"2008","journal-title":"Bioinformatics"},{"key":"2023020205004433400_btw750-B5","author":"Marek","year":"2014"},{"key":"2023020205004433400_btw750-B6","first-page":"171.","article-title":"CloudAligner: a fast and full-featured MapReduce based tool for sequence mapping","volume":"4","author":"Nguyen","year":"2011","journal-title":"BMCResNotes"},{"key":"2023020205004433400_btw750-B7","doi-asserted-by":"crossref","first-page":"1704","DOI":"10.1093\/bioinformatics\/btr252","article-title":"FR-HIT, a very fast program to recruit metagenomic reads to homologous reference genomes","volume":"27","author":"Niu","year":"2011","journal-title":"Bioinformatics"},{"key":"2023020205004433400_btw750-B8","doi-asserted-by":"crossref","first-page":"3014","DOI":"10.1093\/bioinformatics\/btt528","article-title":"BioPig: a Hadoop-based analytic toolkit for large-scale sequence data","volume":"29","author":"Nordberg","year":"2013","journal-title":"Bioinformatics"},{"key":"2023020205004433400_btw750-B9","doi-asserted-by":"crossref","first-page":"2444","DOI":"10.1073\/pnas.85.8.2444","article-title":"Improved tools for biological sequence comparison","volume":"85","author":"Pearson","year":"1988","journal-title":"Proc. Natl Acad. Sci. USA"},{"key":"2023020205004433400_btw750-B10","doi-asserted-by":"crossref","first-page":"59","DOI":"10.1038\/nature08821","article-title":"A human gut microbial gene catalogue established by metagenomic sequencing","volume":"464","author":"Qin","year":"2010","journal-title":"Nature"},{"key":"2023020205004433400_btw750-B11","author":"Tsugawa","year":"2008"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/33\/7\/1090\/49038389\/bioinformatics_33_7_1090.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/33\/7\/1090\/49038389\/bioinformatics_33_7_1090.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,2]],"date-time":"2023-02-02T05:01:37Z","timestamp":1675314097000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/33\/7\/1090\/2870465"}},"subtitle":[],"editor":[{"given":"Inanc","family":"Birol","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2017,1,4]]},"references-count":11,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2017,4,1]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btw750","relation":{},"ISSN":["1367-4803","1367-4811"],"issn-type":[{"value":"1367-4803","type":"print"},{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2017,4,1]]},"published":{"date-parts":[[2017,1,4]]}}}