{"id":"https://openalex.org/W4405766600","doi":"https://doi.org/10.48550/arxiv.2412.16855","title":"GME: Improving Universal Multimodal Retrieval by Multimodal LLMs","display_name":"GME: Improving Universal Multimodal Retrieval by Multimodal LLMs","publication_year":2024,"publication_date":"2024-12-22","ids":{"openalex":"https://openalex.org/W4405766600","doi":"https://doi.org/10.48550/arxiv.2412.16855"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2412.16855","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2412.16855","pdf_url":"https://arxiv.org/pdf/2412.16855","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2412.16855","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100327429","display_name":"Xin Zhang","orcid":"https://orcid.org/0000-0002-2585-1392"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhang, Xin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103114439","display_name":"Yanzhao Zhang","orcid":"https://orcid.org/0000-0003-2894-4727"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yanzhao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109982109","display_name":"Wen Xie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xie, Wen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100674542","display_name":"Mingxin Li","orcid":"https://orcid.org/0000-0002-7595-3717"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Mingxin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024652567","display_name":"Ziqi Dai","orcid":"https://orcid.org/0009-0002-0425-0149"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dai, Ziqi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049484480","display_name":"Dingkun Long","orcid":"https://orcid.org/0000-0001-6570-9406"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Long, Dingkun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005535444","display_name":"Pengjun Xie","orcid":"https://orcid.org/0009-0004-8412-359X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xie, Pengjun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004953265","display_name":"Meishan Zhang","orcid":"https://orcid.org/0000-0001-6335-1340"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Meishan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100408967","display_name":"Wenjie Li","orcid":"https://orcid.org/0000-0001-5285-8939"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Wenjie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100402963","display_name":"Min Zhang","orcid":"https://orcid.org/0000-0002-9169-1024"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Min","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5100327429"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9526000022888184,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9484999775886536,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/multimodal-therapy","display_name":"Multimodal therapy","score":0.5688443779945374},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.43538594245910645},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.38340839743614197},{"id":"https://openalex.org/keywords/medicine","display_name":"Medicine","score":0.22498777508735657},{"id":"https://openalex.org/keywords/internal-medicine","display_name":"Internal medicine","score":0.08754891157150269}],"concepts":[{"id":"https://openalex.org/C4441509","wikidata":"https://www.wikidata.org/wiki/Q6418787","display_name":"Multimodal therapy","level":2,"score":0.5688443779945374},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.43538594245910645},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.38340839743614197},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.22498777508735657},{"id":"https://openalex.org/C126322002","wikidata":"https://www.wikidata.org/wiki/Q11180","display_name":"Internal medicine","level":1,"score":0.08754891157150269}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2412.16855","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2412.16855","pdf_url":"https://arxiv.org/pdf/2412.16855","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2412.16855","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2412.16855","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2412.16855","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2412.16855","pdf_url":"https://arxiv.org/pdf/2412.16855","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Universal":[0],"Multimodal":[1,109],"Retrieval":[2],"(UMR)":[3],"aims":[4],"to":[5,34,41,84,127],"enable":[6],"search":[7],"across":[8],"various":[9],"modalities":[10],"using":[11,44],"a":[12,26,86,93,122],"unified":[13],"model,":[14],"where":[15],"queries":[16],"and":[17,91,155,158,166],"candidates":[18],"can":[19,59],"consist":[20],"of":[21,28,64,79,131,152],"pure":[22],"text,":[23],"images,":[24],"or":[25],"combination":[27],"both.":[29],"Previous":[30],"work":[31],"has":[32],"attempted":[33],"adopt":[35],"multimodal":[36,56,71],"large":[37],"language":[38],"models":[39],"(MLLMs)":[40],"realize":[42],"UMR":[43,124,145],"only":[45],"text":[46],"data.":[47,168],"However,":[48],"our":[49,132,138],"preliminary":[50],"experiments":[51],"demonstrate":[52],"that":[53,137],"more":[54],"diverse":[55],"training":[57,72,87,97,103,156],"data":[58,73,88],"further":[60],"unlock":[61],"the":[62,69,101,107,129,164],"potential":[63],"MLLMs.":[65],"Despite":[66],"its":[67],"effectiveness,":[68],"existing":[70,144],"is":[74],"highly":[75],"imbalanced":[76],"in":[77],"terms":[78],"modality,":[80],"which":[81],"motivates":[82],"us":[83],"develop":[85,106],"synthesis":[89],"pipeline":[90],"construct":[92,121],"large-scale,":[94],"high-quality":[95],"fused-modal":[96],"dataset.":[98],"Based":[99],"on":[100,162],"synthetic":[102,167],"data,":[104],"we":[105,120,148],"General":[108],"Embedder":[110],"(GME),":[111],"an":[112],"MLLM-based":[113],"dense":[114],"retriever":[115],"designed":[116],"for":[117],"UMR.":[118],"Furthermore,":[119],"comprehensive":[123],"Benchmark":[125],"(UMRB)":[126],"evaluate":[128],"effectiveness":[130],"approach.":[133],"Experimental":[134],"results":[135],"show":[136],"method":[139],"achieves":[140],"state-of-the-art":[141],"performance":[142],"among":[143],"methods.":[146],"Last,":[147],"provide":[149],"in-depth":[150],"analyses":[151],"model":[153,165],"scaling":[154],"strategies,":[157],"perform":[159],"ablation":[160],"studies":[161],"both":[163]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
