{"id":"https://openalex.org/W4402684111","doi":"https://doi.org/10.18653/v1/2024.findings-acl.863","title":"Recognizing Everything from All Modalities at Once: Grounded Multimodal Universal Information Extraction","display_name":"Recognizing Everything from All Modalities at Once: Grounded Multimodal Universal Information Extraction","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4402684111","doi":"https://doi.org/10.18653/v1/2024.findings-acl.863"},"language":"en","primary_location":{"id":"doi:10.18653/v1/2024.findings-acl.863","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2024.findings-acl.863","pdf_url":"https://aclanthology.org/2024.findings-acl.863.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics ACL 2024","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2024.findings-acl.863.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5004953265","display_name":"Meishan Zhang","orcid":"https://orcid.org/0000-0001-6335-1340"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Meishan Zhang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055815455","display_name":"Hao Fei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hao Fei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5117306112","display_name":"Bin Wang","orcid":"https://orcid.org/0000-0002-7835-7160"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bin Wang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089517967","display_name":"Shengqiong Wu","orcid":"https://orcid.org/0000-0001-6192-1194"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shengqiong Wu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101488072","display_name":"Yixin Cao","orcid":"https://orcid.org/0000-0002-1632-7812"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yixin Cao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112479301","display_name":"Fei Li","orcid":"https://orcid.org/0000-0002-6260-1907"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fei Li","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100402911","display_name":"Min Zhang","orcid":"https://orcid.org/0000-0002-3895-5510"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Min Zhang","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5004953265"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.4176,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.84791567,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"14498","last_page":"14511"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.4771000146865845,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.4771000146865845,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.786770224571228},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6181269288063049},{"id":"https://openalex.org/keywords/grounded-theory","display_name":"Grounded theory","score":0.5082921385765076},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.5076128244400024},{"id":"https://openalex.org/keywords/multimodality","display_name":"Multimodality","score":0.4514141380786896},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.35915255546569824},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.35606545209884644},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.32817041873931885},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.19969195127487183},{"id":"https://openalex.org/keywords/qualitative-research","display_name":"Qualitative research","score":0.1007588803768158},{"id":"https://openalex.org/keywords/sociology","display_name":"Sociology","score":0.09048610925674438}],"concepts":[{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.786770224571228},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6181269288063049},{"id":"https://openalex.org/C156325361","wikidata":"https://www.wikidata.org/wiki/Q1152864","display_name":"Grounded theory","level":3,"score":0.5082921385765076},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.5076128244400024},{"id":"https://openalex.org/C2780910867","wikidata":"https://www.wikidata.org/wiki/Q1952416","display_name":"Multimodality","level":2,"score":0.4514141380786896},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.35915255546569824},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35606545209884644},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.32817041873931885},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.19969195127487183},{"id":"https://openalex.org/C190248442","wikidata":"https://www.wikidata.org/wiki/Q839486","display_name":"Qualitative research","level":2,"score":0.1007588803768158},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.09048610925674438},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2024.findings-acl.863","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2024.findings-acl.863","pdf_url":"https://aclanthology.org/2024.findings-acl.863.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics ACL 2024","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2024.findings-acl.863","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2024.findings-acl.863","pdf_url":"https://aclanthology.org/2024.findings-acl.863.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics ACL 2024","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Decent work and economic growth","id":"https://metadata.un.org/sdg/8","score":0.46000000834465027}],"awards":[],"funders":[{"id":"https://openalex.org/F4320318547","display_name":"Baidu","ror":"https://ror.org/03vs3wt56"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4402684111.pdf","grobid_xml":"https://content.openalex.org/works/W4402684111.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4205899436","https://openalex.org/W2462138927","https://openalex.org/W2248852396","https://openalex.org/W199123384","https://openalex.org/W4239305747","https://openalex.org/W1483892602","https://openalex.org/W127837312","https://openalex.org/W2922283411","https://openalex.org/W4384789578","https://openalex.org/W4387421317"],"abstract_inverted_index":{"In":[0],"the":[1,38,42,116,144,171],"field":[2],"of":[3,12,44,81,118,150],"information":[4,85,109],"extraction":[5],"(IE),":[6],"tasks":[7,60,137],"across":[8,138,162],"a":[9,24,52,73,119,127,167],"wide":[10],"range":[11],"modalities":[13,94],"and":[14,29,83,111,130],"their":[15,66],"combinations":[16,142],"have":[17],"been":[18],"traditionally":[19],"studied":[20],"in":[21,26],"isolation,":[22],"leaving":[23],"gap":[25],"deeply":[27],"recognizing":[28],"analyzing":[30],"cross-modal":[31],"information.To":[32],"address":[33,115],"this,":[34],"this":[35],"work":[36],"for":[37,108,122,170],"first":[39],"time":[40],"introduces":[41],"concept":[43],"grounded":[45,123],"Multimodal":[46],"Universal":[47],"Information":[48],"Extraction":[49],"(MUIE),":[50],"providing":[51],"unified":[53],"task":[54],"framework":[55],"to":[56],"analyze":[57],"any":[58],"IE":[59,136],"over":[61],"various":[62],"modalities,":[63,88],"along":[64],"with":[65,105,143,152],"fine-grained":[67,112],"groundings.To":[68],"tackle":[69],"MUIE,":[70,124],"we":[71,125],"tailor":[72],"multimodal":[74,113,146],"large":[75],"language":[76],"model":[77],"(MLLM),":[78],"REAMO,":[79],"capable":[80],"extracting":[82],"grounding":[84],"from":[86,92],"all":[87,93,163],"i.e.,":[89],"'recognizing":[90],"everything":[91],"at":[95],"once'.REAMO":[96],"is":[97],"updated":[98],"via":[99],"varied":[100],"tuning":[101],"strategies,":[102],"equipping":[103],"it":[104],"powerful":[106],"capabilities":[107],"recognition":[110],"grounding.To":[114],"absence":[117],"suitable":[120],"benchmark":[121,169],"curate":[126],"high-quality,":[128],"diverse,":[129],"challenging":[131],"test":[132],"set,":[133],"which":[134],"encompasses":[135],"9":[139],"common":[140],"modality":[141],"corresponding":[145],"groundings.The":[147],"extensive":[148],"comparison":[149],"REAMO":[151],"existing":[153],"MLLMs":[154],"integrated":[155],"into":[156],"pipeline":[157],"approaches":[158],"demonstrates":[159],"its":[160],"advantages":[161],"evaluation":[164],"dimensions,":[165],"establishing":[166],"strong":[168],"follow-up":[172],"research.":[173]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
