{"id":"https://openalex.org/W7138015827","doi":"https://doi.org/10.48550/arxiv.2603.13366","title":"Thinking in Uncertainty: Mitigating Hallucinations in MLRMs with Latent Entropy-Aware Decoding","display_name":"Thinking in Uncertainty: Mitigating Hallucinations in MLRMs with Latent Entropy-Aware Decoding","publication_year":2026,"publication_date":"2026-03-09","ids":{"openalex":"https://openalex.org/W7138015827","doi":"https://doi.org/10.48550/arxiv.2603.13366"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.13366","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.13366","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.13366","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5122369633","display_name":"Zhongxing Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Xu, Zhongxing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129648095","display_name":"Zhonghua Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Zhonghua","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129752387","display_name":"Zhe Qian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qian, Zhe","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102975859","display_name":"Dachuan Shi","orcid":"https://orcid.org/0000-0002-9296-7213"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shi, Dachuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129751721","display_name":"Feilong Tang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tang, Feilong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129706057","display_name":"Ming Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Ming","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037350701","display_name":"Shiyan Su","orcid":"https://orcid.org/0009-0000-5486-8591"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Su, Shiyan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089465005","display_name":"Xiaocheng Zou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zou, Xiaocheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129660255","display_name":"Wei Feng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Feng, Wei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129750709","display_name":"Dwarikanath Mahapatra","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mahapatra, Dwarikanath","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129693902","display_name":"Yifan Peng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Peng, Yifan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113312569","display_name":"Mingquan Lin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin, Mingquan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129694568","display_name":"Zongyuan Ge","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ge, Zongyuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":13,"corresponding_author_ids":["https://openalex.org/A5122369633"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9901999831199646,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9901999831199646,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.00139999995008111,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.0013000000035390258,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.7250000238418579},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.5875999927520752},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.5468999743461609},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.5006999969482422},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.46480000019073486},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.46239998936653137},{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.45739999413490295},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.412200003862381},{"id":"https://openalex.org/keywords/probabilistic-latent-semantic-analysis","display_name":"Probabilistic latent semantic analysis","score":0.39089998602867126}],"concepts":[{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.7250000238418579},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6603000164031982},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.5875999927520752},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.5468999743461609},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.5006999969482422},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4884999990463257},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.46480000019073486},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.46239998936653137},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.45739999413490295},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.412200003862381},{"id":"https://openalex.org/C112933361","wikidata":"https://www.wikidata.org/wiki/Q2845258","display_name":"Probabilistic latent semantic analysis","level":2,"score":0.39089998602867126},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.35989999771118164},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.33709999918937683},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.336899995803833},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.3070000112056732},{"id":"https://openalex.org/C97364631","wikidata":"https://www.wikidata.org/wiki/Q484284","display_name":"Deductive reasoning","level":2,"score":0.29490000009536743},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2906000018119812},{"id":"https://openalex.org/C9679016","wikidata":"https://www.wikidata.org/wiki/Q1417473","display_name":"Principle of maximum entropy","level":2,"score":0.2865000069141388},{"id":"https://openalex.org/C19768560","wikidata":"https://www.wikidata.org/wiki/Q320727","display_name":"Dependency (UML)","level":2,"score":0.28600001335144043},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.2849999964237213},{"id":"https://openalex.org/C51167844","wikidata":"https://www.wikidata.org/wiki/Q4422623","display_name":"Latent variable","level":2,"score":0.2847000062465668},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.28029999136924744},{"id":"https://openalex.org/C197914299","wikidata":"https://www.wikidata.org/wiki/Q18650","display_name":"Semantic memory","level":3,"score":0.2759999930858612},{"id":"https://openalex.org/C115086926","wikidata":"https://www.wikidata.org/wiki/Q17004651","display_name":"Causal reasoning","level":3,"score":0.27140000462532043},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.2705000042915344},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2700999975204468},{"id":"https://openalex.org/C149441793","wikidata":"https://www.wikidata.org/wiki/Q200726","display_name":"Probability distribution","level":2,"score":0.2558000087738037},{"id":"https://openalex.org/C2779458634","wikidata":"https://www.wikidata.org/wiki/Q24963715","display_name":"Debiasing","level":2,"score":0.2522999942302704},{"id":"https://openalex.org/C103057564","wikidata":"https://www.wikidata.org/wiki/Q4751139","display_name":"Analytic reasoning","level":3,"score":0.25110000371932983}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.13366","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.13366","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.13366","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.13366","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recent":[0],"advancements":[1],"in":[2,12,144],"multimodal":[3],"large":[4],"reasoning":[5,43,64,73,98,146],"models":[6],"(MLRMs)":[7],"have":[8],"significantly":[9],"improved":[10],"performance":[11],"visual":[13,173,184],"question":[14],"answering.":[15],"However,":[16],"we":[17,59,101,119,169],"observe":[18],"that":[19,40,78,130,177,189],"transition":[20],"words":[21],"(e.g.,":[22],"because,":[23],"however,":[24],"and":[25,32,70,158],"wait)":[26],"are":[27],"closely":[28],"associated":[29],"with":[30],"hallucinations":[31,193],"tend":[33],"to":[34,65,112,134,161,181],"exhibit":[35],"high-entropy":[36,97,156],"states.":[37],"We":[38],"argue":[39],"adequate":[41],"contextual":[42,94],"information":[44],"can":[45],"be":[46],"directly":[47],"extracted":[48],"from":[49,107],"the":[50,86,108,179],"token":[51,109,163],"probability":[52,110],"distribution.":[53],"Inspired":[54],"by":[55],"superposed":[56,63],"representation":[57],"theory,":[58],"propose":[60,102,170],"leveraging":[61],"latent":[62,72],"integrate":[66],"multiple":[67,198],"candidate":[68],"semantics":[69],"maintain":[71],"trajectories.":[74],"The":[75,138,149],"hypothesis":[76],"is":[77],"reliance":[79],"on":[80,183,197],"discrete":[81,162],"textual":[82],"inputs":[83],"may":[84],"drive":[85],"model":[87,150,180],"toward":[88],"sequential":[89],"explicit":[90],"reasoning,":[91],"underutilizing":[92],"dense":[93],"cues":[95],"during":[96],"stages.":[99],"Therefore,":[100],"constructing":[103],"rich":[104],"semantic":[105,132],"representations":[106],"distributions":[111],"enhance":[113],"in-context":[114],"reasoning.":[115,137],"With":[116],"this":[117],"goal,":[118],"present":[120],"Latent":[121],"Entropy-Aware":[122],"Decoding":[123],"(LEAD),":[124],"an":[125],"efficient":[126],"plug-and-play":[127],"decoding":[128],"strategy":[129,176],"leverages":[131],"context":[133],"achieve":[135],"reliable":[136],"heart":[139],"of":[140],"our":[141],"method":[142],"lies":[143],"entropy-aware":[145],"mode":[147],"switching.":[148],"employs":[151],"probability-weighted":[152],"continuous":[153],"embeddings":[154,164],"under":[155],"states":[157],"transitions":[159],"back":[160],"as":[165],"entropy":[166],"decreases.":[167],"Moreover,":[168],"a":[171],"prior-guided":[172],"anchor":[174],"injection":[175],"encourages":[178],"focus":[182],"information.":[185],"Extensive":[186],"experiments":[187],"show":[188],"LEAD":[190],"effectively":[191],"mitigates":[192],"across":[194],"various":[195],"MLRMs":[196],"benchmarks.":[199]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-03-18T00:00:00"}
