{"id":"https://openalex.org/W7127576715","doi":"https://doi.org/10.48550/arxiv.2602.03425","title":"ConsistentRFT: Reducing Visual Hallucinations in Flow-based Reinforcement Fine-Tuning","display_name":"ConsistentRFT: Reducing Visual Hallucinations in Flow-based Reinforcement Fine-Tuning","publication_year":2026,"publication_date":"2026-02-03","ids":{"openalex":"https://openalex.org/W7127576715","doi":"https://doi.org/10.48550/arxiv.2602.03425"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.03425","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5125009975","display_name":"Xiaofeng Tan","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Tan, Xiaofeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124973203","display_name":"Jun Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Jun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101323076","display_name":"Yuanting Fan","orcid":"https://orcid.org/0009-0008-6507-666X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fan, Yuanting","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000290451","display_name":"Bin-Bin Gao","orcid":"https://orcid.org/0000-0003-2572-8156"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gao, Bin-Bin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125030227","display_name":"Xi Jiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Xi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125037509","display_name":"Xiaochen Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Xiaochen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101873531","display_name":"Jinlong Peng","orcid":"https://orcid.org/0009-0003-1887-6406"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Peng, Jinlong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125003868","display_name":"Chengjie Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Chengjie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121546315","display_name":"Hongsong Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Hongsong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5125071098","display_name":"Feng Zheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Feng","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5125009975"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.13230000436306,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.13230000436306,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.1136000007390976,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.04670000076293945,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6764000058174133},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.47859999537467957},{"id":"https://openalex.org/keywords/granularity","display_name":"Granularity","score":0.4553000032901764},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.4343000054359436},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.4153999984264374},{"id":"https://openalex.org/keywords/visual-hallucination","display_name":"Visual Hallucination","score":0.39959999918937683},{"id":"https://openalex.org/keywords/visual-perception","display_name":"Visual perception","score":0.39739999175071716},{"id":"https://openalex.org/keywords/flexibility","display_name":"Flexibility (engineering)","score":0.3776000142097473}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6782000064849854},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6764000058174133},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.49720001220703125},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.47859999537467957},{"id":"https://openalex.org/C177774035","wikidata":"https://www.wikidata.org/wiki/Q1246948","display_name":"Granularity","level":2,"score":0.4553000032901764},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.4343000054359436},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.4153999984264374},{"id":"https://openalex.org/C2908998935","wikidata":"https://www.wikidata.org/wiki/Q130741","display_name":"Visual Hallucination","level":2,"score":0.39959999918937683},{"id":"https://openalex.org/C178253425","wikidata":"https://www.wikidata.org/wiki/Q162668","display_name":"Visual perception","level":3,"score":0.39739999175071716},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.3776000142097473},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.3725000023841858},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.3619000017642975},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.32839998602867126},{"id":"https://openalex.org/C2776058522","wikidata":"https://www.wikidata.org/wiki/Q2364768","display_name":"Visual field","level":2,"score":0.3163999915122986},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.3082999885082245},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.2989000082015991},{"id":"https://openalex.org/C2778251979","wikidata":"https://www.wikidata.org/wiki/Q7936617","display_name":"Visual processing","level":3,"score":0.2800000011920929},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2750000059604645},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.26759999990463257},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.2646999955177307},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.26350000500679016},{"id":"https://openalex.org/C189645446","wikidata":"https://www.wikidata.org/wiki/Q350865","display_name":"Mirroring","level":2,"score":0.2597000002861023},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.25870001316070557},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.25839999318122864}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.03425","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.03425","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.03425","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.03425","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Reinforcement":[0],"Fine-Tuning":[1],"(RFT)":[2],"on":[3,70,100,188],"flow-based":[4],"models":[5],"is":[6,205],"crucial":[7],"for":[8,174,178],"preference":[9],"alignment.":[10],"However,":[11],"they":[12],"often":[13],"introduce":[14,138],"visual":[15,28,167],"hallucinations":[16,29],"like":[17],"over-optimized":[18],"details":[19,72,129],"and":[20,31,45,55,79,95,127,176],"semantic":[21],"misalignment.":[22],"This":[23,204],"work":[24],"preliminarily":[25],"explores":[26],"why":[27],"arise":[30],"how":[32],"to":[33,67,108,121],"reduce":[34],"them.":[35],"We":[36,136],"first":[37],"investigate":[38],"RFT":[39,186],"methods":[40,187],"from":[41,51],"a":[42,105,115,139,156],"unified":[43],"perspective,":[44],"reveal":[46],"the":[47,74,90,147,152,197],"core":[48],"problems":[49],"stemming":[50],"two":[52],"aspects,":[53],"exploration":[54,59,123],"exploitation:":[56],"(1)":[57],"limited":[58],"during":[60],"stochastic":[61],"differential":[62],"equation":[63],"(SDE)":[64],"rollouts,":[65],"leading":[66],"an":[68,192],"over-emphasis":[69],"local":[71,128],"at":[73],"expense":[75],"of":[76,172,194,200],"global":[77,125],"semantics,":[78],"(2)":[80],"trajectory":[81],"imitation":[82],"process":[83],"inherent":[84],"in":[85],"policy":[86,154],"gradient":[87],"methods,":[88],"distorting":[89],"model's":[91,148],"foundational":[92],"vector":[93],"field":[94],"its":[96],"cross-step":[97],"consistency.":[98],"Building":[99],"this,":[101],"we":[102,113],"propose":[103],"ConsistentRFT,":[104],"general":[106],"framework":[107],"mitigate":[109],"these":[110],"hallucinations.":[111,181],"Specifically,":[112],"design":[114],"Dynamic":[116],"Granularity":[117],"Rollout":[118],"(DGR)":[119],"mechanism":[120],"balance":[122],"between":[124],"semantics":[126],"by":[130,150],"dynamically":[131],"scheduling":[132],"different":[133],"noise":[134],"sources.":[135],"then":[137],"Consistent":[140],"Policy":[141],"Gradient":[142],"Optimization":[143],"(CPGO)":[144],"that":[145,163],"preserves":[146],"consistency":[149],"aligning":[151],"current":[153],"with":[155],"more":[157],"stable":[158],"prior.":[159],"Extensive":[160],"experiments":[161],"demonstrate":[162],"ConsistentRFT":[164,183],"significantly":[165],"mitigates":[166],"hallucinations,":[168],"achieving":[169],"average":[170],"reductions":[171],"49\\%":[173],"low-level":[175],"38\\%":[177],"high-level":[179],"perceptual":[180],"Furthermore,":[182],"outperforms":[184],"other":[185],"out-of-domain":[189],"metrics,":[190],"showing":[191],"improvement":[193],"5.1\\%":[195],"(v.s.":[196],"baseline's":[198],"decrease":[199],"-0.4\\%)":[201],"over":[202],"FLUX1.dev.":[203],"\\href{https://xiaofeng-tan.github.io/projects/ConsistentRFT}{Project":[206],"Page}.":[207]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-06T00:00:00"}
