{"id":"https://openalex.org/W4416368252","doi":"https://doi.org/10.48550/arxiv.2510.02423","title":"RefineShot: Rethinking Cinematography Understanding with Foundational Skill Evaluation","display_name":"RefineShot: Rethinking Cinematography Understanding with Foundational Skill Evaluation","publication_year":2025,"publication_date":"2025-10-02","ids":{"openalex":"https://openalex.org/W4416368252","doi":"https://doi.org/10.48550/arxiv.2510.02423"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2510.02423","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.02423","pdf_url":"https://arxiv.org/pdf/2510.02423","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2510.02423","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5113115264","display_name":"Hang Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wu, Hang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100582800","display_name":"Yujun Cai","orcid":"https://orcid.org/0009-0007-4868-3153"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cai, Yujun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100712413","display_name":"Hong Ge","orcid":"https://orcid.org/0000-0001-9421-2677"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ge, Haonan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101397369","display_name":"Hongkai Chen","orcid":"https://orcid.org/0000-0001-7206-6584"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Hongkai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100418319","display_name":"Ming\u2013Hsuan Yang","orcid":"https://orcid.org/0000-0003-4848-2304"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Ming-Hsuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Wang, Yiwei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yiwei","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5113115264"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9035000205039978,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9035000205039978,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.012600000016391277,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.012600000016391277,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cinematography","display_name":"Cinematography","score":0.9085000157356262},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.6527000069618225},{"id":"https://openalex.org/keywords/narrative","display_name":"Narrative","score":0.6241999864578247},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5669000148773193},{"id":"https://openalex.org/keywords/limiting","display_name":"Limiting","score":0.48190000653266907},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4514999985694885},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.3953999876976013},{"id":"https://openalex.org/keywords/visual-media","display_name":"Visual media","score":0.3513999879360199}],"concepts":[{"id":"https://openalex.org/C100991257","wikidata":"https://www.wikidata.org/wiki/Q590870","display_name":"Cinematography","level":2,"score":0.9085000157356262},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.6527000069618225},{"id":"https://openalex.org/C199033989","wikidata":"https://www.wikidata.org/wiki/Q1318295","display_name":"Narrative","level":2,"score":0.6241999864578247},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6031000018119812},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5669000148773193},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.48190000653266907},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4514999985694885},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.3953999876976013},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.36320000886917114},{"id":"https://openalex.org/C2987052865","wikidata":"https://www.wikidata.org/wiki/Q11033","display_name":"Visual media","level":2,"score":0.3513999879360199},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34850001335144043},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.33480000495910645},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.29260000586509705},{"id":"https://openalex.org/C2780385302","wikidata":"https://www.wikidata.org/wiki/Q367158","display_name":"Protocol (science)","level":3,"score":0.29249998927116394},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.28290000557899475},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.2824000120162964},{"id":"https://openalex.org/C2780980858","wikidata":"https://www.wikidata.org/wiki/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.2800000011920929},{"id":"https://openalex.org/C3020234875","wikidata":"https://www.wikidata.org/wiki/Q1260632","display_name":"Media content","level":2,"score":0.2648000121116638},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.26350000500679016},{"id":"https://openalex.org/C138268822","wikidata":"https://www.wikidata.org/wiki/Q1051925","display_name":"Resolution (logic)","level":2,"score":0.2542000114917755},{"id":"https://openalex.org/C98184364","wikidata":"https://www.wikidata.org/wiki/Q1780131","display_name":"Argument (complex analysis)","level":2,"score":0.2535000145435333},{"id":"https://openalex.org/C539667460","wikidata":"https://www.wikidata.org/wiki/Q2414942","display_name":"Management science","level":1,"score":0.2529999911785126},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.25049999356269836},{"id":"https://openalex.org/C188147891","wikidata":"https://www.wikidata.org/wiki/Q147638","display_name":"Cognitive science","level":1,"score":0.25029999017715454},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.2502000033855438}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2510.02423","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.02423","pdf_url":"https://arxiv.org/pdf/2510.02423","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2510.02423","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2510.02423","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2510.02423","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.02423","pdf_url":"https://arxiv.org/pdf/2510.02423","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Cinematography":[0],"understanding":[1,35],"refers":[2],"to":[3,6,142],"the":[4,10,18,49,116],"ability":[5],"recognize":[7],"not":[8],"only":[9],"visual":[11],"content":[12,42],"of":[13,61,120],"a":[14,58,144],"scene":[15],"but":[16],"also":[17],"cinematic":[19,62],"techniques":[20],"that":[21,78,130,149],"shape":[22],"narrative":[23],"meaning.":[24],"This":[25],"capability":[26],"is":[27],"attracting":[28],"increasing":[29],"attention,":[30],"as":[31],"it":[32],"enhances":[33],"multimodal":[34],"in":[36,44,82,87,158],"real-world":[37],"applications":[38],"and":[39,46,64,84,90,99,124,135,146,154],"underpins":[40],"coherent":[41],"creation":[43],"film":[45],"media.":[47],"As":[48],"most":[50],"comprehensive":[51],"benchmark":[52,148],"for":[53],"this":[54],"task,":[55],"ShotBench":[56,83,110],"spans":[57],"wide":[59],"range":[60],"concepts":[63],"VQA-style":[65],"evaluations,":[66],"with":[67],"ShotVL":[68],"achieving":[69],"state-of-the-art":[70],"results":[71],"on":[72],"it.":[73],"However,":[74],"our":[75],"analysis":[76,119],"reveals":[77],"ambiguous":[79],"option":[80,113],"design":[81],"ShotVL's":[85,121],"shortcomings":[86],"reasoning":[88,122],"consistency":[89],"instruction":[91],"adherence":[92],"undermine":[93],"evaluation":[94,128],"reliability,":[95],"limiting":[96],"fair":[97],"comparison":[98],"hindering":[100],"future":[101,156],"progress.":[102],"To":[103],"overcome":[104],"these":[105],"issues,":[106],"we":[107],"systematically":[108],"refine":[109],"through":[111],"consistent":[112],"restructuring,":[114],"conduct":[115],"first":[117],"critical":[118],"behavior,":[123],"introduce":[125],"an":[126],"extended":[127],"protocol":[129],"jointly":[131],"assesses":[132],"task":[133],"accuracy":[134],"core":[136],"model":[137],"competencies.":[138],"These":[139],"efforts":[140],"lead":[141],"RefineShot,":[143],"refined":[145],"expanded":[147],"enables":[150],"more":[151],"reliable":[152],"assessment":[153],"fosters":[155],"advances":[157],"cinematography":[159],"understanding.":[160]},"counts_by_year":[],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
