{"id":"https://openalex.org/W4417126435","doi":"https://doi.org/10.48550/arxiv.2506.07016","title":"MAGNET: A Multi-agent Framework for Finding Audio-Visual Needles by Reasoning over Multi-Video Haystacks","display_name":"MAGNET: A Multi-agent Framework for Finding Audio-Visual Needles by Reasoning over Multi-Video Haystacks","publication_year":2025,"publication_date":"2025-06-08","ids":{"openalex":"https://openalex.org/W4417126435","doi":"https://doi.org/10.48550/arxiv.2506.07016"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2506.07016","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2506.07016","pdf_url":"https://arxiv.org/pdf/2506.07016","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2506.07016","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101778650","display_name":"Sanjoy Chowdhury","orcid":"https://orcid.org/0000-0003-4256-4720"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chowdhury, Sanjoy","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120754968","display_name":"Mohamed Elmoghany","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Elmoghany, Mohamed","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120724004","display_name":"Yohan Abeysinghe","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Abeysinghe, Yohan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086595580","display_name":"Junjie Fei","orcid":"https://orcid.org/0000-0002-8193-3704"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fei, Junjie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101977185","display_name":"Sayan Nag","orcid":"https://orcid.org/0000-0001-5652-125X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nag, Sayan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101483979","display_name":"Salman Khan","orcid":"https://orcid.org/0000-0002-2905-1755"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Khan, Salman","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085089542","display_name":"Mohamed Elhoseiny","orcid":"https://orcid.org/0000-0001-9659-1551"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Elhoseiny, Mohamed","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5004194238","display_name":"Dinesh Manocha","orcid":"https://orcid.org/0000-0001-7047-9801"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Manocha, Dinesh","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5101778650"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9786999821662903,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9786999821662903,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.003100000089034438,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.00279999990016222,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/salient","display_name":"Salient","score":0.659500002861023},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6531999707221985},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6258000135421753},{"id":"https://openalex.org/keywords/bridge","display_name":"Bridge (graph theory)","score":0.5734000205993652},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.484499990940094},{"id":"https://openalex.org/keywords/ground","display_name":"Ground","score":0.4796999990940094},{"id":"https://openalex.org/keywords/ground-truth","display_name":"Ground truth","score":0.47839999198913574}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7680000066757202},{"id":"https://openalex.org/C2780719617","wikidata":"https://www.wikidata.org/wiki/Q1030752","display_name":"Salient","level":2,"score":0.659500002861023},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6531999707221985},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6258000135421753},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6176999807357788},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.5734000205993652},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4894999861717224},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.484499990940094},{"id":"https://openalex.org/C168993435","wikidata":"https://www.wikidata.org/wiki/Q6501125","display_name":"Ground","level":2,"score":0.4796999990940094},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.47839999198913574},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.40450000762939453},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.38359999656677246},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.37560001015663147},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.35830000042915344},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.3188999891281128},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3188000023365021},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3093000054359436},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.30660000443458557},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.3059000074863434},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3000999987125397},{"id":"https://openalex.org/C20162079","wikidata":"https://www.wikidata.org/wiki/Q1151406","display_name":"Case-based reasoning","level":2,"score":0.2759000062942505}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2506.07016","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2506.07016","pdf_url":"https://arxiv.org/pdf/2506.07016","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2506.07016","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2506.07016","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2506.07016","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2506.07016","pdf_url":"https://arxiv.org/pdf/2506.07016","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"multimodal":[1],"models":[2],"(LMMs)":[3],"have":[4],"shown":[5],"remarkable":[6],"progress":[7],"in":[8,33,55,80,116,152],"audio-visual":[9,50,102],"understanding,":[10],"yet":[11],"they":[12],"struggle":[13],"with":[14],"real-world":[15],"scenarios":[16],"that":[17],"require":[18],"complex":[19],"reasoning":[20,53],"across":[21,77],"extensive":[22],"video":[23,28],"collections.":[24],"Existing":[25],"benchmarks":[26],"for":[27,169],"question":[29],"answering":[30],"remain":[31],"limited":[32],"scope,":[34],"typically":[35],"involving":[36],"one":[37],"clip":[38],"per":[39],"query,":[40],"which":[41,179],"falls":[42],"short":[43],"of":[44,48,114,163,200],"representing":[45],"the":[46,70,91,112],"challenges":[47],"large-scale,":[49],"retrieval":[51,118,165],"and":[52,85,119,139,148,166,187,192,197],"encountered":[54],"practical":[56],"applications.":[57],"To":[58,95,159],"bridge":[59],"this":[60,96,133],"gap,":[61],"we":[62,98,124,173],"introduce":[63,174],"a":[64,83,126,184,188],"novel":[65],"task":[66,154],"named":[67],"AV-HaystacksQA,":[68],"where":[69],"goal":[71],"is":[72],"to":[73,82,89,110,131,137,194],"identify":[74],"salient":[75],"segments":[76],"different":[78],"videos":[79],"response":[81,171],"query":[84],"link":[86],"them":[87],"together":[88],"generate":[90],"most":[92],"informative":[93],"answer.":[94],"end,":[97],"present":[99],"AVHaystacks,":[100],"an":[101],"benchmark":[103],"comprising":[104],"3100":[105],"annotated":[106],"QA":[107,153],"pairs":[108],"designed":[109],"assess":[111],"capabilities":[113],"LMMs":[115],"multi-video":[117,164],"temporal":[120,167],"grounding":[121,168,202],"task.":[122],"Additionally,":[123],"propose":[125],"model-agnostic,":[127],"multi-agent":[128],"framework":[129],"MAGNET":[130],"address":[132],"challenge,":[134],"achieving":[135],"up":[136],"89%":[138],"65%":[140],"relative":[141],"improvements":[142],"over":[143],"baseline":[144],"methods":[145],"on":[146,155],"BLEU@4":[147],"GPT":[149],"evaluation":[150,162,199],"scores":[151],"our":[156],"proposed":[157],"AVHaystacks.":[158],"enable":[160],"robust":[161],"optimal":[170],"generation,":[172],"two":[175],"new":[176],"metrics,":[177],"STEM,":[178],"captures":[180],"alignment":[181],"errors":[182],"between":[183],"ground":[185],"truth":[186],"predicted":[189],"step":[190],"sequence":[191],"MTGS,":[193],"facilitate":[195],"balanced":[196],"interpretable":[198],"segment-level":[201],"performance.":[203],"Project:":[204],"https://schowdhury671.github.io/magnet_project/":[205]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-10T00:00:00"}
