{"id":"https://openalex.org/W4405717809","doi":"https://doi.org/10.1109/tmm.2024.3521838","title":"Hear Me, See Me, Understand Me: Audio-Visual Autism Behavior Recognition","display_name":"Hear Me, See Me, Understand Me: Audio-Visual Autism Behavior Recognition","publication_year":2024,"publication_date":"2024-12-23","ids":{"openalex":"https://openalex.org/W4405717809","doi":"https://doi.org/10.1109/tmm.2024.3521838"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2024.3521838","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2024.3521838","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5028337614","display_name":"Shijian Deng","orcid":null},"institutions":[{"id":"https://openalex.org/I162577319","display_name":"The University of Texas at Dallas","ror":"https://ror.org/049emcs32","country_code":"US","type":"education","lineage":["https://openalex.org/I162577319"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Shijian Deng","raw_affiliation_strings":["Department of Computer Science, The University of Texas at Dallas, Richardson, TX, USA"],"raw_orcid":"https://orcid.org/0009-0008-9560-702X","affiliations":[{"raw_affiliation_string":"Department of Computer Science, The University of Texas at Dallas, Richardson, TX, USA","institution_ids":["https://openalex.org/I162577319"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111222231","display_name":"Erin E. Kosloski","orcid":"https://orcid.org/0009-0007-4239-8044"},"institutions":[{"id":"https://openalex.org/I162577319","display_name":"The University of Texas at Dallas","ror":"https://ror.org/049emcs32","country_code":"US","type":"education","lineage":["https://openalex.org/I162577319"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Erin E. Kosloski","raw_affiliation_strings":["School of Behavioral and Brain Sciences, The University of Texas at Dallas, Dallas, TX, USA"],"raw_orcid":"https://orcid.org/0009-0007-4239-8044","affiliations":[{"raw_affiliation_string":"School of Behavioral and Brain Sciences, The University of Texas at Dallas, Dallas, TX, USA","institution_ids":["https://openalex.org/I162577319"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101373698","display_name":"Siddhi Patel","orcid":"https://orcid.org/0009-0001-3366-6701"},"institutions":[{"id":"https://openalex.org/I162577319","display_name":"The University of Texas at Dallas","ror":"https://ror.org/049emcs32","country_code":"US","type":"education","lineage":["https://openalex.org/I162577319"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Siddhi Patel","raw_affiliation_strings":["School of Behavioral and Brain Sciences, The University of Texas at Dallas, Dallas, TX, USA"],"raw_orcid":"https://orcid.org/0009-0001-3366-6701","affiliations":[{"raw_affiliation_string":"School of Behavioral and Brain Sciences, The University of Texas at Dallas, Dallas, TX, USA","institution_ids":["https://openalex.org/I162577319"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5099045883","display_name":"Zeke A. Barnett","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zeke A. Barnett","raw_affiliation_strings":["School of Computer Science, Carnegie Mellon University, Pittsburgh, PA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science, Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028680712","display_name":"Yiyang Nan","orcid":"https://orcid.org/0000-0001-7218-6228"},"institutions":[{"id":"https://openalex.org/I27804330","display_name":"Brown University","ror":"https://ror.org/05gq02987","country_code":"US","type":"education","lineage":["https://openalex.org/I27804330"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yiyang Nan","raw_affiliation_strings":["Department of Computer Science, Brown University, Providence, RI, USA"],"raw_orcid":"https://orcid.org/0000-0001-7218-6228","affiliations":[{"raw_affiliation_string":"Department of Computer Science, Brown University, Providence, RI, USA","institution_ids":["https://openalex.org/I27804330"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102113010","display_name":"Alexander Kaplan","orcid":null},"institutions":[{"id":"https://openalex.org/I162577319","display_name":"The University of Texas at Dallas","ror":"https://ror.org/049emcs32","country_code":"US","type":"education","lineage":["https://openalex.org/I162577319"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alexander Kaplan","raw_affiliation_strings":["Department of Computer Science, The University of Texas at Dallas, Richardson, TX, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, The University of Texas at Dallas, Richardson, TX, USA","institution_ids":["https://openalex.org/I162577319"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5099045884","display_name":"Sisira Aarukapalli","orcid":null},"institutions":[{"id":"https://openalex.org/I162577319","display_name":"The University of Texas at Dallas","ror":"https://ror.org/049emcs32","country_code":"US","type":"education","lineage":["https://openalex.org/I162577319"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sisira Aarukapalli","raw_affiliation_strings":["Department of Computer Science, The University of Texas at Dallas, Richardson, TX, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, The University of Texas at Dallas, Richardson, TX, USA","institution_ids":["https://openalex.org/I162577319"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5099045885","display_name":"William T. Doan","orcid":null},"institutions":[{"id":"https://openalex.org/I162577319","display_name":"The University of Texas at Dallas","ror":"https://ror.org/049emcs32","country_code":"US","type":"education","lineage":["https://openalex.org/I162577319"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"William T. Doan","raw_affiliation_strings":["Department of Computer Science, The University of Texas at Dallas, Richardson, TX, USA"],"raw_orcid":"https://orcid.org/0009-0005-3135-6346","affiliations":[{"raw_affiliation_string":"Department of Computer Science, The University of Texas at Dallas, Richardson, TX, USA","institution_ids":["https://openalex.org/I162577319"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001641297","display_name":"Matthew Wang","orcid":"https://orcid.org/0000-0003-3578-6683"},"institutions":[{"id":"https://openalex.org/I162577319","display_name":"The University of Texas at Dallas","ror":"https://ror.org/049emcs32","country_code":"US","type":"education","lineage":["https://openalex.org/I162577319"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Matthew Wang","raw_affiliation_strings":["Department of Computer Science, The University of Texas at Dallas, Richardson, TX, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, The University of Texas at Dallas, Richardson, TX, USA","institution_ids":["https://openalex.org/I162577319"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104150506","display_name":"Harsh Singh","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113480","display_name":"Mohamed bin Zayed University of Artificial Intelligence","ror":"https://ror.org/0258gkt32","country_code":"AE","type":"education","lineage":["https://openalex.org/I4210113480"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Harsh Singh","raw_affiliation_strings":["Mohamed bin Zayed University of Artificial Intelligence, Abu Dhabi, UAE"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mohamed bin Zayed University of Artificial Intelligence, Abu Dhabi, UAE","institution_ids":["https://openalex.org/I4210113480"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030048123","display_name":"Pamela Rosenthal Rollins","orcid":"https://orcid.org/0000-0002-8046-2955"},"institutions":[{"id":"https://openalex.org/I162577319","display_name":"The University of Texas at Dallas","ror":"https://ror.org/049emcs32","country_code":"US","type":"education","lineage":["https://openalex.org/I162577319"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pamela R. Rollins","raw_affiliation_strings":["School of Behavioral and Brain Sciences, The University of Texas at Dallas, Dallas, TX, USA"],"raw_orcid":"https://orcid.org/0000-0002-8046-2955","affiliations":[{"raw_affiliation_string":"School of Behavioral and Brain Sciences, The University of Texas at Dallas, Dallas, TX, USA","institution_ids":["https://openalex.org/I162577319"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101835756","display_name":"Yapeng Tian","orcid":"https://orcid.org/0000-0003-1423-4513"},"institutions":[{"id":"https://openalex.org/I162577319","display_name":"The University of Texas at Dallas","ror":"https://ror.org/049emcs32","country_code":"US","type":"education","lineage":["https://openalex.org/I162577319"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yapeng Tian","raw_affiliation_strings":["Department of Computer Science, The University of Texas at Dallas, Richardson, TX, USA"],"raw_orcid":"https://orcid.org/0000-0003-1423-4513","affiliations":[{"raw_affiliation_string":"Department of Computer Science, The University of Texas at Dallas, Richardson, TX, USA","institution_ids":["https://openalex.org/I162577319"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":12,"corresponding_author_ids":["https://openalex.org/A5028337614"],"corresponding_institution_ids":["https://openalex.org/I162577319"],"apc_list":null,"apc_paid":null,"fwci":4.586,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.95711863,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"27","issue":null,"first_page":"2335","last_page":"2346"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.7875000238418579,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.7875000238418579,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12060","display_name":"Child Development and Digital Technology","score":0.7724000215530396,"subfield":{"id":"https://openalex.org/subfields/3304","display_name":"Education"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10106","display_name":"Autism Spectrum Disorder Research","score":0.6938999891281128,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7808378338813782},{"id":"https://openalex.org/keywords/autism","display_name":"Autism","score":0.6655319929122925},{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.5592500567436218},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5498747825622559},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.43293625116348267},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.39294132590293884},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.15841230750083923}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7808378338813782},{"id":"https://openalex.org/C205778803","wikidata":"https://www.wikidata.org/wiki/Q38404","display_name":"Autism","level":2,"score":0.6655319929122925},{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.5592500567436218},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5498747825622559},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.43293625116348267},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.39294132590293884},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.15841230750083923},{"id":"https://openalex.org/C138496976","wikidata":"https://www.wikidata.org/wiki/Q175002","display_name":"Developmental psychology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2024.3521838","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2024.3521838","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":70,"referenced_works":["https://openalex.org/W1927052826","https://openalex.org/W1993793414","https://openalex.org/W2038821742","https://openalex.org/W2067583440","https://openalex.org/W2089004696","https://openalex.org/W2167462312","https://openalex.org/W2774521151","https://openalex.org/W2795072727","https://openalex.org/W2900541008","https://openalex.org/W2902117623","https://openalex.org/W2916113381","https://openalex.org/W2950605906","https://openalex.org/W2962865004","https://openalex.org/W2964109005","https://openalex.org/W2964213933","https://openalex.org/W2971680695","https://openalex.org/W3007589762","https://openalex.org/W3015591594","https://openalex.org/W3033618928","https://openalex.org/W3034658206","https://openalex.org/W3081098783","https://openalex.org/W3083347663","https://openalex.org/W3099367322","https://openalex.org/W3103205469","https://openalex.org/W3107291510","https://openalex.org/W3118120400","https://openalex.org/W3139253241","https://openalex.org/W3164606851","https://openalex.org/W3175514052","https://openalex.org/W3182657421","https://openalex.org/W3194397797","https://openalex.org/W3214311327","https://openalex.org/W3215086423","https://openalex.org/W4210671175","https://openalex.org/W4211154280","https://openalex.org/W4213038917","https://openalex.org/W4229007573","https://openalex.org/W4309660795","https://openalex.org/W4312380001","https://openalex.org/W4375869297","https://openalex.org/W4379057633","https://openalex.org/W4384517917","https://openalex.org/W4386071707","https://openalex.org/W4386072368","https://openalex.org/W4386076365","https://openalex.org/W4386113246","https://openalex.org/W4386348132","https://openalex.org/W4387421372","https://openalex.org/W4387682108","https://openalex.org/W4387969495","https://openalex.org/W4389518986","https://openalex.org/W4389519587","https://openalex.org/W4390874575","https://openalex.org/W4402727764","https://openalex.org/W4403386295","https://openalex.org/W4405429615","https://openalex.org/W6773226109","https://openalex.org/W6791353385","https://openalex.org/W6796581206","https://openalex.org/W6810738896","https://openalex.org/W6840058269","https://openalex.org/W6847363464","https://openalex.org/W6851397930","https://openalex.org/W6851592950","https://openalex.org/W6853116092","https://openalex.org/W6853320480","https://openalex.org/W6853469104","https://openalex.org/W6857517021","https://openalex.org/W6859559887","https://openalex.org/W6870122244"],"related_works":["https://openalex.org/W4280602684","https://openalex.org/W4310092953","https://openalex.org/W2227393071","https://openalex.org/W4402326599","https://openalex.org/W1995350144","https://openalex.org/W2082102603","https://openalex.org/W2166141227","https://openalex.org/W3133980636","https://openalex.org/W4378085486","https://openalex.org/W4366602711"],"abstract_inverted_index":{"In":[0],"this":[1,61,109],"article,":[2],"we":[3,65,112,155],"introduce":[4],"a":[5,82,160,168],"novel":[6],"problem":[7],"of":[8,90,159],"audio-visual":[9,38,68],"autism":[10,25,39,69,79,151,184],"behavior":[11,16,40,152,185],"recognition,":[12,17,41],"which":[13,42],"includes":[14],"social":[15,97],"an":[18,67,87],"essential":[19],"aspect":[20],"previously":[21],"omitted":[22],"in":[23,52,150,167],"AI-assisted":[24],"screening":[26,80],"research.":[27],"We":[28],"define":[29],"the":[30,53,74,103,129,148,157,179],"task":[31],"at":[32],"hand":[33],"as":[34],"one":[35],"that":[36,133],"is":[37],"uses":[43],"audio":[44,135],"and":[45,99,118,140],"visual":[46],"cues,":[47],"including":[48,93],"any":[49],"speech":[50,141],"present":[51],"audio,":[54],"to":[55,96,164,173,177],"recognize":[56],"autism-related":[57],"behaviors.":[58],"To":[59,101],"facilitate":[60],"new":[62,110],"research":[63,107],"direction,":[64],"collected":[66],"spectrum":[70],"dataset":[71,77,131],"(AV-ASD),":[72],"currently":[73],"largest":[75],"video":[76],"for":[78,105],"using":[81],"behavioral":[83],"approach.":[84],"It":[85],"covers":[86],"extensive":[88],"range":[89],"autism-associated":[91],"behaviors,":[92],"those":[94],"related":[95],"communication":[98],"interaction.":[100],"pave":[102],"way":[104],"further":[106],"on":[108,128],"problem,":[111],"intensively":[113],"explored":[114,156],"leveraging":[115],"foundation":[116],"models":[117,122],"multimodal":[119,169],"large":[120,170],"language":[121,171],"across":[123],"different":[124],"modalities.":[125],"Our":[126],"experiments":[127],"AV-ASD":[130],"demonstrate":[132],"integrating":[134],"(mainly":[136],"ambient":[137],"sound),":[138],"visual,":[139],"(predominately":[142],"spoken":[143],"language)":[144],"modalities":[145],"significantly":[146],"enhances":[147],"performance":[149],"recognition.":[153,186],"Additionally,":[154],"use":[158],"<italic":[161],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[162],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">post-hoc":[163],"ad-hoc</i>":[165],"pipeline":[166],"model":[172],"investigate":[174],"its":[175],"potential":[176],"augment":[178],"model's":[180],"explanatory":[181],"capability":[182],"during":[183]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":11},{"year":2024,"cited_by_count":2}],"updated_date":"2026-04-28T14:05:53.105641","created_date":"2025-10-10T00:00:00"}
