{"id":"https://openalex.org/W7128647875","doi":"https://doi.org/10.48550/arxiv.2602.09637","title":"Towards Training-free Multimodal Hate Localisation with Large Language Models","display_name":"Towards Training-free Multimodal Hate Localisation with Large Language Models","publication_year":2026,"publication_date":"2026-02-10","ids":{"openalex":"https://openalex.org/W7128647875","doi":"https://doi.org/10.48550/arxiv.2602.09637"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.09637","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5067978245","display_name":"Yueming Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Sun, Yueming","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125630708","display_name":"Long Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Long","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125637273","display_name":"Jianbo Jiao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiao, Jianbo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5125673848","display_name":"Zeyu Fu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fu, Zeyu","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5067978245"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.9514999985694885,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.9514999985694885,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.007300000172108412,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.00570000009611249,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.8486999869346619},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.5981000065803528},{"id":"https://openalex.org/keywords/scheme","display_name":"Scheme (mathematics)","score":0.5947999954223633},{"id":"https://openalex.org/keywords/online-video","display_name":"Online video","score":0.5439000129699707},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.4726000130176544},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.3743000030517578},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.3725000023841858},{"id":"https://openalex.org/keywords/mechanism","display_name":"Mechanism (biology)","score":0.3702000081539154}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.8486999869346619},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7511000037193298},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.5981000065803528},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.5947999954223633},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5716000199317932},{"id":"https://openalex.org/C2988167200","wikidata":"https://www.wikidata.org/wiki/Q16885149","display_name":"Online video","level":2,"score":0.5439000129699707},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.4726000130176544},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3846000134944916},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.3743000030517578},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3725000023841858},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.3702000081539154},{"id":"https://openalex.org/C2778152352","wikidata":"https://www.wikidata.org/wiki/Q5165061","display_name":"Content (measure theory)","level":2,"score":0.32260000705718994},{"id":"https://openalex.org/C518677369","wikidata":"https://www.wikidata.org/wiki/Q202833","display_name":"Social media","level":2,"score":0.30809998512268066},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.2953000068664551},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.2881999909877777},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.2721000015735626},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.26350000500679016},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2572000026702881},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.2522999942302704}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.09637","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.09637","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.09637","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.09637","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"score":0.6719011664390564,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"proliferation":[1],"of":[2],"hateful":[3,75,106],"content":[4,76],"in":[5,77],"online":[6],"videos":[7],"poses":[8],"severe":[9],"threats":[10],"to":[11,70,103,118],"individual":[12],"well-being":[13],"and":[14,67,72,94,97,128,147,158],"societal":[15],"harmony.":[16],"However,":[17],"existing":[18,135],"solutions":[19],"for":[20,51,108,156],"video":[21,53,85,95,161],"hate":[22,52,160],"detection":[23],"either":[24],"rely":[25],"heavily":[26],"on":[27,61,123],"large-scale":[28],"human":[29],"annotations":[30],"or":[31],"lack":[32],"fine-grained":[33,105],"temporal":[34],"precision.":[35],"In":[36],"this":[37],"work,":[38],"we":[39],"propose":[40],"LELA,":[41],"the":[42],"first":[43],"training-free":[44,79,136],"Large":[45],"Language":[46],"Model":[47],"(LLM)":[48],"based":[49],"framework":[50],"localization.":[54,162],"Distinct":[55],"from":[56],"state-of-the-art":[57],"models":[58],"that":[59,131],"depend":[60],"supervised":[62],"pipelines,":[63],"LELA":[64,132,151],"leverages":[65],"LLMs":[66],"modality-specific":[68],"captioning":[69],"detect":[71],"temporally":[73],"localize":[74],"a":[78,84,99,114,139,153],"manner.":[80],"Our":[81],"method":[82],"decomposes":[83],"into":[86],"five":[87],"modalities,":[88],"including":[89],"image,":[90],"speech,":[91],"OCR,":[92],"music,":[93],"context,":[96],"uses":[98],"multi-stage":[100],"prompting":[101],"scheme":[102],"compute":[104],"scores":[107],"each":[109],"frame.":[110],"We":[111,142],"further":[112],"introduce":[113],"composition":[115],"matching":[116],"mechanism":[117],"enhance":[119],"cross-modal":[120],"reasoning.":[121],"Experiments":[122],"two":[124],"challenging":[125],"benchmarks,":[126],"HateMM":[127],"MultiHateClip,":[129],"demonstrate":[130],"outperforms":[133],"all":[134],"baselines":[137],"by":[138],"large":[140],"margin.":[141],"also":[143],"provide":[144],"extensive":[145],"ablations":[146],"qualitative":[148],"visualizations,":[149],"establishing":[150],"as":[152],"strong":[154],"foundation":[155],"scalable":[157],"interpretable":[159]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-12T00:00:00"}
