{"id":"https://openalex.org/W4389518938","doi":"https://doi.org/10.18653/v1/2023.findings-emnlp.726","title":"Debias NLU Datasets via Training-free Perturbations","display_name":"Debias NLU Datasets via Training-free Perturbations","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4389518938","doi":"https://doi.org/10.18653/v1/2023.findings-emnlp.726"},"language":"en","primary_location":{"id":"doi:10.18653/v1/2023.findings-emnlp.726","is_oa":true,"landing_page_url":"http://dx.doi.org/10.18653/v1/2023.findings-emnlp.726","pdf_url":"https://aclanthology.org/2023.findings-emnlp.726.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: EMNLP 2023","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2023.findings-emnlp.726.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5032152448","display_name":"Qi Guo","orcid":"https://orcid.org/0000-0001-9198-2270"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Qi Guo","raw_affiliation_strings":["National Key Laboratory for Novel Software Technology, Nanjing University \u2666 Institute for AI Industry Research (AIR), Tsinghua University"],"affiliations":[{"raw_affiliation_string":"National Key Laboratory for Novel Software Technology, Nanjing University \u2666 Institute for AI Industry Research (AIR), Tsinghua University","institution_ids":["https://openalex.org/I99065089","https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068383593","display_name":"Yuanhang Tang","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]},{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuanhang Tang","raw_affiliation_strings":["National Key Laboratory for Novel Software Technology, Nanjing University \u2666 Institute for AI Industry Research (AIR), Tsinghua University"],"affiliations":[{"raw_affiliation_string":"National Key Laboratory for Novel Software Technology, Nanjing University \u2666 Institute for AI Industry Research (AIR), Tsinghua University","institution_ids":["https://openalex.org/I99065089","https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008231744","display_name":"Yawen Ouyang","orcid":null},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yawen Ouyang","raw_affiliation_strings":["National Key Laboratory for Novel Software Technology, Nanjing University \u2666 Institute for AI Industry Research (AIR), Tsinghua University"],"affiliations":[{"raw_affiliation_string":"National Key Laboratory for Novel Software Technology, Nanjing University \u2666 Institute for AI Industry Research (AIR), Tsinghua University","institution_ids":["https://openalex.org/I99065089","https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101777324","display_name":"Zhen Wu","orcid":"https://orcid.org/0009-0003-1246-9076"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]},{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhen Wu","raw_affiliation_strings":["National Key Laboratory for Novel Software Technology, Nanjing University \u2666 Institute for AI Industry Research (AIR), Tsinghua University"],"affiliations":[{"raw_affiliation_string":"National Key Laboratory for Novel Software Technology, Nanjing University \u2666 Institute for AI Industry Research (AIR), Tsinghua University","institution_ids":["https://openalex.org/I99065089","https://openalex.org/I881766915"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102994315","display_name":"Xinyu Dai","orcid":"https://orcid.org/0000-0002-4139-7337"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinyu Dai","raw_affiliation_strings":["National Key Laboratory for Novel Software Technology, Nanjing University \u2666 Institute for AI Industry Research (AIR), Tsinghua University"],"affiliations":[{"raw_affiliation_string":"National Key Laboratory for Novel Software Technology, Nanjing University \u2666 Institute for AI Industry Research (AIR), Tsinghua University","institution_ids":["https://openalex.org/I99065089","https://openalex.org/I881766915"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5032152448"],"corresponding_institution_ids":["https://openalex.org/I881766915","https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.16883842,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"10886","last_page":"10901"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8365158438682556},{"id":"https://openalex.org/keywords/debiasing","display_name":"Debiasing","score":0.8170652985572815},{"id":"https://openalex.org/keywords/natural-language-understanding","display_name":"Natural language understanding","score":0.6866176128387451},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4699419140815735},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.45885732769966125},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.42556142807006836},{"id":"https://openalex.org/keywords/concept-drift","display_name":"Concept drift","score":0.41518110036849976},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4087463915348053},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.25593146681785583},{"id":"https://openalex.org/keywords/data-stream-mining","display_name":"Data stream mining","score":0.14490416646003723}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8365158438682556},{"id":"https://openalex.org/C2779458634","wikidata":"https://www.wikidata.org/wiki/Q24963715","display_name":"Debiasing","level":2,"score":0.8170652985572815},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.6866176128387451},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4699419140815735},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.45885732769966125},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.42556142807006836},{"id":"https://openalex.org/C60777511","wikidata":"https://www.wikidata.org/wiki/Q3045002","display_name":"Concept drift","level":3,"score":0.41518110036849976},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4087463915348053},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.25593146681785583},{"id":"https://openalex.org/C89198739","wikidata":"https://www.wikidata.org/wiki/Q3079880","display_name":"Data stream mining","level":2,"score":0.14490416646003723},{"id":"https://openalex.org/C188147891","wikidata":"https://www.wikidata.org/wiki/Q147638","display_name":"Cognitive science","level":1,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2023.findings-emnlp.726","is_oa":true,"landing_page_url":"http://dx.doi.org/10.18653/v1/2023.findings-emnlp.726","pdf_url":"https://aclanthology.org/2023.findings-emnlp.726.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: EMNLP 2023","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2023.findings-emnlp.726","is_oa":true,"landing_page_url":"http://dx.doi.org/10.18653/v1/2023.findings-emnlp.726","pdf_url":"https://aclanthology.org/2023.findings-emnlp.726.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: EMNLP 2023","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/13","score":0.7099999785423279,"display_name":"Climate action"}],"awards":[{"id":"https://openalex.org/G3734224992","display_name":null,"funder_award_id":"61976114","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G655826466","display_name":null,"funder_award_id":"61936012","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4389518938.pdf"},"referenced_works_count":40,"referenced_works":["https://openalex.org/W1840435438","https://openalex.org/W2118707092","https://openalex.org/W2250790822","https://openalex.org/W2788496822","https://openalex.org/W2923014074","https://openalex.org/W2932893307","https://openalex.org/W2951286828","https://openalex.org/W2962736243","https://openalex.org/W2962843521","https://openalex.org/W2963096121","https://openalex.org/W2963216553","https://openalex.org/W2963341956","https://openalex.org/W2963351832","https://openalex.org/W2963846996","https://openalex.org/W2964044490","https://openalex.org/W2970019270","https://openalex.org/W2970379526","https://openalex.org/W2971296908","https://openalex.org/W2996428491","https://openalex.org/W3034831508","https://openalex.org/W3035032873","https://openalex.org/W3035139434","https://openalex.org/W3035390927","https://openalex.org/W3098341425","https://openalex.org/W3101004475","https://openalex.org/W3103291112","https://openalex.org/W3110511794","https://openalex.org/W3152911627","https://openalex.org/W3155431862","https://openalex.org/W3179534853","https://openalex.org/W3192478068","https://openalex.org/W3194325860","https://openalex.org/W3212606841","https://openalex.org/W3214559542","https://openalex.org/W4225468040","https://openalex.org/W4229907684","https://openalex.org/W4295253143","https://openalex.org/W4385574060","https://openalex.org/W4385574074","https://openalex.org/W4387427059"],"related_works":["https://openalex.org/W4362554880","https://openalex.org/W4281684980","https://openalex.org/W3034831508","https://openalex.org/W3023038345","https://openalex.org/W4287800015","https://openalex.org/W3175352680","https://openalex.org/W4283796213","https://openalex.org/W4286981822","https://openalex.org/W3211881375","https://openalex.org/W3198192082"],"abstract_inverted_index":{"Several":[0],"recent":[1],"studies":[2],"have":[3],"shown":[4],"that":[5,19,114,167],"advanced":[6],"models":[7,31,137],"for":[8],"natural":[9],"language":[10,136],"understanding":[11],"(NLU)":[12],"are":[13,20],"prone":[14],"to":[15,28,41,43,95,123],"capture":[16],"biased":[17,121],"features":[18,122],"independent":[21],"of":[22,69,77,91,102,143],"the":[23,67,88,100,141,155,180],"task":[24],"but":[25,39],"spuriously":[26],"correlated":[27],"labels.":[29],"Such":[30],"often":[32],"perform":[33],"well":[34],"on":[35,118],"in-distribution":[36],"(ID)":[37],"datasets":[38],"fail":[40],"generalize":[42],"out-of-distribution":[44],"(OOD)":[45],"datasets.":[46],"Existing":[47],"solutions":[48],"can":[49],"be":[50],"separated":[51],"into":[52],"two":[53],"orthogonal":[54],"approaches:":[55],"model-centric":[56,181],"methods":[57,62],"and":[58,152],"data-centric":[59],"methods.":[60],"Model-centric":[61],"improve":[63],"OOD":[64],"performance":[65,171],"at":[66],"expense":[68],"ID":[70],"performance.":[71],"Data-centric":[72],"strategies":[73],"usually":[74],"boost":[75],"both":[76],"them":[78],"via":[79,133],"data-level":[80],"manipulations":[81],"such":[82,103],"as":[83],"generative":[84],"data":[85],"augmentation.":[86],"However,":[87],"high":[89],"cost":[90,145],"fine-tuning":[92],"a":[93,112,148,186],"generator":[94],"produce":[96],"valid":[97],"samples":[98,119],"limits":[99],"potential":[101],"approaches.":[104],"To":[105],"address":[106],"this":[107],"issue,":[108],"we":[109],"propose":[110],"PDD,":[111],"framework":[113],"conducts":[115],"training-free":[116,149],"Perturbations":[117],"containing":[120],"Debias":[124],"NLU":[125],"Datasets.":[126],"PDD":[127,139,168,184],"works":[128],"by":[129,146,158],"iteratively":[130],"conducting":[131],"perturbations":[132],"pre-trained":[134],"mask":[135],"(MLM).":[138],"exhibits":[140],"advantage":[142],"low":[144],"adopting":[147],"perturbation":[150],"strategy":[151],"further":[153],"improves":[154],"label":[156,160],"consistency":[157],"utilizing":[159],"information":[161],"during":[162],"perturbations.":[163],"Extensive":[164],"experiments":[165],"demonstrate":[166],"shows":[169],"competitive":[170],"with":[172,179],"previous":[173],"state-of-the-art":[174],"debiasing":[175,182],"strategies.":[176],"When":[177],"combined":[178],"methods,":[183],"establishes":[185],"new":[187],"state-of-the-art.":[188]},"counts_by_year":[],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
