{"id":"https://openalex.org/W4399252447","doi":"https://doi.org/10.48550/arxiv.2405.19856","title":"DevEval: A Manually-Annotated Code Generation Benchmark Aligned with Real-World Code Repositories","display_name":"DevEval: A Manually-Annotated Code Generation Benchmark Aligned with Real-World Code Repositories","publication_year":2024,"publication_date":"2024-05-30","ids":{"openalex":"https://openalex.org/W4399252447","doi":"https://doi.org/10.48550/arxiv.2405.19856"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2405.19856","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2405.19856","pdf_url":"https://arxiv.org/pdf/2405.19856","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2405.19856","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102859755","display_name":"Jia Li","orcid":"https://orcid.org/0000-0003-4411-6614"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Li, Jia","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100447691","display_name":"Ge Li","orcid":"https://orcid.org/0000-0003-0140-0949"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Ge","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018828419","display_name":"Yunfei Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Yunfei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005447276","display_name":"Yongmin Li","orcid":"https://orcid.org/0000-0003-1668-2440"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Yongmin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101535573","display_name":"Huanyu Liu","orcid":"https://orcid.org/0000-0002-5618-8036"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Huanyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101733222","display_name":"Hao Zhu","orcid":"https://orcid.org/0009-0007-7640-1889"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Hao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059916738","display_name":"Lecheng Wang","orcid":"https://orcid.org/0000-0003-2931-9528"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Lecheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025012113","display_name":"Kaibo Liu","orcid":"https://orcid.org/0000-0003-2863-5748"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Kaibo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019878788","display_name":"Zheng Fang","orcid":"https://orcid.org/0000-0003-2601-8148"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fang, Zheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010125442","display_name":"Lanshen Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Lanshen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084930608","display_name":"Jiazheng Ding","orcid":"https://orcid.org/0000-0003-1615-7427"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ding, Jiazheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101829922","display_name":"Xuanming Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Xuanming","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112960917","display_name":"Yuqi Zhu","orcid":"https://orcid.org/0009-0007-9910-3980"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Yuqi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069159185","display_name":"Yihong Dong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dong, Yihong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049100391","display_name":"Zhi Jin","orcid":"https://orcid.org/0000-0003-1087-226X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jin, Zhi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063590181","display_name":"Binhua Li","orcid":"https://orcid.org/0000-0002-4179-6979"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Binhua","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100620486","display_name":"Fei Huang","orcid":"https://orcid.org/0000-0001-9665-6642"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Fei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100644428","display_name":"Yongbin Li","orcid":"https://orcid.org/0009-0008-4504-2163"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Yongbin","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":18,"corresponding_author_ids":["https://openalex.org/A5102859755"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11450","display_name":"Model-Driven Software Engineering Techniques","score":0.9871000051498413,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11450","display_name":"Model-Driven Software Engineering Techniques","score":0.9871000051498413,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.9602000117301941,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9524000287055969,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7995396852493286},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.7450579404830933},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7153059244155884},{"id":"https://openalex.org/keywords/code-generation","display_name":"Code generation","score":0.5188487768173218},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.480551540851593},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.3640477657318115},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.23287254571914673},{"id":"https://openalex.org/keywords/cartography","display_name":"Cartography","score":0.10050886869430542},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.09968680143356323},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.049196213483810425}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7995396852493286},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.7450579404830933},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7153059244155884},{"id":"https://openalex.org/C133162039","wikidata":"https://www.wikidata.org/wiki/Q1061077","display_name":"Code generation","level":3,"score":0.5188487768173218},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.480551540851593},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3640477657318115},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.23287254571914673},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.10050886869430542},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.09968680143356323},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.049196213483810425},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2405.19856","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2405.19856","pdf_url":"https://arxiv.org/pdf/2405.19856","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2405.19856","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2405.19856","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2405.19856","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2405.19856","pdf_url":"https://arxiv.org/pdf/2405.19856","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4399252447.pdf","grobid_xml":"https://content.openalex.org/works/W4399252447.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2378211422","https://openalex.org/W2745001401","https://openalex.org/W4321353415","https://openalex.org/W2130974462","https://openalex.org/W2028665553","https://openalex.org/W2086519370","https://openalex.org/W2163672025","https://openalex.org/W2258184894","https://openalex.org/W2048831961","https://openalex.org/W1606349578"],"abstract_inverted_index":{"How":[0],"to":[1,30],"evaluate":[2,31,113],"the":[3,32,39,143,166],"coding":[4,33,132],"abilities":[5,34,133],"of":[6,35,146,168],"Large":[7],"Language":[8],"Models":[9],"(LLMs)":[10],"remains":[11],"an":[12],"open":[13],"question.":[14],"We":[15,151,161],"find":[16],"that":[17],"existing":[18],"benchmarks":[19],"are":[20,28],"poorly":[21],"aligned":[22],"with":[23,56],"real-world":[24,57,135],"code":[25,63,110,136,172],"repositories":[26,58],"and":[27,65,75,85,112,157,176],"insufficient":[29],"LLMs.":[36],"To":[37],"address":[38],"knowledge":[40],"gap,":[41],"we":[42,107],"propose":[43,108],"a":[44],"new":[45],"benchmark":[46],"named":[47],"DevEval,":[48,106,174],"which":[49],"has":[50],"three":[51],"advances.":[52],"(1)":[53],"DevEval":[54,69,89,118,163],"aligns":[55],"in":[59,134,140,170],"multiple":[60],"dimensions,":[61],"e.g.,":[62],"distributions":[64],"dependency":[66],"distributions.":[67],"(2)":[68],"is":[70,148],"annotated":[71],"by":[72],"13":[73],"developers":[74],"contains":[76],"comprehensive":[77],"annotations":[78],"(e.g.,":[79,101,119],"requirements,":[80],"original":[81],"repositories,":[82,96],"reference":[83,86],"code,":[84],"dependencies).":[87],"(3)":[88],"comprises":[90],"1,874":[91],"testing":[92],"samples":[93],"from":[94],"117":[95],"covering":[97],"10":[98],"popular":[99,115],"domains":[100],"Internet,":[102],"Database).":[103],"Based":[104],"on":[105,117],"repository-level":[109],"generation":[111],"8":[114],"LLMs":[116,169],"gpt-4,":[120],"gpt-3.5,":[121],"StarCoder":[122],"2,":[123],"DeepSeek":[124],"Coder,":[125],"CodeLLaMa).":[126],"Our":[127],"experiments":[128],"reveal":[129],"these":[130],"LLMs'":[131,154,177],"repositories.":[137,173],"For":[138],"example,":[139],"our":[141],"experiments,":[142],"highest":[144],"Pass@1":[145],"gpt-4-turbo":[147],"only":[149],"53.04%.":[150],"also":[152],"analyze":[153],"failed":[155],"cases":[156],"summarize":[158],"their":[159],"shortcomings.":[160],"hope":[162],"can":[164],"facilitate":[165],"development":[167],"real":[171],"prompts,":[175],"predictions":[178],"have":[179],"been":[180],"released.":[181]},"counts_by_year":[],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
