{"id":"https://openalex.org/W4392866428","doi":"https://doi.org/10.48550/arxiv.2403.07969","title":"KnowCoder: Coding Structured Knowledge into LLMs for Universal Information Extraction","display_name":"KnowCoder: Coding Structured Knowledge into LLMs for Universal Information Extraction","publication_year":2024,"publication_date":"2024-03-12","ids":{"openalex":"https://openalex.org/W4392866428","doi":"https://doi.org/10.48550/arxiv.2403.07969"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2403.07969","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.07969","pdf_url":"https://arxiv.org/pdf/2403.07969","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2403.07969","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100674838","display_name":"Zixuan Li","orcid":"https://orcid.org/0009-0005-4713-3032"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Li, Zixuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003892271","display_name":"Yutao Zeng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zeng, Yutao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113144147","display_name":"Yuxin Zuo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zuo, Yuxin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108830527","display_name":"Weicheng Ren","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ren, Weicheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101564554","display_name":"Wenxuan Liu","orcid":"https://orcid.org/0000-0002-1528-4224"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Wenxuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037235882","display_name":"Miao Su","orcid":"https://orcid.org/0000-0002-5646-8484"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Su, Miao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111268903","display_name":"Yucan Guo","orcid":"https://orcid.org/0009-0007-0125-4490"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Yucan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101477342","display_name":"Yantao Liu","orcid":"https://orcid.org/0000-0003-1796-3113"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Yantao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100331094","display_name":"Xiang Li","orcid":"https://orcid.org/0000-0002-9851-6376"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Xiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111148219","display_name":"Zhilei Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Zhilei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101434323","display_name":"Long Bai","orcid":"https://orcid.org/0000-0003-2671-3298"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bai, Long","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100318133","display_name":"Wei Li","orcid":"https://orcid.org/0000-0002-2163-7903"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Wei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101888938","display_name":"Yidan Liu","orcid":"https://orcid.org/0009-0003-8444-488X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Yidan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100570967","display_name":"Pan Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Pan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102843658","display_name":"Xiaolong Jin","orcid":"https://orcid.org/0000-0003-3244-3941"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jin, Xiaolong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109736354","display_name":"Jiafeng Guo","orcid":"https://orcid.org/0000-0002-2793-3893"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Jiafeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5029998682","display_name":"Xueqi Cheng","orcid":"https://orcid.org/0000-0002-5201-8195"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cheng, Xueqi","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":17,"corresponding_author_ids":["https://openalex.org/A5100674838"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.907800018787384,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.907800018787384,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.6515220403671265},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.3519655466079712},{"id":"https://openalex.org/keywords/sociology","display_name":"Sociology","score":0.1253213882446289},{"id":"https://openalex.org/keywords/social-science","display_name":"Social science","score":0.09733575582504272}],"concepts":[{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.6515220403671265},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.3519655466079712},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.1253213882446289},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.09733575582504272}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2403.07969","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.07969","pdf_url":"https://arxiv.org/pdf/2403.07969","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},{"id":"doi:10.48550/arxiv.2403.07969","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2403.07969","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2403.07969","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.07969","pdf_url":"https://arxiv.org/pdf/2403.07969","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4392866428.pdf","grobid_xml":"https://content.openalex.org/works/W4392866428.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2382290278","https://openalex.org/W2478288626","https://openalex.org/W4391913857","https://openalex.org/W2350741829","https://openalex.org/W2530322880"],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"we":[3],"propose":[4],"KnowCoder,":[5,222],"a":[6,24,56,91,123],"Large":[7],"Language":[8],"Model":[9],"(LLM)":[10],"to":[11,22,43,61,108,167,188,193,220,228],"conduct":[12],"Universal":[13],"Information":[14],"Extraction":[15],"(UIE)":[16],"via":[17,133,141],"code":[18,134,145],"generation.":[19],"KnowCoder":[20,54,121,153,176],"aims":[21],"develop":[23],"kind":[25],"of":[26,99,111,119],"unified":[27,210],"schema":[28,58,72,93,130,138,211],"representation":[29,59],"that":[30,40,127],"LLMs":[31,42],"can":[32,81,216],"easily":[33],"understand":[34],"and":[35,46,136,159,185,190,200],"an":[36,85],"effective":[37],"learning":[38,117,125],"framework":[39,126],"encourages":[41],"follow":[44],"schemas":[45,65,184],"extract":[47],"structured":[48],"knowledge":[49],"accurately.":[50],"To":[51,114],"achieve":[52],"these,":[53],"introduces":[55],"code-style":[57,92],"method":[60],"uniformly":[62],"transform":[63],"different":[64],"into":[66],"Python":[67],"classes,":[68],"with":[69],"which":[70,101,223],"complex":[71],"information,":[73],"such":[74],"as":[75],"constraints":[76],"among":[77],"tasks":[78],"in":[79,84],"UIE,":[80,107],"be":[82,218],"captured":[83],"LLM-friendly":[86],"manner.":[87],"We":[88],"further":[89,177],"construct":[90],"library":[94],"covering":[95],"over":[96],"$\\textbf{30,000}$":[97],"types":[98],"knowledge,":[100],"is":[102],"the":[103,109,116,170,197,201,231],"largest":[104],"one":[105],"for":[106],"best":[110],"our":[112,209],"knowledge.":[113],"ease":[115],"process":[118],"LLMs,":[120],"contains":[122],"two-phase":[124],"enhances":[128],"its":[129,137],"understanding":[131],"ability":[132,140,158,181],"pretraining":[135,146],"following":[139],"instruction":[142,174],"tuning.":[143],"After":[144,173],"on":[147,182,208],"around":[148],"$1.5$B":[149],"automatically":[150],"constructed":[151],"data,":[152],"already":[154],"attains":[155],"remarkable":[156],"generalization":[157,180],"achieves":[160,186,224],"relative":[161],"improvements":[162,226],"by":[163],"$\\textbf{49.8%}$":[164],"F1,":[165],"compared":[166,192],"LLaMA2,":[168],"under":[169,196,230],"few-shot":[171],"setting.":[172,233],"tuning,":[175],"exhibits":[178],"strong":[179],"unseen":[183],"up":[187,227],"$\\textbf{12.5%}$":[189],"$\\textbf{21.9%}$,":[191],"sota":[194],"baselines,":[195],"zero-shot":[198],"setting":[199],"low":[202],"resource":[203],"setting,":[204],"respectively.":[205],"Additionally,":[206],"based":[207],"representations,":[212],"various":[213],"human-annotated":[214],"datasets":[215],"simultaneously":[217],"utilized":[219],"refine":[221],"significant":[225],"$\\textbf{7.5%}$":[229],"supervised":[232]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2}],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
