{"id":"https://openalex.org/W4412377016","doi":"https://doi.org/10.1145/3726302.3730064","title":"ProtChatGPT: Towards Understanding Proteins with Hybrid Representation and Large Language Models","display_name":"ProtChatGPT: Towards Understanding Proteins with Hybrid Representation and Large Language Models","publication_year":2025,"publication_date":"2025-07-13","ids":{"openalex":"https://openalex.org/W4412377016","doi":"https://doi.org/10.1145/3726302.3730064"},"language":"en","primary_location":{"id":"doi:10.1145/3726302.3730064","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3726302.3730064","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3726302.3730064","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 48th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3726302.3730064","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5115605144","display_name":"Chao Wang","orcid":"https://orcid.org/0000-0003-1297-768X"},"institutions":[{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]},{"id":"https://openalex.org/I42894916","display_name":"Data61","ror":"https://ror.org/03q397159","country_code":"AU","type":"other","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I42894916","https://openalex.org/I4387156119"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Chao Wang","raw_affiliation_strings":["CSIRO's Data 61, Sydney, NSW, Australia and The University of Technology Sydney, Sydney, NSW, Australia"],"raw_orcid":"https://orcid.org/0000-0003-1297-768X","affiliations":[{"raw_affiliation_string":"CSIRO's Data 61, Sydney, NSW, Australia and The University of Technology Sydney, Sydney, NSW, Australia","institution_ids":["https://openalex.org/I114017466","https://openalex.org/I42894916"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002207978","display_name":"Hehe Fan","orcid":"https://orcid.org/0000-0001-9572-2345"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hehe Fan","raw_affiliation_strings":["Zhejiang University, Hangzhou, Zhejiang, China"],"raw_orcid":"https://orcid.org/0000-0001-9572-2345","affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, Zhejiang, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090036167","display_name":"Ruijie Quan","orcid":"https://orcid.org/0000-0003-4077-1398"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Ruijie Quan","raw_affiliation_strings":["Nanyang Technological University, Singapore, Singapore"],"raw_orcid":"https://orcid.org/0000-0003-4077-1398","affiliations":[{"raw_affiliation_string":"Nanyang Technological University, Singapore, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052731721","display_name":"Lina Yao","orcid":"https://orcid.org/0000-0002-4149-839X"},"institutions":[{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]},{"id":"https://openalex.org/I42894916","display_name":"Data61","ror":"https://ror.org/03q397159","country_code":"AU","type":"other","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I42894916","https://openalex.org/I4387156119"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Lina Yao","raw_affiliation_strings":["CSIRO's Data 61, Sydney, NSW, Australia and The University of New South Wales, Sydney, NSW, Australia"],"raw_orcid":"https://orcid.org/0000-0002-4149-839X","affiliations":[{"raw_affiliation_string":"CSIRO's Data 61, Sydney, NSW, Australia and The University of New South Wales, Sydney, NSW, Australia","institution_ids":["https://openalex.org/I31746571","https://openalex.org/I42894916"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5005421447","display_name":"Yi Yang","orcid":"https://orcid.org/0000-0002-0512-880X"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi Yang","raw_affiliation_strings":["Zhejiang University, Hangzhou, Zhejiang, China"],"raw_orcid":"https://orcid.org/0000-0002-0512-880X","affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, Zhejiang, China","institution_ids":["https://openalex.org/I76130692"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5115605144"],"corresponding_institution_ids":["https://openalex.org/I114017466","https://openalex.org/I42894916"],"apc_list":null,"apc_paid":null,"fwci":2.1341,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.87503547,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1076","last_page":"1086"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10887","display_name":"Bioinformatics and Genomic Networks","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10044","display_name":"Protein Structure and Dynamics","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.718623697757721},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.609420895576477},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4135185480117798},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3705952763557434},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.3696363568305969},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3341997265815735}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.718623697757721},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.609420895576477},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4135185480117798},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3705952763557434},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.3696363568305969},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3341997265815735},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3726302.3730064","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3726302.3730064","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3726302.3730064","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 48th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3726302.3730064","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3726302.3730064","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3726302.3730064","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 48th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.41999998688697815,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G4572344450","display_name":null,"funder_award_id":"2023ZD0120801","funder_id":"https://openalex.org/F4320323817","funder_display_name":"Universitas Brawijaya"},{"id":"https://openalex.org/G5347371045","display_name":null,"funder_award_id":"62472381","funder_id":"https://openalex.org/F4320323817","funder_display_name":"Universitas Brawijaya"}],"funders":[{"id":"https://openalex.org/F4320320386","display_name":"Commonwealth Scientific and Industrial Research Organisation","ror":"https://ror.org/03qn8fb07"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322927","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884"},{"id":"https://openalex.org/F4320323817","display_name":"Universitas Brawijaya","ror":"https://ror.org/01wk3d929"},{"id":"https://openalex.org/F4320329860","display_name":"National Science and Technology Major Project","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4412377016.pdf","grobid_xml":"https://content.openalex.org/works/W4412377016.grobid-xml"},"referenced_works_count":14,"referenced_works":["https://openalex.org/W569478347","https://openalex.org/W2950374603","https://openalex.org/W3046375318","https://openalex.org/W3111174583","https://openalex.org/W3146944767","https://openalex.org/W3177828909","https://openalex.org/W3179485843","https://openalex.org/W3211795435","https://openalex.org/W4223581484","https://openalex.org/W4225323055","https://openalex.org/W4327550249","https://openalex.org/W4386066073","https://openalex.org/W4402774722","https://openalex.org/W4404587505"],"related_works":["https://openalex.org/W2062195135","https://openalex.org/W2795079307","https://openalex.org/W2793058541","https://openalex.org/W1983629434","https://openalex.org/W2055929693","https://openalex.org/W4324271173","https://openalex.org/W2352227742","https://openalex.org/W4390679071","https://openalex.org/W1967645776","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Protein":[0,113],"research":[1,40],"is":[2],"crucial":[3],"in":[4,18,38,72,173],"various":[5],"scientific":[6],"disciplines,":[7],"but":[8],"understanding":[9],"their":[10],"intricate":[11],"structure-function":[12],"relationships":[13],"remains":[14],"challenging.":[15],"Recent":[16],"advancements":[17],"Large":[19],"Language":[20],"Models":[21],"(LLMs)":[22],"have":[23],"significantly":[24],"improved":[25],"the":[26,32,129,156,166],"comprehension":[27],"of":[28,90],"task-specific":[29],"knowledge,":[30],"suggesting":[31],"potential":[33],"for":[34,168],"specialized":[35],"ChatGPT-like":[36],"systems":[37],"protein":[39,57,83,93,97,105,139,174],"to":[41,53,65,75,101,126,141,153],"aid":[42],"fundamental":[43],"investigations.":[44],"In":[45],"this":[46],"work,":[47],"we":[48],"introduce":[49],"ProtChatGPT,":[50],"which":[51,107],"aims":[52],"learn":[54],"and":[55,70,87,99,121,155,171,177],"understand":[56],"structures":[58],"using":[59],"natural":[60],"language.":[61],"ProtChatGPT":[62,148,163],"enables":[63],"users":[64],"upload":[66],"proteins,":[67],"ask":[68],"questions,":[69],"engage":[71],"interactive":[73],"conversations":[74],"produce":[76,102,150],"comprehensive":[77],"answers.":[78,144],"The":[79,131],"system":[80],"comprises":[81],"multi-level":[82,103],"encoding,":[84],"protein-language":[85],"alignment,":[86],"instruction":[88],"tuning":[89],"LLMs.":[91],"A":[92],"first":[94],"undergoes":[95],"multiple":[96],"encoders":[98],"PLP-former":[100],"hybrid":[104],"embeddings,":[106],"are":[108],"then":[109],"aligned":[110],"through":[111],"a":[112],"Context":[114],"Gating":[115],"(PCG)":[116],"module":[117],"with":[118,128,137],"contrastive":[119],"learning,":[120],"projected":[122,138],"by":[123],"an":[124],"adapter":[125],"conform":[127],"LLM.":[130],"LLM":[132],"finally":[133],"combines":[134],"user":[135,158],"questions":[136],"embeddings":[140],"generate":[142],"informative":[143],"Experiments":[145],"show":[146],"that":[147,162],"can":[149],"promising":[151],"responses":[152],"proteins":[154],"corresponding":[157],"questions.":[159],"We":[160],"hope":[161],"could":[164],"form":[165],"basis":[167],"further":[169],"exploration":[170],"application":[172],"research.":[175],"Code":[176],"our":[178],"pre-trained":[179],"model":[180],"will":[181],"be":[182],"publicly":[183],"available.":[184]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
