{"id":"https://openalex.org/W6967077126","doi":"https://doi.org/10.48550/arxiv.2502.20897","title":"Beyond Demographics: Fine-tuning Large Language Models to Predict Individuals' Subjective Text Perceptions","display_name":"Beyond Demographics: Fine-tuning Large Language Models to Predict Individuals' Subjective Text Perceptions","publication_year":2025,"publication_date":"2025-02-28","ids":{"openalex":"https://openalex.org/W6967077126","doi":"https://doi.org/10.48550/arxiv.2502.20897"},"language":"en","primary_location":{"id":"doi:10.48550/arxiv.2502.20897","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2502.20897","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2502.20897","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Orlikowski, Matthias","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Orlikowski, Matthias","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Pei, Jiaxin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pei, Jiaxin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"R\u00f6ttger, Paul","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"R\u00f6ttger, Paul","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Cimiano, Philipp","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cimiano, Philipp","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Jurgens, David","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jurgens, David","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Hovy, Dirk","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hovy, Dirk","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.2921999990940094,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.2921999990940094,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.1273999959230423,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13910","display_name":"Computational and Text Analysis Methods","score":0.11810000240802765,"subfield":{"id":"https://openalex.org/subfields/3300","display_name":"General Social Sciences"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/nucleofection","display_name":"Nucleofection","score":0.421099990606308},{"id":"https://openalex.org/keywords/gestational-period","display_name":"Gestational period","score":0.34220001101493835},{"id":"https://openalex.org/keywords/tsg101","display_name":"TSG101","score":0.3246000111103058},{"id":"https://openalex.org/keywords/dysgeusia","display_name":"Dysgeusia","score":0.30219998955726624},{"id":"https://openalex.org/keywords/hyporeflexia","display_name":"Hyporeflexia","score":0.30219998955726624},{"id":"https://openalex.org/keywords/diafiltration","display_name":"Diafiltration","score":0.2897999882698059},{"id":"https://openalex.org/keywords/hemopericardium","display_name":"Hemopericardium","score":0.2815000116825104},{"id":"https://openalex.org/keywords/fusible-alloy","display_name":"Fusible alloy","score":0.2793999910354614}],"concepts":[{"id":"https://openalex.org/C144251240","wikidata":"https://www.wikidata.org/wiki/Q7068229","display_name":"Nucleofection","level":4,"score":0.421099990606308},{"id":"https://openalex.org/C2992336715","wikidata":"https://www.wikidata.org/wiki/Q63431143","display_name":"Gestational period","level":4,"score":0.34220001101493835},{"id":"https://openalex.org/C2778283623","wikidata":"https://www.wikidata.org/wiki/Q18032200","display_name":"TSG101","level":5,"score":0.3246000111103058},{"id":"https://openalex.org/C2777054765","wikidata":"https://www.wikidata.org/wiki/Q6402731","display_name":"Dysgeusia","level":3,"score":0.30219998955726624},{"id":"https://openalex.org/C2777158700","wikidata":"https://www.wikidata.org/wiki/Q1419356","display_name":"Hyporeflexia","level":3,"score":0.30219998955726624},{"id":"https://openalex.org/C18743360","wikidata":"https://www.wikidata.org/wiki/Q1208096","display_name":"Diafiltration","level":4,"score":0.2897999882698059},{"id":"https://openalex.org/C2777935831","wikidata":"https://www.wikidata.org/wiki/Q3144949","display_name":"Hemopericardium","level":3,"score":0.2815000116825104},{"id":"https://openalex.org/C133074676","wikidata":"https://www.wikidata.org/wiki/Q428729","display_name":"Fusible alloy","level":2,"score":0.2793999910354614},{"id":"https://openalex.org/C2781032047","wikidata":"https://www.wikidata.org/wiki/Q938793","display_name":"Articular cartilage damage","level":5,"score":0.2784000039100647},{"id":"https://openalex.org/C145741570","wikidata":"https://www.wikidata.org/wiki/Q7251534","display_name":"Proteogenomics","level":5,"score":0.2782999873161316},{"id":"https://openalex.org/C180938184","wikidata":"https://www.wikidata.org/wiki/Q2142270","display_name":"Liquation","level":3,"score":0.2761000096797943},{"id":"https://openalex.org/C104545631","wikidata":"https://www.wikidata.org/wiki/Q464858","display_name":"Demotion","level":3,"score":0.27090001106262207},{"id":"https://openalex.org/C135979968","wikidata":"https://www.wikidata.org/wiki/Q609809","display_name":"Protein isoform","level":5,"score":0.2689000070095062},{"id":"https://openalex.org/C2909186138","wikidata":"https://www.wikidata.org/wiki/Q1500373","display_name":"Hyperlactatemia","level":2,"score":0.2678999900817871},{"id":"https://openalex.org/C2779627259","wikidata":"https://www.wikidata.org/wiki/Q779763","display_name":"Pretext","level":3,"score":0.26510000228881836},{"id":"https://openalex.org/C2776781215","wikidata":"https://www.wikidata.org/wiki/Q83253","display_name":"Triacetin","level":2,"score":0.2574999928474426},{"id":"https://openalex.org/C2777742743","wikidata":"https://www.wikidata.org/wiki/Q19904005","display_name":"Durvalumab","level":5,"score":0.2524999976158142},{"id":"https://openalex.org/C2776356786","wikidata":"https://www.wikidata.org/wiki/Q1048573","display_name":"Tubulopathy","level":3,"score":0.250900000333786}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2502.20897","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2502.20897","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2502.20897","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2502.20897","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.791987419128418,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"People":[0],"naturally":[1],"vary":[2],"in":[3,83],"their":[4],"annotations":[5],"for":[6,129],"subjective":[7],"questions":[8],"and":[9,119,133],"some":[10],"of":[11,64,71,127],"this":[12,90],"variation":[13,132],"is":[14,93],"thought":[15],"to":[16,19,29,59,96],"be":[17,57,60],"due":[18,95],"the":[20,124],"person's":[21],"sociodemographic":[22,44,49,62,84,103,131],"characteristics.":[23],"LLMs":[24,55,128],"have":[25],"also":[26],"been":[27],"used":[28],"label":[30],"data,":[31],"but":[32,88],"recent":[33],"work":[34],"has":[35],"shown":[36],"that":[37,79,89,111],"models":[38,63,80,97,112],"perform":[39],"poorly":[40],"when":[41,86],"prompted":[42],"with":[43,74],"attributes,":[45],"suggesting":[46],"limited":[47],"inherent":[48],"knowledge.":[50],"Here,":[51],"we":[52,77],"ask":[53],"whether":[54],"can":[56],"trained":[58,87],"accurate":[61],"annotator":[65],"variation.":[66],"Using":[67],"a":[68],"curated":[69],"dataset":[70],"five":[72],"tasks":[73],"standardized":[75],"sociodemographics,":[76],"show":[78],"do":[81],"improve":[82],"prompting":[85],"performance":[91],"gain":[92],"largely":[94],"learning":[98],"annotator-specific":[99],"behaviour":[100],"rather":[101],"than":[102],"patterns.":[104],"Across":[105],"all":[106],"tasks,":[107],"our":[108],"results":[109],"suggest":[110],"learn":[113],"little":[114],"meaningful":[115],"connection":[116],"between":[117],"sociodemographics":[118],"annotation,":[120],"raising":[121],"doubts":[122],"about":[123],"current":[125],"use":[126],"simulating":[130],"behaviour.":[134]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
