{"id":"https://openalex.org/W4372266964","doi":"https://doi.org/10.1109/icassp49357.2023.10095464","title":"Lego-Features: Exporting Modular Encoder Features for Streaming and Deliberation ASR","display_name":"Lego-Features: Exporting Modular Encoder Features for Streaming and Deliberation ASR","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4372266964","doi":"https://doi.org/10.1109/icassp49357.2023.10095464"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10095464","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp49357.2023.10095464","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5008863473","display_name":"Rami Botros","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Rami Botros","raw_affiliation_strings":["Google LLC,USA","Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC,USA","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032640894","display_name":"Rohit Prabhavalkar","orcid":"https://orcid.org/0000-0001-5331-6058"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rohit Prabhavalkar","raw_affiliation_strings":["Google LLC,USA","Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC,USA","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081836495","display_name":"Johan Schalkwyk","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Johan Schalkwyk","raw_affiliation_strings":["Google LLC,USA","Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC,USA","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068010225","display_name":"Ciprian Chelba","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ciprian Chelba","raw_affiliation_strings":["Google LLC,USA","Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC,USA","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070513394","display_name":"Tara N. Sainath","orcid":"https://orcid.org/0000-0002-4126-6556"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tara N. Sainath","raw_affiliation_strings":["Google LLC,USA","Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC,USA","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5110711540","display_name":"Fran\u00e7oise Beaufays","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Fran\u00e7oise Beaufays","raw_affiliation_strings":["Google LLC,USA","Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC,USA","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5008863473"],"corresponding_institution_ids":["https://openalex.org/I1291425158"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.04041467,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"abs 1211 3711","issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8340007662773132},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.7636359930038452},{"id":"https://openalex.org/keywords/modular-design","display_name":"Modular design","score":0.6977927684783936},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5768078565597534},{"id":"https://openalex.org/keywords/codebook","display_name":"Codebook","score":0.5040918588638306},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.45777106285095215},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.4157561659812927},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3760852515697479},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.2157098352909088},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.10247436165809631}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8340007662773132},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.7636359930038452},{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.6977927684783936},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5768078565597534},{"id":"https://openalex.org/C127759330","wikidata":"https://www.wikidata.org/wiki/Q637416","display_name":"Codebook","level":2,"score":0.5040918588638306},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45777106285095215},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.4157561659812927},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3760852515697479},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2157098352909088},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.10247436165809631},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10095464","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp49357.2023.10095464","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.550000011920929,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W1828163288","https://openalex.org/W1855892484","https://openalex.org/W2033256038","https://openalex.org/W2121879602","https://openalex.org/W2127141656","https://openalex.org/W2515439472","https://openalex.org/W2526425061","https://openalex.org/W2617258110","https://openalex.org/W2750499125","https://openalex.org/W2936774411","https://openalex.org/W2962760690","https://openalex.org/W2962824709","https://openalex.org/W2964172053","https://openalex.org/W2989224798","https://openalex.org/W3007227084","https://openalex.org/W3008181812","https://openalex.org/W3011339933","https://openalex.org/W3015194534","https://openalex.org/W3015489952","https://openalex.org/W3028545098","https://openalex.org/W3097777922","https://openalex.org/W3160766462","https://openalex.org/W3169714379","https://openalex.org/W3176891424","https://openalex.org/W3197976839","https://openalex.org/W3198442913","https://openalex.org/W4223950106","https://openalex.org/W4224919446","https://openalex.org/W4225307083","https://openalex.org/W4225529283","https://openalex.org/W4300980406","https://openalex.org/W4384519051","https://openalex.org/W4386566728","https://openalex.org/W6638749077","https://openalex.org/W6639156005","https://openalex.org/W6754400763","https://openalex.org/W6770250107","https://openalex.org/W6839049670","https://openalex.org/W6841262374","https://openalex.org/W6844697645","https://openalex.org/W6844715639","https://openalex.org/W6846002384"],"related_works":["https://openalex.org/W2293149949","https://openalex.org/W2026099691","https://openalex.org/W4284672201","https://openalex.org/W2377486419","https://openalex.org/W2943202426","https://openalex.org/W2163679795","https://openalex.org/W2736714427","https://openalex.org/W2137816434","https://openalex.org/W2017956276","https://openalex.org/W2048606991"],"abstract_inverted_index":{"In":[0,137],"end-to-end":[1],"(E2E)":[2],"speech":[3,58],"recognition":[4],"models,":[5,59],"a":[6,46,65,176],"representational":[7],"tight-coupling":[8],"inevitably":[9],"emerges":[10],"between":[11],"the":[12,15,62,89,97,109,131,142,158,163,183,190],"encoder":[13,185],"and":[14,36,179],"decoder.":[16],"We":[17],"build":[18],"upon":[19],"recent":[20],"work":[21],"that":[22,34,108],"has":[23],"begun":[24],"to":[25,81,129,150,156,182],"explore":[26,61],"building":[27],"encoders":[28,35],"with":[29,101,115,153],"modular":[30,82],"encoded":[31,77],"representations,":[32,78],"such":[33],"decoders":[37],"from":[38],"different":[39],"models":[40],"can":[41],"be":[42,151],"stitched":[43],"together":[44],"in":[45,64],"zero-shot":[47],"manner":[48],"without":[49,87],"further":[50],"fine-tuning.":[51],"While":[52],"previous":[53],"research":[54],"only":[55],"addresses":[56],"full-context":[57],"we":[60,106],"problem":[63],"streaming":[66],"setting":[67],"as":[68,85,187,189],"well.":[69],"Our":[70],"framework":[71],"builds":[72],"on":[73],"top":[74],"of":[75],"existing":[76],"converting":[79],"them":[80],"features,":[83],"dubbed":[84],"Lego-Features,":[86],"modifying":[88],"pre-trained":[90],"model.":[91],"The":[92],"features":[93,155],"remain":[94],"interchangeable":[95],"when":[96,113],"model":[98],"is":[99],"retrained":[100],"distinct":[102],"initializations.":[103],"Though":[104],"sparse,":[105],"show":[107],"Lego-Features":[110,164],"are":[111,125],"powerful":[112,178],"tested":[114],"RNN-T":[116],"or":[117,170],"LAS":[118],"decoders,":[119],"maintaining":[120],"high-quality":[121],"downstream":[122],"performance.":[123],"They":[124],"also":[126],"rich":[127],"enough":[128],"represent":[130],"first-pass":[132],"prediction":[133],"during":[134],"two-pass":[135],"deliberation.":[136],"this":[138],"scenario,":[139],"they":[140,146,174],"outperform":[141],"N-best":[143,191],"hypotheses,":[144],"since":[145],"do":[147],"not":[148,166],"need":[149],"supplemented":[152],"acoustic":[154],"deliver":[157],"best":[159],"results.":[160],"Moreover,":[161],"generating":[162],"does":[165],"require":[167],"beam":[168],"search":[169],"auto-regressive":[171],"computation.":[172],"Overall,":[173],"present":[175],"modular,":[177],"cheap":[180],"alternative":[181],"standard":[184],"output,":[186],"well":[188],"hypotheses.":[192]},"counts_by_year":[],"updated_date":"2025-12-24T23:09:58.560324","created_date":"2025-10-10T00:00:00"}
