{"id":"https://openalex.org/W4315629708","doi":"https://doi.org/10.1109/globecom48099.2022.10001043","title":"SemAudio: Semantic-Aware Streaming Communications for Real-Time Audio Transmission","display_name":"SemAudio: Semantic-Aware Streaming Communications for Real-Time Audio Transmission","publication_year":2022,"publication_date":"2022-12-04","ids":{"openalex":"https://openalex.org/W4315629708","doi":"https://doi.org/10.1109/globecom48099.2022.10001043"},"language":"en","primary_location":{"id":"doi:10.1109/globecom48099.2022.10001043","is_oa":false,"landing_page_url":"https://doi.org/10.1109/globecom48099.2022.10001043","pdf_url":null,"source":{"id":"https://openalex.org/S4363607705","display_name":"GLOBECOM 2022 - 2022 IEEE Global Communications Conference","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"GLOBECOM 2022 - 2022 IEEE Global Communications Conference","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100448242","display_name":"Hao Wei","orcid":"https://orcid.org/0000-0002-0037-4155"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hao Wei","raw_affiliation_strings":["Beijing Univ. of Posts and Telecommun.,Key Lab. of Universal Wireless Commun.,Beijing,China,100876"],"affiliations":[{"raw_affiliation_string":"Beijing Univ. of Posts and Telecommun.,Key Lab. of Universal Wireless Commun.,Beijing,China,100876","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059494893","display_name":"Wenjun Xu","orcid":"https://orcid.org/0000-0001-8767-4742"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]},{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenjun Xu","raw_affiliation_strings":["Beijing Univ. of Posts and Telecommun.,Key Lab. of Universal Wireless Commun.,Beijing,China,100876","Department of Mathematics and Theories, Peng Cheng Lab., Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Beijing Univ. of Posts and Telecommun.,Key Lab. of Universal Wireless Commun.,Beijing,China,100876","institution_ids":["https://openalex.org/I139759216"]},{"raw_affiliation_string":"Department of Mathematics and Theories, Peng Cheng Lab., Shenzhen, China","institution_ids":["https://openalex.org/I4210136793"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080094775","display_name":"Fengyu Wang","orcid":"https://orcid.org/0000-0002-5485-9414"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fengyu Wang","raw_affiliation_strings":["School of Artificial Intelligence, Beijing Univ. of Posts and Telecommun.,Beijing,China,100876"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Beijing Univ. of Posts and Telecommun.,Beijing,China,100876","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018243964","display_name":"Xin Du","orcid":"https://orcid.org/0000-0003-0356-623X"},"institutions":[{"id":"https://openalex.org/I43439940","display_name":"University of Southampton","ror":"https://ror.org/01ryk1543","country_code":"GB","type":"education","lineage":["https://openalex.org/I43439940"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Xin Du","raw_affiliation_strings":["School of Electronics and Computer Science, University of Southampton,Southampton,UK,SO17 3AS"],"affiliations":[{"raw_affiliation_string":"School of Electronics and Computer Science, University of Southampton,Southampton,UK,SO17 3AS","institution_ids":["https://openalex.org/I43439940"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032943024","display_name":"Tiankui Zhang","orcid":"https://orcid.org/0000-0002-6953-847X"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tiankui Zhang","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,Beijing,China,100876"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,Beijing,China,100876","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100405787","display_name":"Ping Zhang","orcid":"https://orcid.org/0000-0002-0269-104X"},"institutions":[{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ping Zhang","raw_affiliation_strings":["Peng Cheng Lab.,Department of Mathematics and Theories,Shenzhen,China,518066"],"affiliations":[{"raw_affiliation_string":"Peng Cheng Lab.,Department of Mathematics and Theories,Shenzhen,China,518066","institution_ids":["https://openalex.org/I4210136793"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100448242"],"corresponding_institution_ids":["https://openalex.org/I139759216"],"apc_list":null,"apc_paid":null,"fwci":2.0864,"has_fulltext":false,"cited_by_count":17,"citation_normalized_percentile":{"value":0.9014218,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"3965","last_page":"3970"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12357","display_name":"Digital Media Forensic Detection","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8819645643234253},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.645379900932312},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5653645992279053},{"id":"https://openalex.org/keywords/real-time-computing","display_name":"Real-time computing","score":0.5565621852874756},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.4424445331096649},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.43127936124801636},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3309051990509033},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.10998952388763428}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8819645643234253},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.645379900932312},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5653645992279053},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.5565621852874756},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.4424445331096649},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.43127936124801636},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3309051990509033},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.10998952388763428},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/globecom48099.2022.10001043","is_oa":false,"landing_page_url":"https://doi.org/10.1109/globecom48099.2022.10001043","pdf_url":null,"source":{"id":"https://openalex.org/S4363607705","display_name":"GLOBECOM 2022 - 2022 IEEE Global Communications Conference","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"GLOBECOM 2022 - 2022 IEEE Global Communications Conference","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W2127851351","https://openalex.org/W2141998673","https://openalex.org/W2943554574","https://openalex.org/W2963045393","https://openalex.org/W2964110616","https://openalex.org/W3015671919","https://openalex.org/W3015974384","https://openalex.org/W3036851434","https://openalex.org/W3113050802","https://openalex.org/W3166791908","https://openalex.org/W3213872105","https://openalex.org/W4210624746","https://openalex.org/W4226150551","https://openalex.org/W4285047747","https://openalex.org/W4385245566","https://openalex.org/W6810966997"],"related_works":["https://openalex.org/W4390516098","https://openalex.org/W2181948922","https://openalex.org/W2384362569","https://openalex.org/W2142795561","https://openalex.org/W4205302943","https://openalex.org/W2561132942","https://openalex.org/W3155418658","https://openalex.org/W4243199227","https://openalex.org/W2954284861","https://openalex.org/W3036465205"],"abstract_inverted_index":{"Deep":[0],"learning":[1],"(DL)":[2],"enabled":[3],"semantic":[4,20,40,51,98,113],"communications":[5],"have":[6],"been":[7],"developed":[8],"to":[9,62,65,80,105,159],"improve":[10],"the":[11,19,36,50,54,59,70,86,107,112,122,133,147,151,155],"offline":[12],"communication":[13,41],"efficiently":[14],"and":[15,89,96,110,114,128,157,171],"intelligently":[16],"by":[17,120],"exploring":[18],"information,":[21],"while":[22],"constraining":[23],"their":[24],"applications":[25],"in":[26,125],"real-time":[27,44,82,101,161,179],"online":[28],"scenarios.":[29],"In":[30],"this":[31],"work,":[32],"we":[33],"propose":[34],"SemAudio,":[35],"first":[37],"DL-based":[38],"streaming":[39],"system":[42,71],"for":[43,178],"audio":[45,55,102,180],"processing.":[46],"To":[47],"better":[48],"extract":[49,97],"features":[52,99],"of":[53],"signal,":[56],"SemAudio":[57,92,145,153],"employs":[58],"Transformer-XL":[60,88],"due":[61],"its":[63],"potential":[64],"capture":[66],"long-distance":[67],"dependency.":[68],"Moreover,":[69],"works":[72],"based":[73],"on":[74],"a":[75],"chunk-based":[76],"mask":[77],"attention":[78],"strategy":[79],"enable":[81],"streaming.":[83],"By":[84],"incorporating":[85],"novel":[87],"chunk-wise":[90],"approach,":[91],"can":[93],"effectively":[94],"learn":[95],"from":[100],"data.":[103],"Furthermore,":[104],"alleviate":[106],"channel":[108,115,176],"distortion":[109],"attenuation,":[111],"encoder/decoder":[116],"are":[117],"jointly":[118],"designed":[119],"minimizing":[121],"mean":[123],"error":[124],"both":[126],"time":[127,135],"frequency":[129],"domains":[130],"rather":[131],"than":[132],"merely":[134],"domain.":[136],"The":[137],"extensive":[138],"experimental":[139],"results":[140],"suggest":[141],"that":[142],"our":[143],"proposed":[144,152],"outperforms":[146],"traditional":[148],"communications.":[149],"Besides,":[150],"compromises":[154],"quality":[156],"latency":[158,173],"meet":[160],"requirements,":[162],"which":[163],"obtains":[164],"satisfactory":[165],"performance":[166],"with":[167],"significantly":[168],"higher":[169],"accuracy":[170],"lower":[172],"under":[174],"multiple":[175],"conditions":[177],"communication.":[181]},"counts_by_year":[{"year":2025,"cited_by_count":10},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
