{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,19]],"date-time":"2026-03-19T08:01:40Z","timestamp":1773907300709,"version":"3.50.1"},"reference-count":43,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"1","license":[{"start":{"date-parts":[[2013,2,1]],"date-time":"2013-02-01T00:00:00Z","timestamp":1359676800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE J. Sel. Top. Signal Process."],"published-print":{"date-parts":[[2013,2]]},"DOI":"10.1109\/jstsp.2012.2237379","type":"journal-article","created":{"date-parts":[[2013,1,1]],"date-time":"2013-01-01T19:02:48Z","timestamp":1357066968000},"page":"147-156","source":"Crossref","is-referenced-by-count":21,"title":["Audiovisual Voice Activity Detection Based on Microphone Arrays and Color Information"],"prefix":"10.1109","volume":"7","author":[{"given":"Vicente P.","family":"Minotto","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Carlos B. O.","family":"Lopes","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jacob","family":"Scharcanski","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Claudio R.","family":"Jung","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bowon","family":"Lee","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref39","first-page":"185","author":"platt","year":"1999","journal-title":"Advances in Kernel Methods"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1145\/279943.279985"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/1656274.1656278"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1007\/s00530-010-0182-0"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/5.18626"},{"key":"ref30","first-page":"2005","article-title":"Illuminant and device invariant colour using histogram equalisation","volume":"38","author":"hordley","year":"2005","journal-title":"Pattern Recognit"},{"key":"ref37","author":"russell","year":"2003","journal-title":"Artificial Intelligence A Modern Approach"},{"key":"ref36","first-page":"124","article-title":"The alternating decision tree learning algorithm","author":"freund","year":"1999","journal-title":"Proc 16th Int Conf Mach Learn"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1023\/B:MACH.0000027782.67192.13"},{"key":"ref34","author":"quinlan","year":"1993","journal-title":"C4 5 Programs for Machine Learning"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/34.990138"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1145\/1273496.1273598"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1002\/0470854774"},{"key":"ref12","doi-asserted-by":"crossref","first-page":"62","DOI":"10.1109\/TSMC.1979.4310076","article-title":"A threshold selection method from gray-level histograms","volume":"9","author":"otsu","year":"1979","journal-title":"IEEE Trans Syst Man Cybern"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2006.1660092"},{"key":"ref14","first-page":"2409","article-title":"Two novel visual voice activity detectors based on appearance models and retinal filtering","volume":"1","author":"aubrey","year":"2007","journal-title":"Proc Eur Signal Process Conf"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/1291233.1291371"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/97.736233"},{"key":"ref17","doi-asserted-by":"crossref","first-page":"3106","DOI":"10.21437\/Interspeech.2010-773","article-title":"Spectral entropy-based voice activity detector for videoconferencing systems","author":"lee","year":"2010","journal-title":"Proc INTERSPEECH"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TSA.2003.815518"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-387-79582-9_18"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ISM.2009.33"},{"key":"ref4","doi-asserted-by":"crossref","first-page":"151","DOI":"10.1016\/S0165-1684(98)00230-8","article-title":"Improved noise suppression filter using self-adaptive estimator of probability of speech absence","volume":"75","author":"soon","year":"1999","journal-title":"Signal Process"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICDSP.2009.5201171"},{"key":"ref3","first-page":"365","article-title":"A voice activity detector employing soft decision based noise spectrum adaptation","author":"sohn","year":"1998","journal-title":"Proc Int Conf Acoust Speech Signal Process"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/j.patrec.2011.09.002"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2011.6115606"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/FCST.2010.99"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1016\/S0031-3203(00)00090-X"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/INNOVATIONS.2008.4781689"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/89.848229"},{"key":"ref9","first-page":"458","article-title":"Gaussian mixture model for human skin color and its application in image and video databases","author":"yang","year":"1999","journal-title":"Proc SPIE '99 Its Application in Image and Video Databases"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1121\/1.3050257"},{"key":"ref20","author":"dibiase","year":"2000","journal-title":"A high-accuracy low-latency technique for talker localization in reverberant environments"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2000.871072"},{"key":"ref21","first-page":"3737","article-title":"A hybrid real-time face tracking system","author":"wang","year":"1997","journal-title":"Proc Int Conf Acoust Speech Signal Process"},{"key":"ref42","first-page":"318","article-title":"Combining naive Bayes and decision tables","author":"hall","year":"2008","journal-title":"Proc 21st Florida Artif Intell Soc Conf (FLAIRS)"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCB.2008.2009558"},{"key":"ref41","first-page":"174","article-title":"The power of decision tables","author":"kohavi","year":"1995","journal-title":"Proc Eur Conf Mach Learn"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2006.881678"},{"key":"ref26","article-title":"Voice activity detection based on fusion of audio and visual information","author":"takeuchi","year":"2009","journal-title":"Proc Int Conf Auditory-Visual Speech Process"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1023\/A:1022631118932"},{"key":"ref25","article-title":"Using audio-visual features for robust voice activity detection in clean and noisy speech","author":"almajai","year":"2008","journal-title":"Proc 16th Eur Signal Process Conf (EUSIPCO '08)"}],"container-title":["IEEE Journal of Selected Topics in Signal Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/4200690\/6416927\/06400215.pdf?arnumber=6400215","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,2,4]],"date-time":"2022-02-04T23:32:37Z","timestamp":1644017557000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/6400215\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013,2]]},"references-count":43,"journal-issue":{"issue":"1"},"URL":"https:\/\/doi.org\/10.1109\/jstsp.2012.2237379","relation":{},"ISSN":["1932-4553","1941-0484"],"issn-type":[{"value":"1932-4553","type":"print"},{"value":"1941-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2013,2]]}}}