Heting Gao, Xulin Fan, & Mark Hasegawa-Johnson. SyncDiff: Diffusion-based Talking Head Synthesis with Bottlenecked Temporal Visual Prior for Improved Synchronization. WACV Winter Conference on Applications of Computer Vision, Mar, 2025
@inproceedings{gao2025syncdiff,
author = {Heting Gao and Xulin Fan and Mark Hasegawa-Johnson},
booktitle = {WACV Winter Conference on Applications of Computer Vision},
month = {3},
title = {SyncDiff: Diffusion-based Talking Head Synthesis with Bottlenecked Temporal Visual Prior for Improved Synchronization},
url = {https://wacv2025.thecvf.com/},
year = {2025}
}
Eunseop Yoon, Heesuk Yoon, Mark Hasegawa-Johnson, & Changdong Yoo. Can Video LLMs Refuse to Answer? Alignment for Answerability in Video Large Language Models. Proc. International Conference on Learning Representations (ICLR), 2025
@inproceedings{yoon2025can,
author = {Eunseop Yoon and Heesuk Yoon and Mark Hasegawa-Johnson and Changdong Yoo},
booktitle = {Proc. International Conference on Learning Representations (ICLR)},
title = {Can Video LLMs Refuse to Answer? Alignment for Answerability in Video Large Language Models},
year = {2025}
}
Jonghwan Na, Mark Hasegawa-Johnson, & Bowon Lee. Cohort-Sensitive Labeling: An Effective Approach for Enhancing ASR Performance. Proc. ICASSP, 2025
@inproceedings{na2025cohort,
author = {Jonghwan Na and Mark Hasegawa-Johnson and Bowon Lee},
booktitle = {Proc. ICASSP},
title = {Cohort-Sensitive Labeling: An Effective Approach for Enhancing ASR Performance},
year = {2025}
}
Jonghwan Na, Xiuwen Zheng, Mark Hasegawa-Johnson, & Bowon Lee. Improved Recognition of the Speech of People with Parkinson’s who Stutter. Proc. ICASSP, 2025
@inproceedings{na2025improved,
author = {Jonghwan Na and Xiuwen Zheng and Mark Hasegawa-Johnson and Bowon Lee},
booktitle = {Proc. ICASSP},
title = {Improved Recognition of the Speech of People with Parkinson’s who Stutter},
year = {2025}
}
Satwinder Singh, Qianli Wang, Zihan Zhong, Clarion Mendes, Mark Hasegawa-Johnson, Waleed Abdulla, & Seyed Reza Shahamiri. Robust Cross-Etiology and Speaker-Independent Dysarthric Speech Recognition. Proc. ICASSP, 2025
@inproceedings{singh2025robust,
author = {Satwinder Singh and Qianli Wang and Zihan Zhong and Clarion Mendes and Mark Hasegawa-Johnson and Waleed Abdulla and Seyed Reza Shahamiri},
booktitle = {Proc. ICASSP},
title = {Robust Cross-Etiology and Speaker-Independent Dysarthric Speech Recognition},
year = {2025}
}
Qianli Wang, Zihan Zong, Satwinder Singh, Clarion Mendes, Mark Hasegawa-Johnson, Waleed Abdulla, & Seyed Reza Shahamiri. Dysarthric Speech Conformer: Adaptation for Sequence-to-Sequence Dysarthric Speech Recognition. Proc. ICASSP, 2025
@inproceedings{wang2025robust,
author = {Qianli Wang and Zihan Zong and Satwinder Singh and Clarion Mendes and Mark Hasegawa-Johnson and Waleed Abdulla and Seyed Reza Shahamiri},
booktitle = {Proc. ICASSP},
title = {Dysarthric Speech Conformer: Adaptation for Sequence-to-Sequence Dysarthric Speech Recognition},
year = {2025}
}
Maliha Jahan, Priyam Mazumdar, Thomas Thebaud, Mark Hasegawa-Johnson, Jesus Villalba, Najim Dehak, & Laureano Moro-Velazquez. Unveiling Performance Bias in ASR Systems: A Study on Gender, Age, Accent, and More. Proc. ICASSP, 2025
@inproceedings{jahan2025unveiling,
author = {Maliha Jahan and Priyam Mazumdar and Thomas Thebaud and Mark Hasegawa-Johnson and Jesus Villalba and Najim Dehak and Laureano Moro-Velazquez},
booktitle = {Proc. ICASSP},
title = {Unveiling Performance Bias in ASR Systems: A Study on Gender, Age, Accent, and More},
year = {2025}
}
Satwinder Singh, Zihan Zhong, Qianli Wang, Clarion Mendes, Mark Hasegawa-Johnson, Waleed Abdulla, & Seyed Reza Shahamiri. A Comprehensive Performance Evaluation of Whisper Models in Dysarthric Speech Recognition. International Conference on Neural Information Processing (ICONIP2024), Dec, 2024
@inproceedings{singh2024comprehensive,
author = {Satwinder Singh and Zihan Zhong and Qianli Wang and Clarion Mendes and Mark Hasegawa-Johnson and Waleed Abdulla and Seyed Reza Shahamiri},
booktitle = {International Conference on Neural Information Processing (ICONIP2024)},
month = {12},
title = {A Comprehensive Performance Evaluation of Whisper Models in Dysarthric Speech Recognition},
url = {https://iconip2024.org/},
year = {2024}
}
Heejin Kim, Clarion Mendes, Mark Hasegawa-Johnson, Meg Dickinson, & Erik Hege. An Overview of Speech Data in the Speech Accessibility Project: Speech Variations for Inclusive Technology. American Speech Language Hearing Association (ASHA) Annual Convention, unpublished presentation, Dec, 2024
@inproceedings{kim2024overview,
author = {Heejin Kim and Clarion Mendes and Mark Hasegawa-Johnson and Meg Dickinson and Erik Hege},
booktitle = {American Speech Language Hearing Association (ASHA) Annual Convention},
month = {12},
note = {unpublished presentation},
title = {An Overview of Speech Data in the Speech Accessibility Project: Speech Variations for Inclusive Technology},
url = {https://convention.asha.org/},
year = {2024}
}
Junkai Wu, Xulin Fan, Bo-Ru Lu, Xilin Jiang, Nima Mesgarani, Mark Hasegawa-Johnson, & Mari Ostendorf. Just ASR + LLM? A Study on Speech Large Language Models’ Ability to Identify and Understand Speaker in Spoken Dialogue. IEEE Spoken Language Technology (SLT), Dec, 2024
@inproceedings{wu2024just,
author = {Junkai Wu and Xulin Fan and Bo-Ru Lu and Xilin Jiang and Nima Mesgarani and Mark Hasegawa-Johnson and Mari Ostendorf},
booktitle = {IEEE Spoken Language Technology (SLT)},
month = {12},
title = {Just ASR + LLM? A Study on Speech Large Language Models’ Ability to Identify and Understand Speaker in Spoken Dialogue},
url = {https://arxiv.org/abs/2409.04927},
year = {2024}
}
Mahir Morshed, & Mark Hasegawa-Johnson. Using Articulatory Feature Detectors in Progressive Networks for Multilingual Low-Resource Phone Recognition. J. Acoust. Soc. Am., vol. 156, no. 5, pp. 3411-3421, Nov, 2024
@article{morshed2024using,
author = {Mahir Morshed and Mark Hasegawa-Johnson},
doi = {10.1121/10.0034415},
journal = {J. Acoust. Soc. Am.},
month = {11},
number = {5},
pages = {3411--3421},
title = {Using Articulatory Feature Detectors in Progressive Networks for Multilingual Low-Resource Phone Recognition},
url = {https://pubs.aip.org/asa/jasa/article/156/5/3411/3321345/Using-articulatory-feature-detectors-in},
volume = {156},
year = {2024}
}
Giang Le, Yinglun Sun, Maliha Jahan, Helin Wang, Thomas Thebaud, Zsuzsanna Fagyal, Mark Hasegawa-Johnson, Laureano Moro-Velazquez, & Najim Dehak. “You are what, Chinese?” Modeling identifications of race, ethnicity, and national origin as stancetaking in a large podcast corpus of American English. presented at NWAV New Ways of Analyzing Variation, Nov, 2024
@unpublished{le2024you,
author = {Giang Le and Yinglun Sun and Maliha Jahan and Helin Wang and Thomas Thebaud and Zsuzsanna Fagyal and Mark Hasegawa-Johnson and Laureano Moro-Velazquez and Najim Dehak},
month = {11},
note = {presented at NWAV New Ways of Analyzing Variation},
title = {"You are what, Chinese?" Modeling identifications of race, ethnicity, and national origin as stancetaking in a large podcast corpus of American English},
year = {2024}
}
Mohammad Nur Hossain Khan, Nancy McElwain, Mark Hasegawa-Johnson, & Bashima Islam. InfantMotion2Vec: Unlabeled Data-Driven Infant Pose Estimation Using a Single Chest IMU. IEEE International Conference on Wearable and Implantable Body Sensor Networks (BSN 2024), Oct, 2024
@inproceedings{khan2024infantmotion2vec,
author = {Mohammad Nur Hossain Khan and Nancy McElwain and Mark Hasegawa-Johnson and Bashima Islam},
booktitle = {IEEE International Conference on Wearable and Implantable Body Sensor Networks (BSN 2024)},
month = {10},
title = {InfantMotion2Vec: Unlabeled Data-Driven Infant Pose Estimation Using a Single Chest IMU},
url = {https://bsn.embs.org/2024/},
year = {2024}
}
Jialu Li, Mark Hasegawa-Johnson, & Karrie Karahalios. Enhancing Child Vocalization Classification with Phonetically-Tuned Embeddings for Assisting Autism Diagnosis. Proc. Interspeech, Sep, 2024
@inproceedings{li2024enhancing,
author = {Jialu Li and Mark Hasegawa-Johnson and Karrie Karahalios},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2024-540},
month = {9},
title = {Enhancing Child Vocalization Classification with Phonetically-Tuned Embeddings for Assisting Autism Diagnosis},
year = {2024}
}
Eunseop Yoon, Hee Suk Yoon, John Harvill, Mark Hasegawa-Johnson, & Chang D. Yoo. LI-TTA: Language Informed Test-Time Adaptation for Automatic Speech Recognition. Proc. Interspeech, Sep, 2024
@inproceedings{yoon2024litta,
author = {Eunseop Yoon and Hee Suk Yoon and John Harvill and Mark Hasegawa-Johnson and Chang D. Yoo},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2024-1829},
month = {9},
title = {LI-TTA: Language Informed Test-Time Adaptation for Automatic Speech Recognition},
year = {2024}
}
Xiuwen Zheng, Bornali Phukon, & Mark Hasegawa-Johnson. Fine-Tuning Automatic Speech Recognition for People with Parkinson’s: An Effective Strategy for Enhancing Speech Technology Accessibility. Proc. Interspeech, Sep, 2024
@inproceedings{zheng2024fine,
author = {Xiuwen Zheng and Bornali Phukon and Mark Hasegawa-Johnson},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2024-1969},
month = {9},
title = {Fine-Tuning Automatic Speech Recognition for People with Parkinson's: An Effective Strategy for Enhancing Speech Technology Accessibility},
year = {2024}
}
Heting Gao, Kaizhi Qian, Junrui Ni, Chuang Gan, Mark A. Hasegawa-Johnson and
Shiyu Chang, & Yang Zhang. Speech Self-Supervised Learning Using Diffusion Model Synthetic Data. Proc. International Conference on Machine Learning (ICML), Jul, 2024
@inproceedings{gao2024speech,
author = {Heting Gao and Kaizhi Qian and Junrui Ni and Chuang Gan and Mark A. Hasegawa-Johnson and
Shiyu Chang and Yang Zhang},
booktitle = {Proc. International Conference on Machine Learning (ICML)},
month = {7},
title = {Speech Self-Supervised Learning Using Diffusion Model Synthetic Data},
url = {https://openreview.net/forum?id=ecnpYYHjt9},
year = {2024}
}
Yannan Hu, Nancy L. McElwain, & Mark Hasegawa-Johnson. Real-Time Relations Between Prosodic Features of Infant-Directed Speech and Infant Attention at 3 Months. Speech Prosody, pp. accepted for publication, Jul, 2024
@inproceedings{hu2024real2,
author = {Yannan Hu and Nancy L. McElwain and Mark Hasegawa-Johnson},
booktitle = {Speech Prosody},
doi = {10.21437/SpeechProsody.2024-45},
month = {7},
pages = {accepted for publication},
title = {Real-Time Relations Between Prosodic Features of Infant-Directed Speech and Infant Attention at 3 Months},
year = {2024}
}
Yannan Hu, Nancy L. McElwain, & Mark Hasegawa-Johnson. Real-time relations between fundamental frequency of infant-directed speech and infant attention at 3 months. 24th International Congress of Infant Studies (ICIS), Poster presentation, Glasgow, Scotland, Jul, 2024
@inproceedings{hu2024real,
address = {Glasgow, Scotland},
author = {Yannan Hu and Nancy L. McElwain and Mark Hasegawa-Johnson},
booktitle = {24th International Congress of Infant Studies (ICIS)},
month = {July},
note = {Poster presentation},
title = {Real-time relations between fundamental frequency of infant-directed speech and infant attention at 3 months},
year = {2024}
}
Maliha Jahan, Helin Wang, Thomas Thebaud, Yinglun Sun, Giang Le, Zsuzsanna Fagyal, Odette Scharenborg, Mark Hasegawa-Johnson, Laureano Moro Velazquez, & Najim Dehak. Finding Spoken Identifications: Using GPT-4 Annotation For An Efficient And Fast Dataset Creation Pipeline. LREC-COLING, pp. 7296-7306, May, 2024
@inproceedings{jahan2024finding,
author = {Maliha Jahan and Helin Wang and Thomas Thebaud and Yinglun Sun and Giang Le and Zsuzsanna Fagyal and Odette Scharenborg and Mark Hasegawa-Johnson and Laureano Moro Velazquez and Najim Dehak},
booktitle = {LREC-COLING},
month = {5},
pages = {7296-7306},
title = {Finding Spoken Identifications: Using GPT-4 Annotation For An Efficient And Fast Dataset Creation Pipeline},
url = {https://aclanthology.org/2024.lrec-main.641/},
year = {2024}
}
Hee Suk Yoon, Eunseop Yoon, Joshua Tian Jin Tee, Mark A. Hasegawa-Johnson
and Yingzhen Li, & Chang D. Yoo. C-TPT: Calibrated Test-Time Prompt Tuning for Vision-Language Models via Text Feature Dispersion. Proc. International Conference on Learning Representations (ICLR), May, 2024
@inproceedings{yoon2024calibrated,
author = {Hee Suk Yoon and Eunseop Yoon and Joshua Tian Jin Tee and Mark A. Hasegawa-Johnson
and Yingzhen Li and Chang D. Yoo},
booktitle = {Proc. International Conference on Learning Representations (ICLR)},
month = {5},
title = {C-TPT: Calibrated Test-Time Prompt Tuning for Vision-Language Models via Text Feature Dispersion},
url = {https://iclr.cc/virtual/2024/poster/17996},
year = {2024}
}
John Harvill. Speech classification and lexical semantic modeling via self-supervision and knowledge transfer. Master’s Thesis, University of Illinois, Apr, 2024
@phdthesis{harvill2024speech,
author = {John Harvill},
keywords = {recognition},
month = {April},
school = {University of Illinois},
title = {Speech classification and lexical semantic modeling via self-supervision and knowledge transfer},
url = {https://www.ideals.illinois.edu/items/131435},
year = {2024}
}
Abhayjeet Singh, Amala Nagireddi, Anjali Jayakumar, Deekshitha G, Jesuraja Bandekar, Roopa R, Sandhya Badiger, Sathvik Udupa, Saurabh Kumar, Prasanta Kumar Ghosh, Hema A Murthy, Heiga Zen, Pranaw Kumar, Kamal Kant, Amol Bole, Bira Chandra Singh, Keiichi Tokuda, Mark Hasegawa-Johnson, & Philipp Olbrich. Lightweight, Multi-speaker, Multi-lingual Indic Text-To-Speech. IEEE Open Journal of Signal Processing, vol. 5, pp. 790-798, Mar, 2024
@article{singh2024lightweight,
author = {Abhayjeet Singh and Amala Nagireddi and Anjali Jayakumar and Deekshitha G and Jesuraja Bandekar and Roopa R and Sandhya Badiger and Sathvik Udupa and Saurabh Kumar and Prasanta Kumar Ghosh and Hema A Murthy and Heiga Zen and Pranaw Kumar and Kamal Kant and Amol Bole and Bira Chandra Singh and Keiichi Tokuda and Mark Hasegawa-Johnson and Philipp Olbrich},
doi = {10.1109/OJSP.2024.3379092},
issn = {2644-1322},
journal = {IEEE Open Journal of Signal Processing},
month = {3},
pages = {790--798},
title = {Lightweight, Multi-speaker, Multi-lingual Indic Text-To-Speech},
url = {https://ieeexplore.ieee.org/document/10479171?source=authoralert},
volume = {5},
year = {2024}
}
Heting Gao. Unsupervised speech technology for low-resource languages. Master’s Thesis, University of Illinois, Mar, 2024
@phdthesis{gao2024unsupervised,
author = {Heting Gao},
keywords = {recognition},
month = {March},
school = {University of Illinois},
title = {Unsupervised speech technology for low-resource languages},
url = {https://www.ideals.illinois.edu/items/131276},
year = {2024}
}
Jialu Li. Breaking down barriers: advancing interdisciplinary speech applications in early children’s development. Master’s Thesis, University of Illinois, Mar, 2024
@phdthesis{wang2024reasoning,
author = {Zhonghao Wang},
keywords = {vision},
month = {March},
school = {University of Illinois},
title = {Reasoning, scaling, generating with vision-language models},
url = {https://www.ideals.illinois.edu/items/131362},
year = {2024}
}
Bashima Islam, Nancy L McElwain, Jialu Li, Maria Davila, Yannan Hu, Kexin Hu, Jordan M Bodway, Ashutosh M Dhekne, Romit Roy Choudhury, & Mark Hasegawa-Johnson. Preliminary Technical Validation of LittleBeats™: A Multimodal Sensing Platform to Capture Cardiac Physiology, Motion, and Vocalizations. Sensors, vol. 24, no. 3, Jan, 2024
@article{islam2024preliminary,
author = {Bashima Islam and Nancy L McElwain and Jialu Li and Maria Davila and Yannan Hu and Kexin Hu and Jordan M Bodway and Ashutosh M Dhekne and Romit Roy Choudhury and Mark Hasegawa-Johnson},
doi = {https://www.mdpi.com/1424-8220/24/3/901},
journal = {Sensors},
month = {1},
number = {3},
title = {Preliminary Technical Validation of LittleBeats™: A Multimodal Sensing Platform to Capture Cardiac Physiology, Motion, and Vocalizations},
volume = {24},
year = {2024}
}
Mark Hasegawa-Johnson, Xiuwen Zheng, Heejin Kim, Clarion Mendes, Meg Dickinson, Erik Hege, Chris Zwilling, Marie Moore Channell, Laura Mattie, Heather Hodges, Lorraine Ramig, Mary Bellard, Mike Shebanek, Leda Sarı, Kaustubh Kalgaonkar, David Frerichs, Jeffrey P. Bigham, Leah Findlater, Colin Lea, Sarah Herrlinger, Peter Korn, Shadi Abou-Zahra, Rus Heywood, Katrin Tomanek, & Bob MacDonald. Community-supported shared infrastructure in support of speech accessibility. Journal of Speech Language and Hearing Research, vol. 67, no. 10, pp. 4162-4175, 2024
@article{hasegawajohnson2024community,
author = {Mark Hasegawa-Johnson and Xiuwen Zheng and Heejin Kim and Clarion Mendes and Meg Dickinson and Erik Hege and Chris Zwilling and Marie Moore Channell and Laura Mattie and Heather Hodges and Lorraine Ramig and Mary Bellard and Mike Shebanek and Leda Sarı and Kaustubh Kalgaonkar and David Frerichs and Jeffrey P. Bigham and Leah Findlater and Colin Lea and Sarah Herrlinger and Peter Korn and Shadi Abou-Zahra and Rus Heywood and Katrin Tomanek and Bob MacDonald},
doi = {10.1044/2024_JSLHR-24-00122},
journal = {Journal of Speech Language and Hearing Research},
number = {10},
pages = {4162-4175},
title = {Community-supported shared infrastructure in support of speech accessibility},
url = {https://pubs.asha.org/doi/10.1044/2024_JSLHR-24-00122},
volume = {67},
year = {2024}
}
John Harvill, Moitreya Chatterjee, Shaveta Khosla, Mustafa Alam, Narendra Ahuja
and Mark Hasegawa-Johnson, David Chestek, & David Beiser. Multimodal Respiratory Rate Estimation from Audio and Video in Emergency Department Patients. IEEE Journal of Translational Engineering in Health and Medicine, 2024
@article{harvill2024multimodal,
author = {John Harvill and Moitreya Chatterjee and Shaveta Khosla and Mustafa Alam and Narendra Ahuja
and Mark Hasegawa-Johnson and David Chestek and David Beiser},
doi = {10.1109/JTEHM.2024.3418345},
issn = {2168-2372},
journal = {IEEE Journal of Translational Engineering in Health and Medicine},
title = {Multimodal Respiratory Rate Estimation from Audio and Video in Emergency Department Patients},
url = {https://ieeexplore.ieee.org/document/10570324},
year = {2024}
}
Liming Wang, Mark Hasegawa-Johnson, & Chang Yoo. Unsupervised Speech Recognition with N-Skipgram and Positional Unigram Matching. Proc. ICASSP, no. 4604, 2024
@inproceedings{wang2024unsupervised,
author = {Liming Wang and Mark Hasegawa-Johnson and Chang Yoo},
booktitle = {Proc. ICASSP},
doi = {10.1109/ICASSP48485.2024.10446327},
number = {4604},
title = {Unsupervised Speech Recognition with N-Skipgram and Positional Unigram Matching},
year = {2024}
}
Heting Gao, Mark Hasegawa-Johnson, & Chang D. Yoo. G2PU: Grapheme-to-Phoneme Transducer with Speech Units. Proc. ICASSP, no. 1746, 2024
@inproceedings{gao2024g2pu,
author = {Heting Gao and Mark Hasegawa-Johnson and Chang D. Yoo},
booktitle = {Proc. ICASSP},
doi = {10.1109/ICASSP48485.2024.10448105},
number = {1746},
title = {G2PU: Grapheme-to-Phoneme Transducer with Speech Units},
year = {2024}
}
Nancy McElwain, Bashima Islam, Meghan Fisher, Camille Nebeker, Jordan Marie Bodway, & Mark Hasegawa-Johnson. Evaluating Users’ Experiences of a Child Multimodal Wearable Device: A Mixed Methods Approach. JMIR Human Factors, vol. 11, JMIR Publications Toronto, Canada, pp. e49316, 2024
@article{mcelwain2024evaluating,
author = {Nancy McElwain and Bashima Islam and Meghan Fisher and Camille Nebeker and Jordan Marie Bodway and Mark Hasegawa-Johnson},
doi = {10.2196/49316},
journal = {JMIR Human Factors},
pages = {e49316},
publisher = {JMIR Publications Toronto, Canada},
title = {Evaluating Users’ Experiences of a Child Multimodal Wearable Device: A Mixed Methods Approach},
volume = {11},
year = {2024}
}
Renato Azevedo, Rocio Garcia-Retamero, Mark Hasegawa-Johnson, Kuangxiao Gu, & Danel G. Morrow. The Influence of Memory for and Affective Response to Health Messages on Self-care Behavioral Intentions. Human Factors in Healthcare, vol. 4, Elsevier, pp. 100058, Dec, 2023
@article{azevedo2024influence,
author = {Renato Azevedo and Rocio Garcia-Retamero and Mark Hasegawa-Johnson and Kuangxiao Gu and Danel G. Morrow},
journal = {Human Factors in Healthcare},
month = {12},
pages = {100058},
publisher = {Elsevier},
title = {The Influence of Memory for and Affective Response to Health Messages on Self-care Behavioral Intentions},
volume = {4},
year = {2023}
}
Feiyu Zhang. End-to-end modeling for code-switching automatic speech recognition. Master’s Thesis, University of Illinois, 2024
@mastersthesis{zhang2024end,
author = {Feiyu Zhang},
school = {University of Illinois},
title = {End-to-end modeling for code-switching automatic speech recognition},
url = {https://www.ideals.illinois.edu/items/131716},
year = {2024}
}
Kai Chieh Chang. Fusing multimodal neural networks: a study on sleep classification and sound event localization and detection. Master’s Thesis, University of Illinois, 2024
@mastersthesis{chang2024fusing,
author = {Kai Chieh Chang},
school = {University of Illinois},
title = {Fusing multimodal neural networks: a study on sleep classification and sound event localization and detection},
url = {https://www.ideals.illinois.edu/items/131693},
year = {2024}
}
Kai Chieh Chang, Mark Hasegawa-Johnson, Nancy L. McElwain, & Bashima Islam. Classification of Infant Sleep/Wake States: Cross-Attention Among Large Scale Pretrained Transformer Networks Using Audio, ECG, and IMU Data. APSIPA ASC, Nov, 2023
@inproceedings{chang2023classification,
author = {Kai Chieh Chang and Mark Hasegawa-Johnson and Nancy L. McElwain and Bashima Islam},
booktitle = {APSIPA ASC},
keywords = {multimodal signal understanding},
month = {11},
title = {Classification of Infant Sleep/Wake States: Cross-Attention Among Large Scale Pretrained Transformer Networks Using Audio, ECG, and IMU Data},
url = {https://arxiv.org/abs/2306.15808},
year = {2023}
}
Liming Wang, Mark Hasegawa-Johnson, & Chang D. Yoo. A Theory of Unsupervised Speech Recognition. ACL, Jul, 2023
@inproceedings{wang2023a,
author = {Liming Wang and Mark Hasegawa-Johnson and Chang D. Yoo},
booktitle = {ACL},
keywords = {recognition},
month = {7},
title = {A Theory of Unsupervised Speech Recognition},
url = {https://arxiv.org/abs/2306.07926},
year = {2023}
}
Liming Wang, Junrui Ni, Heting Gao, Jialu Li, Kai Chieh Chang, Xulin Fan, Junkai Wu, Mark Hasegawa-Johnson, & Chang D. Yoo. Speak and Decipher and Sign: Toward Unsupervised Speech-to-Sign Language Recognition. Findings of ACL, Jul, 2023
@inproceedings{wang2023speak,
author = {Liming Wang and Junrui Ni and Heting Gao and Jialu Li and Kai Chieh Chang and Xulin Fan and Junkai Wu and Mark Hasegawa-Johnson and Chang D. Yoo},
booktitle = {Findings of ACL},
keywords = {recognition},
month = {7},
title = {Speak and Decipher and Sign: Toward Unsupervised Speech-to-Sign Language Recognition},
url = {https://aclanthology.org/2023.findings-acl.424/},
year = {2023}
}
Oshane Odane Thomas. Evolution of primate cuboid form and the application of learning methods to the analysis of morphological and behavioral phenotypes. Master’s Thesis, University of Illinois, Jun, 2023
@phdthesis{thomas2023evolution,
author = {Oshane Odane Thomas},
keywords = {biomedical},
month = {June},
school = {University of Illinois},
title = {Evolution of primate cuboid form and the application of learning methods to the analysis of morphological and behavioral phenotypes},
url = {https://www.ideals.illinois.edu/items/129109},
year = {2023}
}
Liming Wang. Multimodal spoken unit discovery with paired and unpaired modalities. Master’s Thesis, University of Illinois, May, 2023
@phdthesis{wang2023multimodal,
author = {Liming Wang},
keywords = {recognition},
month = {May},
school = {University of Illinois},
title = {Multimodal spoken unit discovery with paired and unpaired modalities},
url = {https://www.ideals.illinois.edu/items/128694},
year = {2023}
}
Eugene Cox, Mark Hasegawa-Johnson, Suma Bhat, Mukhil Umashankar, H Chad Lane, & Daniel Morrow. The Importance of Diverse User Goals When Designing an Automated COVID Risk Counselor. Proceedings of the International Symposium on Human Factors and Ergonomics in Health Care, vol. 12, no. 1, SAGE Publications, pp. 35-39, Mar, 2023
@article{cos2023importance,
author = {Eugene Cox and Mark Hasegawa-Johnson and Suma Bhat and Mukhil Umashankar and H Chad Lane and Daniel Morrow},
journal = {Proceedings of the International Symposium on Human Factors and Ergonomics in Health Care},
month = {3},
number = {1},
pages = {35--39},
publisher = {SAGE Publications},
title = {The Importance of Diverse User Goals When Designing an Automated COVID Risk Counselor},
url = {https://journals.sagepub.com/doi/pdf/10.1177/2327857923121009},
volume = {12},
year = {2023}
}
Mark Hasegawa-Johnson, Heejin Kim, Xiuwen Zheng, Meg Dickinson, Erik Hege, Clarion Mendes, Chris Zwilling, Shadi Abou-Zahra, Leah Findlater, Aaron Gustafson, Bob MacDonald, Mike Shebanek, Adina Bradshaw, Shawnise Carter, Blair Casey, Marie Moore Channell, Kayla Ferguson, Heather Hodges, Laura Mattie, & Lorraine Ramig. Community-supported shared infrastructure in support of speech accessibility. vol. 67, no. 11, Unpublished presentation at the ASHA 2023 Research Symposium, 2023
@unpublished{hasegawajohnson2023community,
author = {Mark Hasegawa-Johnson and Heejin Kim and Xiuwen Zheng and Meg Dickinson and Erik Hege and Clarion Mendes and Chris Zwilling and Shadi Abou-Zahra and Leah Findlater and Aaron Gustafson and Bob MacDonald and Mike Shebanek and Adina Bradshaw and Shawnise Carter and Blair Casey and Marie Moore Channell and Kayla Ferguson and Heather Hodges and Laura Mattie and Lorraine Ramig},
note = {Unpublished presentation at the ASHA 2023 Research Symposium},
number = {11},
title = {Community-supported shared infrastructure in support of speech accessibility},
url = {https://vimeo.com/911627521},
volume = {67},
year = {2023}
}
Jialu Li, Mark Hasegawa-Johnson, & Nancy McElwain. Towards Robust Family-Infant Audio Analysis Based on Unsupervised Pretraining of Wav2vec 2.0 on Large-Scale Unlabeled Family Audio. Proc. Interspeech, 2023
@inproceedings{li2023towards,
author = {Jialu Li and Mark Hasegawa-Johnson and Nancy McElwain},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2023-460},
keywords = {analysis},
title = {Towards Robust Family-Infant Audio Analysis Based on Unsupervised Pretraining of Wav2vec 2.0 on Large-Scale Unlabeled Family Audio},
url = {https://arxiv.org/abs/2305.12530},
year = {2023}
}
Wanyue Zhai, & Mark Hasegawa-Johnson. Wav2ToBI: a new approach to automatic ToBI transcription. Proc. Interspeech, 2023
@inproceedings{zhai2023wav2tobi,
author = {Wanyue Zhai and Mark Hasegawa-Johnson},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2023-477},
keywords = {analysis},
title = {Wav2ToBI: a new approach to automatic ToBI transcription},
year = {2023}
}
Chang, Kai Chieh, Hasegawa-Johnson, Mark, McElwain, Nancy L, & Islam, Bashima. Classification of infant sleep/wake states: cross-attention among large scale pretrained transformer networks using audio, ECG, and IMU data. 2023 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC), IEEE, pp. 2370-2377, 2023
@inproceedings{chang2023classification,
author = {Chang, Kai Chieh and Hasegawa-Johnson, Mark and McElwain, Nancy L and Islam, Bashima},
booktitle = {2023 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)},
doi = {10.1109/APSIPAASC58517.2023.10317201},
organization = {IEEE},
pages = {2370--2377},
title = {Classification of infant sleep/wake states: cross-attention among large scale pretrained transformer networks using audio, ECG, and IMU data},
url = {https://ieeexplore.ieee.org/iel7/10317071/10317095/10317201.pdf},
year = {2023}
}
Hee Suk Yoon, Eunseop Yoon, John Harvill, Sunjae Yoon, Mark Hasegawa-Johnson, & Chang D. Yoo. SMSMix: Sense Maintained Sentence Mixup for Word Sense Disambiguation. EMNLP, pp. 1493–1502, Dec, 2022
@inproceedings{yoon2022smsmix,
author = {Hee Suk Yoon and Eunseop Yoon and John Harvill and Sunjae Yoon and Mark Hasegawa-Johnson and Chang D. Yoo},
booktitle = {EMNLP},
keywords = {intelligence},
month = {12},
pages = {1493–1502},
title = {SMSMix: Sense Maintained Sentence Mixup for Word Sense Disambiguation},
url = {https://aclanthology.org/2022.findings-emnlp.107.pdf},
year = {2022}
}
Eunseop Yoon, Hee Suk Yoon, Dhananjaya Gowda, SooHwan Eom, Daehyeok Kim, John Harvill, Heting Gao, Mark Hasegawa-Johnson, Chanwoo Kim, & Chang D. Yoo. Mitigating the Exposure Bias in Sentence-Level Grapheme-to-Phoneme (G2P) Transduction. Proc. Interspeech, 2023
@inproceedings{yoon2023mitigating,
author = {Eunseop Yoon and Hee Suk Yoon and Dhananjaya Gowda and SooHwan Eom and Daehyeok Kim and John Harvill and Heting Gao and Mark Hasegawa-Johnson and Chanwoo Kim and Chang D. Yoo},
booktitle = {Proc. Interspeech},
keywords = {recognition},
title = {Mitigating the Exposure Bias in Sentence-Level Grapheme-to-Phoneme (G2P) Transduction},
year = {2023}
}
Zhongweiyang Xu, Xulin Fan, & Mark Hasegawa-Johnson. Dual-Path Cross-Modal Attention for better Audio-Visual Speech Extraction. Proceedings of ICASSP, Recognized as one of the top 3% of papers at the conference, 2023
@inproceedings{xu2023dual,
author = {Zhongweiyang Xu and Xulin Fan and Mark Hasegawa-Johnson},
booktitle = {Proceedings of ICASSP},
doi = {10.1109/ICASSP49357.2023.10096732},
keywords = {recognition},
note = {Recognized as one of the top 3% of papers at the conference},
title = {Dual-Path Cross-Modal Attention for better Audio-Visual Speech Extraction},
url = {https://drive.google.com/file/d/1IHK4EWuOiBX11fA7mdzUHIhCWK5ahOwf/view},
year = {2023}
}
Wonjune Kang, Mark Hasegawa-Johnson, & Deb Roy. End-to-End Zero-Shot Voice Conversion with Location-Variable Convolutions. Proc. Interspeech, 2023
@inproceedings{kang2023end,
author = {Wonjune Kang and Mark Hasegawa-Johnson and Deb Roy},
booktitle = {Proc. Interspeech},
keywords = {synthesis},
title = {End-to-End Zero-Shot Voice Conversion with Location-Variable Convolutions},
year = {2023}
}
Seunghyun Lee. Explainable artificial intelligence for inclusive automatic speech recognition. Master’s Thesis, University of Illinois, 2023
@article{zelasko2021discovering,
author = {Piotr Zelasko and Siyuan Feng and Laureano Moro-Velazquez and Ali Abavisani and Saurabchand Bhati and Odette Scharenborg and Mark Hasegawa-Johnson and Najim Dehak},
doi = {10.1016/j.csl.2022.101358},
journal = {Computer Speech and Language},
keywords = {recognition},
month = {7},
pages = {101358:1-54},
title = {Discovering Phonetic Inventories with Crosslingual Automatic Speech Recognition},
volume = {74},
year = {2022}
}
Haeyong Kang, Rusty John Lloyd Mina, Sultan Rizky Hikmawan Madjid, Jaehong Yoon, Mark Hasegawa-Johnson, Sung Ju Hwang, & Chang D Yoo. Forget-free continual learning with winning subnetworks. Proc. International Conference on Machine Learning (ICML), vol. 162, pp. 10734-10750, Jun, 2022
@inproceedings{kang2022forget,
author = {Haeyong Kang and Rusty John Lloyd Mina and Sultan Rizky Hikmawan Madjid and Jaehong Yoon and Mark Hasegawa-Johnson and Sung Ju Hwang and Chang D Yoo},
booktitle = {Proc. International Conference on Machine Learning (ICML)},
month = {6},
pages = {10734-10750},
title = {Forget-free continual learning with winning subnetworks},
url = {https://proceedings.mlr.press/v162/kang22b.html},
volume = {162},
year = {2022}
}
Jialu Li, & Mark Hasegawa-Johnson. Autosegmental Neural Nets 2.0: An Extensive Study of Training Synchronous and Asynchronous Phones and Tones for Under-Resourced Tonal Languages. IEEE Transactions on Audio, Speech and Language, vol. 30, pp. 1918-1926, May, 2022
@article{li2022autosegmental,
author = {Jialu Li and Mark Hasegawa-Johnson},
doi = {10.1109/TASLP.2022.3178238},
journal = {IEEE Transactions on Audio, Speech and Language},
keywords = {recognition},
month = {5},
pages = {1918-1926},
title = {Autosegmental Neural Nets 2.0: An Extensive Study of Training Synchronous and Asynchronous Phones and Tones for Under-Resourced Tonal Languages},
url = {https://ieeexplore.ieee.org/abstract/document/9783062},
volume = {30},
year = {2022}
}
Liming Wang, Siyuan Feng, Mark A. Hasegawa-Johnson, & Chang D. Yoo. Self-supervised Semantic-driven Phoneme Discovery for Zero-resource Speech Recognition. ACL, pp. 8027–8047, May, 2022
@inproceedings{wang2022self,
author = {Liming Wang and Siyuan Feng and Mark A. Hasegawa-Johnson and Chang D. Yoo},
booktitle = {ACL},
doi = {10.18653/v1/2022.acl-long.553},
keywords = {recognition},
month = {5},
pages = {8027–8047},
title = {Self-supervised Semantic-driven Phoneme Discovery for Zero-resource Speech Recognition},
year = {2022}
}
Ali Abavisani. The role of talker change, vowel change and speech perceptual measure in hearing impaired phone recognition. Master’s Thesis, University of Illinois, May, 2022
@phdthesis{abavisani2022role,
author = {Ali Abavisani},
keywords = {recognition},
month = {May},
school = {University of Illinois},
title = {The role of talker change, vowel change and speech perceptual measure in hearing impaired phone recognition},
url = {https://www.ideals.illinois.edu/items/125380},
year = {2022}
}
Li, Jialu, Hasegawa-Johnson, Mark, & McElwain, Nancy L. Visualizations of Complex Sequences of Family-Infant Vocalizations Using Bag-of-Audio-Words Approach Based on Wav2vec 2.0 Features. arXiv preprint arXiv:2203.15183, 2022
@article{li2022visualizations,
author = {Li, Jialu and Hasegawa-Johnson, Mark and McElwain, Nancy L},
journal = {arXiv preprint arXiv:2203.15183},
title = {Visualizations of Complex Sequences of Family-Infant Vocalizations Using Bag-of-Audio-Words Approach Based on Wav2vec 2.0 Features},
url = {https://arxiv.org/pdf/2203.15183},
year = {2022}
}
Kaizhi Qian, Yang Zhang, Heting Gao, Junrui Ni, C.-I. Lai, David Cox
and Mark Hasegawa-Johnson, & Shiyu Chang. Contentvec: An improved selfsupervised speech representation by disentangling speakers. Proc. International Conference on Machine Learning (ICML), pp. 18003-18017, 2022
@inproceedings{qian2022contentvec,
author = {Kaizhi Qian and Yang Zhang and Heting Gao and Junrui Ni and C.-I. Lai and David Cox
and Mark Hasegawa-Johnson and Shiyu Chang},
booktitle = {Proc. International Conference on Machine Learning (ICML)},
pages = {18003--18017},
title = {Contentvec: An improved selfsupervised speech representation by disentangling speakers},
url = {https://proceedings.mlr.press/v162/qian22b/qian22b.pdf},
year = {2022}
}
John Harvill, Mark Hasegawa-Johnson, & Chang D. Yoo. Frame-Level Stutter Detection. Proc. Interspeech 2022, pp. 2843-2847, 2022
@inproceedings{harvill22_interspeech,
author = {John Harvill and Mark Hasegawa-Johnson and Chang D. Yoo},
booktitle = {Proc. Interspeech 2022},
doi = {10.21437/Interspeech.2022-204},
pages = {2843--2847},
title = {Frame-Level Stutter Detection},
year = {2022}
}
John Harvill, Yash Wani, Narendra Ahuja, Mark Hasegawa-Johnson, David Chestek, Mustafa Alam, & David Beiser. Estimation of Respiratory Rate from Breathing Audio. 44th Annual International Conference of the IEEE Engineering in Medicine and Biology Society, 2022
@inproceedings{harvill2022estimation,
author = {John Harvill and Yash Wani and Narendra Ahuja and Mark Hasegawa-Johnson and David Chestek and Mustafa Alam and David Beiser},
booktitle = {44th Annual International Conference of the IEEE Engineering in Medicine and Biology Society},
keywords = {analysis},
title = {Estimation of Respiratory Rate from Breathing Audio},
url = {https://ieeexplore.ieee.org/document/9871897},
year = {2022}
}
Gao, Heting, Ni, Junrui, Zhang, Yang, Qian, Kaizhi, Chang, Shiyu, & Hasegawa-Johnson, Mark. Domain Generalization for Language-Independent Automatic Speech Recognition. Frontiers in Artificial Intelligence, vol. 5, Frontiers Media SA, pp. 806274, 2022
@article{gao2022domain,
author = {Gao, Heting and Ni, Junrui and Zhang, Yang and Qian, Kaizhi and Chang, Shiyu and Hasegawa-Johnson, Mark},
doi = {10.3389/frai.2022.806274},
journal = {Frontiers in Artificial Intelligence},
pages = {806274},
publisher = {Frontiers Media SA},
title = {Domain Generalization for Language-Independent Automatic Speech Recognition},
volume = {5},
year = {2022}
}
Raymond Yeh, Mark Hasegawa-Johnson, & Alexander Schwing. Equivariance Discovery by Learned Parameter-Sharing. AISTATS, 2022
@inproceedings{yeh2022equivariance,
author = {Raymond Yeh and Mark Hasegawa-Johnson and Alexander Schwing},
booktitle = {AISTATS},
keywords = {intelligence},
title = {Equivariance Discovery by Learned Parameter-Sharing},
url = {https://proceedings.mlr.press/v151/yeh22b/yeh22b.pdf},
year = {2022}
}
John Harvill, Roxana Girju, & Mark Hasegawa-Johnson. Syn2Vec: Synset Colexification Graphs for Lexical Semantic Similarity. Proc. NAACL, pp. 5259–5270, 2022
@article{gao2022seamless,
author = {Heting Gao and Xiaoxuan Wang and Sunghun Kang and Rusty Mina and Dias Issa and John Harvill and Leda Sarı and Mark Hasegawa-Johnson and Chang D. Yoo},
doi = {10.1016/j.specom.2021.11.004},
journal = {Speech Communication},
keywords = {recognition},
pages = {76-83},
title = {Seamless Equal Accuracy Ratio for Inclusive CTC Speech Recognition},
volume = {136},
year = {2022}
}
Leda Sarı, Mark Hasegawa-Johnson, & Samuel Thomas. Auxiliary Networks for Joint Speaker Adaptation and Speaker Change Detection. IEEE Transactions on Audio, Speech, and Language, vol. 29, pp. 324-333, 2022
@inproceedings{sari2022auxiliary,
author = {Leda Sarı and Mark Hasegawa-Johnson and Samuel Thomas},
booktitle = {IEEE Transactions on Audio, Speech, and Language},
doi = {10.1109/TASLP.2020.3040626},
keywords = {recognition},
pages = {324-333},
title = {Auxiliary Networks for Joint Speaker Adaptation and Speaker Change Detection},
volume = {29},
year = {2022}
}
Mahir Morshed, & Mark Hasegawa-Johnson. Cross-lingual articulatory feature information transfer for speech recognition using recurrent progressive neural networks. Proc. Interspeech 2022, pp. 2298-2302, 2022
@inproceedings{morshed22_interspeech,
author = {Mahir Morshed and Mark Hasegawa-Johnson},
booktitle = {Proc. Interspeech 2022},
doi = {10.21437/Interspeech.2022-11202},
pages = {2298--2302},
title = {Cross-lingual articulatory feature information transfer for speech recognition using recurrent progressive neural networks},
year = {2022}
}
Heting Gao, Junrui Ni, Kaizhi Qian, Yang Zhang, Shiyu Chang, & Mark Hasegawa-Johnson. WavPrompt: Towards Few-Shot Spoken Language Understanding with Frozen Language Models. Proc. Interspeech 2022, pp. 2738-2742, 2022
@inproceedings{gao22e_interspeech,
author = {Heting Gao and Junrui Ni and Kaizhi Qian and Yang Zhang and Shiyu Chang and Mark Hasegawa-Johnson},
booktitle = {Proc. Interspeech 2022},
doi = {10.21437/Interspeech.2022-11031},
pages = {2738--2742},
title = {WavPrompt: Towards Few-Shot Spoken Language Understanding with Frozen Language Models},
year = {2022}
}
Junrui Ni, Liming Wang, Heting Gao, Kaizhi Qian, Yang Zhang, Shiyu Chang, & Mark Hasegawa-Johnson. Unsupervised Text-to-Speech Synthesis by Unsupervised Automatic Speech Recognition. Proc. Interspeech 2022, pp. 461-465, 2022
@inproceedings{ni22_interspeech,
author = {Junrui Ni and Liming Wang and Heting Gao and Kaizhi Qian and Yang Zhang and Shiyu Chang and Mark Hasegawa-Johnson},
booktitle = {Proc. Interspeech 2022},
doi = {10.21437/Interspeech.2022-816},
pages = {461--465},
title = {Unsupervised Text-to-Speech Synthesis by Unsupervised Automatic Speech Recognition},
year = {2022}
}
Chak Ho Chan, Kaizhi Qian, Yang Zhang, & Mark Hasegawa-Johnson. SpeechSplit2.0: Unsupervised Speech Disentanglement for Voice Conversion without Tuning Autoencoder Bottlenecks. ICASSP, pp. 6332-6336, 2022
@inproceedings{chan2022speech,
author = {Chak Ho Chan and Kaizhi Qian and Yang Zhang and Mark Hasegawa-Johnson},
booktitle = {ICASSP},
doi = {10.1109/ICASSP43922.2022.9747763},
pages = {6332-6336},
title = {SpeechSplit2.0: Unsupervised Speech Disentanglement for Voice Conversion without Tuning Autoencoder Bottlenecks},
url = {https://doi.org/10.1109/ICASSP43922.2022.9747763},
year = {2022}
}
John Harvill. Regularization for dysarthric speech recognition and telemedicine applications. Master’s Thesis, University of Illinois, 2022
@mastersthesis{harvill2022dysarthric,
author = {John Harvill},
keywords = {recognition},
school = {University of Illinois},
title = {Regularization for dysarthric speech recognition and telemedicine applications},
url = {https://www.ideals.illinois.edu/items/124579},
year = {2022}
}
Haozhong Guan. Study on speech emotion recognition based on deep learning. Master’s Thesis, University of Illinois, 2022
@mastersthesis{guan2022study,
author = {Haozhong Guan},
school = {University of Illinois},
title = {Study on speech emotion recognition based on deep learning},
url = {https://www.ideals.illinois.edu/items/126694},
year = {2022}
}
Mahir Morshed. Cross-lingual articulation information transfer with progressive networks for speech recognition. Master’s Thesis, University of Illinois, 2022
@mastersthesis{morshed2022cross,
author = {Mahir Morshed},
keywords = {recognition},
school = {University of Illinois},
title = {Cross-lingual articulation information transfer with progressive networks for speech recognition},
url = {https://www.ideals.illinois.edu/items/124791},
year = {2022}
}
Chak Ho Chan. SpeechSplit2: Disentangling Speech Information Streams without Exhaustive Bottleneck Fine-tuning. Master’s Thesis, University of Illinois, 2022
@mastersthesis{chan2022speech,
author = {Chak Ho Chan},
keywords = {analysis},
school = {University of Illinois},
title = {SpeechSplit2: Disentangling Speech Information Streams without Exhaustive Bottleneck Fine-tuning},
url = {https://www.ideals.illinois.edu/items/118126},
year = {2022}
}
Yuchen Fan. Sparse representation in deep vision models. Master’s Thesis, University of Illinois, Nov, 2021
@article{scharenborg2021international,
author = {Odette Scharenborg and Mark Hasegawa-Johnson},
doi = {10.1007/978-981-15-9323-9_36},
editor = {Marchi, E., Siniscalchi, S.M., Cumani, S., Salerno, V.M., Li, H.},
journal = {Lecture Notes in Computer Science},
keywords = {recognition},
month = {3},
title = {Position Paper: Brain Signal-based Dialogue Systems},
volume = {714},
year = {2021}
}
Jialu Li, Mark Hasegawa-Johnson, & Nancy McElwain. Analysis of Acoustic and Voice Quality Features for the Classification of Infant and Mother Vocalizations. Speech Communication, vol. 133, pp. 41-61, 2021
@article{li2021analysis,
author = {Jialu Li and Mark Hasegawa-Johnson and Nancy McElwain},
doi = {10.1016/j.specom.2021.07.010},
journal = {Speech Communication},
keywords = {analysis},
pages = {41-61},
title = {Analysis of Acoustic and Voice Quality Features for the Classification of Infant and Mother Vocalizations},
volume = {133},
year = {2021}
}
Andrew Rosenberg, & Mark Hasegawa-Johnson. Automatic Prosody Labeling and Assessment. Oxford Handbook of Language Prosody, Carlos Gussenhoven and Aoju Chen, eds., Oxford University Press, pp. 646-656, 2021
@incollection{rosenberg2021oxford,
author = {Andrew Rosenberg and Mark Hasegawa-Johnson},
booktitle = {Oxford Handbook of Language Prosody},
doi = {10.1093/oxfordhb/9780198832232.013.43},
editor = {Carlos Gussenhoven and Aoju Chen},
keywords = {analysis},
pages = {646--656},
publisher = {Oxford University Press},
title = {Automatic Prosody Labeling and Assessment},
year = {2021}
}
Junzhe Zhu, Mark Hasegawa-Johnson, & Nancy McElwain. A Comparison Study on Infant-Parent Voice Diarization. Proc. ICASSP, pp. 7178-7182, 2021
@inproceedings{zhu2021a,
author = {Junzhe Zhu and Mark Hasegawa-Johnson and Nancy McElwain},
booktitle = {Proc. ICASSP},
doi = {10.1109/ICASSP39728.2021.9413538},
keywords = {analysis},
pages = {7178-7182},
title = {A Comparison Study on Infant-Parent Voice Diarization},
year = {2021}
}
John Harvill, Yash R. Wani, Mark Hasegawa-Johnson, Narendra Ahuja, David Beiser, & David Chestek. Classification of COVID-19 from Cough Using Autoregressive Predictive Coding Pretraining and Spectral Data Augmentation. Proc. Interspeech, pp. 926-930, 2021
@inproceedings{harvill2021classification,
author = {John Harvill and Yash R. Wani and Mark Hasegawa-Johnson and Narendra Ahuja and David Beiser and David Chestek},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2021-799},
keywords = {analysis},
pages = {926--930},
title = {Classification of COVID-19 from Cough Using Autoregressive Predictive Coding Pretraining and Spectral Data Augmentation},
year = {2021}
}
Hui Shi, Yang Zhang, Hao Wu, Shiyu Chang, Kaizhi Qian, Mark Hasegawa-Johnson, & Jishen Zhao. Continuous CNN for Nonuniform Time Series. Proc. ICASSP, 2021
@inproceedings{shi2021continuous,
author = {Hui Shi and Yang Zhang and Hao Wu and Shiyu Chang and Kaizhi Qian and Mark Hasegawa-Johnson and Jishen Zhao},
booktitle = {Proc. ICASSP},
doi = {10.1109/ICASSP39728.2021.9414318},
keywords = {intelligence},
title = {Continuous CNN for Nonuniform Time Series},
year = {2021}
}
Kiran Ramnath, Leda Sarı, Mark Hasegawa-Johnson, & Chang Yoo. Worldly Wise (WoW) – Cross-Lingual Knowledge Fusion for Fact-based Visual Spoken-Question Answering. Proc. NAACL, pp. 1908–1919, 2021
@inproceedings{ramnath2021worldly,
author = {Kiran Ramnath and Leda Sarı and Mark Hasegawa-Johnson and Chang Yoo},
booktitle = {Proc. NAACL},
doi = {10.18653/v1/2021.naacl-main.153},
keywords = {intelligence},
pages = {1908–1919},
title = {Worldly Wise (WoW) - Cross-Lingual Knowledge Fusion for Fact-based Visual Spoken-Question Answering},
year = {2021}
}
Zhonghao Wang, Mo Yu, Kai Wang, Jinjun Xiaong, Wen-mei Hwu, Mark Hasegawa-Johnson, & Humphrey Shi. Interpretable Visual Reasoning via Induced Symbolic Space. ICCV, pp. 1878-1887, 2021
@inproceedings{wang2021interpretable,
author = {Zhonghao Wang and Mo Yu and Kai Wang and Jinjun Xiaong and Wen-mei Hwu and Mark Hasegawa-Johnson and Humphrey Shi},
booktitle = {ICCV},
keywords = {intelligence},
pages = {1878-1887},
title = {Interpretable Visual Reasoning via Induced Symbolic Space},
url = {https://openaccess.thecvf.com/content/ICCV2021/html/Wang_Interpretable_Visual_Reasoning_via_Induced_Symbolic_Space_ICCV_2021_paper.html},
year = {2021}
}
Leda Sarı, Mark Hasegawa-Johnson, & Chang D. Yoo. Counterfactually Fair Automatic Speech Recognition. IEEE Transactions on Audio, Speech, and Language, vol. 29, pp. 3515-3525, 2021
@article{sari2021counterfactually,
author = {Leda Sarı and Mark Hasegawa-Johnson and Chang D. Yoo},
doi = {10.1109/TASLP.2021.3126949},
journal = {IEEE Transactions on Audio, Speech, and Language},
keywords = {recognition},
pages = {3515-3525},
title = {Counterfactually Fair Automatic Speech Recognition},
volume = {29},
year = {2021}
}
Heting Gao, Junrui Ni, Yang Zhang, Kaizhi Qian, Shiyu Chang, & Mark Hasegawa-Johnson. Zero-shot Cross-Lingual Phonetic Recognition with External Language Embedding. Proc. Interspeech, pp. 1304-1308, 2021
@inproceedings{gao2021zero-shot,
author = {Heting Gao and Junrui Ni and Yang Zhang and Kaizhi Qian and Shiyu Chang and Mark Hasegawa-Johnson},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2021-1843},
keywords = {recognition},
pages = {1304--1308},
title = {Zero-shot Cross-Lingual Phonetic Recognition with External Language Embedding},
year = {2021}
}
Siyuan Feng, Piotr Żelasko, Laureano Moro-Velázquez, Ali Abavisani, Mark Hasegawa-Johnson, Odette Scharenborg, & Najim Dehak. How Phonotactics Affect Multilingual and Zero-shot ASR Performance. Proc. ICASSP, pp. 7238-7242, 2021
@inproceedings{feng2021how,
author = {Siyuan Feng and Piotr Żelasko and Laureano Moro-Velázquez and Ali Abavisani and Mark Hasegawa-Johnson and Odette Scharenborg and Najim Dehak},
booktitle = {Proc. ICASSP},
doi = {10.1109/ICASSP39728.2021.9414478},
keywords = {recognition},
pages = {7238-7242},
title = {How Phonotactics Affect Multilingual and Zero-shot ASR Performance},
year = {2021}
}
Liming Wang, Xinsheng Wang, Mark Hasegawa-Johnson, Odette Scharenborg, & Najim Dehak. Align or Attend? Toward More Efficient and Accurate Spoken Word Discovery Using Speech-to-Image Retrieval. Proc. ICASSP, 2021
@inproceedings{wang2021align,
author = {Liming Wang and Xinsheng Wang and Mark Hasegawa-Johnson and Odette Scharenborg and Najim Dehak},
booktitle = {Proc. ICASSP},
doi = {10.1109/ICASSP39728.2021.9414418},
keywords = {recognition},
title = {Align or Attend? Toward More Efficient and Accurate Spoken Word Discovery Using Speech-to-Image Retrieval},
year = {2021}
}
John Harvill, Dias Issa, Mark Hasegawa-Johnson, & Changdong Yoo. Synthesis of New Words for Improved Dysarthric Speech Recognition on an Expanded Vocabulary. Proc. ICASSP, pp. 6428-6432, 2021
@inproceedings{harvill2021synthesis,
author = {John Harvill and Dias Issa and Mark Hasegawa-Johnson and Changdong Yoo},
booktitle = {Proc. ICASSP},
doi = {10.1109/ICASSP39728.2021.9414869},
keywords = {recognition},
pages = {6428-6432},
title = {Synthesis of New Words for Improved Dysarthric Speech Recognition on an Expanded Vocabulary},
year = {2021}
}
Kaizhi Qian, Yang Zhang, Shiyu Chang, Chuang Gan, David D. Cox, Mark Hasegawa-Johnson, & Jinjun Xiong. Global Rhythm Style Transfer Without Text Transcriptions. ICML, 2021
@inproceedings{qian2021global,
author = {Kaizhi Qian and Yang Zhang and Shiyu Chang and Chuang Gan and David D. Cox and Mark Hasegawa-Johnson and Jinjun Xiong},
booktitle = {ICML},
keywords = {synthesis},
title = {Global Rhythm Style Transfer Without Text Transcriptions},
url = {http://proceedings.mlr.press/v139/qian21b/qian21b.pdf},
year = {2021}
}
Xinsheng Wang, Siyuan Feng, Jihua Zhu, Mark Hasegawa-Johnson, & Odette Scharenborg. Show and Speak: Directly Synthesize Spoken Description of Images. Proc. ICASSP, 2021
@inproceedings{wang2021show,
author = {Xinsheng Wang and Siyuan Feng and Jihua Zhu and Mark Hasegawa-Johnson and Odette Scharenborg},
booktitle = {Proc. ICASSP},
doi = {10.1109/ICASSP39728.2021.9414021},
keywords = {synthesis},
title = {Show and Speak: Directly Synthesize Spoken Description of Images},
year = {2021}
}
Junzhe Zhu, Raymond Yeh, & Mark Hasegawa-Johnson. Multi-Decoder DPRNN: Source Separation for Variable Number of Speakers. Proc. ICASSP, pp. 3420-3424, 2021
@inproceedings{zhu2021multi-decoder,
author = {Junzhe Zhu and Raymond Yeh and Mark Hasegawa-Johnson},
booktitle = {Proc. ICASSP},
doi = {10.1109/ICASSP39728.2021.9414205},
keywords = {synthesis},
pages = {3420-3424},
title = {Multi-Decoder DPRNN: Source Separation for Variable Number of Speakers},
year = {2021}
}
Heting Gao. Improving multilingual speech recognition systems. Master’s Thesis, University of Illinois, 2021
@mastersthesis{ni2021enforcing,
author = {Ni, Junrui},
school = {University of Illinois},
title = {Enforcing constraints for multi-lingual and cross-lingual speech-to-text systems},
url = {https://www.ideals.illinois.edu/items/123289/bitstreams/406101/data.pdf},
year = {2021}
}
Kaizhi Qian. Deep generative models for speech editing. Master’s Thesis, University of Illinois, Nov, 2020
@phdthesis{qian2020deep,
author = {Kaizhi Qian},
keywords = {synthesis},
month = {November},
school = {University of Illinois},
title = {Deep generative models for speech editing},
url = {https://www.ideals.illinois.edu/items/117215},
year = {2020}
}
Junzhe Zhu, Mark Hasegawa-Johnson, & Leda Sari. Identify Speakers in Cocktail Parties with End-to-End Attention. Proc. Interspeech, pp. 3092-3096, 2020
@inproceedings{zhu2020identify,
author = {Junzhe Zhu and Mark Hasegawa-Johnson and Leda Sari},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2020-2430},
keywords = {analysis},
pages = {3092--3096},
title = {Identify Speakers in Cocktail Parties with End-to-End Attention},
url = {https://arxiv.org/pdf/2005.11408.pdf},
year = {2020}
}
Ali Abavisani, & Mark Hasegawa-Johnson. Automatic Estimation of Intelligibility Measure for Consonants in Speech. Proc. Interspeech, pp. 1161-1165, 2020
@inproceedings{abavisani000086automatic,
author = {Ali Abavisani and Mark Hasegawa-Johnson},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2020-2121},
keywords = {analysis},
pages = {1161--1165},
title = {Automatic Estimation of Intelligibility Measure for Consonants in Speech},
url = {https://arxiv.org/abs/2005.06065},
year = {2020}
}
Mark Hasegawa-Johnson, Leanne Rolston, Camille Goudeseune, Gina-Anne Levow, & Katrin Kirchhoff. Grapheme-to-Phoneme Transduction for Cross-Language ASR. Lecture Notes in Computer Science, vol. 12379, pp. 3-19, 2020
@article{hasegawajohnson2020grapheme-to-phoneme,
author = {Mark Hasegawa-Johnson and Leanne Rolston and Camille Goudeseune and Gina-Anne Levow and Katrin Kirchhoff},
doi = {10.1007/978-3-030-59430-5_1},
journal = {Lecture Notes in Computer Science},
keywords = {recognition},
pages = {3-19},
title = {Grapheme-to-Phoneme Transduction for Cross-Language ASR},
volume = {12379},
year = {2020}
}
Jialu Li, & Mark Hasegawa-Johnson. Autosegmental Neural Nets: Should Phones and Tones be Synchronous or Asynchronous?. Proc. Interspeech, pp. 1027-1031, 2020
@inproceedings{li2020autosegmental,
author = {Jialu Li and Mark Hasegawa-Johnson},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2020-1834},
keywords = {recognition},
pages = {1027--1031},
title = {Autosegmental Neural Nets: Should Phones and Tones be Synchronous or Asynchronous?},
url = {http://arxiv.org/abs/2007.14351},
year = {2020}
}
Piotr Żelasko, Laureano Moro-Velázquez, Mark Hasegawa-Johnson, Odette Scharenborg, & Najim Dehak. That Sounds Familiar: An Analysis of Phonetic Representations Transfer Across Languages. Proc. Interspeech 2020, pp. 3705-3709, 2020
@inproceedings{zelasko2020that,
author = {Piotr Żelasko and Laureano Moro-Velázquez and Mark Hasegawa-Johnson and Odette Scharenborg and Najim Dehak},
booktitle = {Proc. Interspeech 2020},
doi = {10.21437/Interspeech.2020-2513},
keywords = {recognition},
pages = {3705--3709},
title = {That Sounds Familiar: An Analysis of Phonetic Representations Transfer Across Languages},
year = {2020}
}
Justin van der Hout, Mark Hasegawa-Johnson, & Odette Scharenborg. Evaluating Automatically Generated Phoneme Captions for Images. Proc. Interspeech, pp. 2317-2321, 2020
@inproceedings{van2020evaluating,
author = {Justin van der Hout and Mark Hasegawa-Johnson and Odette Scharenborg},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2020-2870},
keywords = {recognition},
pages = {2317--2321},
title = {Evaluating Automatically Generated Phoneme Captions for Images},
url = {https://arxiv.org/abs/2007.15916},
year = {2020}
}
Liming Wang, & Mark Hasegawa-Johnson. A DNN-HMM-DNN Hybrid Model for Discovering Word-Like Units from Spoken Captions and Image Regions. Proc. Interspeech, pp. 1456-1460, 2020
@inproceedings{wang2020a,
author = {Liming Wang and Mark Hasegawa-Johnson},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2020-1148},
keywords = {recognition},
pages = {1456--1460},
title = {A DNN-HMM-DNN Hybrid Model for Discovering Word-Like Units from Spoken Captions and Image Regions},
year = {2020}
}
Liming Wang, & Mark Hasegawa-Johnson. Multimodal word discovery and retrieval with spoken descriptions and visual concepts. IEEE Transactions on Audio, Speech and Language, vol. 28, pp. 1560-1573, 2020
@article{wang2020multimodal,
author = {Liming Wang and Mark Hasegawa-Johnson},
doi = {10.1109/TASLP.2020.2996082},
journal = {IEEE Transactions on Audio, Speech and Language},
keywords = {recognition},
pages = {1560-1573},
title = {Multimodal word discovery and retrieval with spoken descriptions and visual concepts},
volume = {28},
year = {2020}
}
Mark Hasegawa-Johnson. Multimodal Distant Supervision. NeurIPS Workshop on Self-Supervised Learning for Speech and Audio, 2020
@inproceedings{hasegawajohnson2020multimodal,
author = {Mark Hasegawa-Johnson},
booktitle = {NeurIPS Workshop on Self-Supervised Learning for Speech and Audio},
title = {Multimodal Distant Supervision},
url = {https://slideslive.com/38938462/multimodal-distant-supervision},
year = {2020}
}
Leda Sarı, Samuel Thomas, & Mark Hasegawa-Johnson. Training Spoken Language Understanding Systems with Non-Parallel Speech and Text. Proc. ICASSP, pp. 8109-8113, 2020
@inproceedings{sari2020training,
author = {Leda Sarı and Samuel Thomas and Mark Hasegawa-Johnson},
booktitle = {Proc. ICASSP},
doi = {10.1109/ICASSP40776.2020.9054664},
keywords = {recognition},
pages = {8109-8113},
title = {Training Spoken Language Understanding Systems with Non-Parallel Speech and Text},
year = {2020}
}
Leda Sarı, & Mark Hasegawa-Johnson. Deep F-Measure Maximization for End-to-End Speech Understanding. Proc. Interspeech, pp. 1580-1584, 2020
@inproceedings{sari2020deep,
author = {Leda Sarı and Mark Hasegawa-Johnson},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2020-1949},
keywords = {recognition},
pages = {1580--1584},
title = {Deep F-Measure Maximization for End-to-End Speech Understanding},
year = {2020}
}
Kaizhi Qian, Yang Zhang, Shiyu Chang, Mark Hasegawa-Johnson, & David Cox. Unsupervised Speech Decomposition via Triple Information Bottleneck. Proc. International Conference on Machine Learning (ICML), vol. 119, pp. 7836-7846, 2020
@inproceedings{qian2020unsupervised,
author = {Kaizhi Qian and Yang Zhang and Shiyu Chang and Mark Hasegawa-Johnson and David Cox},
booktitle = {Proc. International Conference on Machine Learning (ICML)},
demo = {https://auspicious3000.github.io/SpeechSplit-Demo/},
keywords = {synthesis},
pages = {7836--7846},
title = {Unsupervised Speech Decomposition via Triple Information Bottleneck},
url = {http://proceedings.mlr.press/v119/qian20a.html},
volume = {119},
year = {2020}
}
Kaizhi Qian, Zeyu Jin, Mark Hasegawa-Johnson, & Gautham Mysore. F0-Consistent Many-to-Many Non-Parallel Voice Conversion via Conditional Autoencoder. Proc. ICASSP, pp. 6284-6288, 2020
@inproceedings{qian2020f0-consistent,
author = {Kaizhi Qian and Zeyu Jin and Mark Hasegawa-Johnson and Gautham Mysore},
booktitle = {Proc. ICASSP},
doi = {10.1109/ICASSP40776.2020.9054734},
keywords = {synthesis},
pages = {6284-6288},
title = {F0-Consistent Many-to-Many Non-Parallel Voice Conversion via Conditional Autoencoder},
year = {2020}
}
Tarek Sakakini, Jong Yoon Lee, Aditya Srinivasa, Renato Azevedo, Victor Sadauskas, Kuangxiao Gu, Suma Bhat, Dan Morrow, James Graumlich, Saqib Walayat, Mark Hasegawa-Johnson, Donald Wilpern, & Ann Willemsen-Dunlap. Automatic Text Simplification of Health Materials in Low-Resource Domains. LOUHI: 11th International Workshop on Health Text Mining and Information Analysis, 2020
@inproceedings{sakakini2020automatic,
author = {Tarek Sakakini and Jong Yoon Lee and Aditya Srinivasa and Renato Azevedo and Victor Sadauskas and Kuangxiao Gu and Suma Bhat and Dan Morrow and James Graumlich and Saqib Walayat and Mark Hasegawa-Johnson and Donald Wilpern and Ann Willemsen-Dunlap},
booktitle = {LOUHI: 11th International Workshop on Health Text Mining and Information Analysis},
keywords = {synthesis},
title = {Automatic Text Simplification of Health Materials in Low-Resource Domains},
url = {https://louhi2020.fbk.eu/},
year = {2020}
}
Daniel Morrow, Renato F.L. Azevedo, Leda Sari, Kuangxiao Gu, Tarek Sakakini, Mark Hasegawa-Johnson, Suma Bhat, James Graumlich, Thomas Huang, Andrew Hariharan, Yunxin Shao, & Elizabeth Cox. Closing the Loop in Computer Agent/Patient Communication. Proceedings of the 2020 Human Factors and Ergonomics Society Annual Meeting, Chicago, IL, 2020
@inproceedings{morrow2020closing,
address = {Chicago, IL},
author = {Daniel Morrow and Renato F.L. Azevedo and Leda Sari and Kuangxiao Gu and Tarek Sakakini and Mark Hasegawa-Johnson and Suma Bhat and James Graumlich and Thomas Huang and Andrew Hariharan and Yunxin Shao and Elizabeth Cox},
booktitle = {Proceedings of the 2020 Human Factors and Ergonomics Society Annual Meeting},
keywords = {synthesis},
title = {Closing the Loop in Computer Agent/Patient Communication},
year = {2020}
}
Heejin Kim, & Mark Hasegawa-Johnson. Communication Improves When Human or Computer Listeners Adapt to Dysarthria. Acoustic Analysis of Pathologies: From Infancy to Young Adulthood, Amy Neustein and Hemant Patil, eds., Walter De Gruyter, Inc., Boston/Berlin, pp. 181-197, 2020
@incollection{kim2020communication,
address = {Boston/Berlin},
author = {Heejin Kim and Mark Hasegawa-Johnson},
booktitle = {Acoustic Analysis of Pathologies: From Infancy to Young Adulthood},
doi = {10.1515/9781501513138-005},
editor = {Amy Neustein and Hemant Patil},
pages = {181--197},
publisher = {Walter De Gruyter, Inc.},
title = {Communication Improves When Human or Computer Listeners Adapt to Dysarthria},
year = {2020}
}
Junzhe Zhu. Multi-decoder DPRNN high accuracy source counting and separation. Master’s Thesis, University of Illinois, 2020
@mastersthesis{zhu2020multi,
author = {Junzhe Zhu},
keywords = {enhancement},
school = {University of Illinois},
title = {Multi-decoder DPRNN high accuracy source counting and separation},
url = {https://www.ideals.illinois.edu/items/116889},
year = {2020}
}
Ningkai Wu. Semi-supervised cycle-consistency training for end-to-end ASR using unpaired speech. Master’s Thesis, University of Illinois, 2020
@mastersthesis{wu2020semi,
author = {Ningkai Wu},
keywords = {recognition},
school = {University of Illinois},
title = {Semi-supervised cycle-consistency training for end-to-end ASR using unpaired speech},
url = {https://www.ideals.illinois.edu/items/115809},
year = {2020}
}
Liming Wang. A translation framework for discovering word-like units from visual scenes and spoken descriptions. Master’s Thesis, University of Illinois, 2020
@mastersthesis{wang2020translation,
author = {Liming Wang},
keywords = {analysis},
school = {University of Illinois},
title = {A translation framework for discovering word-like units from visual scenes and spoken descriptions},
url = {https://www.ideals.illinois.edu/items/115664},
year = {2020}
}
Junrui Ni, Mark Hasegawa-Johnson, & Odette Scharenborg. The Time-Course of Phoneme Category Adaptation in Deep Neural Networks. Lecture Notes in Artificial Intelligence, vol. 11816, pp. 3-18, Oct, 2019
@article{ni2019the,
author = {Junrui Ni and Mark Hasegawa-Johnson and Odette Scharenborg},
doi = {10.1007/978-3-030-31372-2_1},
journal = {Lecture Notes in Artificial Intelligence},
keywords = {analysis},
month = {October},
pages = {3-18},
title = {The Time-Course of Phoneme Category Adaptation in Deep Neural Networks},
url = {https://www.researchgate.net/profile/Odette_Scharenborg/publication/336096664_The_Time-Course_of_Phoneme_Category_Adaptation_in_Deep_Neural_Networks/links/5db0627e4585155e27f81326/The-Time-Course-of-Phoneme-Category-Adaptation-in-Deep-Neural-Networks.pdf},
volume = {11816},
year = {2019}
}
Wenda Chen. Modeling phones, keywords, topics and intents in spoken languages. Master’s Thesis, University of Illinois, Jul, 2019
@phdthesis{yang2019dealing,
author = {Xuesong Yang},
keywords = {recognition},
month = {May},
school = {University of Illinois},
title = {Dealing with linguistic mismatches for automatic speech recognition},
url = {https://www.ideals.illinois.edu/items/112306},
year = {2019}
}
Leda Sari, Samuel Thomas, Mark Hasegawa-Johnson, & Michael Picheny. Pre-Training of Speaker Embeddings for Low-Latency Speaker Change Detection in Broadcast News. Proc. ICASSP, pp. 3093:1-5, 2019
@inproceedings{sari2019pre-training,
author = {Leda Sari and Samuel Thomas and Mark Hasegawa-Johnson and Michael Picheny},
booktitle = {Proc. ICASSP},
doi = {10.1109/ICASSP.2019.8683612},
keywords = {analysis},
pages = {3093:1-5},
title = {Pre-Training of Speaker Embeddings for Low-Latency Speaker Change Detection in Broadcast News},
year = {2019}
}
Odette Scharenborg, Jiska Koemans, Cybelle Smith, Mark A. Hasegawa-Johnson, & Kara D. Federmeier. The Neural Correlates Underlying Lexically-Guided Perceptual Learning. Proc. Interspeech, pp. 1223-1227, 2019
@inproceedings{scharenborg2019the,
author = {Odette Scharenborg and Jiska Koemans and Cybelle Smith and Mark A. Hasegawa-Johnson and Kara D. Federmeier},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2019-2328},
keywords = {analysis},
pages = {1223--1227},
title = {The Neural Correlates Underlying Lexically-Guided Perceptual Learning},
year = {2019}
}
Mary Pietrowicz, Carla Agurto, Jonah Casebeer, Mark Hasegawa-Johnson, Karrie Karahalios, & Guillermo Cecchi. Dimensional Analysis of Laughter in Female Conversational Speech. Proc. ICASSP, pp. 6600-6604, 2019
@inproceedings{pietrowicz2019dimensional,
author = {Mary Pietrowicz and Carla Agurto and Jonah Casebeer and Mark Hasegawa-Johnson and Karrie Karahalios and Guillermo Cecchi},
booktitle = {Proc. ICASSP},
doi = {10.1109/ICASSP.2019.8683566},
keywords = {analysis},
pages = {6600-6604},
title = {Dimensional Analysis of Laughter in Female Conversational Speech},
url = {https://ieeexplore.ieee.org/abstract/document/8683566},
year = {2019}
}
Leda Sarı, Samuel Thomas, & Mark A. Hasegawa-Johnson. Learning Speaker Aware Offsets for Speaker Adaptation of Neural Networks. Proc. Interspeech 2019, pp. 769-773, 2019
@inproceedings{sar19_interspeech,
author = {Leda Sarı and Samuel Thomas and Mark A. Hasegawa-Johnson},
booktitle = {Proc. Interspeech 2019},
doi = {10.21437/Interspeech.2019-1788},
pages = {769--773},
title = {Learning Speaker Aware Offsets for Speaker Adaptation of Neural Networks},
year = {2019}
}
Di He, Xuesong Yang, Boon Pang Lim, Yi Liang, Mark Hasegawa-Johnson, & Deming Chen. When CTC Training Meets Acoustic Landmarks. ICASSP, pp. 5996-6000, 2019
@inproceedings{he2150when,
author = {Di He and Xuesong Yang and Boon Pang Lim and Yi Liang and Mark Hasegawa-Johnson and Deming Chen},
booktitle = {ICASSP},
doi = {10.1109/ICASSP.2019.8683607},
keywords = {recognition},
pages = {5996-6000},
title = {When CTC Training Meets Acoustic Landmarks},
year = {2019}
}
Liming Wang, & Mark A. Hasegawa-Johnson. Multimodal Word Discovery and Retrieval with Phone Sequence and Image Concepts. Proc. Interspeech, pp. 2683-2687, 2019
@inproceedings{wang2019multimodal,
author = {Liming Wang and Mark A. Hasegawa-Johnson},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2019-1487},
keywords = {recognition},
pages = {2683--2687},
title = {Multimodal Word Discovery and Retrieval with Phone Sequence and Image Concepts},
year = {2019}
}
Mark Hasegawa-Johnson, Najim Dehak, & Odette Scharenborg. Position Paper: Indirect Supervision for Dialog Systems in Unwritten Languages. International Workshop on Spoken Dialog Systems, 2019
@inproceedings{hasegawajohnson2019position,
author = {Mark Hasegawa-Johnson and Najim Dehak and Odette Scharenborg},
booktitle = {International Workshop on Spoken Dialog Systems},
keywords = {recognition},
title = {Position Paper: Indirect Supervision for Dialog Systems in Unwritten Languages},
url = {https://research.tudelft.nl/files/83277474/IWSDS_2019_Mark.pdf},
year = {2019}
}
Laureano Moro-Velazquez, JaeJin Cho, Shinji Watanabe, Mark A. Hasegawa-Johnson, Odette Scharenborg, Heejin Kim, & Najim Dehak. Study of the Performance of Automatic Speech Recognition Systems in Speakers with Parkinson’s Disease. Proc. Interspeech 2019, pp. 3875-3879, 2019
@inproceedings{morovelazquez2019study,
author = {Laureano Moro-Velazquez and JaeJin Cho and Shinji Watanabe and Mark A. Hasegawa-Johnson and Odette Scharenborg and Heejin Kim and Najim Dehak},
booktitle = {Proc. Interspeech 2019},
doi = {10.21437/Interspeech.2019-2993},
keywords = {recognition},
pages = {3875--3879},
title = {Study of the Performance of Automatic Speech Recognition Systems in Speakers with Parkinson’s Disease},
year = {2019}
}
Kaizhi Qian, Yang Zhang, Shiyu Chang, Xuesong Yang, & Mark Hasegawa-Johnson. AutoVC: Zero-Shot Voice Style Transfer with Only Autoencoder Loss. Proceedings of Machine Learning Research, vol. 97, pp. 5210-5219, 2019
@article{qian2019autovc:,
author = {Kaizhi Qian and Yang Zhang and Shiyu Chang and Xuesong Yang and Mark Hasegawa-Johnson},
demo = {https://auspicious3000.github.io/autovc-demo/},
journal = {Proceedings of Machine Learning Research},
keywords = {synthesis},
pages = {5210-5219},
title = {AutoVC: Zero-Shot Voice Style Transfer with Only Autoencoder Loss},
url = {http://proceedings.mlr.press/v97/qian19c/qian19c.pdf},
volume = {97},
year = {2019}
}
Daniel Morrow, Renato Azevedo, Leitão Ferreira, Rocio Garcia-Retamero, Mark Hasegawa-Johnson, Thomas Huang, William Schuh, Kuangxiao Gu, & Yang Zhang. Contextualizing numeric clinical test results for gist comprehension: Implications for EHR patient portals. Journal of Experimental Psychology: Applied, vol. 25, no. 1, pp. 41-61, 2019
@article{morrow2019contextualizing,
author = {Daniel Morrow and Renato Azevedo and Leitão Ferreira and Rocio Garcia-Retamero and Mark Hasegawa-Johnson and Thomas Huang and William Schuh and Kuangxiao Gu and Yang Zhang},
doi = {10.1037/xap0000203},
journal = {Journal of Experimental Psychology: Applied},
keywords = {synthesis},
number = {1},
pages = {41-61},
title = {Contextualizing numeric clinical test results for gist comprehension: Implications for EHR patient portals},
volume = {25},
year = {2019}
}
Renato F.L. Azevedo, Dan Morrow, Kuangxiao Gu, Thomas Huang, Mark Hasegawa-Johnson, P. Soni, S. Tang, Tarek Sakakini, Suma Bhat, Ann Willemsen-Dunlap, & James Graumlich. The Influence of Computer Agent Characteristics on User Preferences in Health Contexts. Proceedings of the 2019 Human Factors and Ergonomics Society Health Care Symposium, 2019
@inproceedings{azevedo2019the,
author = {Renato F.L. Azevedo and Dan Morrow and Kuangxiao Gu and Thomas Huang and Mark Hasegawa-Johnson and P. Soni and S. Tang and Tarek Sakakini and Suma Bhat and Ann Willemsen-Dunlap and James Graumlich},
booktitle = {Proceedings of the 2019 Human Factors and Ergonomics Society Health Care Symposium},
keywords = {synthesis},
title = {The Influence of Computer Agent Characteristics on User Preferences in Health Contexts},
year = {2019}
}
Yijia Xu. Acoustic Event, Spoken Keyword and Emotional Outburst Detection. Master’s Thesis, University of Illinois, 2019
@mastersthesis{xu2019acoustic,
author = {Yijia Xu},
keywords = {analysis},
school = {University of Illinois},
title = {Acoustic Event, Spoken Keyword and Emotional Outburst Detection},
url = {https://www.ideals.illinois.edu/items/112277},
year = {2019}
}
Van Hai Do, Nancy F. Chen, Boon Pang Lim, & Mark Hasegawa-Johnson. Multitask Learning for Phone Recognition of Underresourced Languages Using Mismatched Transcription. IEEE/ACM Transactions on Audio, Speech and Language Processing (TASLP), vol. 26, no. 3, pp. 501-514, Mar, 2018
@article{hai2018multitask,
author = {Van Hai Do and Nancy F. Chen and Boon Pang Lim and Mark Hasegawa-Johnson},
doi = {10.1109/TASLP.2017.2782360},
journal = {IEEE/ACM Transactions on Audio, Speech and Language Processing (TASLP)},
keywords = {recognition},
month = {March},
number = {3},
pages = {501-514},
title = {Multitask Learning for Phone Recognition of Underresourced Languages Using Mismatched Transcription},
url = {https://dl.acm.org/citation.cfm?id=3180760},
volume = {26},
year = {2018}
}
Yijia Xu, Mark Hasegawa-Johnson, & Nancy L. McElwain. Infant emotional outbursts detection in infant-parent spoken interactions. Proc. Interspeech, pp. 242-246, 2018
@inproceedings{xu2018infant,
author = {Yijia Xu and Mark Hasegawa-Johnson and Nancy L. McElwain},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2018-2429},
keywords = {analysis},
pages = {242--246},
title = {Infant emotional outbursts detection in infant-parent spoken interactions},
year = {2018}
}
Mark Hasegawa-Johnson. Unwritten Languages as a Test Case for the Theory of Phonetic Universals. Plenary talk delivered at the International Symposium on Chinese Spoken Language Processing, 2018
@unpublished{hasegawajohnson2018unwritten,
author = {Mark Hasegawa-Johnson},
keywords = {analysis},
note = {Plenary talk delivered at the International Symposium on Chinese Spoken Language Processing},
title = {Unwritten Languages as a Test Case for the Theory of Phonetic Universals},
year = {2018}
}
Raymond A. Yeh, Teck Yian Lim, Chen Chen, Alexander G. Schwing, Mark Hasegawa-Johnson, & Minh N. Do. Image Restoration with Deep Generative Models. Proc. IEEE ICASSP, pp. 6772-6772, 2018
@inproceedings{yeh2018image,
author = {Raymond A. Yeh and Teck Yian Lim and Chen Chen and Alexander G. Schwing and Mark Hasegawa-Johnson and Minh N. Do},
booktitle = {Proc. IEEE ICASSP},
doi = {10.1109/ICASSP.2018.8462317},
keywords = {intelligence},
pages = {6772-6772},
title = {Image Restoration with Deep Generative Models},
url = {https://ieeexplore.ieee.org/abstract/document/8462317},
year = {2018}
}
Jialu Li, & Mark Hasegawa-Johnson. A Comparable Phone Set for the TIMIT Dataset Discovered in Clustering of Listen, Attend and Spell. NeurIPS Workshop on Interpretability and Robustness in Audio, Speech, and Language, 2018
@inproceedings{li2018a,
author = {Jialu Li and Mark Hasegawa-Johnson},
booktitle = {NeurIPS Workshop on Interpretability and Robustness in Audio, Speech, and Language},
keywords = {recognition},
title = {A Comparable Phone Set for the TIMIT Dataset Discovered in Clustering of Listen, Attend and Spell},
year = {2018}
}
Odette Scharenborg, Sebastian Tiesmeyer, Mark Hasegawa-Johnson, & Najim Dehak. Visualizing Phoneme Category Adaptation in Deep Neural Networks. Proc. Interspeech, pp. 1482-1486, 2018
@inproceedings{scharenborg2018visualizing,
author = {Odette Scharenborg and Sebastian Tiesmeyer and Mark Hasegawa-Johnson and Najim Dehak},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2018-1707},
keywords = {recognition},
pages = {1482--1486},
title = {Visualizing Phoneme Category Adaptation in Deep Neural Networks},
year = {2018}
}
Leda Sari, & Mark Hasegawa-Johnson. Speaker Adaptation with an Auxiliary Network. MLSLP (ISCA Workshop on Machine Learning for Speech and Language Processing), 2018
@inproceedings{sari2018mlslp,
author = {Leda Sari and Mark Hasegawa-Johnson},
booktitle = {MLSLP (ISCA Workshop on Machine Learning for Speech and Language Processing)},
keywords = {recognition},
title = {Speaker Adaptation with an Auxiliary Network},
year = {2018}
}
Di He, Boon Pang Lim, Xuesong Yang, Mark Hasegawa-Johnson, & Deming Chen. Improved ASR for under-resourced languages through Multi-task Learning with Acoustic Landmarks. Proc. Interspeech, pp. 2618-2622, 2018
@inproceedings{he2018improved,
author = {Di He and Boon Pang Lim and Xuesong Yang and Mark Hasegawa-Johnson and Deming Chen},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2018-1124},
keywords = {recognition},
pages = {2618--2622},
title = {Improved ASR for under-resourced languages through Multi-task Learning with Acoustic Landmarks},
year = {2018}
}
Amit Das. Speech Recognition with Probabilistic Transcriptions and End-to-End Systems Using Deep Learning. Master’s Thesis, University of Illinois, 2018
@phdthesis{das2018speech,
author = {Amit Das},
keywords = {recognition},
school = {University of Illinois},
title = {Speech Recognition with Probabilistic Transcriptions and End-to-End Systems Using Deep Learning},
url = {https://www.ideals.illinois.edu/items/109828},
year = {2018}
}
Amit Das, & Mark Hasegawa-Johnson. Improving DNNs Trained With Non-Native Transcriptions Using Knowledge Distillation and Target Interpolation. Proc. Interspeech, pp. 2434-2438, 2018
@inproceedings{das2018improving,
author = {Amit Das and Mark Hasegawa-Johnson},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2018-1450},
keywords = {recognition},
pages = {2434--2438},
title = {Improving DNNs Trained With Non-Native Transcriptions Using Knowledge Distillation and Target Interpolation},
year = {2018}
}
Lucas Ondel, Pierre Godard, Laurent Besacier, Elin Larsen, Mark Hasegawa-Johnson, Odette Scharenborg, Emmanuel Dupoux, Lukas Burget, François Yvon, & Sanjeev Khudanpur. Bayesian Models for Unit Discovery on a Very Low Resource Language. Proc. ICASSP, pp. 5939-5943, 2018
@inproceedings{ondel2018bayesian,
author = {Lucas Ondel and Pierre Godard and Laurent Besacier and Elin Larsen and Mark Hasegawa-Johnson and Odette Scharenborg and Emmanuel Dupoux and Lukas Burget and François Yvon and Sanjeev Khudanpur},
booktitle = {Proc. ICASSP},
doi = {10.1109/ICASSP.2018.8461545},
keywords = {recognition},
pages = {5939-5943},
title = {Bayesian Models for Unit Discovery on a Very Low Resource Language},
year = {2018}
}
Wenda Chen, Mark Hasegawa-Johnson, & Nancy Chen. Recognizing Zero-resourced Languages based on Mismatched Machine Transcriptions. Proc. ICASSP, pp. 5979-5983, 2018
@inproceedings{chen2018recognizing,
author = {Wenda Chen and Mark Hasegawa-Johnson and Nancy Chen},
booktitle = {Proc. ICASSP},
doi = {10.1109/ICASSP.2018.8462481},
keywords = {recognition},
pages = {5979-5983},
title = {Recognizing Zero-resourced Languages based on Mismatched Machine Transcriptions},
url = {https://ieeexplore.ieee.org/abstract/document/8462481},
year = {2018}
}
Xuesong Yang, Kartik Audhkhasi, Andrew Rosenberg, Samuel Thomas, Bhuvana Ramabhadran, & Mark Hasegawa-Johnson. Joint Modeling of Accents and Acoustics for Multi-Accent Speech Recognition. Proc. ICASSP, pp. 5989-5993, 2018
@inproceedings{yang2018joint,
author = {Xuesong Yang and Kartik Audhkhasi and Andrew Rosenberg and Samuel Thomas and Bhuvana Ramabhadran and Mark Hasegawa-Johnson},
booktitle = {Proc. ICASSP},
doi = {10.1109/ICASSP.2018.8462557},
keywords = {recognition},
pages = {5989-5993},
title = {Joint Modeling of Accents and Acoustics for Multi-Accent Speech Recognition},
url = {https://ieeexplore.ieee.org/abstract/document/8462557},
year = {2018}
}
Di He, Boon Pang Lim, Xuesong Yang, Mark Hasegawa-Johnson, & Deming Chen. Acoustic landmarks contain more information about the phone string than other frames for automatic speech recognition with deep neural network acoustic model. Journal of the Acoustical Society of America, vol. 143, no. 6, pp. 3207-3219, 2018
@article{he2018acoustic,
author = {Di He and Boon Pang Lim and Xuesong Yang and Mark Hasegawa-Johnson and Deming Chen},
doi = {10.1121/1.5039837},
journal = {Journal of the Acoustical Society of America},
keywords = {recognition},
number = {6},
pages = {3207-3219},
title = {Acoustic landmarks contain more information about the phone string than other frames for automatic speech recognition with deep neural network acoustic model},
url = {https://asa.scitation.org/doi/full/10.1121/1.5039837},
volume = {143},
year = {2018}
}
Leda Sari, Mark Hasegawa-Johnson, S. Kumaran, Georg Stemmer, & N. Nair Krishnakumar. Speaker Adaptive Audio-Visual Fusion for the Open-Vocabulary Section of AVICAR. Proc. Interspeech, pp. 3524-3528, 2018
@inproceedings{sari2018interspeech,
author = {Leda Sari and Mark Hasegawa-Johnson and S. Kumaran and Georg Stemmer and N. Nair Krishnakumar},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2018-2359},
keywords = {recognition},
pages = {3524--3528},
title = {Speaker Adaptive Audio-Visual Fusion for the Open-Vocabulary Section of AVICAR},
year = {2018}
}
Odette Scharenborg, Patrick Ebel, Francesco Ciannella, Mark Hasegawa-Johnson, & Najim Dehak. Building an ASR System for Mboshi Using a Cross-language Definition of Acoustic Units Approach. Proc. SLTU (Speech and Language Technology for Under-resourced languages), pp. 167-171, 2018
@inproceedings{scharenborg2018building,
author = {Odette Scharenborg and Patrick Ebel and Francesco Ciannella and Mark Hasegawa-Johnson and Najim Dehak},
booktitle = {Proc. SLTU (Speech and Language Technology for Under-resourced languages)},
doi = {10.21437/SLTU.2018-35},
keywords = {recognition},
pages = {167--171},
title = {Building an ASR System for Mboshi Using a Cross-language Definition of Acoustic Units Approach},
year = {2018}
}
Odette Scharenborg, Laurent Besacier, Alan Black, Mark Hasegawa-Johnson, Florian Metze, Graham Neubig, Sebastian Stüker, Pierre Godard, Markus Müller, Lucas Ondel, Shruti Palaskar, Philip Arthur, Francesco Ciannella, Mingxing Du, Elin Larsen, Danny Merkx, Rachid Riad, Liming Wang, & Emmanuel Dupoux. Linguistic Unit Discovery from Multi-Modal Inputs in Unwritten Languages: Summary of the Speaking Rosetta JSALT 2017 Workshop. Proc. ICASSP, 2018
@inproceedings{scharenborg2018linguistic,
author = {Odette Scharenborg and Laurent Besacier and Alan Black and Mark Hasegawa-Johnson and Florian Metze and Graham Neubig and Sebastian Stüker and Pierre Godard and Markus Müller and Lucas Ondel and Shruti Palaskar and Philip Arthur and Francesco Ciannella and Mingxing Du and Elin Larsen and Danny Merkx and Rachid Riad and Liming Wang and Emmanuel Dupoux},
booktitle = {Proc. ICASSP},
doi = {10.1109/ICASSP.2018.8461761},
keywords = {recognition},
title = {Linguistic Unit Discovery from Multi-Modal Inputs in Unwritten Languages: Summary of the Speaking Rosetta JSALT 2017 Workshop},
year = {2018}
}
Wenda Chen, Mark Hasegawa-Jonson, & Nancy F.Y. Chen. Topic and Keyword Identification for Low-resourced Speech Using Cross-Language Transfer Learning. Proc. Interspeech, pp. 2047-2051, 2018
@inproceedings{chen2018topic,
author = {Wenda Chen and Mark Hasegawa-Jonson and Nancy F.Y. Chen},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2018-1283},
keywords = {recognition},
pages = {2047--2051},
title = {Topic and Keyword Identification for Low-resourced Speech Using Cross-Language Transfer Learning},
year = {2018}
}
Mark Hasegawa-Johnson, Alan Black, Lucas Ondel, Odette Scharenborg, & Francesco Ciannella. Image2speech: Automatically generating audio descriptions of images. Journal of the International Science and General Applications (ISGA), vol. 1, no. 1, 2018
@inproceedings{hasegawajohnson2018image2speech:,
author = {Mark Hasegawa-Johnson and Alan Black and Lucas Ondel and Odette Scharenborg and Francesco Ciannella},
booktitle = {Journal of the International Science and General Applications (ISGA)},
keywords = {synthesis},
number = {1},
title = {Image2speech: Automatically generating audio descriptions of images},
url = {https://www.researchgate.net/profile/Kamel-Smaili-2/publication/354700462_INTERNATIONAL_CONFERENCE_ON_NATURAL_LANGUAGE_SIGNAL_AND_SPEECH_PROCESSING_Casablanca_2017_Morocco_Sponsor_ICNLSSP_International_Conference_on_Natural_Language_Signal_and_Speech_Processing/links/6148a03ea595d06017dd226b/INTERNATIONAL-CONFERENCE-ON-NATURAL-LANGUAGE-SIGNAL-AND-SPEECH-PROCESSING-Casablanca-2017-Morocco-Sponsor-ICNLSSP-International-Conference-on-Natural-Language-Signal-and-Speech-Processing.pdf#page=66},
volume = {1},
year = {2018}
}
Teck Yian Lim, Raymond Yeh, Yijia Xu, Minh Do, & Mark Hasegawa-Johnson. Time-Frequency Networks for Audio Super-Resolution. Proc. ICASSP, 2018
@inproceedings{yian2018time-frequency,
author = {Teck Yian Lim and Raymond Yeh and Yijia Xu and Minh Do and Mark Hasegawa-Johnson},
booktitle = {Proc. ICASSP},
doi = {10.1109/ICASSP.2018.8462049},
keywords = {synthesis},
title = {Time-Frequency Networks for Audio Super-Resolution},
year = {2018}
}
Kaizhi Qian, Yang Zhang, Shiyu Chang, Xuesong Yang, Dinei Florencio, & Mark Hasegawa-Johnson. Deep Learning Based Speech Beamforming. Proc. ICASSP, pp. 5389-5393, 2018
@inproceedings{qian2018deep,
author = {Kaizhi Qian and Yang Zhang and Shiyu Chang and Xuesong Yang and Dinei Florencio and Mark Hasegawa-Johnson},
booktitle = {Proc. ICASSP},
doi = {10.1109/ICASSP.2018.8462430},
keywords = {synthesis},
pages = {5389-5393},
title = {Deep Learning Based Speech Beamforming},
year = {2018}
}
Renato F. L. Azevedo, Dan Morrow, James Graumlich Ann Willemsen-Dunlap Mark Hasegawa-Johnson Thomas S. Huang, Kuangxiao Gu, Suma Bhat, Tarek Sakakini, Victor Sadauskas, & Donald J. Halpin. Using conversational agents to explain medication instructions to older adults. AMIA Annu Symp Proc., pp. 185–194, 2018
@inproceedings{azevedo2018using,
author = {Renato F. L. Azevedo and Dan Morrow and James Graumlich Ann Willemsen-Dunlap Mark Hasegawa-Johnson Thomas S. Huang and Kuangxiao Gu and Suma Bhat and Tarek Sakakini and Victor Sadauskas and Donald J. Halpin},
booktitle = {AMIA Annu Symp Proc.},
keywords = {synthesis},
pages = {185–194},
pmid = {30815056},
title = {Using conversational agents to explain medication instructions to older adults},
url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6371340/},
year = {2018}
}
Renato Azevedo, Daniel G. Morrow, Kuangxiao Gu, Thomas Huang, Mark Allan Hasegawa-Johnson, James Graumlich, Victor Sadauskas, Tarek J. Sakakini, Suma Pallathadka Bhat, Ann M. Willemsen-Dunlap, & Donald J. Halpin. Computer Agents and Patient Memory for Medication Information. APA Annual Meeting, 2018
@inproceedings{azevedo2018computer,
author = {Renato Azevedo and Daniel G. Morrow and Kuangxiao Gu and Thomas Huang and Mark Allan Hasegawa-Johnson and James Graumlich and Victor Sadauskas and Tarek J. Sakakini and Suma Pallathadka Bhat and Ann M. Willemsen-Dunlap and Donald J. Halpin},
booktitle = {APA Annual Meeting},
keywords = {synthesis},
title = {Computer Agents and Patient Memory for Medication Information},
year = {2018}
}
Liming Wang. Multimodal semantic learning with context-correlated speeches and images. Unpublished B.S. thesis, University of Illinois, 2018
@mastersthesis{qian2018speech,
author = {Kaizhi Qian},
keywords = {enhancement},
school = {University of Illinois},
title = {Speech enhancement using deep dilated CNN},
url = {https://www.ideals.illinois.edu/items/107741},
year = {2018}
}
@inproceedings{kong2017landmark-based,
address = {Boston},
author = {Xiang Kong and Xuesong Yang and Jeung-Yoon Choi and Mark Hasegawa-Johnson and Stefanie Shattuck-Hufnagel},
booktitle = {Acoustics 17},
doi = {10.1121/1.4987203},
keywords = {analysis},
month = {June},
title = {Landmark-based consonant voicing detection on multilingual corpora},
url = {https://arxiv.org/pdf/1611.03533.pdf},
year = {2017}
}
Di He, Boon Pang Lim, Xuesong Yang, Mark Hasegawa-Johnson, & Deming Chen. Selecting frames for automatic speech recognition based on acoustic landmarks. Acoustics 17, Boston, Jun, 2017
@inproceedings{he2017selecting,
address = {Boston},
author = {Di He and Boon Pang Lim and Xuesong Yang and Mark Hasegawa-Johnson and Deming Chen},
booktitle = {Acoustics 17},
doi = {10.1121/1.4987204},
keywords = {analysis},
month = {June},
title = {Selecting frames for automatic speech recognition based on acoustic landmarks},
url = {https://asa.scitation.org/doi/abs/10.1121/1.4987204},
year = {2017}
}
@inproceedings{kong2017landmark,
address = {Boston},
author = {Xiang Kong and Xuesong Yang and Jeung-Yoon Choi and Mark Hasegawa-Johnson and Stefanie Shattuck-Hufnagel},
booktitle = {Acoustics 17},
doi = {10.1121/1.4987203},
keywords = {recognition},
month = {June},
title = {Landmark-based consonant voicing detection on multilingual corpora},
year = {2017}
}
Daniel Morrow, Mark Hasegawa-Johnson, Thomas Huang, William Schuh, Renato Azevedo, Kuangxiao Gu, Yang Zhang, Bidisha Roy, & Rocio Garcia-Retamero. A Multidisciplinary Approach to Designing and Evaluating Electronic Medical Record Portal Messages that Support Patient Self-Care. Journal of Biomedical Informatics, vol. 69, pp. 63-74, May, 2017
@article{morrow2018a,
author = {Daniel Morrow and Mark Hasegawa-Johnson and Thomas Huang and William Schuh and Renato Azevedo and Kuangxiao Gu and Yang Zhang and Bidisha Roy and Rocio Garcia-Retamero},
doi = {10.1016/j.jbi.2017.03.015},
journal = {Journal of Biomedical Informatics},
keywords = {synthesis},
month = {5},
pages = {63-74},
title = {A Multidisciplinary Approach to Designing and Evaluating Electronic Medical Record Portal Messages that Support Patient Self-Care},
volume = {69},
year = {2017}
}
Di He, Zuofu Cheng, Mark Hasegawa-Johnson, & Deming Chen. Using Approximated Auditory Roughness as a Pre-filtering Feature for Human Screaming and Affective Speech AED. Proc. Interspeech, pp. 1914-1918, 2017
@inproceedings{he2017using,
author = {Di He and Zuofu Cheng and Mark Hasegawa-Johnson and Deming Chen},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2017-593},
keywords = {analysis},
pages = {1914--1918},
title = {Using Approximated Auditory Roughness as a Pre-filtering Feature for Human Screaming and Affective Speech AED},
year = {2017}
}
Mary Pietrowicz. Exposing the Hidden Vocal Channel: Analysis of Vocal Expression. Master’s Thesis, University of Illinois, 2017
@phdthesis{pietrowicz2017exposing,
author = {Mary Pietrowicz},
keywords = {analysis},
school = {University of Illinois},
title = {Exposing the Hidden Vocal Channel: Analysis of Vocal Expression},
url = {https://www.ideals.illinois.edu/items/105321},
year = {2017}
}
Mary Pietrowicz, Mark Hasegawa-Johnson, & Karrie Karahaliqos. Discovering Dimensions of Perqceived Vocal Expression in Semi-Structured, Unscripted Oral History Accoqunts. Proc. ICASSP, pp. 2901:1-4, 2017
@inproceedings{pietrowicz2017discovering,
author = {Mary Pietrowicz and Mark Hasegawa-Johnson and Karrie Karahaliqos},
booktitle = {Proc. ICASSP},
doi = {10.1109/ICASSP.2017.7953247},
keywords = {analysis},
pages = {2901:1-4},
title = {Discovering Dimensions of Perqceived Vocal Expression in Semi-Structured, Unscripted Oral History Accoqunts},
year = {2017}
}
Roger Serwy. Hilbert Phase Methods for Glottal Activity Detection. Master’s Thesis, University of Illinois, 2017
@phdthesis{serwy2017hilbert,
author = {Roger Serwy},
keywords = {analysis},
school = {University of Illinois},
title = {Hilbert Phase Methods for Glottal Activity Detection},
url = {https://www.ideals.illinois.edu/items/102357},
year = {2017}
}
Mark Hasegawa-Johnson, Preethi Jyothi, Wenda Chen, & Van Hai Do. Mismatched Crowdsourcing: Mining Latent Skills to Acquire Speech Transcriptions. Proceedings of Asilomar, 2017
@inproceedings{hasegawajohnson000187mismatched,
author = {Mark Hasegawa-Johnson and Preethi Jyothi and Wenda Chen and Van Hai Do},
booktitle = {Proceedings of Asilomar},
doi = {10.1109/ACSSC.2017.8335558},
grant = {DARPA LORELEI},
keywords = {analysis},
title = {Mismatched Crowdsourcing: Mining Latent Skills to Acquire Speech Transcriptions},
year = {2017}
}
Shiyu Chang, Yang Zhang, Wei Han, Mo Yu andXiaoxiao Guo, Wei Tan, Xiaodong Cui, Michael Witbrock, Mark Hasegawa-Johnson, & Thomas Huang. Dilated Recurrent Neural Networks. NIPS, 2017
@inproceedings{chang2017dilated,
author = {Shiyu Chang and Yang Zhang and Wei Han and Mo Yu andXiaoxiao Guo and Wei Tan and Xiaodong Cui and Michael Witbrock and Mark Hasegawa-Johnson and Thomas Huang},
booktitle = {NIPS},
keywords = {intelligence},
title = {Dilated Recurrent Neural Networks},
url = {https://papers.nips.cc/paper/6613-dilated-recurrent-neural-networks.pdf},
year = {2017}
}
Shiyu Chang, Yang Zhang, Jiling Tang, Dawei Yin, Yi Chang, Mark Hasegawa-Johnson, & Thomas Huang. Streaming Recommender Systems. WWW 2017, pp. 381-389, 2017
@inproceedings{chang2017streaming,
author = {Shiyu Chang and Yang Zhang and Jiling Tang and Dawei Yin and Yi Chang and Mark Hasegawa-Johnson and Thomas Huang},
booktitle = {WWW 2017},
doi = {10.1145/3038912.3052627},
keywords = {intelligence},
pages = {381--389},
title = {Streaming Recommender Systems},
year = {2017}
}
Raymond Yeh, Chen Chen, Teck Yian Lim, Alexander G. Schwing, Mark Hasegawa-Johnson, & Minh N. Do. Semantic Image Inpainting with Deep Generative Networks. CVPR, pp. 5485-5493, 2017
@inproceedings{yeh2017semantic,
author = {Raymond Yeh and Chen Chen and Teck Yian Lim and Alexander G. Schwing and Mark Hasegawa-Johnson and Minh N. Do},
booktitle = {CVPR},
keywords = {intelligence},
pages = {5485-5493},
title = {Semantic Image Inpainting with Deep Generative Networks},
url = {https://openaccess.thecvf.com/content_cvpr_2017/html/Yeh_Semantic_Image_Inpainting_CVPR_2017_paper},
year = {2017}
}
Van Hai Do, Nancy F. Chen, Boon Pang Lim, & Mark Hasegawa-Johnson. Multi-Task Learning Using Mismatched Transcription for Under-Resourced Speech Recognition. Proc. Interspeech 2017, pp. 734-738, 2017
@inproceedings{do17_interspeech,
author = {Van Hai Do and Nancy F. Chen and Boon Pang Lim and Mark Hasegawa-Johnson},
booktitle = {Proc. Interspeech 2017},
doi = {10.21437/Interspeech.2017-788},
pages = {734--738},
title = {Multi-Task Learning Using Mismatched Transcription for Under-Resourced Speech Recognition},
year = {2017}
}
Odette Scharenborg, Francesco Ciannella, Shruti Palaskar, Alan Black, Florian Metze, Lucas Ondel, & Mark Hasegawa-Johnson. Building an ASR System for a Low-Resource Language Through the Adaptation of a High-Resource Language ASR System: Preliminary Results. Proc. Internat. Conference on Natural Language, Signal and Speech Processing (ICNLSSP), Casablanca, Morocco, 2017
@inproceedings{scharenborg2017building,
address = {Casablanca, Morocco},
author = {Odette Scharenborg and Francesco Ciannella and Shruti Palaskar and Alan Black and Florian Metze and Lucas Ondel and Mark Hasegawa-Johnson},
booktitle = {Proc. Internat. Conference on Natural Language, Signal and Speech Processing (ICNLSSP)},
keywords = {recognition},
title = {Building an ASR System for a Low-Resource Language Through the Adaptation of a High-Resource Language ASR System: Preliminary Results},
url = {https://www.researchgate.net/profile/Kamel-Smaili-2/publication/354824671_Proceedings_of_the_International_Conference_on_Natural_Language_Processing_Signal_and_Speech_Processing/links/63503ccc12cbac6a3eda8cac/Proceedings-of-the-International-Conference-on-Natural-Language-Processing-Signal-and-Speech-Processing.pdf#page=28},
year = {2017}
}
Wenda Chen, Mark Hasegawa-Johnson, Nancy F. Chen, & Boon Pang Lim. Mismatched Crowdsourcing from Multiple Annotator Languages For Recognizing Zero-resourced Languages: A Nullspace Clustering Approach. Proc. Interspeech, pp. 2789-2793, 2017
@inproceedings{chen2017mismatched,
author = {Wenda Chen and Mark Hasegawa-Johnson and Nancy F. Chen and Boon Pang Lim},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2017-1567},
keywords = {recognition},
pages = {2789--2793},
title = {Mismatched Crowdsourcing from Multiple Annotator Languages For Recognizing Zero-resourced Languages: A Nullspace Clustering Approach},
year = {2017}
}
Pavlos Papadopoulos, Ruchir Travadi, Colin Vaz, Nikolaos Malandrakis, Ulf Hermjakob, Nima Pourdamghani, Michael Pust, Boliang Zhang, Xiaoman Pan, Di Lu, Ying Lin, Ondrej Glembek, Murali Karthick B, Martin Karafiat, Lukas Burget, Mark Hasegawa-Johnson, Heng Ji, Jonathan May, Kevin Knight, & Shrikanth Narayanan. Team ELISA System for DARPA LORELEI Speech Evaluation 2016. Proc. Interspeech, pp. 2053-2057, 2017
@inproceedings{papadopoulos2017team,
author = {Pavlos Papadopoulos and Ruchir Travadi and Colin Vaz and Nikolaos Malandrakis and Ulf Hermjakob and Nima Pourdamghani and Michael Pust and Boliang Zhang and Xiaoman Pan and Di Lu and Ying Lin and Ondrej Glembek and Murali Karthick B and Martin Karafiat and Lukas Burget and Mark Hasegawa-Johnson and Heng Ji and Jonathan May and Kevin Knight and Shrikanth Narayanan},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2017-180},
keywords = {recognition},
pages = {2053--2057},
title = {Team ELISA System for DARPA LORELEI Speech Evaluation 2016},
year = {2017}
}
Amit Das, Mark Hasegawa-Johnson, & Karel Vesely. Deep Autoencoder Based Multi-task Learning Using Probabilistic Transcription. Proc. Interspeech, pp. 2073-2077, 2017
@inproceedings{das2017deep,
author = {Amit Das and Mark Hasegawa-Johnson and Karel Vesely},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2017-582},
keywords = {recognition},
pages = {2073--2077},
title = {Deep Autoencoder Based Multi-task Learning Using Probabilistic Transcription},
year = {2017}
}
Yang Zhang. Application of Generative Models in Speech Processing Tasks. Master’s Thesis, University of Illinois, 2017
@phdthesis{zhang2017generative,
author = {Yang Zhang},
keywords = {recognition},
school = {University of Illinois},
title = {Application of Generative Models in Speech Processing Tasks},
url = {https://www.ideals.illinois.edu/items/103415},
year = {2017}
}
Mark Hasegawa-Johnson, Alan Black, Lucas Ondel, Odette Scharenborg, & Francesco Ciannella. Image2speech: Automatically generating audio descriptions of images. Proc. Internat. Conference on Natural Language, Signal and Speech Processing (ICNLSSP), Casablanca, Morocco, 2017
@inproceedings{hasegawajohnson2017image2speech:,
address = {Casablanca, Morocco},
author = {Mark Hasegawa-Johnson and Alan Black and Lucas Ondel and Odette Scharenborg and Francesco Ciannella},
booktitle = {Proc. Internat. Conference on Natural Language, Signal and Speech Processing (ICNLSSP)},
keywords = {recognition},
title = {Image2speech: Automatically generating audio descriptions of images},
url = {https://www.researchgate.net/profile/Kamel-Smaili-2/publication/354700462_INTERNATIONAL_CONFERENCE_ON_NATURAL_LANGUAGE_SIGNAL_AND_SPEECH_PROCESSING_Casablanca_2017_Morocco_Sponsor_ICNLSSP_International_Conference_on_Natural_Language_Signal_and_Speech_Processing/links/6148a03ea595d06017dd226b/INTERNATIONAL-CONFERENCE-ON-NATURAL-LANGUAGE-SIGNAL-AND-SPEECH-PROCESSING-Casablanca-2017-Morocco-Sponsor-ICNLSSP-International-Conference-on-Natural-Language-Signal-and-Speech-Processing.pdf#page=66},
year = {2017}
}
Mark Hasegawa-Johnson, Preethi Jyothi, Daniel McCloy, Majid Mirbagheri, Giovanni di Liberto, Amit Das, Bradley Ekin, Chunxi Liu, Vimal Manohar, Hao Tang, Edmund C. Lalor, Nancy Chen, Paul Hager, Tyler Kekona, Rose Sloan, & Adrian KC Lee. ASR for Under-Resourced Languages from Probabilistic Transcription. IEEE/ACM Trans. Audio, Speech and Language, vol. 25, no. 1, pp. 46-59, 2017
@article{hasegawajohnson2017asr,
author = {Mark Hasegawa-Johnson and Preethi Jyothi and Daniel McCloy and Majid Mirbagheri and Giovanni di Liberto and Amit Das and Bradley Ekin and Chunxi Liu and Vimal Manohar and Hao Tang and Edmund C. Lalor and Nancy Chen and Paul Hager and Tyler Kekona and Rose Sloan and Adrian KC Lee},
doi = {10.1109/TASLP.2016.2621659},
issn = {2329-9290},
journal = {IEEE/ACM Trans. Audio, Speech and Language},
keywords = {recognition},
number = {1},
pages = {46-59},
title = {ASR for Under-Resourced Languages from Probabilistic Transcription},
volume = {25},
year = {2017}
}
Yang Zhang, Dinei Florêncio, & Mark Hasegawa-Johnson. Glottal Model Based Speech Beamforming for ad-hoc Microphone Arrays. Proc. Interspeech 2017, pp. 2675-2679, 2017
@inproceedings{zhang17j_interspeech,
author = {Yang Zhang and Dinei Florêncio and Mark Hasegawa-Johnson},
booktitle = {Proc. Interspeech 2017},
doi = {10.21437/Interspeech.2017-1659},
pages = {2675--2679},
title = {Glottal Model Based Speech Beamforming for ad-hoc Microphone Arrays},
year = {2017}
}
Kaizhi Qian, Yang Zhang, Shiyu Chang, Xuesong Yang, Dinei Florencio, & Mark Hasegawa-Johnson. Speech Enhancement Using Bayesian Wavenet. Proc. Interspeech, pp. 2013-2017, 2017
@inproceedings{qian2017speech,
author = {Kaizhi Qian and Yang Zhang and Shiyu Chang and Xuesong Yang and Dinei Florencio and Mark Hasegawa-Johnson},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2017-1672},
keywords = {synthesis},
pages = {2013--2017},
title = {Speech Enhancement Using Bayesian Wavenet},
year = {2017}
}
Emmanuel Dupoux, Odette Scharenborg, Graham Neubig, Laurent Besacier, Mark Hasegawa-Johnson, Alan Black, Florian Metze, & Sebastian Stüker. The Speaking Rosetta Stone – Discovering Grounded Linguistic Units for Languages without Orthography. Final report of the WS17 Frederick Jelinek Memorial Summer Workshop team, 2017
@unpublished{dupoux2017speaking,
author = {Emmanuel Dupoux and Odette Scharenborg and Graham Neubig and Laurent Besacier and Mark Hasegawa-Johnson and Alan Black and Florian Metze and Sebastian Stüker},
note = {Final report of the WS17 Frederick Jelinek Memorial Summer Workshop team},
title = {The Speaking Rosetta Stone - Discovering Grounded Linguistic Units for Languages without Orthography},
url = {https://www.lti.cs.cmu.edu/2017-jelinek-workshop},
year = {2017}
}
Tianyilin Zhu. Thumbnail for Lipreading with convolutional and recurrent neural network models
Lipreading with convolutional and recurrent neural network models. Master’s Thesis, University of Illinois, 2017
@mastersthesis{zhu2017thumbnail,
author = {Tianyilin Zhu},
keywords = {multimodal},
school = {University of Illinois},
title = {Thumbnail for Lipreading with convolutional and recurrent neural network models
Lipreading with convolutional and recurrent neural network models},
url = {https://www.ideals.illinois.edu/items/102816},
year = {2017}
}
Van Hai Do, Nancy F. Chen, Boon Pang Lim, & Mark Hasegawa-Johnson. Speech recognition of under-resourced languages using mismatched transcriptions. International Conference on Asian Language Processing IALP, Tainan, Taiwan, Nov, 2016
@inproceedings{hai2016speech,
address = {Tainan, Taiwan},
author = {Van Hai Do and Nancy F. Chen and Boon Pang Lim and Mark Hasegawa-Johnson},
booktitle = {International Conference on Asian Language Processing IALP},
doi = {10.1109/IALP.2016.7875947},
keywords = {recognition},
month = {11},
title = {Speech recognition of under-resourced languages using mismatched transcriptions},
year = {2016}
}
Van Hai Do, Nancy F. Chen, Boon Pang Lim, & Mark Hasegawa-Johnson. A many-to-one phone mapping approach for cross-lingual speech recognition. 12th IEEE-RIVF International Conference on Computing and Communication Technologies, Hanoi, Vietnam, pp. 120-124, Nov, 2016
@inproceedings{hai2016a,
address = {Hanoi, Vietnam},
author = {Van Hai Do and Nancy F. Chen and Boon Pang Lim and Mark Hasegawa-Johnson},
booktitle = {12th IEEE-RIVF International Conference on Computing and Communication Technologies},
doi = {10.1109/RIVF.2016.7800280},
keywords = {recognition},
month = {11},
pages = {120-124},
title = {A many-to-one phone mapping approach for cross-lingual speech recognition},
year = {2016}
}
Daniel Morrow, Mark Hasegawa-Johnson, Thomas Huang, William Schuh, Rocio Garcia-Retamero, Renato Azevedo, Kuangxiao Gu, Yang Zhang, & Bidisha Roy. Multimedia formats can improve older adult comprehension of clinical test results: Implications for Designing Patient Portals. 28th APS Annual Convention (Association for Psychological Science, May, 2016
@inproceedings{morrow2016multimedia,
author = {Daniel Morrow and Mark Hasegawa-Johnson and Thomas Huang and William Schuh and Rocio Garcia-Retamero and Renato Azevedo and Kuangxiao Gu and Yang Zhang and Bidisha Roy},
booktitle = {28th APS Annual Convention (Association for Psychological Science},
grant = {AHRQ R21HS022948},
keywords = {synthesis},
month = {May},
title = {Multimedia formats can improve older adult comprehension of clinical test results: Implications for Designing Patient Portals},
year = {2016}
}
Mark Hasegawa-Johnson. Speech Production, Speech Perception, and Phonology. Lecture given at the Winter School on Speech and Audio Processing, Chennai, India, Jan, 2016
@unpublished{hasegawajohnson2016speech,
author = {Mark Hasegawa-Johnson},
keywords = {analysis},
month = {January},
note = {Lecture given at the Winter School on Speech and Audio Processing, Chennai, India},
title = {Speech Production, Speech Perception, and Phonology},
year = {2016}
}
Mark Hasegawa-Johnson. Prosody. Lecture given at the Winter School on Speech and Audio Processing, Chennai, India, Jan, 2016
@unpublished{hasegawajohnson2016prosody,
author = {Mark Hasegawa-Johnson},
keywords = {analysis},
month = {January},
note = {Lecture given at the Winter School on Speech and Audio Processing, Chennai, India},
title = {Prosody},
year = {2016}
}
Mark Hasegawa-Johnson. Multivariate-State Models for Speech Recognition. Lecture given at the Winter School on Speech and Audio Processing, Chennai, India, Jan, 2016
@unpublished{hasegawajohnson2016multivariate,
author = {Mark Hasegawa-Johnson},
keywords = {analysis},
month = {January},
note = {Lecture given at the Winter School on Speech and Audio Processing, Chennai, India},
title = {Multivariate-State Models for Speech Recognition},
year = {2016}
}
Mark Hasegawa-Johnson. Limited Data Settings. Lecture given at the Winter School on Speech and Audio Processing, Chennai, India, Jan, 2016
@unpublished{hasegawajohnson2016limited,
author = {Mark Hasegawa-Johnson},
keywords = {analysis},
month = {January},
note = {Lecture given at the Winter School on Speech and Audio Processing, Chennai, India},
title = {Limited Data Settings},
year = {2016}
}
Xuesong Yang, Xiang Kong, Mark Hasegawa-Johnson, & Yanlu Xie. Landmark-based Pronunciation Error Identification on L2 Mandarin Chinese. Speech Prosody, pp. 247-251, 2016
@inproceedings{yang2016landmarkbased,
author = {Xuesong Yang and Xiang Kong and Mark Hasegawa-Johnson and Yanlu Xie},
booktitle = {Speech Prosody},
doi = {10.21437/SpeechProsody.2016-51},
keywords = {analysis},
pages = {247--251},
title = {Landmark-based Pronunciation Error Identification on L2 Mandarin Chinese},
year = {2016}
}
Karen Livescu, Frank Rudzicz, Eric Fosler-Lussier, Mark Hasegawa-Johnson, & Jeff Bilmes. Speech Production in Speech Technologies: Introduction to the CSL Special Issue. Computer Speech and Language, vol. 36, pp. 165-172, 2016
@article{livescu2016speech,
author = {Karen Livescu and Frank Rudzicz and Eric Fosler-Lussier and Mark Hasegawa-Johnson and Jeff Bilmes},
doi = {10.1016/j.csl.2015.11.002},
journal = {Computer Speech and Language},
keywords = {analysis},
pages = {165-172},
title = {Speech Production in Speech Technologies: Introduction to the CSL Special Issue},
volume = {36},
year = {2016}
}
Wenda Chen, Mark Hasegawa-Johnson, & Nancy F. Chen. Mismatched Crowdsourcing based Language Perception for Under-resourced Languages. Procedia Computer Science, vol. 81, pp. 23-29, 2016
@article{chen2016mismatched,
author = {Wenda Chen and Mark Hasegawa-Johnson and Nancy F. Chen},
doi = {10.1016/j.procs.2016.04.025},
journal = {Procedia Computer Science},
keywords = {analysis},
pages = {23--29},
title = {Mismatched Crowdsourcing based Language Perception for Under-resourced Languages},
volume = {81},
year = {2016}
}
Yanlu Xie, Mark Hasegawa-Johnson, Leyuan Qu, & Jinsong Zhang. Landmark of Mandarin Nasal Codas and its Application in Pronunciation Error Detection. Proc. ICASSP, 2016
@inproceedings{xie2016landmark,
author = {Yanlu Xie and Mark Hasegawa-Johnson and Leyuan Qu and Jinsong Zhang},
booktitle = {Proc. ICASSP},
doi = {10.1109/ICASSP.2016.7472703},
keywords = {analysis},
title = {Landmark of Mandarin Nasal Codas and its Application in Pronunciation Error Detection},
year = {2016}
}
Yang Zhang, Gautham Mysore, Florian Berthouzoz, & Mark Hasegawa-Johnson. Analysis of Prosody Increment Induced by Pitch Accents for Automatic Emphasis Correction. Speech Prosody, pp. 79-83, 2016
@inproceedings{zhang2016analysis,
author = {Yang Zhang and Gautham Mysore and Florian Berthouzoz and Mark Hasegawa-Johnson},
booktitle = {Speech Prosody},
doi = {10.21437/SpeechProsody.2016-17},
keywords = {analysis},
pages = {79--83},
title = {Analysis of Prosody Increment Induced by Pitch Accents for Automatic Emphasis Correction},
year = {2016}
}
Van Hai Do, Nancy F. Chen, Boon Pang Lim, & Mark Hasegawa-Johnson. Analysis of Mismatched Transcriptions Generated by Humans and Machines for Under-Resourced Languages. Proc. Interspeech, pp. 3863-3867, 2016
@inproceedings{hai2016analysis,
author = {Van Hai Do and Nancy F. Chen and Boon Pang Lim and Mark Hasegawa-Johnson},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2016-736},
keywords = {analysis},
pages = {3863--3867},
title = {Analysis of Mismatched Transcriptions Generated by Humans and Machines for Under-Resourced Languages},
year = {2016}
}
Xiang Kong, Preethi Jyothi, & Mark Hasegawa-Johnson. Performance Improvement of Probabilistic Transcriptions with Language-specific Constraints.. Procedia Computer Science, vol. 81, pp. 30-36, 2016
@article{kong2016performance,
author = {Xiang Kong and Preethi Jyothi and Mark Hasegawa-Johnson},
doi = {10.1016/j.procs.2016.04.026},
grant = {DARPA LORELEI},
journal = {Procedia Computer Science},
keywords = {analysis},
pages = {30-36},
title = {Performance Improvement of Probabilistic Transcriptions with Language-specific Constraints.},
url = {http://www.sciencedirect.com/science/article/pii/S1877050916300400},
volume = {81},
year = {2016}
}
Lav Varshney, Preethi Jyothi, & Mark Hasegawa-Johnson. Language Coverage for Mismatched Crowdsourcing. Workshop on Information Theory and Applications, 2016
@inproceedings{varshney2016language,
author = {Lav Varshney and Preethi Jyothi and Mark Hasegawa-Johnson},
booktitle = {Workshop on Information Theory and Applications},
doi = {10.1109/ITA.2016.7888198},
grant = {NSF 1550145},
keywords = {analysis},
title = {Language Coverage for Mismatched Crowdsourcing},
year = {2016}
}
Shiyu Chang, Yang Zhang, Jiliang Tang, Dawei Lin, Yi Chang, Mark Hasegawa-Johnson, & Thomas Huang. Positive-Unlabeled Learning in Streaming Networks. KDD, pp. 755-764, 2016
@inproceedings{chang2016positive-unlabeled,
author = {Shiyu Chang and Yang Zhang and Jiliang Tang and Dawei Lin and Yi Chang and Mark Hasegawa-Johnson and Thomas Huang},
booktitle = {KDD},
keywords = {intelligence},
pages = {755-764},
title = {Positive-Unlabeled Learning in Streaming Networks},
url = {http://www.kdd.org/kdd2016/subtopic/view/positive-unlabeled-learning-in-streaming-networks},
year = {2016}
}
Raymond Yeh, Mark Hasegawa-Johnson, & Minh Do. Stable and Symmetric Filter Convolutional Neural Network. Proc. ICASSP, pp. 2652-2656, 2016
@inproceedings{yeh2016stable,
author = {Raymond Yeh and Mark Hasegawa-Johnson and Minh Do},
booktitle = {Proc. ICASSP},
doi = {10.1109/ICASSP.2016.7472158},
keywords = {intelligence},
pages = {2652-2656},
title = {Stable and Symmetric Filter Convolutional Neural Network},
year = {2016}
}
Amit Das, & Mark Hasegawa-Johnson. An investigation on training deep neural networks using probabilistic transcription. Proc. Interspeech, pp. 3858-3862, 2016
@inproceedings{das2016an,
author = {Amit Das and Mark Hasegawa-Johnson},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2016-655},
keywords = {recognition},
pages = {3858--3862},
software = {https://github.com/irrawaddy28/interspeech16-mtl},
title = {An investigation on training deep neural networks using probabilistic transcription},
year = {2016}
}
Amit Das, Preethi Jyothi, & Mark Hasegawa-Johnson. Automatic speech recognition using probabilistic transcriptions in Swahili, Amharic and Dinka. Proc. Interspeech, pp. 3524-3528, 2016
@inproceedings{das2016automatic,
author = {Amit Das and Preethi Jyothi and Mark Hasegawa-Johnson},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2016-657},
keywords = {recognition},
pages = {3524--3528},
software = {https://github.com/irrawaddy28/africanpaper},
title = {Automatic speech recognition using probabilistic transcriptions in Swahili, Amharic and Dinka},
year = {2016}
}
Chunxi Liu, Preethi Jyothi, Hao Tang, Vimal Manohar, Rose Sloan, Tyler Kekona, Mark Hasegawa-Johnson, & Sanjeev Khudanpur. Adapting ASR for Under-Resourced Languages Using Mismatched Transcriptions. Proc. ICASSP, pp. 5840-5844, 2016
@inproceedings{liu2016adapting,
author = {Chunxi Liu and Preethi Jyothi and Hao Tang and Vimal Manohar and Rose Sloan and Tyler Kekona and Mark Hasegawa-Johnson and Sanjeev Khudanpur},
booktitle = {Proc. ICASSP},
doi = {10.1109/ICASSP.2016.7472797},
keywords = {recognition},
pages = {5840-5844},
title = {Adapting ASR for Under-Resourced Languages Using Mismatched Transcriptions},
year = {2016}
}
Kaizhi Qian, Yang Zhang, & Mark Hasegawa-Johnson. Application of Local Binary Patterns for SVM based Stop Consonant Detection. Speech Prosody, pp. 1114-1118, 2016
@inproceedings{qian2016application,
author = {Kaizhi Qian and Yang Zhang and Mark Hasegawa-Johnson},
booktitle = {Speech Prosody},
doi = {10.21437/SpeechProsody.2016-229},
keywords = {recognition},
pages = {1114--1118},
title = {Application of Local Binary Patterns for SVM based Stop Consonant Detection},
year = {2016}
}
Wenda Chen, Mark Hasegawa-Johnson, Nancy Chen, Preethi Jyothi, & Lav Varshney. Clustering-based Phonetic Projection in Mismatched Crowdsourcing Channels for Low-resourced ASR. WSSAP (Workshop on South and Southeast Asian Natural Language Processing), pp. 133-141, 2016
@inproceedings{chen2016clustering-based,
author = {Wenda Chen and Mark Hasegawa-Johnson and Nancy Chen and Preethi Jyothi and Lav Varshney},
booktitle = {WSSAP (Workshop on South and Southeast Asian Natural Language Processing)},
keywords = {recognition},
pages = {133-141},
title = {Clustering-based Phonetic Projection in Mismatched Crowdsourcing Channels for Low-resourced ASR},
url = {https://www.aclweb.org/anthology/W16-3714/},
year = {2016}
}
Ruobai Wang, Yang Zhang, Zhijian Ou, & Mark Hasegawa-Johnson. Use of Particle Filtering and MCMC for Inference in Probabilistic Acoustic Tube Model. IEEE Workshop on Statistical Signal Processing, 2016
@inproceedings{wang2016use,
author = {Ruobai Wang and Yang Zhang and Zhijian Ou and Mark Hasegawa-Johnson},
booktitle = {IEEE Workshop on Statistical Signal Processing},
doi = {10.1109/SSP.2016.7551748},
keywords = {synthesis},
title = {Use of Particle Filtering and MCMC for Inference in Probabilistic Acoustic Tube Model},
year = {2016}
}
Paine, Tom Le, Khorrami, Pooya, Chang, Shiyu, Zhang, Yang, Ramachandran, Prajit, Hasegawa-Johnson, Mark A, & Huang, Thomas S. Fast wavenet generation algorithm. arXiv preprint arXiv:1611.09482, 2016
@article{paine2016fast,
author = {Paine, Tom Le and Khorrami, Pooya and Chang, Shiyu and Zhang, Yang and Ramachandran, Prajit and Hasegawa-Johnson, Mark A and Huang, Thomas S},
journal = {arXiv preprint arXiv:1611.09482},
keywords = {synthesis},
title = {Fast wavenet generation algorithm},
url = {https://arxiv.org/abs/1611.09482},
year = {2016}
}
Sujeeth Bharadwaj. A theory of (almost) zero resource speech recognition. Master’s Thesis, University of Illinois, Jun, 2015
@phdthesis{bharadwaj2015theory,
author = {Sujeeth Bharadwaj},
keywords = {recognition},
month = {June},
school = {University of Illinois},
title = {A theory of (almost) zero resource speech recognition},
url = {https://www.ideals.illinois.edu/items/79584},
year = {2015}
}
Po-Sen Huang. Shallow and deep learning for audio and natural language processing. Master’s Thesis, University of Illinois, Jan, 2015
@phdthesis{huang2015shallow,
author = {Po-Sen Huang},
keywords = {audio},
month = {January},
school = {University of Illinois},
title = {Shallow and deep learning for audio and natural language processing},
url = {https://www.ideals.illinois.edu/items/79707},
year = {2015}
}
Mary Pietrowicz, Mark Hasegawa-Johnson, & Karrie Karahalios. Acoustic Correlates for Perceived Effort Levels in Expressive Speech. Proc. Interspeech, pp. 3720-3724, 2015
@inproceedings{pietrowicz2015acoustic,
author = {Mary Pietrowicz and Mark Hasegawa-Johnson and Karrie Karahalios},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2015-738},
keywords = {analysis},
pages = {3720--3724},
title = {Acoustic Correlates for Perceived Effort Levels in Expressive Speech},
year = {2015}
}
Yang Zhang, Nasser Nasrabadi, & Mark Hasegawa-Johnson. Multichannel Transient Acoustic Signal Classification Using Task-Driven Dictionary with Joint Sparsity and Beamforming. Proc. ICASSP, pp. 2591:1-5, 2015
@inproceedings{zhang2015multichannel,
author = {Yang Zhang and Nasser Nasrabadi and Mark Hasegawa-Johnson},
booktitle = {Proc. ICASSP},
doi = {10.1109/ICASSP.2015.7178294},
grant = {ARO W911NF-09-1-0383},
keywords = {analysis},
pages = {2591:1--5},
title = {Multichannel Transient Acoustic Signal Classification Using Task-Driven Dictionary with Joint Sparsity and Beamforming},
year = {2015}
}
Mahmoud Abunasser. Computational Measures of Linguistic Variation: A Study of Arabic Varieties. Master’s Thesis, University of Illinois, 2015
@phdthesis{abunasser2015computational,
author = {Mahmoud Abunasser},
keywords = {analysis},
school = {University of Illinois},
title = {Computational Measures of Linguistic Variation: A Study of Arabic Varieties},
url = {https://www.ideals.illinois.edu/items/79587},
year = {2015}
}
Amit Das, & Mark Hasegawa-Johnson. Cross-lingual transfer learning during supervised training in low resource scenarios. Proc. Interspeech, pp. 3531-3535, 2015
@inproceedings{das2015cross-lingual,
author = {Amit Das and Mark Hasegawa-Johnson},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2015-700},
keywords = {recognition},
pages = {3531--3535},
title = {Cross-lingual transfer learning during supervised training in low resource scenarios},
year = {2015}
}
Preethi Jyothi, & Mark Hasegawa-Johnson. Transcribing continuous speech using mismatched crowdsourcing. Proc. Interspeech 2015, pp. 2774-2778, 2015
@inproceedings{jyothi15_interspeech,
author = {Preethi Jyothi and Mark Hasegawa-Johnson},
booktitle = {Proc. Interspeech 2015},
doi = {10.21437/Interspeech.2015-584},
pages = {2774--2778},
title = {Transcribing continuous speech using mismatched crowdsourcing},
year = {2015}
}
Preethi Jyothi, & Mark Hasegawa-Johnson. Improving Hindi Broadcast ASR by Adapting the Language Model and Pronunciation Model Using A Priori Syntactic and Morphophonemic Knowledge. Proc. Interspeech, pp. 3164-3168, 2015
@inproceedings{jyothi2015improving,
author = {Preethi Jyothi and Mark Hasegawa-Johnson},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2015-637},
keywords = {recognition},
pages = {3164--3168},
title = {Improving Hindi Broadcast ASR by Adapting the Language Model and Pronunciation Model Using A Priori Syntactic and Morphophonemic Knowledge},
year = {2015}
}
Mark Hasegawa-Johnson, Ed Lalor, KC LEe, Preethi Jyothi, Majid Mirbagheri, Amit Das, Giovannie Di Liberto, Brad Ekin, Chunxi Liu, Vimal Manohar, Hao Tang, Paul Hager, Tyler Kekona, & Rose Sloan. Probabilistic Transcription. WS15 Group Final Presentation, 2015
@unpublished{hasegawajohnson2015probabilistic,
author = {Mark Hasegawa-Johnson and Ed Lalor and KC LEe and Preethi Jyothi and Majid Mirbagheri and Amit Das and Giovannie Di Liberto and Brad Ekin and Chunxi Liu and Vimal Manohar and Hao Tang and Paul Hager and Tyler Kekona and Rose Sloan},
keywords = {recognition},
note = {WS15 Group Final Presentation},
title = {Probabilistic Transcription},
year = {2015}
}
Preethi Jyothi, & Mark Hasegawa-Johnson. Transcribing Continuous Speech Using Mismatched Crowdsourcing. Proc. Interspeech, pp. 2774-2778, 2015
@inproceedings{jyothi2015transcribing,
author = {Preethi Jyothi and Mark Hasegawa-Johnson},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2015-584},
keywords = {recognition},
pages = {2774-2778},
title = {Transcribing Continuous Speech Using Mismatched Crowdsourcing},
year = {2015}
}
Mark Hasegawa-Johnson, Jennifer Cole, Preethi Jyothi, & Lav Varshney. Models of Dataset Size, Question Design, and Cross-Language Speech Perception for Speech Crowdsourcing Applications. Journal of Laboratory Phonology, vol. 6, no. 3-4, pp. 381-431, 2015
@article{hasegawajohnson2015models,
author = {Mark Hasegawa-Johnson and Jennifer Cole and Preethi Jyothi and Lav Varshney},
doi = {10.1515/lp-2015-0012},
journal = {Journal of Laboratory Phonology},
keywords = {recognition},
number = {3-4},
pages = {381-431},
title = {Models of Dataset Size, Question Design, and Cross-Language Speech Perception for Speech Crowdsourcing Applications},
volume = {6},
year = {2015}
}
Preethi Jyothi, & Mark Hasegawa-Johnson. Acquiring Speech Transcriptions Using Mismatched Crowdsourcing. Proc. AAAI, pp. 1263-1269, 2015
@inproceedings{jyothi2015acquiring,
author = {Preethi Jyothi and Mark Hasegawa-Johnson},
booktitle = {Proc. AAAI},
doi = {10.1609/aaai.v29i1.9343},
keywords = {recognition},
pages = {1263-1269},
title = {Acquiring Speech Transcriptions Using Mismatched Crowdsourcing},
year = {2015}
}
Jia-Chen Ren, Lawrence Angrave, & Mark Hasegawa-Johnson. ClassTranscribe: A New Tool with New Educational Opportunities for Student Crowdsourced College Lecture Transcriptions. SLaTE (the Workshop on Speech and Language Technology in Education), 2015
@inproceedings{ren2015classtranscribe:,
author = {Jia-Chen Ren and Lawrence Angrave and Mark Hasegawa-Johnson},
booktitle = {SLaTE (the Workshop on Speech and Language Technology in Education)},
keywords = {recognition},
title = {ClassTranscribe: A New Tool with New Educational Opportunities for Student Crowdsourced College Lecture Transcriptions},
url = {https://www.isca-speech.org/archive/slate_2015/ren15_slate.html},
year = {2015}
}
Jia Chen Ren, Mark Hasegawa-Johnson, & Lawrence Angrave. ClassTranscribe. ICER Conference, 2015
@inproceedings{chen2015classtranscribe,
author = {Jia Chen Ren and Mark Hasegawa-Johnson and Lawrence Angrave},
booktitle = {ICER Conference},
keywords = {recognition},
title = {ClassTranscribe},
year = {2015}
}
Yang Zhang, Zhijian Ou, & Mark Hasegawa-Johnson. Incorporating AM-FM effect in voiced speech for probabilistic acoustic tube model. Proc. WASPAA, 2015
@inproceedings{zhang2015incorporating,
author = {Yang Zhang and Zhijian Ou and Mark Hasegawa-Johnson},
booktitle = {Proc. WASPAA},
doi = {10.1109/WASPAA.2015.7336905},
keywords = {synthesis},
title = {Incorporating AM-FM effect in voiced speech for probabilistic acoustic tube model},
year = {2015}
}
Po-Sen Huang, Minje Kim, Mark Hasegawa-Johnson, & Paris Smaragdis. Joint Optimization of Masks and Deep Recurrent Neural Networks for Monaural Source Separation. IEEE Trans. Audio, Speech and Language Processing, vol. 23, no. 12, pp. 2136-2147, 2015
@article{huang2015joint,
author = {Po-Sen Huang and Minje Kim and Mark Hasegawa-Johnson and Paris Smaragdis},
doi = {10.1109/TASLP.2015.2468583},
journal = {IEEE Trans. Audio, Speech and Language Processing},
keywords = {synthesis},
number = {12},
pages = {2136-2147},
title = {Joint Optimization of Masks and Deep Recurrent Neural Networks for Monaural Source Separation},
url = {https://arxiv.org/abs/1502.04149},
volume = {23},
year = {2015}
}
Renato F. L. Azevedo, Daniel Morrow, Mark Hasegawa-Johnson, Kuangxiao Gu, Dan Soberal, Thomas Huang, William Schuh , & Rocio Garcia-Retamero. Improving Patient Comprehension of Numeric Health Information. Human Factors Conference, 2015
@inproceedings{azevedo2015improving,
author = {Renato F. L. Azevedo and Daniel Morrow and Mark Hasegawa-Johnson and Kuangxiao Gu and Dan Soberal and Thomas Huang and William Schuh and Rocio Garcia-Retamero},
booktitle = {Human Factors Conference},
grant = {AHRQ R21HS022948},
keywords = {synthesis},
title = {Improving Patient Comprehension of Numeric Health Information},
year = {2015}
}
Xuesong Yang. Machine learning approaches to improving mispronunciation detection on an imbalanced corpus. Master’s Thesis, University of Illinois, 2015
@mastersthesis{yang2015machine,
author = {Xuesong Yang},
keywords = {recognition},
school = {University of Illinois},
title = {Machine learning approaches to improving mispronunciation detection on an imbalanced corpus},
url = {https://www.ideals.illinois.edu/items/91251},
year = {2015}
}
Yang Zhang. Probabilistic Generative Modeling of Speech. Master’s Thesis, University of Illinois, 2015
@unpublished{qian2014regularized,
author = {Kaizhi Qian},
keywords = {analysis},
note = {B.S. Thesis, University of Illinois},
title = {Regularized Estimation of Gaussian Mixture Models for SVM Based Speaker Recognition},
url = {https://www.ideals.illinois.edu/items/55611},
year = {2014}
}
Austin Chen, & Mark Hasegawa-Johnson. Mixed Stereo Audio Classification Using a Stereo-Input Mixed-to-Panned Level Feature. IEEE Trans. Speech and Audio Processing, vol. 22, no. 12, pp. 2025-2033, 2014
@article{chen2013mixed,
author = {Austin Chen and Mark Hasegawa-Johnson},
doi = {10.1109/TASLP.2014.2359628},
grant = {QNRF NPRP 09-410-1-069},
journal = {IEEE Trans. Speech and Audio Processing},
keywords = {analysis},
number = {12},
pages = {2025-2033},
title = {Mixed Stereo Audio Classification Using a Stereo-Input Mixed-to-Panned Level Feature},
volume = {22},
year = {2014}
}
Preethi Jyothi, Jennifer Cole, Mark Hasegawa-Johnson, & Vandana Puri. An Investigation of Prosody in Hindi Narrative Speech. Proceedings of Speech Prosody, 2014
@inproceedings{jyothi2014an,
author = {Preethi Jyothi and Jennifer Cole and Mark Hasegawa-Johnson and Vandana Puri},
booktitle = {Proceedings of Speech Prosody},
doi = {10.21437/SpeechProsody.2014-113},
grant = {QNRF 09-410-1-069},
keywords = {analysis},
title = {An Investigation of Prosody in Hindi Narrative Speech},
year = {2014}
}
Sujeeth Bharadwaj, & Mark Hasegawa-Johnson. A PAC-Bayesian Approach to Minimum Perplexity Language Modeling. Proceedings of CoLing, pp. 130-140, 2014
@inproceedings{bharadwaj2014a,
author = {Sujeeth Bharadwaj and Mark Hasegawa-Johnson},
booktitle = {Proceedings of CoLing},
grant = {NSF 0941268},
keywords = {intelligence},
pages = {130-140},
title = {A PAC-Bayesian Approach to Minimum Perplexity Language Modeling},
url = {https://aclanthology.org/C14-1014.pdf},
year = {2014}
}
Kai-Hsiang Lin, Pooya Khorrami, Jiangping Wang, Mark Hasegawa-Johnson, & Thomas S. Huang. Foreground Object Detection in Highly Dynamic Scenes Using Saliency. Proceedings of ICIP, pp. 1125-1129, 2014
@inproceedings{lin2014foreground,
author = {Kai-Hsiang Lin and Pooya Khorrami and Jiangping Wang and Mark Hasegawa-Johnson and Thomas S. Huang},
booktitle = {Proceedings of ICIP},
doi = {10.1109/ICIP.2014.7025224},
keywords = {intelligence},
pages = {1125-1129},
title = {Foreground Object Detection in Highly Dynamic Scenes Using Saliency},
year = {2014}
}
Zhaowen Wang, Zhangyang Wang, Mark Moll, Po-Sen Huang, Devin Grady, Nasser Nasrabadi, Thomas Huang, Lydia Kavraki, & Mark Hasegawa-Johnson. Active Planning, Sensing and Recognition Using a Resource-Constrained Discriminant POMDP. CVPR Multi-Sensor Fusion Workshop, pp. 740-747, 2014
@inproceedings{wang2014active,
author = {Zhaowen Wang and Zhangyang Wang and Mark Moll and Po-Sen Huang and Devin Grady and Nasser Nasrabadi and Thomas Huang and Lydia Kavraki and Mark Hasegawa-Johnson},
booktitle = {CVPR Multi-Sensor Fusion Workshop},
grant = {ARO W911NF-09-1-0383},
keywords = {intelligence},
pages = {740-747},
title = {Active Planning, Sensing and Recognition Using a Resource-Constrained Discriminant POMDP},
url = {https://www.cv-foundation.org/openaccess/content_cvpr_workshops_2014/W19/html/Wang_Active_Planning_Sensing_2014_CVPR_paper.html},
year = {2014}
}
Preethi Jyothi, Jennifer Cole, Mark Hasegawa-Johnson, & Vandana Puri. An Investigation of Prosody in Hindi Narrative Speech. Proc. Speech Prosody 2014, pp. 623-627, 2014
@inproceedings{jyothi14_speechprosody,
author = {Preethi Jyothi and Jennifer Cole and Mark Hasegawa-Johnson and Vandana Puri},
booktitle = {Proc. Speech Prosody 2014},
doi = {10.21437/SpeechProsody.2014-113},
pages = {623--627},
title = {An Investigation of Prosody in Hindi Narrative Speech},
year = {2014}
}
Xiayu Chen, Yang Zhang, & Mark Hasegawa-Jonson. An iterative approach to decision tree training for context dependent speech synthesis. Proc. Interspeech, pp. 2327-2331, 2014
@inproceedings{chen2014an,
author = {Xiayu Chen and Yang Zhang and Mark Hasegawa-Jonson},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2014-191},
keywords = {recognition},
pages = {2327--2331},
title = {An iterative approach to decision tree training for context dependent speech synthesis},
year = {2014}
}
Alina Khasanova, Jennifer Cole, & Mark Hasegawa-Johnson. Detecting articulatory compensation in acoustic data through linear regression modeling. Proc. Interspeech 2014, pp. 925-929, 2014
@inproceedings{khasanova14_interspeech,
author = {Alina Khasanova and Jennifer Cole and Mark Hasegawa-Johnson},
booktitle = {Proc. Interspeech 2014},
doi = {10.21437/Interspeech.2014-241},
pages = {925--929},
title = {Detecting articulatory compensation in acoustic data through linear regression modeling},
year = {2014}
}
Mohamed Elmahdy, Mark Hasegawa-Johnson, & Eiman Mustafawi. Automatic Long Audio Alignment and Confidence Scoring for Conversational Arabic Speech. The 9th edition of the Language Resources and Evaluation Conference (LREC 2014), Reykjavik, Iceland, 2014
@inproceedings{elmahdy2014automatic,
address = {Reykjavik, Iceland},
author = {Mohamed Elmahdy and Mark Hasegawa-Johnson and Eiman Mustafawi},
booktitle = {The 9th edition of the Language Resources and Evaluation Conference (LREC 2014)},
grant = {QNRF NPRP 09-410-1-069},
isbn = {9782951740884},
keywords = {recognition},
title = {Automatic Long Audio Alignment and Confidence Scoring for Conversational Arabic Speech},
url = {http://www.lrec-conf.org/proceedings/lrec2014/pdf/434_Paper.pdf},
year = {2014}
}
Mohamed Elmahdy, Mark Hasegawa-Johnson, & Eiman Mustafawi. Development of a TV Broadcasts Speech Recognition System for Qatari Arabic. The 9th edition of the Language Resources and Evaluation Conference (LREC 2014), Reykjavik, Iceland, pp. 3057-3061, 2014
@inproceedings{elmahdy000038development,
address = {Reykjavik, Iceland},
author = {Mohamed Elmahdy and Mark Hasegawa-Johnson and Eiman Mustafawi},
booktitle = {The 9th edition of the Language Resources and Evaluation Conference (LREC 2014)},
grant = {QNRF NPRP 09-410-1-069},
isbn = {9782951740884},
keywords = {recognition},
pages = {3057-3061},
title = {Development of a TV Broadcasts Speech Recognition System for Qatari Arabic},
url = {http://www.lrec-conf.org/proceedings/lrec2014/pdf/430_Paper.pdf},
year = {2014}
}
Raymond Yeh. Divergence Guided Two Beams Viterbi Algorithm on Factorial HMMs. B.S. Thesis, University of Illinois, 2014
@inproceedings{zhang2014improvement,
author = {Yang Zhang and Zhijian Ou and Mark Hasegawa-Johnson},
booktitle = {ICASSP},
doi = {10.1109/ICASSP.2014.6855144},
keywords = {synthesis},
title = {Improvement of Probabilistic Acoustic Tube Model for Speech Decomposition},
year = {2014}
}
Po-Sen Huang, Minje Kim, Paris Smaragdis, & Mark Hasegawa-Johnson. Deep Learning for Monaural Speech Separation. ICASSP, 2014
@inproceedings{huang2014deep,
author = {Po-Sen Huang and Minje Kim and Paris Smaragdis and Mark Hasegawa-Johnson},
booktitle = {ICASSP},
doi = {10.1109/ICASSP.2014.6853860},
grant = {ARO W911NF-09-1-0383},
keywords = {synthesis},
title = {Deep Learning for Monaural Speech Separation},
year = {2014}
}
Po-Sen Huang, Minje Kim, Mark Hasegawa-Johnson, & Paris Smaragdis. Singing-Voice Separation From Monaural Recordings Using Deep Recurrent Neural Networks. Proceedings of ISMIR, 2014
@inproceedings{huang2014singing-voice,
author = {Po-Sen Huang and Minje Kim and Mark Hasegawa-Johnson and Paris Smaragdis},
booktitle = {Proceedings of ISMIR},
keywords = {synthesis},
title = {Singing-Voice Separation From Monaural Recordings Using Deep Recurrent Neural Networks},
url = {https://archives.ismir.net/ismir2014/paper/000154.pdf},
year = {2014}
}
Daniel Soberal. Face recognition using hidden Markov model supervectors. Master’s Thesis, University of Illinois, 2014
@mastersthesis{soberal2014face,
author = {Daniel Soberal},
keywords = {multimodal},
school = {University of Illinois},
title = {Face recognition using hidden Markov model supervectors},
url = {https://www.ideals.illinois.edu/items/73028},
year = {2014}
}
Kaizhi Qian. Regularized Estimation of Gaussian Mixture Models for SVM Based Speaker Recognition. Unpublished B.S. Thesis, University of Illinois, 2014
@unpublished{qian2014regularized,
author = {Kaizhi Qian},
note = {Unpublished B.S. Thesis},
school = {University of Illinois},
title = {Regularized Estimation of Gaussian Mixture Models for SVM Based Speaker Recognition},
url = {https://www.ideals.illinois.edu/items/55611},
year = {2014}
}
Jonathan Jones. English-Spanish Language Transfer for Automatic Speech Recognition. Unpublished B.S. Thesis, University of Illinois, 2014
@unpublished{jones2014english,
author = {Jonathan Jones},
note = {Unpublished B.S. Thesis},
school = {University of Illinois},
title = {English-Spanish Language Transfer for Automatic Speech Recognition},
url = {https://www.ideals.illinois.edu/items/55442},
year = {2014}
}
Austin Chen. Automatic Classification of Electronic Music and Speech/Music Audio Content. Master’s Thesis, University of Illinois, 2014
@mastersthesis{chen2014automatic,
author = {Austin Chen},
keywords = {analysis},
school = {University of Illinois},
title = {Automatic Classification of Electronic Music and Speech/Music Audio Content},
url = {https://www.ideals.illinois.edu/items/49620},
year = {2014}
}
Harsh Vardhan Sharma, & Mark Hasegawa-Johnson. Acoustic Model Adaptation using in-domain Background Models for Dysarthric Speech Recognition. Computer Speech and Language, vol. 27, no. 6, pp. 1147–1162, Sep, 2013
@article{sharma2012csl,
author = {Harsh Vardhan Sharma and Mark Hasegawa-Johnson},
doi = {10.1016/j.csl.2012.10.002},
journal = {Computer Speech and Language},
keywords = {recognition},
month = {September},
number = {6},
pages = {1147–1162},
title = {Acoustic Model Adaptation using in-domain Background Models for Dysarthric Speech Recognition},
volume = {27},
year = {2013}
}
Elabbas Benmamoun, & Mark Hasegawa-Johnson. How Different are Arabic Dialects from Each Other and from Classical Arabic. 6th Annual Arabic Linguistics Symposium, Ifrane, Morocco, Jun, 2013
@inproceedings{benmamoun2013how,
address = {Ifrane, Morocco},
author = {Elabbas Benmamoun and Mark Hasegawa-Johnson},
booktitle = {6th Annual Arabic Linguistics Symposium},
isbn = {9789027236180},
keywords = {analysis},
month = {June},
title = {How Different are Arabic Dialects from Each Other and from Classical Arabic},
year = {2013}
}
Robert Mertens, Po-Sen Huang, Luke Gottlieb, Gerald Friedland, Ajay Divakaran, & Mark Hasegawa-Johnson. On the Application of Speaker Diarization to Audio Indexing of Non-Speech and Mixed Non-Speech/Speech Video Soundtracks. International Journal of Multimedia Data Engineering and Management (IJDEM), vol. 3, no. 3, pp. 1-19, Apr, 2013
@article{mertens2013on,
author = {Robert Mertens and Po-Sen Huang and Luke Gottlieb and Gerald Friedland and Ajay Divakaran and Mark Hasegawa-Johnson},
doi = {10.4018/jmdem.2012070101},
journal = {International Journal of Multimedia Data Engineering and Management (IJDEM)},
keywords = {analysis},
month = {April},
number = {3},
pages = {1--19},
title = {On the Application of Speaker Diarization to Audio Indexing of Non-Speech and Mixed Non-Speech/Speech Video Soundtracks},
volume = {3},
year = {2013}
}
Kai-Hsiang Lin, Xiaodan Zhuang, Camille Goudeseune, Sarah King, Mark Hasegawa-Johnson, & Thomas S. Huang. Saliency-Maximized Audio Visualization and Efficient Audio-Visual Browsing for Faster-than-Real-Time Human Acoustic Event Detection. ACM Transactions on Applied Perception, 2013
@article{lin2013saliency-maximized,
author = {Kai-Hsiang Lin and Xiaodan Zhuang and Camille Goudeseune and Sarah King and Mark Hasegawa-Johnson and Thomas S. Huang},
doi = {10.1145/2536764.2536773},
grant = {NSF 0807329},
journal = {ACM Transactions on Applied Perception},
keywords = {analysis},
title = {Saliency-Maximized Audio Visualization and Efficient Audio-Visual Browsing for Faster-than-Real-Time Human Acoustic Event Detection},
year = {2013}
}
Galen Andrew, Raman Arora, Sujeeth Bharadwaj, Jeff Bilmes, Mark Hasegawa-Johnson, & Karen Livescu. Using articulatory measurements to learn better acoustic features. Proc. Workshop on Speech Production in Automatic Speech Recognition, Lyon, France, 2013
@inproceedings{andrew2013using,
address = {Lyon, France},
author = {Galen Andrew and Raman Arora and Sujeeth Bharadwaj and Jeff Bilmes and Mark Hasegawa-Johnson and Karen Livescu},
booktitle = {Proc. Workshop on Speech Production in Automatic Speech Recognition},
keywords = {analysis},
title = {Using articulatory measurements to learn better acoustic features},
url = {https://www.isca-speech.org/archive/spasr_2013/andrew13_spasr.html},
year = {2013}
}
Amit Juneja, & Mark Hasegawa-Johnson. Experiments on context-awareness and phone error propagation in human and machine speech recognition. Proc. Workshop on Speech Production in Automatic Speech Recognition, Lyon, France, 2013
@inproceedings{juneja2013experiments,
address = {Lyon, France},
author = {Amit Juneja and Mark Hasegawa-Johnson},
booktitle = {Proc. Workshop on Speech Production in Automatic Speech Recognition},
keywords = {analysis},
title = {Experiments on context-awareness and phone error propagation in human and machine speech recognition},
url = {https://www.isca-speech.org/archive/spasr_2013/juneja13_spasr.html},
year = {2013}
}
Kyungtae Kim, Kai-Hsiang Lin, Dirk B Walther, Mark A Hasegawa-Johnson, & Thomas S Huang. Automatic Detection of Auditory Salience with Optimized Linear Filters Derived from Human Annotation. Pattern Recognition Letters, vol. 38, no. 1, pp. 78-85, 2013
@article{kim2013automatic,
author = {Kyungtae Kim and Kai-Hsiang Lin and Dirk B Walther and Mark A Hasegawa-Johnson and Thomas S Huang},
doi = {10.1016/j.patrec.2013.11.010},
grant = {NSF 0803219},
journal = {Pattern Recognition Letters},
keywords = {analysis},
number = {1},
pages = {78-85},
title = {Automatic Detection of Auditory Salience with Optimized Linear Filters Derived from Human Annotation},
volume = {38},
year = {2013}
}
Mohamed Elmahdy, Mark Hasegawa-Johnson, & Eiman Mustafawi. A Transfer Learning Approach for Under-Resourced Arabic Dialects Speech Recognition. Workshop on Less Resourced Languages, new technologies, new challenges and opportunities (LTC 2013), pp. 60-64, 2013
@inproceedings{elmahdy2013transfer,
author = {Mohamed Elmahdy and Mark Hasegawa-Johnson and Eiman Mustafawi},
booktitle = {Workshop on Less Resourced Languages, new technologies, new challenges and opportunities (LTC 2013)},
grant = {QNRF NPRP 09-410-1-069},
keywords = {recognition},
pages = {60-64},
title = {A Transfer Learning Approach for Under-Resourced Arabic Dialects Speech Recognition},
url = {https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=a7e6a76e0c1f39171c2603657a94c8ba4418a788},
year = {2013}
}
Mohamed Elmahdy, Mark Hasegawa-Johnson, & Eiman Mustafawi. Automatic Long Audio Alignment for Conversational Arabic Speech. Qatar Foundation Annual Research Conference, 2013
@inproceedings{elmahdy2013automatic,
author = {Mohamed Elmahdy and Mark Hasegawa-Johnson and Eiman Mustafawi},
booktitle = {Qatar Foundation Annual Research Conference},
doi = {10.5339/qfarf.2013.ICTP-03},
keywords = {recognition},
title = {Automatic Long Audio Alignment for Conversational Arabic Speech},
year = {2013}
}
Mohamed Elmahdy, Mark Hasegawa-Johnson, & Eiman Mustafawi. Development of a Spontaneous Large Vocabulary Speech Recognition System for Qatari Arabic. Qatar Foundation Annual Research Conference, 2013
@inproceedings{elmahdy2013development,
author = {Mohamed Elmahdy and Mark Hasegawa-Johnson and Eiman Mustafawi},
booktitle = {Qatar Foundation Annual Research Conference},
doi = {10.5339/qfarf.2013.ICTP-053},
keywords = {recognition},
title = {Development of a Spontaneous Large Vocabulary Speech Recognition System for Qatari Arabic},
year = {2013}
}
Mohamed Elmahdy, Mark Hasegawa-Johnson, & Eiman Mustafawi. A Framework for Conversational Arabic Speech Long Audio Alignment. Proc. 6th Language and Technology Conference (LTC 2013), pp. 290-293, 2013
@inproceedings{elmahdy2013framework,
author = {Mohamed Elmahdy and Mark Hasegawa-Johnson and Eiman Mustafawi},
booktitle = {Proc. 6th Language and Technology Conference (LTC 2013)},
grant = {QNRF NPRP 09-410-1-069},
keywords = {recognition},
pages = {290-293},
title = {A Framework for Conversational Arabic Speech Long Audio Alignment},
url = {https://www.academia.edu/download/33424750/ltc-066-mohamed.pdf},
year = {2013}
}
Sujeeth Bharadwaj, Mark Hasegawa-Johnson, Jitendra Ajmera, Om Deshmukh, & Ashish Verma. Sparse Hidden Markov Models for Purer Clusters. Proc. ICASSP, 2013
@inproceedings{bharadwaj2013sparse,
author = {Sujeeth Bharadwaj and Mark Hasegawa-Johnson and Jitendra Ajmera and Om Deshmukh and Ashish Verma},
booktitle = {Proc. ICASSP},
doi = {10.1109/ICASSP.2013.6638228},
keywords = {recognition},
title = {Sparse Hidden Markov Models for Purer Clusters},
year = {2013}
}
Sarah King, & Mark Hasegawa-Johnson. Accurate Speech Segmentation by Mimicking Human Auditory Processing. Proc. ICASSP, 2013
@inproceedings{king2013accurate,
author = {Sarah King and Mark Hasegawa-Johnson},
booktitle = {Proc. ICASSP},
doi = {10.1109/ICASSP.2013.6639242},
grant = {NSF 0807329},
keywords = {recognition},
title = {Accurate Speech Segmentation by Mimicking Human Auditory Processing},
year = {2013}
}
Po-Sen Huang, Li Deng, Mark Hasegawa-Johnson, & Xiaodong He. Random Features for Kernel Deep Convex Network. Proc. ICASSP, pp. 8096-8900, 2013
@inproceedings{huang2013random,
author = {Po-Sen Huang and Li Deng and Mark Hasegawa-Johnson and Xiaodong He},
booktitle = {Proc. ICASSP},
doi = {10.1109/ICASSP.2013.6638237},
keywords = {recognition},
pages = {8096--8900},
title = {Random Features for Kernel Deep Convex Network},
year = {2013}
}
Mahmoud Abunasser, Abbas Benmamoun, & Mark Hasegawa-Johnson. Pronunciation Variation Metric for Four Dialects of Arabic. AIDA 10 (Association Internationale de Dialectologie Arabe), Qatar University, 2013
@inproceedings{abunasser2013pronunciation,
address = {Qatar University},
author = {Mahmoud Abunasser and Abbas Benmamoun and Mark Hasegawa-Johnson},
booktitle = {AIDA 10 (Association Internationale de Dialectologie Arabe)},
keywords = {recognition},
title = {Pronunciation Variation Metric for Four Dialects of Arabic},
year = {2013}
}
Panying Rong, Torrey Loucks, Heejin Kim, & Mark Hasegawa-Johnson. Relationship between kinematics, F2 slope and speech intelligibility in dysarthria due to cerebral palsy. Clinical Linguistics and Phonetics, vol. 26, no. 9, pp. 806-822, Sep, 2012
@article{rong2012relationship,
author = {Panying Rong and Torrey Loucks and Heejin Kim and Mark Hasegawa-Johnson},
doi = {10.3109/02699206.2012.706686},
journal = {Clinical Linguistics and Phonetics},
keywords = {analysis},
month = {September},
number = {9},
pages = {806-822},
title = {Relationship between kinematics, F2 slope and speech intelligibility in dysarthria due to cerebral palsy},
volume = {26},
year = {2012}
}
Mark Hasegawa-Johnson, Xiaodan Zhuang, Xi Zhou, Camille Goudeseune, Hao Tang, Kai-Hsiang Lin, Mohamed Omar, & Thomas Huang. Toward Better Real-world Acoustic Event Detection. Unpublished presentation given at Seoul National University, May, 2012
@unpublished{hasegawa-johnson2012toward,
author = {Mark Hasegawa-Johnson and Xiaodan Zhuang and Xi Zhou and Camille Goudeseune and Hao Tang and Kai-Hsiang Lin and Mohamed Omar and Thomas Huang},
keywords = {analysis},
month = {May},
note = {Unpublished presentation given at Seoul National University},
title = {Toward Better Real-world Acoustic Event Detection},
year = {2012}
}
Shobhit Mathur, Marshall Scott Poole, Feniosky Pena-Mora, Mark Hasegawa-Johnson, & Noshir Contractor. Detecting interaction links in a collaborating group using manually annotated data. Social Networks, 2012
@article{mathur2012detecting,
author = {Shobhit Mathur and Marshall Scott Poole and Feniosky Pena-Mora and Mark Hasegawa-Johnson and Noshir Contractor},
doi = {10.1016/j.socnet.2012.04.002},
grant = {NSF 0941268},
journal = {Social Networks},
keywords = {analysis},
title = {Detecting interaction links in a collaborating group using manually annotated data},
year = {2012}
}
Hao Tang, Stephen Chu, Mark Hasegawa-Johnson, & Thomas Huang. Partially Supervised Speaker Clustering. IEEE Transactions on Pattern Analysis and Machine Intelligence, vol. 34, no. 5, pp. 959-971, 2012
@article{tang2012partially,
author = {Hao Tang and Stephen Chu and Mark Hasegawa-Johnson and Thomas Huang},
doi = {10.1109/TPAMI.2011.174},
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
keywords = {analysis},
number = {5},
pages = {959-971},
title = {Partially Supervised Speaker Clustering},
volume = {34},
year = {2012}
}
Ali Sakr, & Mark Hasegawa-Johnson. Topic Modeling of Phonetic Latin-Spelled Arabic for the Relative Analysis of Genre-Dependent and Dialect-Dependent Variation. CITALA, pp. 153-158, 2012
@inproceedings{sakr2012topic,
author = {Ali Sakr and Mark Hasegawa-Johnson},
booktitle = {CITALA},
grant = {QNRF NPRP 410-1-069},
isbn = {978-9954-9135-0-5},
keywords = {analysis},
pages = {153-158},
title = {Topic Modeling of Phonetic Latin-Spelled Arabic for the Relative Analysis of Genre-Dependent and Dialect-Dependent Variation},
url = {https://www.academia.edu/download/86879040/06fr-rist20-2.pdf},
year = {2012}
}
Po-Sen Huang, Mark Hasegawa-Johnson, Wotao Yin, & Tom Huang. Opportunistic Sensing: Unattended Acoustic Sensor Selection Using Crowdsourcing Models. IEEE Workshop on Machine Learning in Signal Processing, 2012
@inproceedings{huang2012opportunistic,
author = {Po-Sen Huang and Mark Hasegawa-Johnson and Wotao Yin and Tom Huang},
booktitle = {IEEE Workshop on Machine Learning in Signal Processing},
doi = {10.1109/MLSP.2012.6349815},
keywords = {analysis},
title = {Opportunistic Sensing: Unattended Acoustic Sensor Selection Using Crowdsourcing Models},
year = {2012}
}
Po-Sen Huang, Jianchao Yang, Mark Hasegawa-Johnson, Feng Liang, & Thomas S. Huang. Pooling Robust Shift-Invariant Sparse Representations of Acoustic Signals. Proc. Interspeech, pp. 2518-2521, 2012
@inproceedings{huang2012pooling,
author = {Po-Sen Huang and Jianchao Yang and Mark Hasegawa-Johnson and Feng Liang and Thomas S. Huang},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2012-652},
keywords = {analysis},
pages = {2518--2521},
title = {Pooling Robust Shift-Invariant Sparse Representations of Acoustic Signals},
year = {2012}
}
Po-Sen Huang, Robert Mertens, Ajay Divakaran, Gerald Friedland, & Mark Hasegawa-Johnson. How to Put it into Words—Using Random Forests to Extract Symbol Level Descriptions from Audio Content for Concept Detection. ICASSP, 2012
@inproceedings{huang2012how,
author = {Po-Sen Huang and Robert Mertens and Ajay Divakaran and Gerald Friedland and Mark Hasegawa-Johnson},
booktitle = {ICASSP},
doi = {10.1109/ICASSP.2012.6287927},
grant = {ARO W911NF-09-1-0383},
keywords = {analysis},
title = {How to Put it into Words---Using Random Forests to Extract Symbol Level Descriptions from Audio Content for Concept Detection},
year = {2012}
}
Camille Goudeseune. Effective browsing of long audio recordings. ACM International Workshop on Interactive Multimedia on Mobile and Portable Devices, 2012
@inproceedings{goudeseune2012effective,
author = {Camille Goudeseune},
booktitle = {ACM International Workshop on Interactive Multimedia on Mobile and Portable Devices},
doi = {10.1145/2390821.2390831},
grant = {NSF 0807329},
keywords = {analysis},
software = {https://github.com/camilleg/timeliner/blob/master/README.md},
title = {Effective browsing of long audio recordings},
year = {2012}
}
Kai-Hsiang Lin, Xiaodan Zhuang, Camille Goudeseune, Sarah King, Mark Hasegawa-Johnson, & Thomas Huang. Improving Faster-than-Real-Time Human Acoustic Event Detection by Saliency-Maximized Audio Visualization. ICASSP, pp. 2277-2280, 2012
@inproceedings{lin2012improving,
author = {Kai-Hsiang Lin and Xiaodan Zhuang and Camille Goudeseune and Sarah King and Mark Hasegawa-Johnson and Thomas Huang},
booktitle = {ICASSP},
doi = {10.1109/ICASSP.2012.6288368},
grant = {NSF 0807329},
keywords = {analysis},
pages = {2277-2280},
title = {Improving Faster-than-Real-Time Human Acoustic Event Detection by Saliency-Maximized Audio Visualization},
year = {2012}
}
Hosung Nam, Vikramjit Mitra, Mark Tiede, Mark Hasegawa-Johnson, Carol Espy-Wilson, Elliot Saltzman, & Louis Goldstein. A procedure for estimating gestural scores from speech acoustics. J. Acoustical Society of America, vol. 132, no. 6, pp. 3980-3989, 2012
@article{nam2015a,
author = {Hosung Nam and Vikramjit Mitra and Mark Tiede and Mark Hasegawa-Johnson and Carol Espy-Wilson and Elliot Saltzman and Louis Goldstein},
doi = {10.1121/1.4763545},
journal = {J. Acoustical Society of America},
keywords = {analysis},
number = {6},
pages = {3980-3989},
title = {A procedure for estimating gestural scores from speech acoustics},
volume = {132},
year = {2012}
}
Tim Mahrt, Jennifer Cole, Margaret Fleck, & Mark Hasegawa-Johnson. Accounting for Speaker Variation in the Production of Prominence using the Bayesian Information Criterion. Speech Prosody, 2012
@inproceedings{mahrt2012accounting,
author = {Tim Mahrt and Jennifer Cole and Margaret Fleck and Mark Hasegawa-Johnson},
booktitle = {Speech Prosody},
grant = {NSF 0703624},
keywords = {analysis},
title = {Accounting for Speaker Variation in the Production of Prominence using the Bayesian Information Criterion},
url = {https://www.isca-speech.org/archive/speechprosody_2012/mahrt12_speechprosody.html},
year = {2012}
}
Tim Mahrt, Jennifer Cole, Margaret Fleck, & Mark Hasegawa-Johnson. F0 and the perception of prominence. Proc. Interspeech 2012, pp. 2422-2425, 2012
@inproceedings{mahrt12_interspeech,
author = {Tim Mahrt and Jennifer Cole and Margaret Fleck and Mark Hasegawa-Johnson},
booktitle = {Proc. Interspeech 2012},
doi = {10.21437/Interspeech.2012-634},
pages = {2422--2425},
title = {F0 and the perception of prominence},
year = {2012}
}
Mark Hasegawa-Johnson, Elabbas Benmamoun, Eiman Mustafawi, Mohamed Elmahdy, & Rehab Duwairi. On the definition of the word “segmental”. Proc. Speech Prosody 2012, pp. 159-162, 2012
@inproceedings{hasegawajohnson12_speechprosody,
author = {Mark Hasegawa-Johnson and Elabbas Benmamoun and Eiman Mustafawi and Mohamed Elmahdy and Rehab Duwairi},
booktitle = {Proc. Speech Prosody 2012},
pages = {159--162},
title = {On the definition of the word “segmental”},
url = {https://www.isca-speech.org/archive/speechprosody_2012/hasegawajohnson12_speechprosody.html},
year = {2012}
}
Mohamed Elmahdy, Mark Hasegawa-Johnson, & Eiman Mustafawi. A Baseline Speech Recognition System for Levantine Colloquial Arabic. Proceedings of ESOLEC, 2012
@inproceedings{elmahdy2012a,
author = {Mohamed Elmahdy and Mark Hasegawa-Johnson and Eiman Mustafawi},
booktitle = {Proceedings of ESOLEC},
grant = {QNRF NPRP 410-1-069},
keywords = {recognition},
title = {A Baseline Speech Recognition System for Levantine Colloquial Arabic},
url = {https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=0dae1d58dddf914a9aed367e4a8045213e4202d5},
year = {2012}
}
Po-Sen Huang, & Mark Hasegawa-Johnson. Cross-Dialectal Data Transferring for Gaussian Mixture Model Training in Arabic Speech Recognition. International Conference on Arabic Language Processing CITALA, pp. 119-122, 2012
@inproceedings{huang2012cross-dialectal,
author = {Po-Sen Huang and Mark Hasegawa-Johnson},
booktitle = {International Conference on Arabic Language Processing CITALA},
grant = {QNRF NPRP 410-1-069},
isbn = {978-9954-9135-0-5},
keywords = {recognition},
pages = {119-122},
title = {Cross-Dialectal Data Transferring for Gaussian Mixture Model Training in Arabic Speech Recognition},
url = {https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=5d995e455f1f08c0032b71fd7f0bcd1dcde5786b},
year = {2012}
}
Jui-Ting Huang. Semi-Supervised Learning for Acoustic and Prosodic Modeling in Speech Applications. Master’s Thesis, University of Illinois, 2012
@phdthesis{huang2012semi-supervised,
author = {Jui-Ting Huang},
keywords = {recognition},
school = {University of Illinois},
title = {Semi-Supervised Learning for Acoustic and Prosodic Modeling in Speech Applications},
url = {https://www.ideals.illinois.edu/items/32271},
year = {2012}
}
Sarah King, & Mark Hasegawa-Johnson. Detection of Acoustic-Phonetic Landmarks in Mismatched Conditions Using a Biomimetic Model of Human Auditory Processing. CoLing, pp. 589-598, 2012
@inproceedings{king2012detection,
author = {Sarah King and Mark Hasegawa-Johnson},
booktitle = {CoLing},
grant = {QNRF NPRP 09-410-1-069},
keywords = {recognition},
pages = {589--598},
title = {Detection of Acoustic-Phonetic Landmarks in Mismatched Conditions Using a Biomimetic Model of Human Auditory Processing},
url = {https://aclanthology.org/C12-2058.pdf},
year = {2012}
}
Mark Hasegawa-Johnson, Elabbas Benmamoun, Eiman Mustafawi, Mohamed Elmahdy, & Rehab Duwairi. On The Definition of the Word `Segmental’. Speech Prosody, pp. 159-162, 2012
@inproceedings{hasegawajohnson2012on,
author = {Mark Hasegawa-Johnson and Elabbas Benmamoun and Eiman Mustafawi and Mohamed Elmahdy and Rehab Duwairi},
booktitle = {Speech Prosody},
isbn = {978-7-5608-486-3},
keywords = {recognition},
pages = {159-162},
title = {On The Definition of the Word `Segmental'},
url = {https://www.isca-speech.org/archive/speechprosody_2012/hasegawajohnson12_speechprosody.html},
year = {2012}
}
Mohamed Elmahdy, Mark Hasegawa-Johnson, & Eiman Mustafawi. Hybrid Phonemic and Graphemic Modeling for Arabic Speech Recognition. International Journal of Computational Linguistics, vol. 3, no. 1, pp. 88-96, 2012
@article{elmahdy2012ijcl,
author = {Mohamed Elmahdy and Mark Hasegawa-Johnson and Eiman Mustafawi},
grant = {QNRF NPRP 09-410-1-069},
issn = {2180-1266},
journal = {International Journal of Computational Linguistics},
keywords = {recognition},
number = {1},
pages = {88-96},
title = {Hybrid Phonemic and Graphemic Modeling for Arabic Speech Recognition},
url = {https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=78b19225125b6be20ed942ea380ed7d8657c71dd},
volume = {3},
year = {2012}
}
Mohamed Elmahdy, Mark Hasegawa-Johnson, & Eiman Mustafawi. Hybrid Pronunciation Modeling for Arabic Large Vocabulary Speech Recognition. Qatar Foundation Annual Research Forum, 2012
@inproceedings{elmahdy2012qnrf,
author = {Mohamed Elmahdy and Mark Hasegawa-Johnson and Eiman Mustafawi},
booktitle = {Qatar Foundation Annual Research Forum},
doi = {10.5339/qfarf.2012.CSO3},
grant = {QNRF 09-410-1-069},
keywords = {recognition},
title = {Hybrid Pronunciation Modeling for Arabic Large Vocabulary Speech Recognition},
year = {2012}
}
Sujeeth Bharadwaj, Raman Arora, Karen Livescu, & Mark Hasegawa-Johnson. Multi-View Acoustic Feature Learning Using Articulatory Measurements. IWSML (Internat. Worksh. on Statistical Machine Learning for Sign. Process.), 2012
@inproceedings{bharadwaj2012multi-view,
author = {Sujeeth Bharadwaj and Raman Arora and Karen Livescu and Mark Hasegawa-Johnson},
booktitle = {IWSML (Internat. Worksh. on Statistical Machine Learning for Sign. Process.)},
grant = {NSF 0905633},
keywords = {recognition},
title = {Multi-View Acoustic Feature Learning Using Articulatory Measurements},
url = {https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=2106a2dce0f7c3ea611499ed93187dcee841fc16},
year = {2012}
}
Mark Hasegawa-Johnson, David Harwath, Harsh Vardhan Sharma, & Po-Sen Huang. Transfer Learning for Multi-Person and Multi-Dialect Spoken Language Interface. presentation given at the 2012 Urbana Neuroengineering Conference, 2012
@unpublished{hasegawajohnson2012transfer,
author = {Mark Hasegawa-Johnson and David Harwath and Harsh Vardhan Sharma and Po-Sen Huang},
booktitle = {presentation given at the 2012 Urbana Neuroengineering Conference},
keywords = {recognition},
title = {Transfer Learning for Multi-Person and Multi-Dialect Spoken Language Interface},
year = {2012}
}
Harsh Vardhan Sharma. Acoustic Model Adaptation for Recognition of Dysarthric Speech. Master’s Thesis, University of Illinois, 2012
@phdthesis{sharma2012thesis,
author = {Harsh Vardhan Sharma},
keywords = {recognition},
school = {University of Illinois},
title = {Acoustic Model Adaptation for Recognition of Dysarthric Speech},
url = {https://www.ideals.illinois.edu/items/32231},
year = {2012}
}
Po-Sen Huang, Scott Deeann Chen, Paris Smaragdis, & Mark Hasegawa-Johnson. Singing-Voice Separation from Monaural Recordings using Robust Principal Component Analysis. ICASSP, 2012
@inproceedings{huang2012singing-voice,
author = {Po-Sen Huang and Scott Deeann Chen and Paris Smaragdis and Mark Hasegawa-Johnson},
booktitle = {ICASSP},
doi = {10.1109/ICASSP.2012.6287816},
grant = {ARO W911NF-09-1-0383},
keywords = {synthesis},
title = {Singing-Voice Separation from Monaural Recordings using Robust Principal Component Analysis},
year = {2012}
}
Xiaodan Zhuang. Modeling Audio and Visual Cues for Real-world Event Detection. Master’s Thesis, University of Illinois, Apr, 2011
@phdthesis{zhuang2011modeling,
author = {Xiaodan Zhuang},
keywords = {analysis},
month = {April},
school = {University of Illinois},
title = {Modeling Audio and Visual Cues for Real-world Event Detection},
url = {https://www.ideals.illinois.edu/items/24703},
year = {2011}
}
Tim Mahrt, Jui-Ting Huang, Yoonsook Mo, Jennifer Cole, Mark Hasegawa-Johnson, & Margaret Fleck. Feature Sets for the Automatic Detection of Prosodic Prominence. New Tools and Methods for Very Large Scale Phonetics Research, University of Pennsylvania, Jan, 2011
@inproceedings{mahrt2011feature,
author = {Tim Mahrt and Jui-Ting Huang and Yoonsook Mo and Jennifer Cole and Mark Hasegawa-Johnson and Margaret Fleck},
booktitle = {New Tools and Methods for Very Large Scale Phonetics Research},
keywords = {analysis},
month = {Jan.},
publisher = {University of Pennsylvania},
title = {Feature Sets for the Automatic Detection of Prosodic Prominence},
year = {2011}
}
Hosung Nam, Vikramjit Mitra, Mark Tiede, Mark Hasegawa-Johnson, Carol Espy-Wilson, Elliot Saltzman, & Louis Goldstein. Automatic gestural annotation of the U. Wisconsin X-ray Microbeam corpus. Workshop on New Tools and Methods for Very Large Scale Phonetics Research, University of Pennsylvania, Jan, 2011
@inproceedings{nam2011automatic,
author = {Hosung Nam and Vikramjit Mitra and Mark Tiede and Mark Hasegawa-Johnson and Carol Espy-Wilson and Elliot Saltzman and Louis Goldstein},
booktitle = {Workshop on New Tools and Methods for Very Large Scale Phonetics Research},
keywords = {analysis},
month = {Jan.},
publisher = {University of Pennsylvania},
title = {Automatic gestural annotation of the U. Wisconsin X-ray Microbeam corpus},
year = {2011}
}
Jui-Ting Huang, Mark Hasegawa-Johnson, & Jennifer Cole. How Unlabeled Data Change the Acoustic Models For Phonetic Classification. Workshop on New Tools and Methods for Very Large Scale Phonetics Research, University of Pennsylvania, Jan, 2011
@inproceedings{huang2011how,
address = {University of Pennsylvania},
author = {Jui-Ting Huang and Mark Hasegawa-Johnson and Jennifer Cole},
booktitle = {Workshop on New Tools and Methods for Very Large Scale Phonetics Research},
keywords = {recognition},
month = {Jan.},
title = {How Unlabeled Data Change the Acoustic Models For Phonetic Classification},
url = {https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=a6231219eed53704d7afb38176db1aa04705a1f6},
year = {2011}
}
Arthur Kantor, & Mark Hasegawa-Johnson. HMM-based Pronunciation Dictionary Generation. Workshop on New Tools and Methods for Very Large Scale Phonetics Research, University of Pennsylvania, Jan, 2011
@inproceedings{kantor2011hmm-based,
address = {University of Pennsylvania},
author = {Arthur Kantor and Mark Hasegawa-Johnson},
booktitle = {Workshop on New Tools and Methods for Very Large Scale Phonetics Research},
grant = {NSF 0703624},
keywords = {recognition},
month = {Jan.},
software = {http://mickey.ifp.illinois.edu/speechWiki/index.php/Phonetic_Transcription_Tool},
title = {HMM-based Pronunciation Dictionary Generation},
url = {https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=66a415c3c988807ed959fc159fd95786ee8f0029},
year = {2011}
}
Rania Al-Sabbagh, Roxana Girju, Mark Hasegawa-Johnson, Elabbas Benmamoun, Rehab Duwairi, & Eiman Mustafawi. Using Web Mining Techniques to Build a Multi-Dialect Lexicon of Arabic. Talk delivered at the Linguistics in the Gulf Conference, 2011
@unpublished{al-sabbagh2011using,
author = {Rania Al-Sabbagh and Roxana Girju and Mark Hasegawa-Johnson and Elabbas Benmamoun and Rehab Duwairi and Eiman Mustafawi},
keywords = {analysis},
note = {Talk delivered at the Linguistics in the Gulf Conference},
title = {Using Web Mining Techniques to Build a Multi-Dialect Lexicon of Arabic},
year = {2011}
}
R. Mertens, P.-S. Huang, L. Gottlieb, G. Friedland, & A. Divakaran. On the Application of Speaker Diarization to Audio Concept Detection for Multimedia Retrieval. IEEE International Symposium on Multimedia, pp. 446-451, 2011
@inproceedings{mertens2011on,
author = {R. Mertens and P.-S. Huang and L. Gottlieb and G. Friedland and A. Divakaran},
booktitle = {IEEE International Symposium on Multimedia},
doi = {10.1109/ISM.2011.79},
keywords = {analysis},
pages = {446-451},
title = {On the Application of Speaker Diarization to Audio Concept Detection for Multimedia Retrieval},
year = {2011}
}
Po-Sen Huang, Mark Hasegawa-Johnson, & Thyagaraju Damarla. Exemplar Selection Methods to Distinguish Human from Animal Footsteps. Second Annual Human and Light Vehicle Detection Workshop, Maryland, pp. 14:1-10, 2011
@inproceedings{huang2011exemplar,
address = {Maryland},
author = {Po-Sen Huang and Mark Hasegawa-Johnson and Thyagaraju Damarla},
booktitle = {Second Annual Human and Light Vehicle Detection Workshop},
grant = {ARO W911NF-09-1-0383},
keywords = {analysis},
pages = {14:1-10},
title = {Exemplar Selection Methods to Distinguish Human from Animal Footsteps},
url = {https://posenhuang.github.io/papers/Exemplar_Selection_HLVD2011.pdf},
year = {2011}
}
Po-Sen Huang, Thyagaraju Damarla, & Mark Hasegawa-Johnson. Multi-sensory features for Personnel Detection at Border Crossings. Fusion, 2011
@inproceedings{huang2011multi-sensory,
author = {Po-Sen Huang and Thyagaraju Damarla and Mark Hasegawa-Johnson},
booktitle = {Fusion},
grant = {ARO W911NF-09-1-0383},
keywords = {analysis},
title = {Multi-sensory features for Personnel Detection at Border Crossings},
url = {https://ieeexplore.ieee.org/abstract/document/5977673?casa_token=APIqmZlnHDcAAAAA:66fMhlnUcKNXV6jKVFSz0hCDsoABgGyblOhRExzAtqjMs4XMBCJ8zPfVAnhT-yJg2Bz4rOpkfw},
year = {2011}
}
Po-Sen Huang, Xiaodan Zhuang, & Mark Hasegawa-Johnson. Improving Acoustic Event Detection using Generalizable Visual Features and Multi-modality Modeling. ICASSP, pp. 349-352, 2011
@inproceedings{huang2011improving,
author = {Po-Sen Huang and Xiaodan Zhuang and Mark Hasegawa-Johnson},
booktitle = {ICASSP},
doi = {10.1109/ICASSP.2011.5946412},
grant = {ARO W911NF-09-1-0383},
keywords = {analysis},
pages = {349-352},
title = {Improving Acoustic Event Detection using Generalizable Visual Features and Multi-modality Modeling},
year = {2011}
}
Heejin Kim, Mark Hasegawa-Johnson, & Adrienne Perlman. Temporal and spectral characteristics of fricatives in dysarthria. Journal of the Acoustical Society of America, vol. 130, pp. 2446, 2011
@article{kim2011temporal,
author = {Heejin Kim and Mark Hasegawa-Johnson and Adrienne Perlman},
doi = {10.1121/1.3654821},
journal = {Journal of the Acoustical Society of America},
keywords = {analysis},
pages = {2446},
title = {Temporal and spectral characteristics of fricatives in dysarthria},
volume = {130},
year = {2011}
}
Heejin Kim, Mark Hasegawa-Johnson, & Adrienne Perlman. Vowel Contrast and Speech Intelligibility in Dysarthria. Folia Phoniatrica et Logopaedica, vol. 63, no. 4, pp. 187-194, 2011
@article{kim2011vowel,
author = {Heejin Kim and Mark Hasegawa-Johnson and Adrienne Perlman},
doi = {10.1159/000318881},
grant = {NIH DC0032301},
journal = {Folia Phoniatrica et Logopaedica},
keywords = {analysis},
number = {4},
pages = {187-194},
title = {Vowel Contrast and Speech Intelligibility in Dysarthria},
volume = {63},
year = {2011}
}
Tim Mahrt, Jui-Ting Huang, Yoonsook Mo, Margaret Fleck, Mark Hasegawa-Johnson, & Jennifer Cole. Optimal models of prosodic prominence using the Bayesian information criterion. Proc. Interspeech, pp. 2037-2040, 2011
@inproceedings{mahrt2011optimal,
author = {Tim Mahrt and Jui-Ting Huang and Yoonsook Mo and Margaret Fleck and Mark Hasegawa-Johnson and Jennifer Cole},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2011-535},
keywords = {analysis},
pages = {2037--2040},
title = {Optimal models of prosodic prominence using the Bayesian information criterion},
year = {2011}
}
İ. Yücel Ozbek, Mark Hasegawa-Johnson, & Mübeccel Demirekler. On Improving Dynamic State Space Approaches to Articulatory Inversion with MAP based Parameter Estimation. IEEE Transactions on Audio, Speech, and Language, vol. 20, no. 1, pp. 67-81, 2011
@article{ozbek2011on,
author = {İ. Yücel Ozbek and Mark Hasegawa-Johnson and Mübeccel Demirekler},
doi = {10.1109/TASL.2011.2157496},
journal = {IEEE Transactions on Audio, Speech, and Language},
keywords = {analysis},
number = {1},
pages = {67--81},
title = {On Improving Dynamic State Space Approaches to Articulatory Inversion with MAP based Parameter Estimation},
volume = {20},
year = {2011}
}
İ. Yücel Ozbek, Mark Hasegawa-Johnson, & Mübeccel Demirekler. Estimation of Articulatory Trajectories Based on Gaussian Mixture Model (GMM) with Audio-Visual Information Fusion and Dynamic Kalman Smoothing. IEEE Transactions on Audio, Speech, and Language, vol. 19, no. 5, pp. 1180-1195, 2011
@article{ozbek2011estimation,
author = {İ. Yücel Ozbek and Mark Hasegawa-Johnson and Mübeccel Demirekler},
doi = {10.1109/TASL.2010.2087751},
journal = {IEEE Transactions on Audio, Speech, and Language},
keywords = {analysis},
number = {5},
pages = {1180-1195},
title = {Estimation of Articulatory Trajectories Based on Gaussian Mixture Model (GMM) with Audio-Visual Information Fusion and Dynamic Kalman Smoothing},
volume = {19},
year = {2011}
}
Mohamed Elmahdy, Mark Hasegawa-Johnson, Eiman Mustafawi, Rehab Duwairi, & Wolfgang Minker. Challenges and Techniques for Dialectal Arabic Speech Recognition and Machine Translation. Qatar Foundation Annual Research Forum, pp. 244, 2011
@inproceedings{elmahdy2011challenges,
author = {Mohamed Elmahdy and Mark Hasegawa-Johnson and Eiman Mustafawi and Rehab Duwairi and Wolfgang Minker},
booktitle = {Qatar Foundation Annual Research Forum},
doi = {10.5339/qfarf.2011.CSO5},
grant = {QNRF NPRP 410-1-069},
keywords = {recognition},
pages = {244},
title = {Challenges and Techniques for Dialectal Arabic Speech Recognition and Machine Translation},
year = {2011}
}
Mark Hasegawa-Johnson, Jui-Ting Huang, Roxana Girju, Rehab Mustafa Mohamma Duwairi, Eiman Mohd Tayyeb H B Mustafawi, & Elabbas Benmamoun. Learning to Recognize Speech from a Small Number of Labeled Examples. Qatar Foundation Annual Research Forum, pp. 269, 2011
@unpublished{hasegawajohnson2011learning,
author = {Mark Hasegawa-Johnson and Jui-Ting Huang and Roxana Girju and Rehab Mustafa Mohamma Duwairi and Eiman Mohd Tayyeb H B Mustafawi and Elabbas Benmamoun},
doi = {10.5339/qfarf.2011.CSP15},
grant = {QNRF NPRP 410-1-069},
keywords = {recognition},
note = {Qatar Foundation Annual Research Forum},
pages = {269},
title = {Learning to Recognize Speech from a Small Number of Labeled Examples},
year = {2011}
}
Mark Hasegawa-Johnson, Jui-Ting Huang, Sarah King, & Xi Zhou. Normalized recognition of speech and audio events. Journal of the Acoustical Society of America, vol. 130, pp. 2524, 2011
@article{hasegawajohnson2011normalized,
author = {Mark Hasegawa-Johnson and Jui-Ting Huang and Sarah King and Xi Zhou},
doi = {10.1121/1.3655075},
grant = {NSF 0807329},
journal = {Journal of the Acoustical Society of America},
keywords = {recognition},
pages = {2524},
title = {Normalized recognition of speech and audio events},
volume = {130},
year = {2011}
}
Mark Hasegawa-Johnson, Jui-Ting Huang, & Xiaodan Zhuang. Semi-supervised learning for speech and audio processing. Journal of the Acoustical Society of America, vol. 130, pp. 2408, 2011
@article{hasegawajohnson2011semi-supervised,
author = {Mark Hasegawa-Johnson and Jui-Ting Huang and Xiaodan Zhuang},
doi = {10.1121/1.3654654},
grant = {NSF 0703624},
journal = {Journal of the Acoustical Society of America},
keywords = {recognition},
pages = {2408},
title = {Semi-supervised learning for speech and audio processing},
volume = {130},
year = {2011}
}
Boon Pang Lim. Computational Differences between Whispered and Non-whispered Speech. Master’s Thesis, University of Illinois, 2011
@phdthesis{pang2011computational,
author = {Boon Pang Lim},
keywords = {recognition},
school = {University of Illinois},
title = {Computational Differences between Whispered and Non-whispered Speech},
url = {https://www.ideals.illinois.edu/items/24896},
year = {2011}
}
Mark Hasegawa-Johnson, Camille Goudeseune, Jennifer Cole, Hank Kaczmarski, Heejin Kim, Sarah King, Timothy Mahrt, Jui-Ting Huang, Xiaodan Zhuang, Kai-Hsiang Lin, Harsh Vardhan Sharma, Zhen Li, & Thomas S. Huang. Multimodal Speech and Audio User Interfaces for K-12 Outreach. APSIPA, pp. 256:1-8, 2011
@inproceedings{hasegawajohnson2011multimodal,
author = {Mark Hasegawa-Johnson and Camille Goudeseune and Jennifer Cole and Hank Kaczmarski and Heejin Kim and Sarah King and Timothy Mahrt and Jui-Ting Huang and Xiaodan Zhuang and Kai-Hsiang Lin and Harsh Vardhan Sharma and Zhen Li and Thomas S. Huang},
booktitle = {APSIPA},
grant = {NSF 0807329},
keywords = {recognition},
pages = {256:1-8},
title = {Multimodal Speech and Audio User Interfaces for K-12 Outreach},
url = {http://www.apsipa.org/proceedings_2011/pdf/APSIPA256.pdf},
year = {2011}
}
Sujeeth Bharadwaj. Multiview feature learning for speech recognition. Master’s Thesis, University of Illinois, 2011
@mastersthesis{bharadwaj2011multiview,
author = {Sujeeth Bharadwaj},
keywords = {recognition},
school = {University of Illinois},
title = {Multiview feature learning for speech recognition},
url = {https://www.ideals.illinois.edu/items/30027},
year = {2011}
}
Jeremy Tidemann. Characterization of the Head-Related Transfer Function using Chirp and Maximum Length Sequence Excitation Signals. Master’s Thesis, University of Illinois, 2011
@mastersthesis{tidemann2011characterization,
author = {Jeremy Tidemann},
keywords = {analysis},
school = {University of Illinois},
title = {Characterization of the Head-Related Transfer Function using Chirp and Maximum Length Sequence Excitation Signals},
url = {https://www.ideals.illinois.edu/items/25190},
year = {2011}
}
Po-Sen Huang. Non-speech Acoustic Event Detection Using Multimodal Information. Master’s Thesis, University of Illinois, 2011
@mastersthesis{huang2011non,
author = {Po-Sen Huang},
keywords = {multimodal},
school = {University of Illinois},
title = {Non-speech Acoustic Event Detection Using Multimodal Information},
url = {https://www.ideals.illinois.edu/items/30083},
year = {2011}
}
Xiaodan Zhuang, Xi Zhou, Mark A. Hasegawa-Johnson, & Thomas S. Huang. Real-world Acoustic Event Detection. Pattern Recognition Letters, vol. 31, no. 2, pp. 1543-1551, Sep, 2010
@article{zhuang2010real-world,
author = {Xiaodan Zhuang and Xi Zhou and Mark A. Hasegawa-Johnson and Thomas S. Huang},
doi = {10.1016/j.patrec.2010.02.005},
grant = {NSF 0807329},
journal = {Pattern Recognition Letters},
keywords = {analysis},
month = {Sep.},
number = {2},
pages = {1543-1551},
title = {Real-world Acoustic Event Detection},
volume = {31},
year = {2010}
}
Lae-Hoon Kim. Statistical Model Based Multi-Microphone Speech Processing: Toward Overcoming Mismatch Problem. Master’s Thesis, University of Illinois, Aug, 2010
@phdthesis{kim2010statistical,
author = {Lae-Hoon Kim},
keywords = {synthesis},
month = {August},
school = {University of Illinois},
title = {Statistical Model Based Multi-Microphone Speech Processing: Toward Overcoming Mismatch Problem},
url = {http://hdl.handle.net/2142/16839},
year = {2010}
}
Xi Zhou, Xiaodan Zhuang, Hao Tang, Mark A. Hasegawa-Johnson, & Thomas S. Huang. Novel Gaussianized Vector Representation for Improved Natural Scene Categorization. Pattern Recognition Letters, vol. 31, no. 8, pp. 702-708, Jun, 2010
@article{zhou2010novel,
author = {Xi Zhou and Xiaodan Zhuang and Hao Tang and Mark A. Hasegawa-Johnson and Thomas S. Huang},
doi = {10.1016/j.patrec.2009.12.010},
grant = {NSF 0807329},
journal = {Pattern Recognition Letters},
keywords = {intelligence},
month = {Jun.},
number = {8},
pages = {702-708},
title = {Novel Gaussianized Vector Representation for Improved Natural Scene Categorization},
volume = {31},
year = {2010}
}
David Harwath, & Mark Hasegawa-Johnson. Phonetic Landmark Detection for Automatic Language Identification. Speech Prosody, pp. 100231:1-4, 2010
@inproceedings{harwath2010phonetic,
author = {David Harwath and Mark Hasegawa-Johnson},
booktitle = {Speech Prosody},
grant = {NSF 0703624},
keywords = {analysis},
pages = {100231:1-4},
title = {Phonetic Landmark Detection for Automatic Language Identification},
url = {https://www.isca-speech.org/archive/speechprosody_2010/harwath10_speechprosody.html},
year = {2010}
}
Suma Bhat, Mark Hasegawa-Johnson, & Richard Sproat. Automatic Fluency Assessment by Signal-Level Measurement of Spontaneous Speech. INTERSPEECH Satellite Workshop on Second Language Studies: Acquisition, Learning, Education and Technology, 2010
@inproceedings{bhat2010interspeech,
author = {Suma Bhat and Mark Hasegawa-Johnson and Richard Sproat},
booktitle = {INTERSPEECH Satellite Workshop on Second Language Studies: Acquisition, Learning, Education and Technology},
keywords = {analysis},
title = {Automatic Fluency Assessment by Signal-Level Measurement of Spontaneous Speech},
url = {https://www.isca-speech.org/archive/l2ws_2010/bhat10_l2ws.html},
year = {2010}
}
Su-Youn Yoon, Mark Hasegawa-Johnson, & Richard Sproat. Landmark-based Automated Pronunciation Error Detection. Proceedings of Interspeech, pp. 614-617, 2010
@inproceedings{yoon2010landmark-based,
author = {Su-Youn Yoon and Mark Hasegawa-Johnson and Richard Sproat},
booktitle = {Proceedings of Interspeech},
doi = {10.21437/Interspeech.2010-236},
keywords = {analysis},
pages = {614--617},
title = {Landmark-based Automated Pronunciation Error Detection},
year = {2010}
}
Suma Bhat, Richard Sproat, Mark Hasegawa-Johnson, & Fred Davidson. Automatic fluency assessment using thin-slices of spontaneous speech. Language Testing Research Colloquium, Denver, CO, 2010
@inproceedings{bhat2010language,
address = {Denver, CO},
author = {Suma Bhat and Richard Sproat and Mark Hasegawa-Johnson and Fred Davidson},
booktitle = {Language Testing Research Colloquium},
keywords = {analysis},
title = {Automatic fluency assessment using thin-slices of spontaneous speech},
year = {2010}
}
Heejin Kim, Katie Martin, Mark Hasegawa-Johnson, & Adrienne Perlman. Frequency of consonant articulation errors in dysarthric speech. Clinical Linguistics & Phonetics, vol. 24, no. 10, pp. 759-770, 2010
@article{kim2010frequency,
author = {Heejin Kim and Katie Martin and Mark Hasegawa-Johnson and Adrienne Perlman},
doi = {10.3109/02699206.2010.497238},
grant = {NIH DC0032301},
journal = {Clinical Linguistics & Phonetics},
keywords = {analysis},
number = {10},
pages = {759-770},
title = {Frequency of consonant articulation errors in dysarthric speech},
url = {https://www.tandfonline.com/doi/pdf/10.3109/02699206.2010.497238?casa_token=CjxOPSE_L_IAAAAA:EenLYvmMx1Da4-Sp69jHUVHNqTjMhVHxh3-ns6KPTlVx0kUadyZLad9i_kqgnaAeWTXsUViDvvE},
volume = {24},
year = {2010}
}
Heejin Kim, Mark Hasegawa-Johnson, & Adrienne Perlman. Acoustic Cues to Lexical Stress in Spastic Dysarthria. Speech Prosody, pp. 100891:1-4, 2010
@inproceedings{kim2010acoustic,
author = {Heejin Kim and Mark Hasegawa-Johnson and Adrienne Perlman},
booktitle = {Speech Prosody},
grant = {NIH R21-DC008090-A},
keywords = {analysis},
pages = {100891:1-4},
title = {Acoustic Cues to Lexical Stress in Spastic Dysarthria},
url = {https://www.isca-speech.org/archive/speechprosody_2010/kim10_speechprosody.html},
year = {2010}
}
Heejin Kim, Panying Rong, Torrey M. Loucks, & Mark Hasegawa-Johnson. Kinematic Analysis of Tongue Movement Control in Spastic Dysarthria. Proceedings of Interspeech, pp. 2578-2581, 2010
@inproceedings{kim2010kinematic,
author = {Heejin Kim and Panying Rong and Torrey M. Loucks and Mark Hasegawa-Johnson},
booktitle = {Proceedings of Interspeech},
doi = {10.21437/Interspeech.2010-697},
grant = {NSF 0534106},
keywords = {analysis},
pages = {2578--2581},
title = {Kinematic Analysis of Tongue Movement Control in Spastic Dysarthria},
year = {2010}
}
Bryce E Lobdell, Jont B Allen, & Mark A Hasegawa-Johnson. Intelligibility predictors and neural representation of speech. Speech Communication, 2010
@article{lobdell2010intelligibility,
author = {Bryce E Lobdell and Jont B Allen and Mark A Hasegawa-Johnson},
doi = {10.1016/j.specom.2010.08.016},
journal = {Speech Communication},
keywords = {analysis},
title = {Intelligibility predictors and neural representation of speech},
year = {2010}
}
Yoonsook Mo, Jennifer Cole, & Mark Hasegawa-Johnson. Prosodic effects on temporal structure of monosyllabic CVC words in American English. Speech Prosody, pp. 100208:1-4, 2010
@inproceedings{mo2010prosodic,
author = {Yoonsook Mo and Jennifer Cole and Mark Hasegawa-Johnson},
booktitle = {Speech Prosody},
grant = {NSF 0703624},
keywords = {analysis},
pages = {100208:1-4},
title = {Prosodic effects on temporal structure of monosyllabic CVC words in American English},
url = {https://www.isca-speech.org/archive/speechprosody_2010/mo10_speechprosody.html},
year = {2010}
}
Hao Tang, Mark Hasegawa-Johnson, & Thomas S. Huang. Non-Frontal View Facial Expression Recognition. ICME, pp. 1202-1207, 2010
@inproceedings{tang2010non-frontal,
author = {Hao Tang and Mark Hasegawa-Johnson and Thomas S. Huang},
booktitle = {ICME},
doi = {10.1109/ICME.2010.5582576},
keywords = {intelligence},
pages = {1202-1207},
title = {Non-Frontal View Facial Expression Recognition},
year = {2010}
}
Jui-Ting Huang, Po-Sen Huang, Yoonsook Mo, Mark Hasegawa-Johnson, & Jennifer Cole. Prosody-Dependent Acoustic Modeling Using Variable-Parameter Hidden Markov Models. Speech Prosody, pp. 100623:1-4, 2010
@inproceedings{huang2010prosody-dependent,
author = {Jui-Ting Huang and Po-Sen Huang and Yoonsook Mo and Mark Hasegawa-Johnson and Jennifer Cole},
booktitle = {Speech Prosody},
grant = {NSF 0703624},
keywords = {recognition},
pages = {100623:1-4},
title = {Prosody-Dependent Acoustic Modeling Using Variable-Parameter Hidden Markov Models},
url = {https://www.isca-speech.org/archive/speechprosody_2010/huang10_speechprosody.html},
year = {2010}
}
Hao Tang, Mark Hasegawa-Johnson, & Thomas S. Huang. Toward Robust Learning of the Gaussian Mixture State Emission Densities for Hidden Markov Models. ICASSP, 2010
@inproceedings{tang2010toward,
author = {Hao Tang and Mark Hasegawa-Johnson and Thomas S. Huang},
booktitle = {ICASSP},
doi = {10.1109/ICASSP.2010.5494989},
grant = {NSF 0803219},
keywords = {recognition},
title = {Toward Robust Learning of the Gaussian Mixture State Emission Densities for Hidden Markov Models},
year = {2010}
}
Arthur Kantor. Pronunciation modeling for large vocabulary speech recognition. Master’s Thesis, University of Illinois, 2010
@phdthesis{kantor2010pronunciation,
author = {Arthur Kantor},
keywords = {recognition},
school = {University of Illinois},
software = {http://mickey.ifp.illinois.edu/speechWiki/index.php/GMTK_parallel_tools},
title = {Pronunciation modeling for large vocabulary speech recognition},
url = {https://www.ideals.illinois.edu/items/18366},
year = {2010}
}
Chi Hu, Xiaodan Zhuang, & Mark Hasegawa-Johnson. FSM-Based Pronunciation Modeling using Articulatory Phonological Code. Proceedings of Interspeech, pp. 2274-2277, 2010
@inproceedings{hu2010interspeech,
author = {Chi Hu and Xiaodan Zhuang and Mark Hasegawa-Johnson},
booktitle = {Proceedings of Interspeech},
doi = {10.21437/Interspeech.2010-624},
grant = {NSF 0703624},
keywords = {recognition},
pages = {2274--2277},
title = {FSM-Based Pronunciation Modeling using Articulatory Phonological Code},
year = {2010}
}
Hosung Nam, Vikramjit Mitra, Mark Tiede, Elliot Saltzman, Louis Goldstein, Carol Espy-Wilson, & Mark Hasegawa-Johnson. A procedure for estimating gestural scores from natural speech. Proceedings of Interspeech, pp. 30-33, 2010
@inproceedings{nam2010a,
author = {Hosung Nam and Vikramjit Mitra and Mark Tiede and Elliot Saltzman and Louis Goldstein and Carol Espy-Wilson and Mark Hasegawa-Johnson},
booktitle = {Proceedings of Interspeech},
doi = {10.21437/Interspeech.2010-4},
grant = {NSF 0703624},
keywords = {recognition},
pages = {30--33},
title = {A procedure for estimating gestural scores from natural speech},
year = {2010}
}
Jui-Ting Huang, & Mark Hasegawa-Johnson. Semi-Supervised Training of Gaussian Mixture Models by Conditional Entropy Minimization. Proceedings of Interspeech, pp. 1353-1356, 2010
@inproceedings{huang2010semi-supervised,
author = {Jui-Ting Huang and Mark Hasegawa-Johnson},
booktitle = {Proceedings of Interspeech},
doi = {10.21437/Interspeech.2008-116},
grant = {NSF 0703624},
keywords = {recognition},
pages = {1353--1356},
title = {Semi-Supervised Training of Gaussian Mixture Models by Conditional Entropy Minimization},
year = {2010}
}
Harsh Vardhan Sharma, & Mark Hasegawa-Johnson. State Transition Interpolation and MAP Adaptation for HMM-based Dysarthric Speech Recognition. HLT/NAACL Workshop on Speech and Language Processing for Assistive Technology (SLPAT), pp. 72-79, 2010
@inproceedings{sharma2010state,
author = {Harsh Vardhan Sharma and Mark Hasegawa-Johnson},
booktitle = {HLT/NAACL Workshop on Speech and Language Processing for Assistive Technology (SLPAT)},
grant = {NSF 0534106},
keywords = {recognition},
pages = {72-79},
title = {State Transition Interpolation and MAP Adaptation for HMM-based Dysarthric Speech Recognition},
url = {http://delivery.acm.org/10.1145/1870000/1867763/p72-sharma.pdf?key1=1867763&key2=2798521031&coll=DL&dl=ACM&ip=65.42.208.134&CFID=15366854&CFTOKEN=94985121},
year = {2010}
}
Xiaodan Zhuang, Lijuan Wang, Frank Soong, & Mark Hasegawa-Johnson. A Minimum Converted Trajectory Error (MCTE) Approach to High Quality Speech-to-Lips Conversion. Proceedings of Interspeech, pp. 1736-1739, 2010
@inproceedings{zhuang2010a,
author = {Xiaodan Zhuang and Lijuan Wang and Frank Soong and Mark Hasegawa-Johnson},
booktitle = {Proceedings of Interspeech},
doi = {10.21437/Interspeech.2010-498},
grant = {NSF 0703624},
keywords = {synthesis},
pages = {1736--1739},
title = {A Minimum Converted Trajectory Error (MCTE) Approach to High Quality Speech-to-Lips Conversion},
year = {2010}
}
Lae-Hoon Kim, & Mark Hasegawa-Johnson. Toward Overcoming Fundamental Limitation in Frequency-Domain Blind Source Separation for Reverberant Speech Mixtures. Proceedings of Asilomar, 2010
@inproceedings{kim2010toward,
author = {Lae-Hoon Kim and Mark Hasegawa-Johnson},
booktitle = {Proceedings of Asilomar},
doi = {10.1109/ACSSC.2010.5757618},
grant = {NSF 0913188},
keywords = {synthesis},
title = {Toward Overcoming Fundamental Limitation in Frequency-Domain Blind Source Separation for Reverberant Speech Mixtures},
year = {2010}
}
Lae-Hoon Kim, Kyung-Tae Kim, & Mark Hasegawa-Johnson. Robust Automatic Speech Recognition with Decoder Oriented Ideal Binary Mask Estimation. Proceedings of Interspeech, pp. 2066-2069, 2010
@inproceedings{kim2010robust,
author = {Lae-Hoon Kim and Kyung-Tae Kim and Mark Hasegawa-Johnson},
booktitle = {Proceedings of Interspeech},
doi = {10.21437/Interspeech.2010-583},
grant = {NSF 0913188},
keywords = {synthesis},
pages = {2066--2069},
title = {Robust Automatic Speech Recognition with Decoder Oriented Ideal Binary Mask Estimation},
year = {2010}
}
Lae-Hoon Kim, Kyungtae Kim, & Mark Hasegawa-Johnson. Speech enhancement beyond minimum mean squared error with perceptual noise shaping. J. Acoust. Soc. Am., vol. 127, no. 3, pp. 1817, 2010
@article{kim2010speech,
author = {Lae-Hoon Kim and Kyungtae Kim and Mark Hasegawa-Johnson},
doi = {10.1121/1.3384190},
journal = {J. Acoust. Soc. Am.},
keywords = {synthesis},
number = {3},
pages = {1817},
title = {Speech enhancement beyond minimum mean squared error with perceptual noise shaping},
volume = {127},
year = {2010}
}
Lae-Hoon Kim, Mark Hasegawa-Johnson, Gerasimos Potamianos, & Vit Libal. Joint Estimation of DOA and Speech Based on EM Beamforming. ICASSP, 2010
@inproceedings{kim2010joint,
author = {Lae-Hoon Kim and Mark Hasegawa-Johnson and Gerasimos Potamianos and Vit Libal},
booktitle = {ICASSP},
doi = {10.1109/ICASSP.2010.5496144},
keywords = {synthesis},
title = {Joint Estimation of DOA and Speech Based on EM Beamforming},
year = {2010}
}
Chi Hu. FSM-Based Pronunciation Modeling using Articulatory Phonological Code. Master’s Thesis, University of Illinois, 2010
@unpublished{harwath2010phonetic,
author = {David Harwath},
note = {Unpublished B.S. Thesis},
school = {University of Illinois},
title = {Phonetic Landmark Detection for Automatic Language Identification},
url = {https://www.ideals.illinois.edu/items/47011},
year = {2010}
}
Su-Youn Yoon, Mark Hasegawa-Johnson, & Richard Sproat. Automated Pronunciation Scoring using Confidence Scoring and Landmark-based SVM. Proc. Interspeech, Brighton, pp. 1903-1906, Sep, 2009
@inproceedings{yoon2009interspeech,
address = {Brighton},
author = {Su-Youn Yoon and Mark Hasegawa-Johnson and Richard Sproat},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2009-551},
keywords = {analysis},
month = {September},
pages = {1903--1906},
title = {Automated Pronunciation Scoring using Confidence Scoring and Landmark-based SVM},
year = {2009}
}
I. Yücel Özbek, Mark Hasegawa-Johnson, & Mübeccel Demirekler. Formant Trajectories for Acoustic-to-Articulatory Inversion. Proc. Interspeech, Brighton, pp. 2807-2810, Sep, 2009
@inproceedings{ozbek2009formant,
address = {Brighton},
author = {I. Yücel Özbek and Mark Hasegawa-Johnson and Mübeccel Demirekler},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2009-717},
keywords = {analysis},
month = {September},
pages = {2807--2810},
title = {Formant Trajectories for Acoustic-to-Articulatory Inversion},
year = {2009}
}
Yoonsook Mo, Jennifer Cole, & Mark Hasegawa-Johnson. Prosodic effects on vowel production: evidence from formant structure. Proc. Interspeech, Brighton, pp. 2535-2538, Sep, 2009
@inproceedings{mo2009prosodic,
address = {Brighton},
author = {Yoonsook Mo and Jennifer Cole and Mark Hasegawa-Johnson},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2009-668},
grant = {NSF 0703624},
keywords = {analysis},
month = {September},
pages = {2535--2538},
title = {Prosodic effects on vowel production: evidence from formant structure},
year = {2009}
}
Xiaodan Zhuang, Hosung Nam, Mark Hasegawa-Johnson, Louis Goldstein, & Elliot Saltzman. Articulatory Phonological Code for Word Recognition. Proc. Interspeech, Brighton, pp. 2763-2766, Sep, 2009
@inproceedings{zhuang2009articulatory,
address = {Brighton},
author = {Xiaodan Zhuang and Hosung Nam and Mark Hasegawa-Johnson and Louis Goldstein and Elliot Saltzman},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2009-706},
grant = {NSF 0703624},
keywords = {recognition},
month = {September},
pages = {2763--2766},
title = {Articulatory Phonological Code for Word Recognition},
year = {2009}
}
Harsh Vardhan Sharma, Mark Hasegawa-Johnson, Jon Gunderson, & Adrienne Perlman. Universal Access: Speech Recognition for Talkers with Spastic Dysarthria. Proc. Interspeech, Brighton, pp. 1451-1454, Sep, 2009
@inproceedings{sharma2009universal,
address = {Brighton},
author = {Harsh Vardhan Sharma and Mark Hasegawa-Johnson and Jon Gunderson and Adrienne Perlman},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2009-444},
grant = {NIH R21 DC008090A},
keywords = {recognition},
month = {September},
pages = {1451--1454},
title = {Universal Access: Speech Recognition for Talkers with Spastic Dysarthria},
year = {2009}
}
Bowon Lee, & Mark Hasegawa-Johnson. A Phonemic Restoration Approach for Automatic Speech Recognition with Highly Nonstationary Background Noise. DSP in Cars workshop, Dallas, Jul, 2009
@inproceedings{lee2009a,
address = {Dallas},
author = {Bowon Lee and Mark Hasegawa-Johnson},
booktitle = {DSP in Cars workshop},
keywords = {recognition},
month = {July},
title = {A Phonemic Restoration Approach for Automatic Speech Recognition with Highly Nonstationary Background Noise},
url = {https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=66f31946db0456df7be1f6b09528e2f5be8f84e0},
year = {2009}
}
Thomas S. Huang, Mark A. Hasegawa-Johnson, Stephen M. Chu, Zhihong Zeng, & Hao Tang. Sensitive Talking Heads. IEEE Signal Processing Magazine, vol. 26, no. 4, pp. 67-72, Jul, 2009
@article{huang2009sensitive,
author = {Thomas S. Huang and Mark A. Hasegawa-Johnson and Stephen M. Chu and Zhihong Zeng and Hao Tang},
doi = {10.1109/MSP.2009.932562},
journal = {IEEE Signal Processing Magazine},
keywords = {synthesis},
month = {July},
number = {4},
pages = {67-72},
title = {Sensitive Talking Heads},
volume = {26},
year = {2009}
}
Lae-Hoon Kim, & Mark Hasegawa-Johnson. Optimal Multi-Microphone Speech Enhancement in Cars. DSP in Cars workshop, Dallas, Jul, 2009
@inproceedings{kim2009optimal,
address = {Dallas},
author = {Lae-Hoon Kim and Mark Hasegawa-Johnson},
booktitle = {DSP in Cars workshop},
grant = {NSF 0803219},
keywords = {synthesis},
month = {July},
title = {Optimal Multi-Microphone Speech Enhancement in Cars},
url = {https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=f43edc454be762b15b7b023f52f406c00f695b59},
year = {2009}
}
Hao Tang, Stephen M. Chu, Mark Hasegawa-Johnson, & Thomas S. Huang. Emotion Recognition from Speech via Boosted Gaussian Mixture Models. International Conference on Multimedia & Expo (ICME’09), pp. 294-297, 2009
@inproceedings{tang2009emotion,
author = {Hao Tang and Stephen M. Chu and Mark Hasegawa-Johnson and Thomas S. Huang},
booktitle = {International Conference on Multimedia & Expo (ICME'09)},
doi = {10.1109/ICME.2009.5202493},
grant = {NIH R21 DC008090 A},
keywords = {analysis},
pages = {294-297},
title = {Emotion Recognition from Speech via Boosted Gaussian Mixture Models},
year = {2009}
}
Mark Hasegawa-Johnson, Camille Goudeseune, Kai-Hsiang Lin, David Cohen, Xi Zhou, Xiaodan Zhuang, Kyungtae Kim, Hank Kaczmarski, & Thomas Huang. Visual Analytics for Audio. NIPS Workshop on Visual Analytics, 2009
@inproceedings{hasegawa-johnson2009visual,
author = {Mark Hasegawa-Johnson and Camille Goudeseune and Kai-Hsiang Lin and David Cohen and Xi Zhou and Xiaodan Zhuang and Kyungtae Kim and Hank Kaczmarski and Thomas Huang},
booktitle = {NIPS Workshop on Visual Analytics},
grant = {NSF 0807329},
keywords = {analysis},
title = {Visual Analytics for Audio},
year = {2009}
}
Mark Hasegawa-Johnson. Pattern Recognition in Acoustic Signal Processing. Unpublished presentation at the Machine Learning Summer School, University of Chicago, 2009
@unpublished{hasegawa-johnson2009pattern,
author = {Mark Hasegawa-Johnson},
grant = {NSF 0807329},
keywords = {analysis},
note = {Unpublished presentation at the Machine Learning Summer School, University of Chicago},
title = {Pattern Recognition in Acoustic Signal Processing},
year = {2009}
}
Mark Hasegawa-Johnson. Tutorial: Pattern Recognition in Signal Processing. J. Acoust. Soc. Am., vol. 125, pp. 2698, 2009
@article{hasegawa-johnson2009tutorial,
author = {Mark Hasegawa-Johnson},
doi = {10.1121/1.4784323},
grant = {NSF 0803219},
journal = {J. Acoust. Soc. Am.},
keywords = {analysis},
pages = {2698},
title = {Tutorial: Pattern Recognition in Signal Processing},
volume = {125},
year = {2009}
}
Mark Hasegawa-Johnson, Xiaodan Zhuang, Xi Zhou, Camille Goudeseune, & Thomas S. Huang. Adaptation of tandem HMMs for non-speech audio event detection. J. Acoust. Soc. Am., vol. 125, pp. 2730, 2009
@article{hasegawa-johnson2009adaptation,
author = {Mark Hasegawa-Johnson and Xiaodan Zhuang and Xi Zhou and Camille Goudeseune and Thomas S. Huang},
doi = {10.1121/1.4784503},
journal = {J. Acoust. Soc. Am.},
keywords = {analysis},
pages = {2730},
title = {Adaptation of tandem HMMs for non-speech audio event detection},
volume = {125},
year = {2009}
}
Xiaodan Zhuang, Jing Huang, Gerasimos Potamianos, & Mark Hasegawa-Johnson. Acoustic Fall Detection using Gaussian Mixture Models and GMM Supervectors. ICASSP, pp. 69-72, 2009
@inproceedings{zhuang2009acoustic,
author = {Xiaodan Zhuang and Jing Huang and Gerasimos Potamianos and Mark Hasegawa-Johnson},
booktitle = {ICASSP},
doi = {10.1109/ICASSP.2009.4959522},
keywords = {analysis},
pages = {69-72},
title = {Acoustic Fall Detection using Gaussian Mixture Models and GMM Supervectors},
year = {2009}
}
Su-Youn Yoon, Mark Hasegawa-Johnson, & Richard Sproat. Automated Pronunciation Scoring for L2 English Learners. CALICO workshop, 2009
@inproceedings{yoon2009calico,
author = {Su-Youn Yoon and Mark Hasegawa-Johnson and Richard Sproat},
booktitle = {CALICO workshop},
keywords = {analysis},
title = {Automated Pronunciation Scoring for L2 English Learners},
year = {2009}
}
David Cohen, Camille Goudeseune, & Mark Hasegawa-Johnson. Efficient Simultaneous Multi-Scale Computation of FFTs. Georgia Tech publication GT-FODAVA-09-01, 2009
@unpublished{cohen2009efficient,
author = {David Cohen and Camille Goudeseune and Mark Hasegawa-Johnson},
keywords = {analysis},
note = {Georgia Tech publication GT-FODAVA-09-01},
software = {https://github.com/camilleg/multiscale-FFT},
title = {Efficient Simultaneous Multi-Scale Computation of FFTs},
year = {2009}
}
Su-Youn Yoon, Lisa Pierce, Amanda Huensch, Eric Juul, Samantha Perkins, Richard Sproat, & Mark Hasegawa-Johnson. Construction of a rated speech corpus of L2 learners’ speech. CALICO journal, 2009
@article{yoon2009construction,
author = {Su-Youn Yoon and Lisa Pierce and Amanda Huensch and Eric Juul and Samantha Perkins and Richard Sproat and Mark Hasegawa-Johnson},
journal = {CALICO journal},
keywords = {analysis},
title = {Construction of a rated speech corpus of L2 learners' speech},
url = {https://www.jstor.org/stable/pdf/calicojournal.26.3.662.pdf?casa_token=fCpVdyBksEoAAAAA:n5lwoOcMttFqYjtsLvYeRcIoGOzjAy5r4_XDSZc4MWe4qPQzCpKEQXnpGTRzy2SyvsLWrcrzAl4Nki-4FPSj36-NIQSqcwffDoms1tPJ8Srti9W1tQ},
year = {2009}
}
Bryce Lobdell. Models of Human Phone Transcription in Noise Based on Intelligibility Predictors. Master’s Thesis, University of Illinois, 2009
@phdthesis{lobdell2009models,
author = {Bryce Lobdell},
keywords = {analysis},
school = {University of Illinois},
title = {Models of Human Phone Transcription in Noise Based on Intelligibility Predictors},
url = {https://hdl.handle.net/2142/81127},
year = {2009}
}
Yoonsook Mo, Jennifer Cole, & Mark Hasegawa-Johnson. How do ordinary listeners perceive prosodic prominence? Syntagmatic vs. Paradigmatic comparison.. J. Acoust. Soc. Am., vol. 125, no. 4, pp. 2572, 2009
@article{mo2009how,
author = {Yoonsook Mo and Jennifer Cole and Mark Hasegawa-Johnson},
doi = {10.1121/1.4783760},
grant = {NSF 0703624},
journal = {J. Acoust. Soc. Am.},
keywords = {analysis},
number = {4},
pages = {2572},
title = {How do ordinary listeners perceive prosodic prominence? Syntagmatic vs. Paradigmatic comparison.},
volume = {125},
year = {2009}
}
Sarah Borys. Lovable Indestructible Grad Student of Chaos. Master’s Thesis, University of Illinois, 2009
@phdthesis{borys2009lovable,
author = {Sarah Borys},
keywords = {analysis},
school = {University of Illinois},
title = {Lovable Indestructible Grad Student of Chaos},
year = {2009}
}
Xiaodan Zhuang, Xi Zhou, Mark A. Hasegawa-Johnson, & Thomas S. Huang. Efficient Object Localization with Gaussianized Vector Representation. IMCE, pp. 89-96, 2009
@inproceedings{zhuang2009efficient,
author = {Xiaodan Zhuang and Xi Zhou and Mark A. Hasegawa-Johnson and Thomas S. Huang},
booktitle = {IMCE},
doi = {10.1145/1631040.1631055},
grant = {NSF 0803219},
keywords = {intelligence},
pages = {89-96},
title = {Efficient Object Localization with Gaussianized Vector Representation},
year = {2009}
}
Jui-Ting Huang, Xi Zhou, Mark Hasegawa-Johnson, & Thomas Huang. Kernel Metric Learning for Phonetic Classification. ASRU, pp. 141-145, 2009
@inproceedings{huang2009kernel,
author = {Jui-Ting Huang and Xi Zhou and Mark Hasegawa-Johnson and Thomas Huang},
booktitle = {ASRU},
doi = {10.1109/ASRU.2009.5373389},
grant = {NSF 0703624},
keywords = {recognition},
pages = {141-145},
title = {Kernel Metric Learning for Phonetic Classification},
year = {2009}
}
Jui-Ting Huang, & Mark Hasegawa-Johnson. On semi-supervised learning of Gaussian mixture models for phonetic classification. NAACL HLT Workshop on Semi-Supervised Learning, pp. 75-83, 2009
@inproceedings{huang2009on,
author = {Jui-Ting Huang and Mark Hasegawa-Johnson},
booktitle = {NAACL HLT Workshop on Semi-Supervised Learning},
grant = {NSF 0534106},
keywords = {recognition},
pages = {75-83},
title = {On semi-supervised learning of Gaussian mixture models for phonetic classification},
url = {https://aclanthology.org/W09-2210.pdf},
year = {2009}
}
Xiaodan Zhuang, Jui-Ting Huang, & Mark Hasegawa-Johnson. Speech Retrieval in Unknown Languages: a Pilot Study. NAACL HLT Cross-Lingual Information Access Workshop (CLIAWS), pp. 3-11, 2009
@inproceedings{zhuang2009speech,
author = {Xiaodan Zhuang and Jui-Ting Huang and Mark Hasegawa-Johnson},
booktitle = {NAACL HLT Cross-Lingual Information Access Workshop (CLIAWS)},
grant = {NSF 0703624},
keywords = {recognition},
pages = {3-11},
title = {Speech Retrieval in Unknown Languages: a Pilot Study},
url = {https://aclanthology.org/W09-1602.pdf"},
year = {2009}
}
Sujeeth Bharadwaj. Advances in Sparse Classification. Unpublished B.S. Thesis, University of Illinois, 2009
@inproceedings{yoon000019detecting,
author = {Taejin Yoon and Jennifer Cole and Mark Hasegawa-Johnson},
booktitle = {SpeechProsody},
grant = {NSF 0414117},
keywords = {analysis},
pages = {33-36},
title = {Detecting Non-Modal Phonation in Telephone Speech},
url = {https://www.isca-speech.org/archive/speechprosody_2008/yoon08_speechprosody.html},
year = {2008}
}
Tae-Jin Yoon, Cole, Jennifer, Mark Hasegawa-Johnson, & Chilin Shih. Detecting Non-modal Phonation in Telephone Speech. Proc. Speech Prosody, 2008
@inproceedings{yoon2008detecting,
author = {Tae-Jin Yoon and Cole and Jennifer and Mark Hasegawa-Johnson and Chilin Shih},
booktitle = {Proc. Speech Prosody},
grant = {NSF 0414117},
keywords = {analysis},
title = {Detecting Non-modal Phonation in Telephone Speech},
url = {https://www.isca-archive.org/speechprosody_2008/yoon08_speechprosody.pdf},
year = {2008}
}
Xiaodan Zhuang, Xi Zhou, Thomas S. Huang, & Mark Hasegawa-Johnson. Feature Analysis and Selection for Acoustic Event Detection. ICASSP, pp. 17-20, 2008
@inproceedings{zhuang000070feature,
author = {Xiaodan Zhuang and Xi Zhou and Thomas S. Huang and Mark Hasegawa-Johnson},
booktitle = {ICASSP},
doi = {10.1109/ICASSP.2008.4517535},
grant = {NSF 0414117},
keywords = {analysis},
pages = {17-20},
title = {Feature Analysis and Selection for Acoustic Event Detection},
year = {2008}
}
Xi Zhou, Xiaodan Zhuang, Ming Lui, Hao Tang, Mark Hasegawa-Johnson, & Thomas Huang. HMM-Based Acoustic Event Detection with AdaBoost Feature Selection. Lecture Notes in Computer Science, vol. 4625, pp. 345-353, 2008
@article{zhou000071hmm-based,
author = {Xi Zhou and Xiaodan Zhuang and Ming Lui and Hao Tang and Mark Hasegawa-Johnson and Thomas Huang},
doi = {10.1007/978-3-540-68585-2_33},
grant = {NSF 0414117},
journal = {Lecture Notes in Computer Science},
keywords = {analysis},
pages = {345-353},
title = {HMM-Based Acoustic Event Detection with AdaBoost Feature Selection},
volume = {4625},
year = {2008}
}
Bryce Lobdell, Mark Hasegawa-Johnson, & Jont B. Allen. Human Speech Perception and Feature Extraction. Proc. Interspeech, pp. 1797-1800, 2008
@inproceedings{lobdell2008human,
author = {Bryce Lobdell and Mark Hasegawa-Johnson and Jont B. Allen},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2008-494},
keywords = {analysis},
pages = {1797--1800},
title = {Human Speech Perception and Feature Extraction},
year = {2008}
}
Yoonsook Mo, Jennifer Cole, & Mark Hasegawa-Johnson. Frequency and repetition effects outweigh phonetic detail in prominence perception. LabPhon 11, pp. 29-30, 2008
@inproceedings{mo2008frequency,
author = {Yoonsook Mo and Jennifer Cole and Mark Hasegawa-Johnson},
booktitle = {LabPhon 11},
keywords = {analysis},
pages = {29-30},
title = {Frequency and repetition effects outweigh phonetic detail in prominence perception},
url = {https://labphon.org/sites/default/files/previous_conferences/LabPhon11.pdf#page=51},
year = {2008}
}
Xiaodan Zhuang, Hosung Nam, Mark Hasegawa-Johnson, Louis Goldstein, & Elliot Saltzman. The Entropy of Articulatory Phonological Code: Recognizing Gestures from Tract Variables. Proc. Interspeech, pp. 1489-1492, 2008
@inproceedings{zhuang2008the,
author = {Xiaodan Zhuang and Hosung Nam and Mark Hasegawa-Johnson and Louis Goldstein and Elliot Saltzman},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2008-428},
grant = {NSF 0703624},
keywords = {analysis},
pages = {1489--1492},
title = {The Entropy of Articulatory Phonological Code: Recognizing Gestures from Tract Variables},
year = {2008}
}
Sarah Borys. Lovable Indestructible Grad Student of Chaos. Cartoons published online, 2008
@unpublished{borys2008lovable,
author = {Sarah Borys},
keywords = {analysis},
note = {Cartoons published online},
title = {Lovable Indestructible Grad Student of Chaos},
year = {2008}
}
Xiaodan Zhuang, Xi Zhou, Mark Hasegawa-Johnson, & Thomas Huang. Face Age Estimation Using Patch-based Hidden Markov Model Supervectors. ICPR, pp. 1-4, 2008
@inproceedings{zhuang2008face,
author = {Xiaodan Zhuang and Xi Zhou and Mark Hasegawa-Johnson and Thomas Huang},
booktitle = {ICPR},
doi = {10.1109/ICPR.2008.4761364},
grant = {NSF 0534106},
keywords = {intelligence},
pages = {1-4},
title = {Face Age Estimation Using Patch-based Hidden Markov Model Supervectors},
year = {2008}
}
Xi Zhou, Xiaodan Zhuang, Hao Tang, Mark Hasegawa-Johnson, & Thomas Huang. A Novel Gaussianized Vector Representation for Natural Scene Categorization. ICPR, pp. 1-4, 2008
@inproceedings{zhou2008a,
author = {Xi Zhou and Xiaodan Zhuang and Hao Tang and Mark Hasegawa-Johnson and Thomas Huang},
booktitle = {ICPR},
doi = {10.1109/ICPR.2008.4761665},
grant = {NSF 0534106},
keywords = {intelligence},
pages = {1-4},
title = {A Novel Gaussianized Vector Representation for Natural Scene Categorization},
year = {2008}
}
Xi Zhou, Xiaodan Zhuang, Shuicheng Yan, Shih-Fu Chang, Mark Hasegawa-Johnson, & Thomas S. Huang. SIFT-Bag Kernel for Video Event Analysis. ACM Multimedia, pp. 229-238, 2008
@inproceedings{zhou2008sift-bag,
author = {Xi Zhou and Xiaodan Zhuang and Shuicheng Yan and Shih-Fu Chang and Mark Hasegawa-Johnson and Thomas S. Huang},
booktitle = {ACM Multimedia},
doi = {10.1145/1459359.1459391},
grant = {NSF 0534106},
keywords = {intelligence},
pages = {229-238},
title = {SIFT-Bag Kernel for Video Event Analysis},
year = {2008}
}
Shuicheng Yan, Xi Zhou, Ming Liu, Mark Hasegawa-Johnson, & Thomas S. Huang. Regression from Patch Kernel. IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 1-8, 2008
@inproceedings{yan2008regression,
author = {Shuicheng Yan and Xi Zhou and Ming Liu and Mark Hasegawa-Johnson and Thomas S. Huang},
booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
doi = {10.1109/CVPR.2008.4587405},
keywords = {intelligence},
pages = {1-8},
title = {Regression from Patch Kernel},
url = {http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=4587405},
year = {2008}
}
Jui-Ting Huang, & Mark Hasegawa-Johnson. Maximum Mutual Information Estimation with Unlabeled Data for Phonetic Classification. Proc. Interspeech, 2008
@inproceedings{huang2008maximum,
author = {Jui-Ting Huang and Mark Hasegawa-Johnson},
booktitle = {Proc. Interspeech},
grant = {NSF 0534133},
keywords = {recognition},
title = {Maximum Mutual Information Estimation with Unlabeled Data for Phonetic Classification},
url = {https://www.isca-archive.org/interspeech_2008/huang08_interspeech.pdf},
year = {2008}
}
Xiaodan Zhuang, Hosung Nam, Mark Hasegawa-Johnson, Louis Goldstein, & Elliot Saltzman. The Entropy of Articulatory Phonological Code: Recognizing Gestures from Tract Variables. Proc. Interspeech, pp. 1489-1492, 2008
@inproceedings{zhuang000061the,
author = {Xiaodan Zhuang and Hosung Nam and Mark Hasegawa-Johnson and Louis Goldstein and Elliot Saltzman},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2008-428},
grant = {NSF 0703624},
keywords = {recognition},
pages = {1489--1492},
title = {The Entropy of Articulatory Phonological Code: Recognizing Gestures from Tract Variables},
year = {2008}
}
Arthur Kantor, & Mark Hasegawa-Johnson. Stream Weight Tuning in Dynamic Bayesian Networks. Proc. ICASSP, pp. 4525-4528, 2008
@inproceedings{kantor2008stream,
author = {Arthur Kantor and Mark Hasegawa-Johnson},
booktitle = {Proc. ICASSP},
doi = {10.1109/ICASSP.2008.4518662},
grant = {NSF 0703624},
keywords = {recognition},
pages = {4525-4528},
title = {Stream Weight Tuning in Dynamic Bayesian Networks},
year = {2008}
}
Mark Hasegawa-Johnson, Jennifer Cole, Ken Chen, Partha Lal, Amit Juneja, Taejin Yoon, Sarah Borys, & Xiaodan Zhuang. Prosodically Organized Automatic Speech Recognition. Language and Linguistics Monograph Series, vol. A25, Academica Sinica, Taiwan, pp. 101-128, 2008
@article{hasegawajohnson2008prosodically,
address = {Taiwan},
author = {Mark Hasegawa-Johnson and Jennifer Cole and Ken Chen and Partha Lal and Amit Juneja and Taejin Yoon and Sarah Borys and Xiaodan Zhuang},
grant = {NSF 0414117},
journal = {Language and Linguistics Monograph Series},
keywords = {recognition},
pages = {101-128},
publisher = {Academica Sinica},
title = {Prosodically Organized Automatic Speech Recognition},
volume = {A25},
year = {2008}
}
Taejin Yoon, Xiaodan Zhuang, Jennifer Cole, & Mark Hasegawa-Johnson. Voice Quality Dependent Speech Recognition. Language and Linguistics Monograph Series, vol. A25, Academica Sinica, Taiwan, pp. 77-100, 2008
@article{yoon2008voice,
address = {Taiwan},
author = {Taejin Yoon and Xiaodan Zhuang and Jennifer Cole and Mark Hasegawa-Johnson},
grant = {NSF 0414117},
journal = {Language and Linguistics Monograph Series},
keywords = {recognition},
pages = {77-100},
publisher = {Academica Sinica},
title = {Voice Quality Dependent Speech Recognition},
volume = {A25},
year = {2008}
}
Heejin Kim, Mark Hasegawa-Johnson, Adrienne Perlman, Jon Gunderson, Thomas Huang, Kenneth Watkin, & Simone Frame. Dysarthric Speech Database for Universal Access Research. Proc. Interspeech, pp. 1741-1744, 2008
@inproceedings{kim2008dysarthric,
author = {Heejin Kim and Mark Hasegawa-Johnson and Adrienne Perlman and Jon Gunderson and Thomas Huang and Kenneth Watkin and Simone Frame},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2008-480},
grant = {NIH DC008090A},
keywords = {recognition},
pages = {1741-1744},
title = {Dysarthric Speech Database for Universal Access Research},
year = {2008}
}
Hao Tang, Yun Fu, Jilin Tu, Mark Hasegawa-Johnson, & Thomas S. Huang. Humanoid Audio-Visual Avatar with Emotive Text-to-Speech Synthesis. IEEE Trans. Multimedia, vol. 10, no. 6, pp. 969-981, 2008
@article{tang2008humanoid,
author = {Hao Tang and Yun Fu and Jilin Tu and Mark Hasegawa-Johnson and Thomas S. Huang},
doi = {10.1109/TMM.2008.2001355},
journal = {IEEE Trans. Multimedia},
keywords = {synthesis},
number = {6},
pages = {969-981},
title = {Humanoid Audio-Visual Avatar with Emotive Text-to-Speech Synthesis},
volume = {10},
year = {2008}
}
Hao Tang, Yuxiao Hu, Yun Fu, Mark Hasegawa-Johnson, & Thomas S. Huang. Real-time conversion from a single 2D face image to a 3D text-driven emotive audio-visual avatar. IEEE International Conference on Multimedia and Expo (ICME), pp. 1205-1208, 2008
@inproceedings{tang2008real-time,
author = {Hao Tang and Yuxiao Hu and Yun Fu and Mark Hasegawa-Johnson and Thomas S. Huang},
booktitle = {IEEE International Conference on Multimedia and Expo (ICME)},
doi = {10.1109/ICME.2008.4607657},
keywords = {synthesis},
pages = {1205-1208},
title = {Real-time conversion from a single 2D face image to a 3D text-driven emotive audio-visual avatar},
year = {2008}
}
Hao Tang, Xi Zhou, Matthias Odisio, Mark Hasegawa-Johnson, & Thomas Huang. Two-Stage Prosody Prediction for Emotional Text-to-Speech Synthesis. Proc. Interspeech, pp. 2138-2141, 2008
@inproceedings{tang2008two-stage,
author = {Hao Tang and Xi Zhou and Matthias Odisio and Mark Hasegawa-Johnson and Thomas Huang},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2008-554},
grant = {NSF 0426227},
keywords = {synthesis},
pages = {2138-2141},
title = {Two-Stage Prosody Prediction for Emotional Text-to-Speech Synthesis},
year = {2008}
}
Hao Tang, Yun Fu, Jilin Tu, Thomas Huang, & Mark Hasegawa-Johnson. EAVA: A 3D Emotive Audio-Visual Avatar. IEEE Workshop on Applications of Computer Vision (IEEE WACV), pp. 1-6, 2008
@inproceedings{tang2008eava:,
author = {Hao Tang and Yun Fu and Jilin Tu and Thomas Huang and Mark Hasegawa-Johnson},
booktitle = {IEEE Workshop on Applications of Computer Vision (IEEE WACV)},
doi = {10.1109/WACV.2008.4544003},
grant = {NSF 0426227},
keywords = {synthesis},
pages = {1-6},
title = {EAVA: A 3D Emotive Audio-Visual Avatar},
year = {2008}
}
Lae-Hoon Kim, Mark Hasegawa-Johnson, Jun-Seok Lim, & Koeng-Mo Sung. Acoustic model for robustness analysis of optimal multipoint room equalization. J. Acoust. Soc. Am., vol. 123, no. 4, pp. 2043-2053, 2008
@article{kim2008acoustic,
author = {Lae-Hoon Kim and Mark Hasegawa-Johnson and Jun-Seok Lim and Koeng-Mo Sung},
doi = {10.1121/1.2837285},
journal = {J. Acoust. Soc. Am.},
keywords = {synthesis},
number = {4},
pages = {2043-2053},
title = {Acoustic model for robustness analysis of optimal multipoint room equalization},
volume = {123},
year = {2008}
}
Lae-Hoon Kim, & Mark Hasegawa-Johnson. Optimal Speech Estimator Considering Room Response as well as Additive Noise: Different Approaches in Low and High Frequency Range. ICASSP, pp. 4573-4576, 2008
@inproceedings{kim2008optimal,
author = {Lae-Hoon Kim and Mark Hasegawa-Johnson},
booktitle = {ICASSP},
doi = {10.1109/ICASSP.2008.4518674},
keywords = {synthesis},
pages = {4573-4576},
title = {Optimal Speech Estimator Considering Room Response as well as Additive Noise: Different Approaches in Low and High Frequency Range},
year = {2008}
}
Sarah Borys. An SVM Front End Landmark Speech Recognition System. Master’s Thesis, University of Illinois, 2008
@mastersthesis{borys2008scm,
author = {Sarah Borys},
keywords = {recognition},
school = {University of Illinois},
title = {An SVM Front End Landmark Speech Recognition System},
year = {2008}
}
Yang Li. Incremental Training and Growth of Artificial Neural Networks. Master’s Thesis, University of Illinois, 2008
@mastersthesis{li2008incremental,
author = {Yang Li},
keywords = {intelligence},
school = {University of Illinois},
title = {Incremental Training and Growth of Artificial Neural Networks},
year = {2008}
}
Taejin Yoon, Jennifer Cole, & Mark Hasegawa-Johnson. On the edge: Acoustic cues to layered prosodic domains. Proc. International Congress on Phonetic Sciences (ICPhS), Saarbrücken, pp. 1264:1017-1020, Aug, 2007
@inproceedings{yoon000168on,
address = {Saarbrücken},
author = {Taejin Yoon and Jennifer Cole and Mark Hasegawa-Johnson},
booktitle = {Proc. International Congress on Phonetic Sciences (ICPhS)},
grant = {NSF 0414117},
keywords = {analysis},
month = {August},
pages = {1264:1017-1020},
title = {On the edge: Acoustic cues to layered prosodic domains},
url = {http://www.icphs2007.de/conference/Papers/1264/1264.pdf},
year = {2007}
}
Ming Liu, Xi Zhou, Mark Hasegawa-Johnson, Thomas S. Huang, & Zhengyou Zhang. Frequency Domain Correspondence for Speaker Normalization. Proc. Interspeech, Antwerp, pp. 274-277, Aug, 2007
@inproceedings{liu2007frequency,
address = {Antwerp},
author = {Ming Liu and Xi Zhou and Mark Hasegawa-Johnson and Thomas S. Huang and Zhengyou Zhang},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2007-120},
keywords = {recognition},
month = {August},
pages = {274-277},
title = {Frequency Domain Correspondence for Speaker Normalization},
year = {2007}
}
Mark Hasegawa-Johnson, Karen Livescu, Partha Lal, & Kate Saenko. Audiovisual Speech Recognition with Articulator Positions as Hidden Variables. Proc. International Congress on Phonetic Sciences (ICPhS), Saarbrücken, pp. 1719:297-302, Aug, 2007
@inproceedings{hasegawajohnson2007audiovisual,
address = {Saarbrücken},
author = {Mark Hasegawa-Johnson and Karen Livescu and Partha Lal and Kate Saenko},
booktitle = {Proc. International Congress on Phonetic Sciences (ICPhS)},
grant = {NSF 0121285},
keywords = {recognition},
month = {August},
pages = {1719:297-302},
title = {Audiovisual Speech Recognition with Articulator Positions as Hidden Variables},
url = {https://www.researchgate.net/profile/K-Livescu/publication/228926670_Audiovisual_speech_recognition_with_articulator_positions_as_hidden_variables/links/00b7d5201284197994000000/Audiovisual-speech-recognition-with-articulator-positions-as-hidden-variables.pdf},
year = {2007}
}
Mark Hasegawa-Johnson. Audio-Visual Speech Recognition: Audio Noise, Video Noise, and Pronunciation Variability. talk given to the Signal Processing Society, IEEE Japan, Jun, 2007
@unpublished{hasegawajohnson2007audio-visual,
author = {Mark Hasegawa-Johnson},
grant = {NSF 0534106},
keywords = {recognition},
month = {June},
note = {talk given to the Signal Processing Society, IEEE Japan},
title = {Audio-Visual Speech Recognition: Audio Noise, Video Noise, and Pronunciation Variability},
year = {2007}
}
Bowon Lee, & Mark Hasegawa-Johnson. Minimum Mean Squared Error A Posteriori Estimation of High Variance Vehicular Noise. 2007 Biennial on DSP for In-Vehicle and Mobile Systems, Istanbul, Jun, 2007
@inproceedings{lee000035minimum,
address = {Istanbul},
author = {Bowon Lee and Mark Hasegawa-Johnson},
booktitle = {2007 Biennial on DSP for In-Vehicle and Mobile Systems},
grant = {Motorola RPS19},
keywords = {synthesis},
month = {June},
title = {Minimum Mean Squared Error A Posteriori Estimation of High Variance Vehicular Noise},
url = {https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=1d23cdbd3b402db72ed6ea907bdf7f54d3c9dc2e},
year = {2007}
}
Karen Livescu, Ozgur Cetin, Mark Hasegawa-Johnson, Simon King, Chris Bartels, Nash Borges, Arthur Kantor, Partha Lal, Lisa Yung, Ari Bezman, Stephen Dawson-Haggerty, Bronwyn Woods, Joe Frankel, Matthew Magimai-Doss, & Kate Saenko. Articulatory Feature-Based Methods for Acoustic and Audio-Visual Speech Recognition: Summary from the 2006 JHU Summer Workshop. ICASSP, pp. 621-624, May, 2007
@inproceedings{livescu2007articulatory,
author = {Karen Livescu and Ozgur Cetin and Mark Hasegawa-Johnson and Simon King and Chris Bartels and Nash Borges and Arthur Kantor and Partha Lal and Lisa Yung and Ari Bezman and Stephen Dawson-Haggerty and Bronwyn Woods and Joe Frankel and Matthew Magimai-Doss and Kate Saenko},
booktitle = {ICASSP},
doi = {10.1109/ICASSP.2007.366989},
keywords = {recognition},
month = {May},
pages = {621-624},
title = {Articulatory Feature-Based Methods for Acoustic and Audio-Visual Speech Recognition: Summary from the 2006 JHU Summer Workshop},
year = {2007}
}
Taejin Yoon, Jennifer Cole, & Mark Hasegawa-Johnson. On the edge: Acoustic cues to layered prosodic domains.. 81st Annual Meeting of the Linguistic Society of America, Anaheim, CA, Jan, 2007
@inproceedings{yoon2007on,
address = {Anaheim, CA},
author = {Taejin Yoon and Jennifer Cole and Mark Hasegawa-Johnson},
booktitle = {81st Annual Meeting of the Linguistic Society of America},
grant = {NSF 0414117},
keywords = {analysis},
month = {January},
title = {On the edge: Acoustic cues to layered prosodic domains.},
url = {http://www.icphs2007.de/conference/Papers/1264/1264.pdf},
year = {2007}
}
Taejin Yoon. A Predictive Model of Prosody Through Grammatical Interface: A Computational Approach. Master’s Thesis, University of Illinois, 2007
@phdthesis{yoon2007a,
author = {Taejin Yoon},
keywords = {analysis},
school = {University of Illinois},
title = {A Predictive Model of Prosody Through Grammatical Interface: A Computational Approach},
url = {https://www.ideals.illinois.edu/items/83925},
year = {2007}
}
Tong Zhang, Mark Hasegawa-Johnson, & Stephen E. Levinson. Extraction of Pragmatic and Semantic Salience from Spontaneous Spoken English. Speech Communication, 2007
@article{zhang2007extraction,
author = {Tong Zhang and Mark Hasegawa-Johnson and Stephen E. Levinson},
doi = {10.1016/j.specom.2005.07.007},
grant = {NSF 0085980},
journal = {Speech Communication},
keywords = {analysis},
title = {Extraction of Pragmatic and Semantic Salience from Spontaneous Spoken English},
year = {2007}
}
Xi Zhou, Yu Fun, Ming Liu, Mark Hasegawa-Johnson, & Thomas Huang. Robust Analysis and Weighting on MFCC Components for Speech Recognition and Speaker Identification. International Conference on Multimedia and Expo, pp. 188-191, 2007
@inproceedings{zhou2007robust,
author = {Xi Zhou and Yu Fun and Ming Liu and Mark Hasegawa-Johnson and Thomas Huang},
booktitle = {International Conference on Multimedia and Expo},
doi = {10.1109/ICME.2007.4284618},
grant = {NSF 0426627},
keywords = {analysis},
pages = {188-191},
title = {Robust Analysis and Weighting on MFCC Components for Speech Recognition and Speaker Identification},
year = {2007}
}
Ming Liu, Zhengyou Zhang, Mark Hasegawa-Johnson, & Thomas Huang. Exploring Discriminative Learning for Text-Independent Speaker Recognition. ICME, pp. 56-59, 2007
@inproceedings{liu2007exploring,
author = {Ming Liu and Zhengyou Zhang and Mark Hasegawa-Johnson and Thomas Huang},
booktitle = {ICME},
doi = {10.1109/ICME.2007.4284585},
grant = {NSF 0426627},
keywords = {analysis},
pages = {56-59},
title = {Exploring Discriminative Learning for Text-Independent Speaker Recognition},
year = {2007}
}
Soo-Eun Chang, Nicoline Ambrose, Kirk Erickson, & Mark Hasegawa-Johnson. Brain Anatomy Differences in Childhood Stuttering. Neuroimage, 2007
@article{chang2007brain,
author = {Soo-Eun Chang and Nicoline Ambrose and Kirk Erickson and Mark Hasegawa-Johnson},
doi = {10.1016/j.neuroimage.2007.09.067},
grant = {NIH DC05210},
journal = {Neuroimage},
keywords = {analysis},
title = {Brain Anatomy Differences in Childhood Stuttering},
year = {2007}
}
Jennifer Cole, Yoonsook Mo, & Mark Hasegawa-Johnson. Signal-based and expectation-based factors in the perception of prosodic prominence. Journal of Laboratory Phonology, 2007
@article{cole2007signal-based,
author = {Jennifer Cole and Yoonsook Mo and Mark Hasegawa-Johnson},
doi = {10.1515/labphon.2010.022},
grant = {NSF 0703624)},
journal = {Journal of Laboratory Phonology},
keywords = {analysis},
title = {Signal-based and expectation-based factors in the perception of prosodic prominence},
year = {2007}
}
Jennifer Cole, Heejin Kim, Hansook Choi, & Mark Hasegawa-Johnson. Prosodic effects on acoustic cues to stop voicing and place of articulation: Evidence from Radio News speech. J Phonetics, vol. 35, pp. 180-209, 2007
@article{cole2007prosodic,
author = {Jennifer Cole and Heejin Kim and Hansook Choi and Mark Hasegawa-Johnson},
doi = {10.1016/j.wocn.2006.03.004},
grant = {NSF 0414117},
journal = {J Phonetics},
keywords = {analysis},
pages = {180-209},
title = {Prosodic effects on acoustic cues to stop voicing and place of articulation: Evidence from Radio News speech},
volume = {35},
year = {2007}
}
Mark Hasegawa-Johnson. Multi-Stream Approach to Audiovisual Automatic Speech Recognition. IEEE 9th Workshop on Multimedia Signal Processing (MMSP), pp. 328-331, 2007
@inproceedings{hasegawajohnson2007multi-stream,
author = {Mark Hasegawa-Johnson},
booktitle = {IEEE 9th Workshop on Multimedia Signal Processing (MMSP)},
doi = {10.1109/MMSP.2007.4412884},
keywords = {recognition},
pages = {328-331},
title = {Multi-Stream Approach to Audiovisual Automatic Speech Recognition},
year = {2007}
}
Yun Fu, Xi Zhou, Ming Liu, Mark Hasegawa-Johnson, & Thomas S. Huang. Lipreading by Locality Discriminant Graph. IEEE International Conference on Image Processing (ICIP), pp. III:325-8, 2007
@inproceedings{fu2007lipreading,
author = {Yun Fu and Xi Zhou and Ming Liu and Mark Hasegawa-Johnson and Thomas S. Huang},
booktitle = {IEEE International Conference on Image Processing (ICIP)},
doi = {10.1109/ICIP.2007.4379312},
grant = {NSF 0426627},
keywords = {recognition},
pages = {III:325-8},
title = {Lipreading by Locality Discriminant Graph},
year = {2007}
}
Karen Livescu, Özgür Çetin, Mark Hasegawa-Johnson, Simon King, Chris Bartels, Nash Borges, Arthur Kantor, Partha Lal, Lisa Yung, Ari Bezman, Stephen Dawson-Hagerty, Bronwyn Woods, Joe Frankel, Mathew Magimai-Doss, & Kate Saenko. Articulatory-Feature-Based Methods for Acoustic and Audio-Visual Speech Recognition: 2006 JHU Summer Workshop Final Report.. Final report of the WS06 Johns Hopkins Summer Workshop team, 2007
@unpublished{livescu2007articulatory-feature-based,
author = {Karen Livescu and Özgür Çetin and Mark Hasegawa-Johnson and Simon King and Chris Bartels and Nash Borges and Arthur Kantor and Partha Lal and Lisa Yung and Ari Bezman and Stephen Dawson-Hagerty and Bronwyn Woods and Joe Frankel and Mathew Magimai-Doss and Kate Saenko},
keywords = {recognition},
note = {Final report of the WS06 Johns Hopkins Summer Workshop team},
title = {Articulatory-Feature-Based Methods for Acoustic and Audio-Visual Speech Recognition: 2006 JHU Summer Workshop Final Report.},
year = {2007}
}
Ken Chen, Mark Hasegawa-Johnson, & Jennifer Cole. A Factored Language Model for Prosody-Dependent Speech Recognition. Robust Speech Recognition and Understanding, Michael Grimm and Kristian Kroschel, eds., INTECH Publishing, pp. 319-332, 2007
@incollection{chen2007a,
author = {Ken Chen and Mark Hasegawa-Johnson and Jennifer Cole},
booktitle = {Robust Speech Recognition and Understanding},
editor = {Michael Grimm and Kristian Kroschel},
keywords = {recognition},
pages = {319-332},
publisher = {INTECH Publishing},
title = {A Factored Language Model for Prosody-Dependent Speech Recognition},
url = {http://www.intechopen.com/books/show/title/robust_speech_recognition_and_understanding},
year = {2007}
}
Weimo Zhu, Mark Hasegawa-Johnson, Karen Chapman-Novakofski, & Arthur Kantor. Cellphone-Based Nutrition E-Diary. National Nutrient Database Conference, 2007
@inproceedings{zhu2007cellphone-based,
author = {Weimo Zhu and Mark Hasegawa-Johnson and Karen Chapman-Novakofski and Arthur Kantor},
booktitle = {National Nutrient Database Conference},
keywords = {recognition},
title = {Cellphone-Based Nutrition E-Diary},
year = {2007}
}
Weimo Zhu, Mark Hasegawa-Johnson, Arthur Kantor, Dan Roth, Yong Gao, Youngsik Park, & Lin Yang. E-coder for Automatic Scoring Physical Activity Diary Data: Development and Validation. ACSM, 2007
@inproceedings{zhu2007ecoder,
author = {Weimo Zhu and Mark Hasegawa-Johnson and Arthur Kantor and Dan Roth and Yong Gao and Youngsik Park and Lin Yang},
booktitle = {ACSM},
keywords = {recognition},
title = {E-coder for Automatic Scoring Physical Activity Diary Data: Development and Validation},
year = {2007}
}
Mark Hasegawa-Johnson. Phonology and the Art of Automatic Speech Recognition. Director’s Seminar Series, Beckman Institute, University of Illinois at Urbana-Champaign, Nov, 2006
@unpublished{hasegawajohnson2006phonology,
author = {Mark Hasegawa-Johnson},
grant = {NSF 0414117},
keywords = {recognition},
month = {November},
note = {Director's Seminar Series, Beckman Institute, University of Illinois at Urbana-Champaign},
title = {Phonology and the Art of Automatic Speech Recognition},
year = {2006}
}
Rahul Chitturi, & Mark Hasegawa-Johnson. Novel Time-Domain Multi-class SVMs for Landmark Detection. Proc. Interspeech, pp. paper 1904-Thu1CaP.14, Sep, 2006
@inproceedings{chitturi2006time,
author = {Rahul Chitturi and Mark Hasegawa-Johnson},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2006-105},
keywords = {analysis},
month = {September},
pages = {paper 1904-Thu1CaP.14},
title = {Novel Time-Domain Multi-class SVMs for Landmark Detection},
year = {2006}
}
Mark Hasegawa-Johnson. Object Tracking and Asynchrony in Audio-Visual Speech Recognition.. talk given to the Artificial Intelligence, Vision, and Robotics seminar series, Aug, 2006
@unpublished{hasegawajohnson2006object,
author = {Mark Hasegawa-Johnson},
grant = {NSF 0534106},
keywords = {recognition},
month = {August},
note = {talk given to the Artificial Intelligence, Vision, and Robotics seminar series},
title = {Object Tracking and Asynchrony in Audio-Visual Speech Recognition.},
year = {2006}
}
Mark Hasegawa-Johnson. Dealing with Acoustic Noise. Part IIII: Video. tutorial presentation given at WS06, Center for Language and Speech Processing, Jul, 2006
@inproceedings{hasegawajohnson2006dealing,
author = {Mark Hasegawa-Johnson},
booktitle = {tutorial presentation given at WS06, Center for Language and Speech Processing},
grant = {NSF 0121285},
keywords = {recognition},
month = {July},
title = {Dealing with Acoustic Noise. Part IIII: Video},
year = {2006}
}
Mark Hasegawa-Johnson. Dealing with Acoustic Noise. Part II: Beamforming.. Tutorial presentation given at WS06, Center for Language and Speech Processing, Jul, 2006
@unpublished{hasegawajohnson2006dealing1,
author = {Mark Hasegawa-Johnson},
booktitle = {Tutorial presentation given at WS06, Center for Language and Speech Processing},
keywords = {synthesis},
month = {July},
title = {Dealing with Acoustic Noise. Part II: Beamforming.},
year = {2006}
}
Mark Hasegawa-Johnson. Dealing with Acoustic Noise. Part I: Spectral Estimation.. Tutorial presentation given at WS06, Center for Language and Speech Processing, Jul, 2006
@unpublished{hasegawajohnson2006dealing2,
author = {Mark Hasegawa-Johnson},
booktitle = {Tutorial presentation given at WS06, Center for Language and Speech Processing},
keywords = {synthesis},
month = {July},
title = {Dealing with Acoustic Noise. Part I: Spectral Estimation.},
year = {2006}
}
Mark Hasegawa-Johnson, Jonathan Gunderson, Adrienne Perlman, & Thomas Huang. HMM-Based and SVM-Based Recognition of the Speech of Talkers with Spastic Dysarthria. ICASSP, pp. III:1060-3, May, 2006
@inproceedings{hasegawajohnson2006hmm-based,
author = {Mark Hasegawa-Johnson and Jonathan Gunderson and Adrienne Perlman and Thomas Huang},
booktitle = {ICASSP},
doi = {10.1109/ICASSP.2006.1660840},
grant = {NSF 0534106},
keywords = {recognition},
month = {May},
pages = {III:1060-3},
title = {HMM-Based and SVM-Based Recognition of the Speech of Talkers with Spastic Dysarthria},
year = {2006}
}
Lae-Hoon Kim, Mark Hasegawa-Johnson, & Keung-Mo Sung. Generalized Optimal Multi-Microphone Speech Enhancement Using Sequential Minimum Variance Distortionless Response (MVDR) Beamforming and Postfiltering. ICASSP, pp. III:65-8, May, 2006
@inproceedings{kim2006generalized,
author = {Lae-Hoon Kim and Mark Hasegawa-Johnson and Keung-Mo Sung},
booktitle = {ICASSP},
doi = {10.1109/ICASSP.2006.1660591},
keywords = {synthesis},
month = {May},
pages = {III:65-8},
title = {Generalized Optimal Multi-Microphone Speech Enhancement Using Sequential Minimum Variance Distortionless Response (MVDR) Beamforming and Postfiltering},
year = {2006}
}
Tong Zhang, Mark Hasegawa-Johnson, & Stephen E. Levinson. Cognitive State Classification in a spoken tutorial dialogue system. Speech Communication, vol. 48, no. 6, 2006
@article{zhang2006cognitive,
author = {Tong Zhang and Mark Hasegawa-Johnson and Stephen E. Levinson},
doi = {10.1016/j.specom.2005.09.006},
grant = {NSF 0085980},
journal = {Speech Communication},
keywords = {analysis},
number = {6},
page = {616-632},
title = {Cognitive State Classification in a spoken tutorial dialogue system},
volume = {48},
year = {2006}
}
Rajiv Reddy, & Mark Hasegawa-Johnson. Analysis of Pitch Contours in Repetition-Disfluency using Stem-ML. Midwest Computational Linguistics Colloquium, 2006
@inproceedings{reddy2006mclc,
author = {Rajiv Reddy and Mark Hasegawa-Johnson},
booktitle = {Midwest Computational Linguistics Colloquium},
keywords = {analysis},
title = {Analysis of Pitch Contours in Repetition-Disfluency using Stem-ML},
url = {https://www.researchgate.net/profile/Rajiv-Reddy-7/publication/228967280_Analysis_of_Pitch_Contours_in_Repetition-Disfluency_using_Stem-ML/links/00b49536b0b12a63e2000000/Analysis-of-Pitch-Contours-in-Repetition-Disfluency-using-Stem-ML.pdf},
year = {2006}
}
Soo-Eun Chang, Kirk I. Erickson, Nicoline G. Ambrose, Mark Hasegawa-Johnson, & C.L. Ludlow. Deficient white matter development in left hemisphere speech-language regions in children who stutter. Society for Neuroscience, Atlanta, GA, 2006
@inproceedings{chang2006deficient,
address = {Atlanta, GA},
author = {Soo-Eun Chang and Kirk I. Erickson and Nicoline G. Ambrose and Mark Hasegawa-Johnson and C.L. Ludlow},
booktitle = {Society for Neuroscience},
grant = {NIH DC05210},
keywords = {analysis},
title = {Deficient white matter development in left hemisphere speech-language regions in children who stutter},
year = {2006}
}
Rahul Chitturi, & Mark Hasegawa-Johnson. Novel entropy based moving average refiners for HMM landmarks. Proc. Interspeech 2006, pp. paper 1911-Wed1FoP.8, 2006
@inproceedings{chitturi2006entropy,
author = {Rahul Chitturi and Mark Hasegawa-Johnson},
booktitle = {Proc. Interspeech 2006},
doi = {10.21437/Interspeech.2006-468},
pages = {paper 1911-Wed1FoP.8},
title = {Novel entropy based moving average refiners for HMM landmarks},
year = {2006}
}
Heejin Kim, Taejin Yoon, Jennifer Cole, & Mark Hasegawa-Johnson. Acoustic differentiation of L- and L-L% in Switchboard and Radio News speech. Proceedings of Speech Prosody, Dresden, 2006
@inproceedings{kim2006acoustic,
address = {Dresden},
author = {Heejin Kim and Taejin Yoon and Jennifer Cole and Mark Hasegawa-Johnson},
booktitle = {Proceedings of Speech Prosody},
grant = {NSF 0414117},
keywords = {analysis},
title = {Acoustic differentiation of L- and L-L% in Switchboard and Radio News speech},
url = {https://www.isca-speech.org/archive/speechprosody_2006/kim06d_speechprosody.html},
year = {2006}
}
Rajiv Reddy. Analysis of Pitch Contours in Repetition-Disfluency Using Stem-ML. B.S. Thesis, University of Illinois, 2006
@unpublished{reddy2006thesis,
author = {Rajiv Reddy},
howpublished = {B.S. Thesis, University of Illinois},
keywords = {analysis},
title = {Analysis of Pitch Contours in Repetition-Disfluency Using Stem-ML},
year = {2006}
}
Bowon Lee. Robust Speech Recognition in a Car Using a Microphone Array. Master’s Thesis, University of Illinois, 2006
@phdthesis{lee2006robust,
author = {Bowon Lee},
keywords = {recognition},
school = {University of Illinois},
title = {Robust Speech Recognition in a Car Using a Microphone Array},
url = {https://www.ideals.illinois.edu/items/82279},
year = {2006}
}
Camille Goudeseune, & Bowon Lee. AVICAR: Audio-Visual Speech Recognition in a Car Environment. Promotional Film, 2006
@unpublished{goudeseune2006avicar,
author = {Camille Goudeseune and Bowon Lee},
booktitle = {Promotional Film},
grant = {Motorola RPS19},
keywords = {recognition},
title = {AVICAR: Audio-Visual Speech Recognition in a Car Environment},
year = {2006}
}
Ken Chen, Mark Hasegawa-Johnson, Aaron Cohen, Sarah Borys, Sung-Suk Kim, Jennifer Cole, & Jeung-Yoon Choi. Prosody Dependent Speech Recognition on Radio News Corpus of American English. IEEE Transactions on Speech and Audio Processing, vol. 14, no. 1, pp. 232-245, 2006
@article{chen2006prosody,
author = {Ken Chen and Mark Hasegawa-Johnson and Aaron Cohen and Sarah Borys and Sung-Suk Kim and Jennifer Cole and Jeung-Yoon Choi},
doi = {10.1109/TSA.2005.853208},
grant = {NSF 0132900},
journal = {IEEE Transactions on Speech and Audio Processing},
keywords = {recognition},
number = {1},
pages = {232-245},
title = {Prosody Dependent Speech Recognition on Radio News Corpus of American English},
volume = {14},
year = {2006}
}
Sarah Borys, & Mark Hasegawa-Johnson. Distinctive Feature Based SVM Discriminant Features for Improvements to Phone Recognition on Telephone Band Speech. ISCA Interspeech, Oct, 2005
@inproceedings{borys2005distinctive,
author = {Sarah Borys and Mark Hasegawa-Johnson},
booktitle = {ISCA Interspeech},
doi = {10.21437/Interspeech.2005-200},
grant = {NSF 0132900},
keywords = {recognition},
month = {October},
title = {Distinctive Feature Based SVM Discriminant Features for Improvements to Phone Recognition on Telephone Band Speech},
year = {2005}
}
Lae-Hoon Kim, & Mark Hasegawa-Johnson. Generalized multi-microphone spectral amplitude estimation based on correlated noise model. 119th Convention of the Audio Engineering Society, New York, Oct, 2005
@inproceedings{kim2005generalized,
address = {New York},
author = {Lae-Hoon Kim and Mark Hasegawa-Johnson},
booktitle = {119th Convention of the Audio Engineering Society},
keywords = {synthesis},
month = {October},
title = {Generalized multi-microphone spectral amplitude estimation based on correlated noise model},
year = {2005}
}
Mark Hasegawa-Johnson, James Baker, Sarah Borys, Ken Chen, Emily Coogan, Steven Greenberg, Amit Juneja, Katrin Kirchhoff, Karen Livescu, Srividya Mohan, Jennifer Muller, Kemal Sönmez, & Tianyu Wang. Landmark-Based Speech Recognition: Report of the 2004 Johns Hopkins Summer Workshop. ICASSP, pp. 1213-1216, Mar, 2005
@inproceedings{hasegawajohnson2004landmark,
author = {Mark Hasegawa-Johnson and James Baker and Sarah Borys and Ken Chen and Emily Coogan and Steven Greenberg and Amit Juneja and Katrin Kirchhoff and Karen Livescu and Srividya Mohan and Jennifer Muller and Kemal Sönmez and Tianyu Wang},
booktitle = {ICASSP},
doi = {10.1109/ICASSP.2005.1415088},
grant = {NSF 0121285},
keywords = {recognition},
month = {March},
pages = {1213-1216},
title = {Landmark-Based Speech Recognition: Report of the 2004 Johns Hopkins Summer Workshop},
year = {2005}
}
Weimo Zhu, Mark Hasegawa-Johnson, & Mital Arun Gandhi. Accuracy of Voice-Recognition Technology in Collecting Behavior Diary Data. Association of Test Publishers (ATP): Innovations in Testing, Mar, 2005
@inproceedings{zhu2005accuracy,
author = {Weimo Zhu and Mark Hasegawa-Johnson and Mital Arun Gandhi},
booktitle = {Association of Test Publishers (ATP): Innovations in Testing},
keywords = {recognition},
month = {March},
title = {Accuracy of Voice-Recognition Technology in Collecting Behavior Diary Data},
year = {2005}
}
Tae-Jin Yoon, Cole, Jennifer, Mark Hasegawa-Johnson, & Chilin Shih. Acoustic correlates of non-modal phonation in telephone speech. The Journal of the Acoustical Society of America, vol. 117, no. 4, pp. 2621, 2005
@article{yoon2005acoustic,
author = {Tae-Jin Yoon and Cole and Jennifer and Mark Hasegawa-Johnson and Chilin Shih},
doi = {10.1121/1.4778287},
grant = {NSF 0414117},
journal = {The Journal of the Acoustical Society of America},
keywords = {analysis},
number = {4},
pages = {2621},
title = {Acoustic correlates of non-modal phonation in telephone speech},
volume = {117},
year = {2005}
}
Taejin Yoon. Mapping Syntax and Prosody. Presentation at the Midwest Computational Linguistics Colloquium, Columbus, OH, 2005
@unpublished{yoon2005mapping,
address = {Columbus, OH},
author = {Taejin Yoon},
grant = {NSF 0414117},
keywords = {analysis},
note = {Presentation at the Midwest Computational Linguistics Colloquium},
title = {Mapping Syntax and Prosody},
year = {2005}
}
Jeung-Yoon Choi, Mark Hasegawa-Johnson, & Jennifer Cole. Finding Intonational Boundaries Using Acoustic Cues Related to the Voice Source. Journal of the Acoustical Society of America, vol. 118, no. 4, pp. 2579-88, 2005
@article{choi2005finding,
author = {Jeung-Yoon Choi and Mark Hasegawa-Johnson and Jennifer Cole},
doi = {10.1121/1.2010288},
journal = {Journal of the Acoustical Society of America},
keywords = {analysis},
number = {4},
pages = {2579-88},
title = {Finding Intonational Boundaries Using Acoustic Cues Related to the Voice Source},
volume = {118},
year = {2005}
}
Jennifer Cole, Mark Hasegawa-Johnson, Chilin Shih, Eun-Kyung Lee, Heejin Kim, H. Lu, Yoonsook Mo, & Tae-Jin Yoon. Prosodic Parallelism as a Cue to Repetition and Hesitation Disfluency. Disfluency In Spontaneous Speech (DISS’05), Aix-en-Provence, France, pp. 53-58, 2005
@inproceedings{cole2005prosodic,
address = {Aix-en-Provence, France},
author = {Jennifer Cole and Mark Hasegawa-Johnson and Chilin Shih and Eun-Kyung Lee and Heejin Kim and H. Lu and Yoonsook Mo and Tae-Jin Yoon},
booktitle = {Disfluency In Spontaneous Speech (DISS'05)},
grat = {NSF 0414117},
keywords = {analysis},
pages = {53-58},
title = {Prosodic Parallelism as a Cue to Repetition and Hesitation Disfluency},
url = {https://www.isca-speech.org/archive/diss_2005/cole05_diss.html},
year = {2005}
}
Yeojin Kim, & Mark Hasegawa-Johnson. Phonetic Segment Rescoring Using SVMs. Midwest Computational Linguistics Colloquium, Columbus, OH, 2005
@inproceedings{kim2005phonetic,
address = {Columbus, OH},
author = {Yeojin Kim and Mark Hasegawa-Johnson},
booktitle = {Midwest Computational Linguistics Colloquium},
grant = {NSF 0132900},
keywords = {recognition},
title = {Phonetic Segment Rescoring Using SVMs},
year = {2005}
}
Mark Hasegawa-Johnson, James Baker, Steven Greenberg, Katrin Kirchhoff, Jennifer Muller, Kemal Sonmez, Sarah Borys, Ken Chen, Amit Juneja, Katrin Kirchhoff, Karen Livescu, Srividya Mohan, Emily Coogan, & Tianyu Wang. Landmark-Based Speech Recognition: Report of the 2004 Johns Hopkins Summer Workshop. no. WS04, Final report of the WS04 Johns Hopkins Summer Workshop team, 2005
@unpublished{hasegawajohnson2005landmark-based,
author = {Mark Hasegawa-Johnson and James Baker and Steven Greenberg and Katrin Kirchhoff and Jennifer Muller and Kemal Sonmez and Sarah Borys and Ken Chen and Amit Juneja and Katrin Kirchhoff and Karen Livescu and Srividya Mohan and Emily Coogan and Tianyu Wang},
keywords = {recognition},
note = {Final report of the WS04 Johns Hopkins Summer Workshop team},
number = {WS04},
title = {Landmark-Based Speech Recognition: Report of the 2004 Johns Hopkins Summer Workshop},
year = {2005}
}
Yanli Zheng. Acoustic Modeling and Feature Extraction for Speech Recognition. Master’s Thesis, University of Illinois, 2005
@phdthesis{zheng2005feature,
author = {Yanli Zheng},
keywords = {recognition},
school = {University of Illinois},
title = {Acoustic Modeling and Feature Extraction for Speech Recognition},
url = {https://www.ideals.illinois.edu/items/82196},
year = {2005}
}
Mark Hasegawa-Johnson, Ken Chen, Jennifer Cole, Sarah Borys, Sung-Suk Kim, Aaron Cohen, Tong Zhang, Jeung-Yoon Choi, Heejin Kim, Taejin Yoon, & Sandra Chavarria. Simultaneous Recognition of Words and Prosody in the Boston University Radio Speech Corpus. Speech Communication, vol. 46, no. 3-4, pp. 418-439, 2005
@article{hasegawajohnson2005simultaneous,
author = {Mark Hasegawa-Johnson and Ken Chen and Jennifer Cole and Sarah Borys and Sung-Suk Kim and Aaron Cohen and Tong Zhang and Jeung-Yoon Choi and Heejin Kim and Taejin Yoon and Sandra Chavarria},
doi = {10.1016/j.specom.2005.01.009},
grant = {NSF 0132900},
journal = {Speech Communication},
keywords = {recognition},
number = {3-4},
pages = {418-439},
title = {Simultaneous Recognition of Words and Prosody in the Boston University Radio Speech Corpus},
volume = {46},
year = {2005}
}
Tong Zhang, Mark Hasegawa-Johnson, & Stephen E. Levinson. A Hybrid Model for Spontaneous Speech Understanding. Proceedings of the National Conference on Artificial Intelligence, pp. 10.1.1.80.879:1-8, 2005
@inproceedings{zhang2005a,
author = {Tong Zhang and Mark Hasegawa-Johnson and Stephen E. Levinson},
booktitle = {Proceedings of the National Conference on Artificial Intelligence},
grant = {NSF 0085980},
keywords = {recognition},
pages = {10.1.1.80.879:1-8},
title = {A Hybrid Model for Spontaneous Speech Understanding},
url = {https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=8110962ce5089070665624355945933b54b7e8eb},
year = {2005}
}
Arthur Kantor, Weimo Zhu, & Mark Hasegawa-Johnson. Restricted domain speech classification using automatic transcription and SVMs. Midwest Computational Linguistics Colloquium, 2005
@inproceedings{kantor2005restricted,
author = {Arthur Kantor and Weimo Zhu and Mark Hasegawa-Johnson},
booktitle = {Midwest Computational Linguistics Colloquium},
keywords = {recognition},
title = {Restricted domain speech classification using automatic transcription and SVMs},
year = {2005}
}
Christopher Co. Room Reconstruction and Navigation Using Acoustically Obtained Room Impulse Responses and a Mobile Robot Platform. Master’s Thesis, University of Illinois, 2005
@mastersthesis{co2005room,
author = {Christopher Co},
keywords = {analysis},
school = {University of Illinois},
software = {http://isle.illinois.edu/speech_web_lg/software/2013/co2013software.tgz},
title = {Room Reconstruction and Navigation Using Acoustically Obtained Room Impulse Responses and a Mobile Robot Platform},
url = {https://www.ideals.illinois.edu/items/42150},
year = {2005}
}
Soo-Eun Chang, Nicoline Ambrose, & Mark Hasegawa-Johnson. An MRI (DTI) study on children with persistent developmental stuttering. ASHA Convention, Nov, 2004
@inproceedings{chang2004an,
author = {Soo-Eun Chang and Nicoline Ambrose and Mark Hasegawa-Johnson},
booktitle = {ASHA Convention},
keywords = {analysis},
month = {November},
title = {An MRI (DTI) study on children with persistent developmental stuttering},
year = {2004}
}
Sarah Borys, Mark Hasegawa-Johnson, Ken Chen, & Aaron Cohen. Modeling and Recognition of Phonetic and Prosodic Factors for Improvements to Acoustic Speech Recognition Models. Proc. Interspeech, pp. 3013-3016, Oct, 2004
@inproceedings{borys000029modeling,
author = {Sarah Borys and Mark Hasegawa-Johnson and Ken Chen and Aaron Cohen},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2004-756},
grant = {NSF 0132900},
keywords = {analysis},
month = {October},
pages = {3013--3016},
title = {Modeling and Recognition of Phonetic and Prosodic Factors for Improvements to Acoustic Speech Recognition Models},
year = {2004}
}
Mark Hasegawa-Johnson, Stephen E. Levinson, & Tong Zhang. Children’s Emotion Recognition in an Intelligent Tutoring Scenario. Proc. Interspeech, pp. 1441-1444, Oct, 2004
@inproceedings{zhang2004children,
author = {Mark Hasegawa-Johnson and Stephen E. Levinson and Tong Zhang},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2004-552},
grant = {NSF 0085980},
keywords = {analysis},
month = {October},
pages = {1441--1444},
title = {Children's Emotion Recognition in an Intelligent Tutoring Scenario},
year = {2004}
}
Yanli Zheng, Mark Hasegawa-Johnson, & Sarah Borys. Stop Consonant Classification by Dynamic Formant Trajectory. Proc. Interspeech, pp. 396-399, Oct, 2004
@inproceedings{zheng2004stop,
author = {Yanli Zheng and Mark Hasegawa-Johnson and Sarah Borys},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2004-403},
grant = {NSF 0132900},
keywords = {analysis},
month = {October},
pages = {396-399},
title = {Stop Consonant Classification by Dynamic Formant Trajectory},
year = {2004}
}
Tae-Jin Yoon, Sandra Chavarria, Jennifer Cole, & Mark Hasegawa-Johnson. Intertranscriber Reliability of Prosodic Labeling on Telephone Conversation Using ToBI. Proc. Interspeech, pp. 2729-2732, Oct, 2004
@inproceedings{yoon2004intertranscriber,
author = {Tae-Jin Yoon and Sandra Chavarria and Jennifer Cole and Mark Hasegawa-Johnson},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2004-659},
keywords = {analysis},
month = {October},
pages = {2729--2732},
title = {Intertranscriber Reliability of Prosodic Labeling on Telephone Conversation Using ToBI},
year = {2004}
}
Mark Hasegawa-Johnson. Landmark-Based Speech Recognition: The Marriage of High-Dimensional Machine Learning Techniques with Modern Linguistic Representations. talk given at Tsinghua University, Oct, 2004
@unpublished{hasegawajohnson2004landmark-based,
author = {Mark Hasegawa-Johnson},
booktitle = {talk given at Tsinghua University},
grant = {NSF 0132900},
keywords = {recognition},
month = {October},
title = {Landmark-Based Speech Recognition: The Marriage of High-Dimensional Machine Learning Techniques with Modern Linguistic Representations},
year = {2004}
}
Mark Hasegawa-Johnson, & Ameya Deoras. A Factorial HMM Approach to Robust Isolated Digit Recognition in Background Music.. Proc. Interspeech, pp. 2093-2096, Oct, 2004
@inproceedings{hasegawajohnson2004factorial,
author = {Mark Hasegawa-Johnson and Ameya Deoras},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2004-637},
grant = {NSF 0132900},
keywords = {recognition},
month = {October},
pages = {2093--2096},
title = {A Factorial HMM Approach to Robust Isolated Digit Recognition in Background Music.},
year = {2004}
}
Ken Chen, & Mark Hasegawa-Johnson. Modeling pronunciation variation using artificial neural networks for English spontaneous speech. Proc. Interspeech, pp. 400-403, Oct, 2004
@inproceedings{chen2004modeling,
author = {Ken Chen and Mark Hasegawa-Johnson},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2004-557},
grant = {NSF 0414117},
keywords = {recognition},
month = {October},
pages = {400-403},
title = {Modeling pronunciation variation using artificial neural networks for English spontaneous speech},
year = {2004}
}
Bowon Lee, Mark Hasegawa-Johnson, Camille Goudeseune, Suketu Kamdar, Sarah Borys, Ming Liu, & Thomas Huang. AVICAR: Audio-Visual Speech Corpus in a Car Environment.. Proc. Interspeech, pp. 380-383, Oct, 2004
@inproceedings{lee2004avicar,
author = {Bowon Lee and Mark Hasegawa-Johnson and Camille Goudeseune and Suketu Kamdar and Sarah Borys and Ming Liu and Thomas Huang},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2004-424},
grant = {Motorola RPS19},
keywords = {recognition},
month = {October},
pages = {380-383},
title = {AVICAR: Audio-Visual Speech Corpus in a Car Environment.},
year = {2004}
}
Sarah Borys, Mark Hasegawa-Johnson, Ken Chen, & Aaron Cohen. Modeling and Recognition of Phonetic and Prosodic Factors for Improvements to Acoustic Speech Recognition Models. Proc. Interspeech, pp. 3013-3016, Oct, 2004
@inproceedings{borys2004modeling,
author = {Sarah Borys and Mark Hasegawa-Johnson and Ken Chen and Aaron Cohen},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2004-756},
grant = {NSF 0132900},
keywords = {recognition},
month = {October},
pages = {3013--3016},
title = {Modeling and Recognition of Phonetic and Prosodic Factors for Improvements to Acoustic Speech Recognition Models},
year = {2004}
}
Mark Hasegawa-Johnson. Speech Recognition Models of the Interdependence Among Syntax, Prosody, and Segmental Acoustics. talk given at Tsinghua University, Oct, 2004
@unpublished{hasegawajohnson2004tsinghua,
author = {Mark Hasegawa-Johnson},
booktitle = {talk given at Tsinghua University},
grant = {NSF 0414117},
keywords = {recognition},
month = {October},
title = {Speech Recognition Models of the Interdependence Among Syntax, Prosody, and Segmental Acoustics},
year = {2004}
}
Mital Gandhi, & Mark Hasegawa-Johnson. Source Separation using Particle Filters. Proc. Interspeech, pp. 2673-2676, Oct, 2004
@inproceedings{gandhi2004source,
author = {Mital Gandhi and Mark Hasegawa-Johnson},
booktitle = {Proc. Interspeech},
doi = {10.21437/Interspeech.2004-710},
keywords = {synthesis},
month = {October},
pages = {2673--2676},
title = {Source Separation using Particle Filters},
year = {2004}
}
Mark Hasegawa-Johnson, Sarah Borys, & Ken Chen. Experiments in Landmark-Based Speech Recognition. Sound to Sense: Workshop in Honor of Kenneth N. Stevens, Jun, 2004
@inproceedings{hasegawajohnson2004experiments,
author = {Mark Hasegawa-Johnson and Sarah Borys and Ken Chen},
booktitle = {Sound to Sense: Workshop in Honor of Kenneth N. Stevens},
grant = {NSF 0132900},
keywords = {recognition},
month = {June},
title = {Experiments in Landmark-Based Speech Recognition},
year = {2004}
}
Mark Hasegawa-Johnson, Jennifer Cole, Chilin Shih, Ken Chen, Aaron Cohen, Sandra Chavarria, Heejin Kim, Taejin Yoon, Sarah Borys, & Jeung-Yoon Choi. Speech Recognition Models of the Interdependence Among Syntax, Prosody, and Segmental Acoustics. HLT/NAACL Workshop on Higher-Level Knowledge in Automatic Speech Recognition and Understanding, pp. 56-63, May, 2004
@inproceedings{hasegawajohnson2004hlt,
author = {Mark Hasegawa-Johnson and Jennifer Cole and Chilin Shih and Ken Chen and Aaron Cohen and Sandra Chavarria and Heejin Kim and Taejin Yoon and Sarah Borys and Jeung-Yoon Choi},
booktitle = {HLT/NAACL Workshop on Higher-Level Knowledge in Automatic Speech Recognition and Understanding},
grant = {NSF 0414117},
keywords = {recognition},
month = {May},
pages = {56-63},
title = {Speech Recognition Models of the Interdependence Among Syntax, Prosody, and Segmental Acoustics},
url = {https://aclanthology.org/W04-3010/},
year = {2004}
}
Ken Chen, Mark Hasegawa-Johnson, Aaron Cohen, & Jennifer Cole. A Maximum Likelihood Prosody Recognizer. SpeechProsody, Nara, Japan, pp. 509-512, Mar, 2004
@inproceedings{chen2004maximum,
address = {Nara, Japan},
author = {Ken Chen and Mark Hasegawa-Johnson and Aaron Cohen and Jennifer Cole},
booktitle = {SpeechProsody},
grant = {NSF 0132900},
keywords = {analysis},
month = {March},
pages = {509-512},
title = {A Maximum Likelihood Prosody Recognizer},
url = {https://www.isca-speech.org/archive/speechprosody_2004/chen04b_speechprosody.html},
year = {2004}
}
Yuexi Ren, Sung-Suk Kim, Mark Hasegawa-Johnson, & Jennifer Cole. Speaker-Independent Automatic Detection of Pitch Accent. Proc. Speech Prosody 2004, Nara, Japan, pp. 521-524, Mar, 2004
@inproceedings{ren2004speaker,
address = {Nara, Japan},
author = {Yuexi Ren and Sung-Suk Kim and Mark Hasegawa-Johnson and Jennifer Cole},
booktitle = {Proc. Speech Prosody 2004},
grant = {NSF 0085980},
keywords = {analysis},
month = {March},
pages = {521--524},
title = {Speaker-Independent Automatic Detection of Pitch Accent},
url = {https://www.isca-speech.org/archive/speechprosody_2004/ren04_speechprosody.html},
year = {2004}
}
Heejin Kim, Jennifer Cole, Hansook Choi, & Mark Hasegawa-Johnson. The Effect of Accent on Acoustic Cues to Stop Voicing and Place of Articulation in Radio News Speech. SpeechProsody, Nara, Japan, pp. 29-32, Mar, 2004
@inproceedings{kim2004the,
address = {Nara, Japan},
author = {Heejin Kim and Jennifer Cole and Hansook Choi and Mark Hasegawa-Johnson},
booktitle = {SpeechProsody},
keywords = {analysis},
month = {March},
pages = {29-32},
title = {The Effect of Accent on Acoustic Cues to Stop Voicing and Place of Articulation in Radio News Speech},
url = {https://www.isca-speech.org/archive/speechprosody_2004/kim04_speechprosody.html},
year = {2004}
}
Sandra Chavarria, Taejin Yoon, Jennifer Cole, & Mark Hasegawa-Johnson. Acoustic differentiation of ip and IP boundary levels: Comparison of L- and L-L% in the Switchboard corpus. Speech Prosody, Nara, Japan, pp. 333-336, Mar, 2004
@inproceedings{chavarria000179acoustic,
address = {Nara, Japan},
author = {Sandra Chavarria and Taejin Yoon and Jennifer Cole and Mark Hasegawa-Johnson},
booktitle = {Speech Prosody},
keywords = {analysis},
month = {March},
pages = {333-336},
title = {Acoustic differentiation of ip and IP boundary levels: Comparison of L- and L-L% in the Switchboard corpus},
url = {https://www.isca-speech.org/archive/speechprosody_2004/chavarria04_speechprosody.html},
year = {2004}
}
Ken Chen, & Mark Hasegawa-Johnson. How Prosody Improves Word Recognition. Speech Prosody, Nara, Japan, pp. 583-586, Mar, 2004
@inproceedings{chen2004how,
address = {Nara, Japan},
author = {Ken Chen and Mark Hasegawa-Johnson},
booktitle = {Speech Prosody},
grant = {NSF 0132900},
keywords = {recognition},
month = {March},
pages = {583-586},
title = {How Prosody Improves Word Recognition},
url = {https://www.isca-speech.org/archive/speechprosody_2004/chen04c_speechprosody.html},
year = {2004}
}
Mark Hasegawa-Johnson, Stephen Levinson, & Tong Zhang. Automatic detection of contrast for speech understanding. Proc. Interspeech 2004, pp. 581-584, 2004
@inproceedings{hasegawajohnson04automatic,
author = {Mark Hasegawa-Johnson and Stephen Levinson and Tong Zhang},
booktitle = {Proc. Interspeech 2004},
doi = {10.21437/Interspeech.2004-223},
keywords = {analysis},
pages = {581--584},
title = {Automatic detection of contrast for speech understanding},
year = {2004}
}
Mark Hasegawa-Johnson, Stephen Levinson, & Tong Zhang. Automatic detection of contrast for speech understanding. Proc. Interspeech 2004, pp. 581-584, 2004
@inproceedings{hasegawajohnson2004automatic,
author = {Mark Hasegawa-Johnson and Stephen Levinson and Tong Zhang},
booktitle = {Proc. Interspeech 2004},
doi = {10.21437/Interspeech.2004-223},
grant = {NSF 0085980},
pages = {581--584},
title = {Automatic detection of contrast for speech understanding},
year = {2004}
}
Ken Chen, & Mark Hasegawa-Johnson. An Automatic Prosody Labeling System Using ANN-Based Syntactic-Prosodic Model and GMM-Based Acoustic-Prosodic Model. ICASSP, 2004
@inproceedings{chen2004an,
author = {Ken Chen and Mark Hasegawa-Johnson},
booktitle = {ICASSP},
doi = {10.1109/ICASSP.2004.1326034},
grant = {NSF 0132900},
keywords = {analysis},
title = {An Automatic Prosody Labeling System Using ANN-Based Syntactic-Prosodic Model and GMM-Based Acoustic-Prosodic Model},
year = {2004}
}
Sung-Suk Kim, Mark Hasegawa-Johnson, & Ken Chen. Automatic Recognition of Pitch Movements Using Multilayer Perceptron and Time-Delay Recursive Neural Network. IEEE Signal Processing Letters, vol. 11, no. 7, pp. 645-648, 2004
@article{kim2004automatic,
author = {Sung-Suk Kim and Mark Hasegawa-Johnson and Ken Chen},
doi = {10.1109/LSP.2004.830114},
grant = {NSF 0132900},
journal = {IEEE Signal Processing Letters},
keywords = {analysis},
number = {7},
pages = {645-648},
title = {Automatic Recognition of Pitch Movements Using Multilayer Perceptron and Time-Delay Recursive Neural Network},
volume = {11},
year = {2004}
}
Yanli Zheng, & Mark Hasegawa-Johnson. Formant Tracking by Mixture State Particle Filter. ICASSP, 2004
@inproceedings{zheng2004formant,
author = {Yanli Zheng and Mark Hasegawa-Johnson},
booktitle = {ICASSP},
doi = {10.1109/ICASSP.2004.1326048},
grant = {NSF 0132900},
keywords = {analysis},
title = {Formant Tracking by Mixture State Particle Filter},
year = {2004}
}
Tae-Jin Yoon, Heejin Kim, & Sandra Chavarría.. Local Acoustic Cues Distinguishing Two Levels of prosodic Phrasing: Speech Corpus Evidence. Labphon 9, University of Illinois at Urbana-Champaign, 2004
@inproceedings{yoon2004local,
address = {University of Illinois at Urbana-Champaign},
author = {Tae-Jin Yoon and Heejin Kim and Sandra Chavarría.},
booktitle = {Labphon 9},
keywords = {analysis},
title = {Local Acoustic Cues Distinguishing Two Levels of prosodic Phrasing: Speech Corpus Evidence},
url = {https://www.researchgate.net/profile/Tae-Jin-Yoon/publication/238592404_Local_Acoustic_Cues_Distinguish_Two_Levels_of_Prosodic_Phrasing_Speech_Corpus_Evidence/links/5575664408ae753637500253/Local-Acoustic-Cues-Distinguish-Two-Levels-of-Prosodic-Phrasing-Speech-Corpus-Evidence.pdf},
year = {2004}
}
Mohammad Kamal Omar, & Mark Hasegawa-Johnson. Model Enforcement: A Unified Feature Transformation Framework for Classification and Recognition. IEEE Transactions on Signal Processing, vol. 52, no. 10, pp. 2701-2710, 2004
@article{omar2004model,
author = {Mohammad Kamal Omar and Mark Hasegawa-Johnson},
doi = {10.1109/TSP.2004.834344},
grant = {NSF 0132900},
journal = {IEEE Transactions on Signal Processing},
keywords = {intelligence},
number = {10},
pages = {2701-2710},
title = {Model Enforcement: A Unified Feature Transformation Framework for Classification and Recognition},
volume = {52},
year = {2004}
}
Ameya Deoras, & Mark Hasegawa-Johnson. A Factorial HMM Approach to Simultaneous Recognition of Isolated Digits Spoken by Multiple Talkers on One Audio Channel. ICASSP, 2004
@inproceedings{deoras2004icassp,
author = {Ameya Deoras and Mark Hasegawa-Johnson},
booktitle = {ICASSP},
doi = {10.1109/ICASSP.2004.1326122},
grant = {NSF 0132900},
keywords = {recognition},
title = {A Factorial HMM Approach to Simultaneous Recognition of Isolated Digits Spoken by Multiple Talkers on One Audio Channel},
year = {2004}
}
Stefan Geirhofer. Feature Reduction with Linear Discriminant Analysis and its Performance on Phoneme Recognition. Undergraduate research project, 2004
@unpublished{geirhofer2004feature,
author = {Stefan Geirhofer},
keywords = {recognition},
note = {Undergraduate research project},
title = {Feature Reduction with Linear Discriminant Analysis and its Performance on Phoneme Recognition},
year = {2004}
}
Yuexi Ren, Mark Hasegawa-Johnson, & Stephen E. Levinson. Semantic analysis for a speech user interface in an intelligent-tutoring system. Intl. Conf. on Intelligent User Interfaces, Madeira, Portugal, 2004
@inproceedings{ren2004semantic,
address = {Madeira, Portugal},
author = {Yuexi Ren and Mark Hasegawa-Johnson and Stephen E. Levinson},
booktitle = {Intl. Conf. on Intelligent User Interfaces},
grant = {NSF 0085980},
keywords = {recognition},
title = {Semantic analysis for a speech user interface in an intelligent-tutoring system},
url = {http://portal.acm.org/citation.cfm?doid=964442.964516},
year = {2004}
}
Ken Chen. Prosody Dependent Speech Recognition on American Radio News Speech. Master’s Thesis, University of Illinois, 2004
@phdthesis{chen2004prosody,
author = {Ken Chen},
keywords = {recognition},
school = {University of Illinois},
title = {Prosody Dependent Speech Recognition on American Radio News Speech},
url = {https://www.ideals.illinois.edu/items/82155},
year = {2004}
}
Aaron Cohen. A Survey of Machine Learning Methods for Predicting Prosody in Radio Speech. Master’s Thesis, University of Illinois, 2004
@mastersthesis{cohen2004survey,
author = {Aaron Cohen},
keywords = {analysis},
school = {University of Illinois},
title = {A Survey of Machine Learning Methods for Predicting Prosody in Radio Speech},
year = {2004}
}
Yanli Zheng, & Mark Hasegawa-Johnson. Particle Filtering Approach to Bayesian Formant Tracking. IEEE Workshop on Statistical Signal Processing, pp. 581-584, Sep, 2003
@inproceedings{zheng2003particle,
author = {Yanli Zheng and Mark Hasegawa-Johnson},
booktitle = {IEEE Workshop on Statistical Signal Processing},
doi = {10.1109/SSP.2003.1289549},
grant = {NSF 0132900},
keywords = {analysis},
month = {September},
pages = {581-584},
title = {Particle Filtering Approach to Bayesian Formant Tracking},
year = {2003}
}
Mohammed Kamal Omar, & Mark Hasegawa-Johnson. Maximum Conditional Mutual Information Projection For Speech Recognition. Proc. Interspeech, pp. 505-508, Sep, 2003
@inproceedings{omar2003maximum,
author = {Mohammed Kamal Omar and Mark Hasegawa-Johnson},
booktitle = {Proc. Interspeech},
doi = {10.21437/Eurospeech.2003-181},
grant = {NSF 0132900},
keywords = {recognition},
month = {September},
pages = {505-508},
title = {Maximum Conditional Mutual Information Projection For Speech Recognition},
year = {2003}
}
Mohammed Kamal Omar, & Mark Hasegawa-Johnson. Non-Linear Maximum Likelihood Feature Transformation For Speech Recognition. Proc. Interspeech, pp. 2497-2500, Sep, 2003
@inproceedings{omar2003interspeech,
author = {Mohammed Kamal Omar and Mark Hasegawa-Johnson},
booktitle = {Proc. Interspeech},
doi = {10.21437/Eurospeech.2003-685},
grant = {NSF 0132900},
keywords = {recognition},
month = {September},
pages = {2497-2500},
title = {Non-Linear Maximum Likelihood Feature Transformation For Speech Recognition},
year = {2003}
}
Ken Chen, Mark Hasegawa-Johnson, Aaron Cohen, Sarah Borys, & Jennifer Cole. Prosody Dependent Speech Recognition with Explicit Duration Modelling at Intonational Phrase Boundaries. Proc. Interspeech, pp. 393-396, Sep, 2003
@inproceedings{chen2003prosody,
author = {Ken Chen and Mark Hasegawa-Johnson and Aaron Cohen and Sarah Borys and Jennifer Cole},
booktitle = {Proc. Interspeech},
doi = {10.21437/Eurospeech.2003-153},
keywords = {recognition},
month = {September},
pages = {393-396},
software = {http://isle.illinois.edu/speech_web_lg/software/2003/Durhmm3.0.zip},
title = {Prosody Dependent Speech Recognition with Explicit Duration Modelling at Intonational Phrase Boundaries},
year = {2003}
}
Tong Zhang, Mark Hasegawa-Johnson, & Stephen E. Levinson. Mental State Detection of Dialogue System Users via Spoken Language. ISCA/IEEE Workshop on Spontaneous Speech Processing and Recognition (SSPR), pp. MAP17.1-4, Apr, 2003
@inproceedings{zhang000013mental,
author = {Tong Zhang and Mark Hasegawa-Johnson and Stephen E. Levinson},
booktitle = {ISCA/IEEE Workshop on Spontaneous Speech Processing and Recognition (SSPR)},
grant = {NSF 0085980},
keywords = {analysis},
month = {April},
pages = {MAP17.1-4},
title = {Mental State Detection of Dialogue System Users via Spoken Language},
url = {https://www.isca-speech.org/archive/sspr_2003/zhang03_sspr.html},
year = {2003}
}
Yanli Zheng, & Mark Hasegawa-Johnson. Acoustic segmentation using switching state Kalman Filter. ICASSP, pp. I:752-755, Apr, 2003
@inproceedings{zheng2003acoustic,
author = {Yanli Zheng and Mark Hasegawa-Johnson},
booktitle = {ICASSP},
doi = {10.1109/ICASSP.2003.1198890},
grant = {NSF 0132900},
keywords = {recognition},
month = {April},
pages = {I:752-755},
title = {Acoustic segmentation using switching state Kalman Filter},
year = {2003}
}
Yanli Zheng, Mark Hasegawa-Johnson, & Shamala Pizza. Analysis of the three-dimensional tongue shape using a three-index factor analysis model. Journal of the Acoustical Society of America, vol. 113, no. 1, pp. 478-486, Jan, 2003
@article{zheng2003parafac,
author = {Yanli Zheng and Mark Hasegawa-Johnson and Shamala Pizza},
doi = {10.1121/1.1520538},
grant = {NIH DC0032301},
journal = {Journal of the Acoustical Society of America},
keywords = {analysis},
month = {January},
number = {1},
pages = {478-486},
title = {Analysis of the three-dimensional tongue shape using a three-index factor analysis model},
volume = {113},
year = {2003}
}
Tong Zhang, Mark Hasegawa-Johnson, & Stephen E. Levinson. An empathic-tutoring system using spoken language. Australian conference on computer-human interactionq (OZCHI), pp. 498-501, 2003
@inproceedings{zhang2003an,
author = {Tong Zhang and Mark Hasegawa-Johnson and Stephen E. Levinson},
booktitle = {Australian conference on computer-human interactionq (OZCHI)},
grant = {NSF 0085980},
keywords = {analysis},
pages = {498-501},
title = {An empathic-tutoring system using spoken language},
url = {https://www.researchgate.net/profile/Stephen-Levinson-2/publication/228805591_An_Empathic-tutoring_System_Using_Spoken_Language/links/54d389ac0cf2b0c6146dabde/An-Empathic-tutoring-System-Using-Spoken-Language.pdf},
year = {2003}
}
Ken Chen, Mark Hasegawa-Johnson, & Sung-Suk Kim. An Intonational Phrase Boundary and Pitch Accent Dependent Speech Recognizer. International Conference on Systems, Cybernetics, and Intelligence, 2003
@inproceedings{chen2003an,
author = {Ken Chen and Mark Hasegawa-Johnson and Sung-Suk Kim},
booktitle = {International Conference on Systems, Cybernetics, and Intelligence},
keywords = {analysis},
title = {An Intonational Phrase Boundary and Pitch Accent Dependent Speech Recognizer},
url = {https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=e9e5670e89cc257c290bc5787bb0c46ffc6061fa},
year = {2003}
}
Ken Chen, & Mark Hasegawa-Johnson. Improving the robustness of prosody dependent language modeling based on prosody syntax cross-correlation. ASRU, 2003
@inproceedings{chen2003improving,
author = {Ken Chen and Mark Hasegawa-Johnson},
booktitle = {ASRU},
doi = {10.1109/ASRU.2003.1318480},
keywords = {analysis},
title = {Improving the robustness of prosody dependent language modeling based on prosody syntax cross-correlation},
year = {2003}
}
Mark Hasegawa-Johnson, Shamala Pizza, Abeer Alwan, Jul Cha, & Katherine Haker. Vowel Category Dependence of the Relationship Between Palate Height, Tongue Height, and Oral Area. Journal of Speech, Language, and Hearing Research, vol. 46, no. 3, pp. 738-753, 2003
@article{hasegawa-johnson2003vowel,
author = {Mark Hasegawa-Johnson and Shamala Pizza and Abeer Alwan and Jul Cha and Katherine Haker},
doi = {10.1044/1092-4388(2003/059)},
grant = {NIH DC0032301},
journal = {Journal of Speech, Language, and Hearing Research},
keywords = {analysis},
number = {3},
pages = {738-753},
title = {Vowel Category Dependence of the Relationship Between Palate Height, Tongue Height, and Oral Area},
volume = {46},
year = {2003}
}
Mark Hasegawa-Johnson. Bayesian Learning for Models of Human Speech Perception. IEEE Workshop on Statistical Signal Processing, St. Louis, MO, pp. 393-396, 2003
@inproceedings{hasegawa-johnson2003bayesian,
address = {St. Louis, MO},
author = {Mark Hasegawa-Johnson},
booktitle = {IEEE Workshop on Statistical Signal Processing},
doi = {10.1109/SSP.2003.1289432},
grant = {NSF 0132900},
keywords = {analysis},
pages = {393-396},
title = {Bayesian Learning for Models of Human Speech Perception},
year = {2003}
}
Jennifer Cole, Hansook Choi, Heejin Kim, & Mark Hasegawa-Johnson. The effect of accent on the acoustic cues to stop voicing in Radio News speech. ICPhS, pp. 2665-2668, 2003
@inproceedings{cole000180the,
author = {Jennifer Cole and Hansook Choi and Heejin Kim and Mark Hasegawa-Johnson},
booktitle = {ICPhS},
keywords = {analysis},
pages = {2665-2668},
title = {The effect of accent on the acoustic cues to stop voicing in Radio News speech},
url = {https://www.internationalphoneticassociation.org/icphs-proceedings/ICPhS2003/papers/p15_2665.pdf},
year = {2003}
}
Mohamed Kamal Omar, & Mark Hasegawa-Johnson. Non-linear maximum likelihood feature transformation for speech recognition. Proc. 8th European Conference on Speech Communication and Technology (Eurospeech 2003), pp. 2497-2500, 2003
@inproceedings{omar2003non-linear,
author = {Mohamed Kamal Omar and Mark Hasegawa-Johnson},
booktitle = {Proc. 8th European Conference on Speech Communication and Technology (Eurospeech 2003)},
doi = {10.21437/Eurospeech.2003-685},
pages = {2497--2500},
title = {Non-linear maximum likelihood feature transformation for speech recognition},
year = {2003}
}
Ameya Deoras. A Factorial HMM Approach to Robust Isolated Digit Recognition in Non-Stationary Noise.. B.S. Thesis, University of Illinois, 2003
@unpublished{deoras2003a,
author = {Ameya Deoras},
keywords = {recognition},
note = {B.S. Thesis, University of Illinois},
title = {A Factorial HMM Approach to Robust Isolated Digit Recognition in Non-Stationary Noise.},
year = {2003}
}
Mohamed Kamal Mahmoud Omar. Acoustic Feature Design for Speech Recognition: A Statistical Information-Theoretic Approach. Master’s Thesis, University of Illinois, 2003
@phdthesis{omar2003acoustic,
author = {Mohamed Kamal Mahmoud Omar},
keywords = {recognition},
school = {University of Illinois},
title = {Acoustic Feature Design for Speech Recognition: A Statistical Information-Theoretic Approach},
url = {https://www.ideals.illinois.edu/items/82130},
year = {2003}
}
Mohammed Kamal Omar, & Mark Hasegawa-Johnson. Approximately Independent Factors of Speech Using Nonlinear Symplectic Transformation. IEEE Transactions on Speech and Audio Processing, vol. 11, no. 6, pp. 660-671, 2003
@article{omar2003approximately,
author = {Mohammed Kamal Omar and Mark Hasegawa-Johnson},
doi = {10.1109/TSA.2003.814457},
grant = {NSF 0132900},
journal = {IEEE Transactions on Speech and Audio Processing},
keywords = {recognition},
number = {6},
pages = {660-671},
title = {Approximately Independent Factors of Speech Using Nonlinear Symplectic Transformation},
volume = {11},
year = {2003}
}
Mohammed Kamal Omar, & Mark Hasegawa-Johnson. Non-Linear Independent Component Analysis for Speech Recognition. International Conference on Computer, Communication and Control Technologies (CCCT ’03), 2003
@inproceedings{omar2003ccct,
author = {Mohammed Kamal Omar and Mark Hasegawa-Johnson},
booktitle = {International Conference on Computer, Communication and Control Technologies (CCCT '03)},
grant = {NSF 0132900},
keywords = {recognition},
title = {Non-Linear Independent Component Analysis for Speech Recognition},
url = {https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=d95f5f7198a3463efa9490533550de5ec02234e2},
year = {2003}
}
Mohammed Kamal Omar, & Mark Hasegawa-Johnson. Strong-Sense Class-Dependent Features for Statistical Recognition. IEEE Workshop on Statistical Signal Processing, St. Louis, MO, pp. 473-476, 2003
@inproceedings{omar2003strong-sense,
address = {St. Louis, MO},
author = {Mohammed Kamal Omar and Mark Hasegawa-Johnson},
booktitle = {IEEE Workshop on Statistical Signal Processing},
doi = {10.1109/SSP.2003.1289454},
grant = {NSF 0132900},
keywords = {recognition},
pages = {473-476},
title = {Strong-Sense Class-Dependent Features for Statistical Recognition},
year = {2003}
}
Ken Chen, Mark Hasegawa-Johnson, & Jennifer Cole. Prosody Dependent Speech Recognition on Radio News. IEEE Workshop on Statistical Signal Processing, St. Louis, MO, 2003
@inproceedings{chen2003wssp,
address = {St. Louis, MO},
author = {Ken Chen and Mark Hasegawa-Johnson and Jennifer Cole},
booktitle = {IEEE Workshop on Statistical Signal Processing},
doi = {10.1109/TSA.2005.853208},
keywords = {recognition},
title = {Prosody Dependent Speech Recognition on Radio News},
year = {2003}
}
Sarah Borys. Recognition of Prosodic Factors and Detection of Landmarks for Improvements to Continuous Speech Recognition Systems. B.S. Thesis, University of Illinois, 2003
@unpublished{borys2003recognition,
author = {Sarah Borys},
keywords = {recognition},
note = {B.S. Thesis, University of Illinois},
title = {Recognition of Prosodic Factors and Detection of Landmarks for Improvements to Continuous Speech Recognition Systems},
year = {2003}
}
Sarah Borys, Mark Hasegawa-Johnson, & Jennifer Cole. The Importance of Prosodic Factors in Phoneme Modeling with Applications to Speech Recognition. ACL Student Session, 2003
@inproceedings{borys2003the,
author = {Sarah Borys and Mark Hasegawa-Johnson and Jennifer Cole},
booktitle = {ACL Student Session},
grant = {NSF 0132900},
keywords = {recognition},
title = {The Importance of Prosodic Factors in Phoneme Modeling with Applications to Speech Recognition},
year = {2003}
}
Sarah Borys, Mark Hasegawa-Johnson, & Jennifer Cole. Prosody as a Conditioning Variable in Speech Recognition. Illinois Journal of Undergraduate Research, 2003
@article{borys2003prosody,
author = {Sarah Borys and Mark Hasegawa-Johnson and Jennifer Cole},
journal = {Illinois Journal of Undergraduate Research},
keywords = {recognition},
title = {Prosody as a Conditioning Variable in Speech Recognition},
url = {https://www.researchgate.net/profile/Jennifer-Cole-6/publication/2842059_Prosody_As_A_Conditioning_Variable_In_Speech_Recognition/links/0912f50aa6042dd0bd000000/Prosody-As-A-Conditioning-Variable-In-Speech-Recognition.pdf},
year = {2003}
}
Bowon Lee, Mark Hasegawa-Johnson, & Camille Goudeseune. Open Loop Multichannel Inversion of Room Impulse Response. J. Acoust. Soc. Am., vol. 113, no. 4, pp. 2202-2203, 2003
@article{lee2003open,
author = {Bowon Lee and Mark Hasegawa-Johnson and Camille Goudeseune},
doi = {10.1121/1.4780198},
journal = {J. Acoust. Soc. Am.},
keywords = {synthesis},
number = {4},
pages = {2202-2203},
title = {Open Loop Multichannel Inversion of Room Impulse Response},
volume = {113},
year = {2003}
}
Mark Hasegawa-Johnson, & Abeer Alwan. Speech Coding: Fundamentals and Applications. Wiley Encyclopedia of Telecommunications and Signal Processing, J. Proakis, eds., Wiley and Sons, NY, Dec, 2002
@incollection{hasegawajohnson2002speech,
address = {NY},
author = {Mark Hasegawa-Johnson and Abeer Alwan},
booktitle = {Wiley Encyclopedia of Telecommunications and Signal Processing},
editor = {J. Proakis},
grant = {NSF 0132900},
keywords = {synthesis},
month = {December},
publisher = {Wiley and Sons},
title = {Speech Coding: Fundamentals and Applications},
year = {2002}
}
Mohammed Kamal Omar, Ken Chen, Mark Hasegawa-Johnson, & Yigal Brandman. An Evaluation of using Mutual Information for Selection of Acoustic-Features Representation of Phonemes for Speech Recognition. Proc. Interspeech, Denver, CO, pp. 2129-2132, Sep, 2002
@inproceedings{omar2002evaluation,
address = {Denver, CO},
author = {Mohammed Kamal Omar and Ken Chen and Mark Hasegawa-Johnson and Yigal Brandman},
booktitle = {Proc. Interspeech},
doi = {10.21437/ICSLP.2002-582},
keywords = {recognition},
month = {September},
pages = {2129-2132},
title = {An Evaluation of using Mutual Information for Selection of Acoustic-Features Representation of Phonemes for Speech Recognition},
year = {2002}
}
Stephen E. Levinson, Thomas S. Huang, Mark A. Hasegawa-Johnson, Ken Chen, Stephen Chu, Ashutosh Garg, Zhinian Jing, Danfeng Li, J. Lin, Mohammed Kamal Omar, & Z. Wen. Multimodal Dialog Systems Research at Illinois. ARPA Workshop on Multimodal Speech Recognition and SPINE, Jun, 2002
@inproceedings{levinson2002multimodal,
author = {Stephen E. Levinson and Thomas S. Huang and Mark A. Hasegawa-Johnson and Ken Chen and Stephen Chu and Ashutosh Garg and Zhinian Jing and Danfeng Li and J. Lin and Mohammed Kamal Omar and Z. Wen},
booktitle = {ARPA Workshop on Multimodal Speech Recognition and SPINE},
grant = {NSF 0132900},
keywords = {recognition},
month = {June},
title = {Multimodal Dialog Systems Research at Illinois},
url = {https://apps.dtic.mil/sti/pdfs/ADA415344.pdf#page=39},
year = {2002}
}
Zhinian Jing, & Mark Hasegawa-Johnson. Auditory-Modeling Inspired Methods of Feature Extraction for Robust Automatic Speech Recognition. ICASSP, pp. IV:4176, May, 2002
@inproceedings{jing2002auditory-modeling,
author = {Zhinian Jing and Mark Hasegawa-Johnson},
booktitle = {ICASSP},
doi = {10.1109/ICASSP.2002.5745632},
grant = {NSF 0132900},
keywords = {recognition},
month = {May},
pages = {IV:4176},
title = {Auditory-Modeling Inspired Methods of Feature Extraction for Robust Automatic Speech Recognition},
year = {2002}
}
Mohammed Kamal Omar, & Mark Hasegawa-Johnson. Maximum Mutual Information Based Acoustic Features Representation of Phonological Features for Speech Recognition. ICASSP, pp. I:81-84, May, 2002
@inproceedings{omar2002maximum,
author = {Mohammed Kamal Omar and Mark Hasegawa-Johnson},
booktitle = {ICASSP},
doi = {10.1109/ICASSP.2002.5743659},
keywords = {recognition},
month = {May},
pages = {I:81-84},
title = {Maximum Mutual Information Based Acoustic Features Representation of Phonological Features for Speech Recognition},
year = {2002}
}
David Petruncio. Evaluation of Various Features for Music Genre Classification with Hidden Markov Models. B.S. Thesis, University of Illinois, 2002
@unpublished{petruncio2002evaluation,
author = {David Petruncio},
keywords = {analysis},
note = {B.S. Thesis, University of Illinois},
title = {Evaluation of Various Features for Music Genre Classification with Hidden Markov Models},
year = {2002}
}
Mark Hasegawa-Johnson. Finding the Best Acoustic Measurements for Landmark-Based Speech Recognition. Accumu Magazine, vol. 11, Kyoto Computer Gakuin, Kyoto, Japan, pp. 45-47, 2002
@mastersthesis{jing2002voice,
author = {Zhinian Jing},
keywords = {recognition},
school = {University of Illinois},
title = {Voice Index and Frame Index for Recognition of Digits in Speech Background},
year = {2002}
}
James Beauchamp, Heinrich Taube, Sever Tipei, Scott Wyatt, Lippold Haken, & Mark Hasegawa-Johnson. Acoustics, Audio, and Music Technology Education at the University of Illinois. J. Acoust. Soc. Am., vol. 110, no. 5, pp. 2961, 2001
@article{beauchamp2001acoustics,
author = {James Beauchamp and Heinrich Taube and Sever Tipei and Scott Wyatt and Lippold Haken and Mark Hasegawa-Johnson},
journal = {J. Acoust. Soc. Am.},
keywords = {analysis},
number = {5},
pages = {2961},
title = {Acoustics, Audio, and Music Technology Education at the University of Illinois},
url = {https://asa.scitation.org/doi/abs/10.1121/1.4776867},
volume = {110},
year = {2001}
}
Mark Hasegawa-Johnson. Preliminary Work and Proposed Continuation: Imaging of Speech Anatomy and Behavior.. Unpublished presentation at Universities of Illinois Inter-campus Biomedical Imaging Forum, 2001
@unpublished{hasegawa-johnson2001preliminary,
author = {Mark Hasegawa-Johnson},
grant = {NIH 0032301},
keywords = {analysis},
note = {Unpublished presentation at Universities of Illinois Inter-campus Biomedical Imaging Forum},
title = {Preliminary Work and Proposed Continuation: Imaging of Speech Anatomy and Behavior.},
year = {2001}
}
Mohammed K. Omar, Mark Hasegawa-Johnson, & Stephen E. Levinson. Gaussian Mixture Models of Phonetic Boundaries for Speech Recognition. ASRU, 2001
@inproceedings{omar2001gaussian,
author = {Mohammed K. Omar and Mark Hasegawa-Johnson and Stephen E. Levinson},
booktitle = {ASRU},
doi = {10.1109/ASRU.2001.1034582},
grant = {NSF 0132900},
keywords = {recognition},
title = {Gaussian Mixture Models of Phonetic Boundaries for Speech Recognition},
year = {2001}
}
Wira Gunawan, & Mark Hasegawa-Johnson. PLP Coefficients can be Quantized at 400 bps. ICASSP, Salt Lake City, UT, pp. 2.2.1-4, 2001
@inproceedings{gunawan2001plp,
address = {Salt Lake City, UT},
author = {Wira Gunawan and Mark Hasegawa-Johnson},
booktitle = {ICASSP},
doi = {10.1109/ICASSP.2001.940771},
keywords = {recognition},
pages = {2.2.1-4},
title = {PLP Coefficients can be Quantized at 400 bps},
year = {2001}
}
Mark Hasegawa-Johnson. Line Spectral Frequencies are the Poles and Zeros of a Discrete Matched-Impedance Vocal Tract Model. Journal of the Acoustical Society of America, vol. 108, no. 1, pp. 457-460, 2000
@article{hasegawa-johnson2000line,
author = {Mark Hasegawa-Johnson},
doi = {10.1121/1.429481},
grant = {NIH DC0032301},
journal = {Journal of the Acoustical Society of America},
keywords = {analysis},
number = {1},
pages = {457-460},
title = {Line Spectral Frequencies are the Poles and Zeros of a Discrete Matched-Impedance Vocal Tract Model},
volume = {108},
year = {2000}
}
Yanli Zheng, & Mark Hasegawa-Johnson. Three Dimensional Tongue shape Factor Analysis. ASHA Leader, vol. 5, no. 16, pp. 144, 2000
@article{zheng2000three,
author = {Yanli Zheng and Mark Hasegawa-Johnson},
grant = {NIH 0032301},
journal = {ASHA Leader},
keywords = {analysis},
number = {16},
pages = {144},
title = {Three Dimensional Tongue shape Factor Analysis},
volume = {5},
year = {2000}
}
Mark Hasegawa-Johnson. Time-frequency distribution of partial phonetic information measured using mutual information. Proc. 6th International Conference on Spoken Language Processing (ICSLP 2000), pp. vol. 4, 133-136, 2000
@inproceedings{hasegawajohnson00time,
author = {Mark Hasegawa-Johnson},
booktitle = {Proc. 6th International Conference on Spoken Language Processing (ICSLP 2000)},
doi = {10.21437/ICSLP.2000-769},
keywords = {analysis},
pages = {vol. 4, 133-136},
title = {Time-frequency distribution of partial phonetic information measured using mutual information},
year = {2000}
}
Mark Hasegawa-Johnson. Multivariate-State Hidden Markov Models for Simultaneous Transcription of Phones and Formants. ICASSP, Istanbul, pp. 1323-1326, 2000
@inproceedings{hasegawajohnson2000multivariate-state,
address = {Istanbul},
author = {Mark Hasegawa-Johnson},
booktitle = {ICASSP},
doi = {10.1109/ICASSP.2000.861822},
keywords = {recognition},
pages = {1323-1326},
title = {Multivariate-State Hidden Markov Models for Simultaneous Transcription of Phones and Formants},
year = {2000}
}
Jun Huang, Stephen Levinson, & Mark Hasegawa-Johnson. Signal approximation in Hilbert space and its application on articulatory speech synthesis. Proc. 6th International Conference on Spoken Language Processing (ICSLP 2000), pp. vol. 2, 775-778, 2000
@inproceedings{huang00signal,
author = {Jun Huang and Stephen Levinson and Mark Hasegawa-Johnson},
booktitle = {Proc. 6th International Conference on Spoken Language Processing (ICSLP 2000)},
doi = {10.21437/ICSLP.2000-384},
keywords = {synthesis},
pages = {vol. 2, 775-778},
title = {Signal approximation in Hilbert space and its application on articulatory speech synthesis},
year = {2000}
}
Wira Gunawan. Distributed Speech Recognition. Master’s Thesis, University of Illinois, 2000
@mastersthesis{setsu2000articulatory,
author = {Jul Setsu Cha},
keywords = {synthesis},
school = {UCLA},
title = {Articulatory Speech Synthesis of Female and Male Talkers},
year = {2000}
}
Mark Hasegawa-Johnson, Jul Cha, Shamala Pizza, & Katherine Haker. CTMRedit: A case study in human-computer interface design. International Conference On Public Participation and Information Technology, Lisbon, pp. 575-584, 1999
@inproceedings{hasegawa-johnson1999icppit,
address = {Lisbon},
author = {Mark Hasegawa-Johnson and Jul Cha and Shamala Pizza and Katherine Haker},
booktitle = {International Conference On Public Participation and Information Technology},
grant = {NIH DC0032301},
keywords = {analysis},
pages = {575-584},
title = {CTMRedit: A case study in human-computer interface design},
url = {https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=4bc8af525ca1f09e6cdc7c5399f382906d3a5365},
year = {1999}
}
Mark Hasegawa-Johnson, Jul Cha, & Katherine Haker. CTMRedit: A Matlab-based tool for segmenting and interpolating MRI and CT images in three orthogonal planes. 21st Annual International Conference of the IEEE/EMBS Society, pp. 1170, 1999
@inproceedings{hasegawa-johnson1999embs,
author = {Mark Hasegawa-Johnson and Jul Cha and Katherine Haker},
booktitle = {21st Annual International Conference of the IEEE/EMBS Society},
grant = {NIH 0032301},
keywords = {analysis},
pages = {1170},
title = {CTMRedit: A Matlab-based tool for segmenting and interpolating MRI and CT images in three orthogonal planes},
year = {1999}
}
Mark Hasegawa-Johnson. Combining magnetic resonance image planes in the Fourier domain for improved spatial resolution. International Conference On Signal Processing Applications and Technology, Orlando, FL, pp. 81.1-5, 1999
@misc{taniguchi1998speech,
abstract = {Transform stochastic codebook so that, after perceptual weighting, it will be orthogonal to the adaptive codebook},
author = {Tomohiko Taniguchi and Mark Johnson},
howpublished = {United States Patent Number 5799131},
keywords = {synthesis},
month = {August},
title = {Speech coding and decoding system},
url = {https://patentimages.storage.googleapis.com/5e/54/26/3ff3894828a9d3/US5799131.pdf},
year = {1998}
}
Mark Hasegawa-Johnson. Electromagnetic Exposure Safety of the Carstens Articulograph AG100. Journal of the Acoustics Society of America, vol. 104, pp. 2529-2532, 1998
@article{hasegawa-johnson1998electromagnetic,
author = {Mark Hasegawa-Johnson},
doi = {10.1121/1.423775},
grant = {NIH 0032301},
journal = {Journal of the Acoustics Society of America},
keywords = {analysis},
pages = {2529-2532},
title = {Electromagnetic Exposure Safety of the Carstens Articulograph AG100},
volume = {104},
year = {1998}
}
Sumiko Takayanagi, Mark Hasegawa-Johnson, Laurie S. Eisner, & Amy Schaefer-Martinez. Information theory and variance estimation techniques in the analysis of category rating data and paired comparisons. J. Acoust. Soc. Am., vol. 102, pp. 3091, 1997
@article{takayanagi1997information,
author = {Sumiko Takayanagi and Mark Hasegawa-Johnson and Laurie S. Eisner and Amy Schaefer-Martinez},
journal = {J. Acoust. Soc. Am.},
keywords = {analysis},
pages = {3091},
title = {Information theory and variance estimation techniques in the analysis of category rating data and paired comparisons},
volume = {102},
year = {1997}
}
Mark A. Hasegawa-Johnson. Formant and Burst Spectral Measurements with Quantitative Error Models for Speech Sound Classification. Master’s Thesis, MIT, 1996
@phdthesis{johnson1996formant,
author = {Mark A. Hasegawa-Johnson},
keywords = {analysis},
school = {MIT},
title = {Formant and Burst Spectral Measurements with Quantitative Error Models for Speech Sound Classification},
url = {https://dspace.mit.edu/handle/1721.1/10636},
year = {1996}
}
Mark A. Hasegawa-Johnson. Burst spectral measures and formant frequencies can be used to accurately discriminate stop place of articulation. J. Acoust. Soc. Am., vol. 98, pp. 2890, 1995
@article{hasegawajohnson1995burst,
author = {Mark A. Hasegawa-Johnson},
doi = {10.1121/1.414301},
journal = {J. Acoust. Soc. Am.},
keywords = {analysis},
pages = {2890},
title = {Burst spectral measures and formant frequencies can be used to accurately discriminate stop place of articulation},
url = {https://asa.scitation.org/doi/pdf/10.1121/1.414301},
volume = {98},
year = {1995}
}
Tomohiko Taniguchi, Mark Johnson, Yasuji Ohta, Hideki Kurihara, Yoshinori Tanaka, & Yoshihito Sakai. Speech coding system having codebook storing differential vectors between each two adjoining code vectors. United States Patent Number 5323486, Jun, 1994
@misc{taniguchi1994speech,
author = {Tomohiko Taniguchi and Mark Johnson and Yasuji Ohta and Hideki Kurihara and Yoshinori Tanaka and Yoshihito Sakai},
howpublished = {United States Patent Number 5323486},
keywords = {synthesis},
month = {June},
title = {Speech coding system having codebook storing differential vectors between each two adjoining code vectors},
url = {https://patentimages.storage.googleapis.com/6e/ac/fb/78352f241fd201/US5323486.pdf},
year = {1994}
}
Mark A. Johnson. A mapping between trainable generalized properties and the acoustic correlates of distinctive features. MIT Speech Communication Group Working Papers, vol. 9, pp. 94-105, 1994
@article{johnson1994a,
author = {Mark A. Johnson},
journal = {MIT Speech Communication Group Working Papers},
keywords = {analysis},
pages = {94-105},
title = {A mapping between trainable generalized properties and the acoustic correlates of distinctive features},
volume = {9},
year = {1994}
}
Mark Johnson. Automatic context-sensitive measurement of the acoustic correlates of distinctive features. ICSLP, Yokohama, pp. 1639-1643, 1994
@misc{taniguchi1993speech1,
abstract = {hexagonal lattice},
author = {Tomohiko Taniguchi and Mark Johnson},
howpublished = {United States Patent Number 5245662},
keywords = {synthesis},
month = {September},
title = {Speech coding system},
url = {https://patentimages.storage.googleapis.com/67/19/4e/63aa5e1e86ae4e/US5245662.pdf},
year = {1993}
}
Tomohiko Taniguchi, Mark Johnson, Hideki Kurihara, Yoshinori Tanaka, & Yasuji Ohta. Speech coding and decoding system. United States Patent Number 5199076, Mar, 1993
@misc{taniguchi1993speech2,
abstract = {sparse adaptive codebook},
author = {Tomohiko Taniguchi and Mark Johnson and Hideki Kurihara and Yoshinori Tanaka and Yasuji Ohta},
howpublished = {United States Patent Number 5199076},
keywords = {synthesis},
month = {March},
title = {Speech coding and decoding system},
url = {https://patentimages.storage.googleapis.com/f5/8e/8f/fb388df1a25c93/US5199076.pdf},
year = {1993}
}
Mark A. Johnson. A mapping between trainable generalized properties and the acoustic correlates of distinctive features. J. Acoust. Soc. Am., vol. 94, pp. 1865, 1993
@article{johnson1992using,
author = {Mark A. Johnson},
journal = {J. Acoust. Soc. Am.},
keywords = {analysis},
pages = {2420-2421},
title = {Using beam elements to model the vocal fold length in breathy voicing},
volume = {91},
year = {1992}
}
Mark A. Johnson. Analysis of durational rhythms in two poems by Robert Frost. MIT Speech Communication Group Working Papers, vol. 8, pp. 29-42, 1992
@article{johnson1992analysis,
author = {Mark A. Johnson},
journal = {MIT Speech Communication Group Working Papers},
keywords = {analysis},
pages = {29-42},
title = {Analysis of durational rhythms in two poems by Robert Frost},
volume = {8},
year = {1992}
}
Mark Johnson, & Tomohiko Taniguchi. On-line and off-line computational reduction techniques using backward filtering in CELP speech coders. IEEE Transactions Acoustics, Speech, and Signal Processing, vol. 40, pp. 2090-2093, 1992
@article{johnson1992on-line,
author = {Mark Johnson and Tomohiko Taniguchi},
doi = {10.1109/78.149977},
journal = {IEEE Transactions Acoustics, Speech, and Signal Processing},
keywords = {synthesis},
pages = {2090-2093},
title = {On-line and off-line computational reduction techniques using backward filtering in CELP speech coders},
volume = {40},
year = {1992}
}
Mark A. Johnson, & Tomohiko Taniguchi. Low-complexity multi-mode VXC using multi-stage optimization and mode selection. ICASSP, Toronto, Canada, pp. 221-224, 1991
@inproceedings{johnson1991low-complexity,
address = {Toronto, Canada},
author = {Mark A. Johnson and Tomohiko Taniguchi},
booktitle = {ICASSP},
doi = {10.1109/ICASSP.1991.150317},
keywords = {synthesis},
pages = {221-224},
title = {Low-complexity multi-mode VXC using multi-stage optimization and mode selection},
year = {1991}
}
Tomohiko Taniguchi, Mark A. Johnson, & Yasuji Ohta. Pitch sharpening for perceptually improved CELP, and the sparse-delta codebook for reduced computation. ICASSP, Toronto, Canada, pp. 241-244, 1991
@inproceedings{taniguchi1991pitch,
address = {Toronto, Canada},
author = {Tomohiko Taniguchi and Mark A. Johnson and Yasuji Ohta},
booktitle = {ICASSP},
doi = {10.1109/ICASSP.1991.150322},
keywords = {synthesis},
pages = {241-244},
title = {Pitch sharpening for perceptually improved CELP, and the sparse-delta codebook for reduced computation},
year = {1991}
}
Tomohiko Taniguchi, Fumio Amano, & Mark A. Johnson. Improving the performance of CELP-based speech coding at low bit rates. International Symposium on Circuits and Systems, Singapore, 1991
@inproceedings{taniguchi1991improving,
address = {Singapore},
author = {Tomohiko Taniguchi and Fumio Amano and Mark A. Johnson},
booktitle = {International Symposium on Circuits and Systems},
doi = {10.1109/ISCAS.1991.176404},
keywords = {synthesis},
title = {Improving the performance of CELP-based speech coding at low bit rates},
year = {1991}
}
Mark A. Johnson, & Tomohiko Taniguchi. Computational reduction in sparse-codebook CELP using backward-weighting of the input. Institute of Electr. and Information and Comm. Eng. Symposium DSP 90-15, Hakata, pp. 61-66, 1990
@inproceedings{johnson1990computational,
address = {Hakata},
author = {Mark A. Johnson and Tomohiko Taniguchi},
booktitle = {Institute of Electr. and Information and Comm. Eng. Symposium DSP 90-15},
keywords = {synthesis},
pages = {61-66},
title = {Computational reduction in sparse-codebook CELP using backward-weighting of the input},
year = {1990}
}
Tomohiko Taniguchi, Mark A. Johnson, & Yasuji Ohta. Multi-vector pitch-orthogonal LPC: quality speech with low complexity at rates between 4 and 8 kbps. ICSLP, Kobe, pp. 113-116, 1990
@inproceedings{taniguchi1990multi-vector,
address = {Kobe},
author = {Tomohiko Taniguchi and Mark A. Johnson and Yasuji Ohta},
booktitle = {ICSLP},
doi = {10.21437/ICSLP.1990-29},
keywords = {synthesis},
pages = {113-116},
title = {Multi-vector pitch-orthogonal LPC: quality speech with low complexity at rates between 4 and 8 kbps},
year = {1990}
}
Mark A. Johnson, & Tomohiko Taniguchi. Pitch-orthogonal code-excited LPC. IEEE Global Telecommunications Conference (GLOBECOM), San Diego, CA, pp. 542-546, 1990