diff --git a/_config.yml b/_config.yml index 17b8e0603b..819109b9d3 100644 --- a/_config.yml +++ b/_config.yml @@ -6,29 +6,29 @@ # `jekyll serve`. If you change this file, please restart the server process. # Site Settings -title : "Yi Ren (任意)" -description : "Focusing on video and speech large generation model." -repository : "rayeren/rayeren.github.io" +title : "Zhen Wang" +description : "Focusing on Computational imaging and imaging processing." +repository : "wstruggle/wstruggle.github.io" google_scholar_stats_use_cdn : true # google analytics -google_analytics_id : G-SWFCX99KQZ +# google_analytics_id : G-SWFCX99KQZ # SEO Related -google_site_verification : b_BbOIt7d513ZXYDw-YRMicO6O5tthYiouqWDdWjGTU -bing_site_verification : B5AA590E7B9C03956A6F7DEC0F776211 -baidu_site_verification : code-ZxFK3f4pfb +# google_site_verification : b_BbOIt7d513ZXYDw-YRMicO6O5tthYiouqWDdWjGTU +# bing_site_verification : B5AA590E7B9C03956A6F7DEC0F776211 +# baidu_site_verification : code-ZxFK3f4pfb # Site Author author: - name : "Yi Ren (任意)" - avatar : "images/ry_profile.jpeg" + name : "Zhen Wang" + avatar : "images/ZhenWang-2.jpg" bio : "" - location : "Singapore" + location : "Beijing" employer : pubmed : - googlescholar : "https://scholar.google.com/citations?user=4FA6C0AAAAAJ" - email : "rayeren613@gmail.com" + googlescholar : "https://scholar.google.com/citations?user=DexiDloAAAAJ&hl=zh-CN" + email : "wzhstruggle@bit.edu.cn" researchgate : # example: "https://www.researchgate.net/profile/yourprofile" uri : bitbucket : @@ -37,15 +37,15 @@ author: flickr : facebook : foursquare : - github : "RayeRen" + github : #"ZhenWang" google_plus : keybase : instagram : impactstory : #"https://profiles.impactstory.org/u/xxxx-xxxx-xxxx-xxxx" lastfm : - linkedin : "rayeren" - dblp : "https://dblp.org/pid/75/6568-6.html" - orcid : "https://orcid.org/0000-0002-9160-3848" + linkedin : #"rayeren" + dblp : #"https://dblp.org/pid/75/6568-6.html" + orcid : "https://orcid.org/0009-0005-3009-9085" pinterest : soundcloud : stackoverflow : # http://stackoverflow.com/users/123456/username diff --git a/_pages/about.md b/_pages/about.md index bfb8807189..bdbfbe3d15 100644 --- a/_pages/about.md +++ b/_pages/about.md @@ -8,15 +8,11 @@ redirect_from: - /about.html --- - + {% include_relative includes/intro.md %} -If you like the template of this homepage, welcome to star and fork my open-sourced template version [AcadHomepage ![](https://img.shields.io/github/stars/RayeRen/acad-homepage.github.io?style=social)](https://github.com/RayeRen/acad-homepage.github.io). - {% include_relative includes/news.md %} {% include_relative includes/pub.md %} {% include_relative includes/honers.md %} - -{% include_relative includes/others.md %} \ No newline at end of file diff --git a/_pages/includes/homepage.md b/_pages/includes/homepage.md index 33e459fe2a..c4e45ac886 100644 --- a/_pages/includes/homepage.md +++ b/_pages/includes/homepage.md @@ -1,5 +1,5 @@ # 📎 Homepages -- Personal Pages: https://rayeren.github.io (updated recently🔥) -- Linkedin: https://www.linkedin.com/in/rayeren -- Google Scholar: https://scholar.google.com/citations?user=4FA6C0AAAAAJ -- DBLP: https://dblp.org/pid/75/6568-6.html +- Personal Pages: [wstruggle.github.io](https://github.com/wstruggle/wstruggle.github.io) (updated recently🔥) +- Google Scholar: https://scholar.google.com/citations?user=DexiDloAAAAJ&hl=zh-CN +- ORCID: https://orcid.org/0009-0005-3009-9085 + diff --git a/_pages/includes/honers.md b/_pages/includes/honers.md index 29237503ef..d83a275855 100644 --- a/_pages/includes/honers.md +++ b/_pages/includes/honers.md @@ -1,10 +1,24 @@ # 🎖 Honors and Awards -- *2021.10* Tencent Scholarship (Top 1%) -- *2021.10* National Scholarship (Top 1%) -- *2020.12* [Baidu Scholarship](https://baike.baidu.com/item/%E7%99%BE%E5%BA%A6%E5%A5%96%E5%AD%A6%E9%87%91/9929412) (10 students in the world each year) -- *2020.12* [AI Chinese new stars](https://mp.weixin.qq.com/s?__biz=MzA4NzQ5MTA2NA==&mid=2653639431&idx=1&sn=25b6368c1954419b9090840347d9a27d&chksm=8be75b90bc90d286a5af3ef8e610e822d705dc3cf4382b45e3f14489f3e7ec4fd8c95ed0eceb&mpshare=1&scene=2&srcid=0511LMlj9Qv9DeIZAjMjYAU9&sharer_sharetime=1620731348139&sharer_shareid=631c113940cb81f34895aa25ab14422a#rd) (100 worldwide each year) -- *2020.12* [AI Chinese New Star Outstanding Scholar](https://mp.weixin.qq.com/s?__biz=MzA4NzQ5MTA2NA==&mid=2653639431&idx=1&sn=25b6368c1954419b9090840347d9a27d&chksm=8be75b90bc90d286a5af3ef8e610e822d705dc3cf4382b45e3f14489f3e7ec4fd8c95ed0eceb&mpshare=1&scene=2&srcid=0511LMlj9Qv9DeIZAjMjYAU9&sharer_sharetime=1620731348139&sharer_shareid=631c113940cb81f34895aa25ab14422a#rd) (10 candidates worldwide each year) -- *2020.12* [ByteDance Scholars Program](https://ur.bytedance.com/scholarship) (10 students in China each year) -- *2020.10* Tianzhou Chen Scholarship (Top 1%) -- *2020.10* National Scholarship (Top 1%) -- *2015.10* National Scholarship (Undergraduate) (Top 1%) \ No newline at end of file +- *2025.05* The 14th 'Youth BIT' Annual Role Model Award (Scientific Research & Innovation)(第十四届‘青春北理’年度榜样-科研创新榜样, Top 0.1%) + +- 2025.05 Annual Elite Award of School of Information and Electronics (Research Pioneer, Top 0.5%) + +- *2025.05* "China Moutai · Nation's Pillars" Postgraduate Scholarship (PhD) (“中国茅台·国之栋梁”本硕博 优才计划(研究生),Top 0.1%) + +- *2025.01* **CAST Young Talents Support Program-Doctoral Student Special Track** (博士生青托) + +- *2024.12* **NSFC Basic Research Program for Young Scholars (Ph.D.)** (博士生国自然) + +- *2024.10* "Qingjin Award" by the Chinese Journal of Lasers (激光杂志社“青衿奖) + +- *2024.10* The Special Grade Graduate Scholarship (Top 5%) + +- *2023.10* Provincial first prize in "Challenge Cup" National Undergraduate Science and Technology Works + +- *2022.05* Third place in Mobile Intelligent Photography and Imaging (MIPI) challenge (ECCV Workshop) + +- *2022.01* The third prize in National Post-Graduate Mathematical Contest in Modeling + +- *2021.10* The Special Grade Graduate Scholarship (Top 5%) + + \ No newline at end of file diff --git a/_pages/includes/intro.md b/_pages/includes/intro.md index 59edbf7745..56480e0387 100644 --- a/_pages/includes/intro.md +++ b/_pages/includes/intro.md @@ -1,9 +1,3 @@ -I am now working on audio-driven video generation and text-to-speech research. If you are seeking any form of **academic cooperation**, please feel free to email me at [rayeren613@gmail.com](mailto:rayeren613@gmail.com). We are hiring interns! +I'm Zhen Wang, currently pursuing my Ph.D. diploma at Beijing Institute of Technology, under the guidance of [Prof. Liheng Bian ](https://scholar.google.com/citations?user=66IFMDEAAAAJ&hl=zh-CN), focusing on computational imaging method and imaging processing algorithm. -I graduated from [Chu Kochen Honors College](http://ckc.zju.edu.cn/ckcen/main.htm), Zhejiang University (浙江大学竺可桢学院) with a bachelor's degree and from the Department of Computer Science and Technology, Zhejiang University (浙江大学计算机科学与技术学院) with a master's degree, advised by [Zhou Zhao (赵洲)](https://person.zju.edu.cn/zhaozhou). I also collaborate with [Xu Tan (谭旭)](https://www.microsoft.com/en-us/research/people/xuta/), [Tao Qin (秦涛)](https://www.microsoft.com/en-us/research/people/taoqin/) and [Tie-yan Liu (刘铁岩)](https://www.microsoft.com/en-us/research/people/tyliu/) from [Microsoft Research Asia](https://www.microsoft.com/en-us/research/group/machine-learning-research-group/) closely. - -I won the [Baidu Scholarship](https://baike.baidu.com/item/%E7%99%BE%E5%BA%A6%E5%A5%96%E5%AD%A6%E9%87%91/9929412) (10 candidates worldwide each year) and [ByteDance Scholars Program](https://ur.bytedance.com/scholarship) (10 candidates worldwide each year) in 2020 and was selected as one of [the top 100 AI Chinese new stars](https://mp.weixin.qq.com/s?__biz=MzA4NzQ5MTA2NA==&mid=2653639431&idx=1&sn=25b6368c1954419b9090840347d9a27d&chksm=8be75b90bc90d286a5af3ef8e610e822d705dc3cf4382b45e3f14489f3e7ec4fd8c95ed0eceb&mpshare=1&scene=2&srcid=0511LMlj9Qv9DeIZAjMjYAU9&sharer_sharetime=1620731348139&sharer_shareid=631c113940cb81f34895aa25ab14422a#rd) and AI Chinese New Star Outstanding Scholar (10 candidates worldwide each year). - -My research interest includes speech synthesis, neural machine translation and automatic music generation. I have published 50+ papers at the top international AI conferences such as NeurIPS, ICML, ICLR, KDD. - -To promote the communication among the Chinese ML & NLP community, we (along with other 11 young scholars worldwide) founded the [MLNLP community](https://space.bilibili.com/168887299) in 2021. I am honored to be one of the chairs of the MLNLP committee. +My current research interests are mainly about **snapshot** **hyperspectral imaging, polarization imaging and computational imaging**. Specifically, I focus on efficiently acquiring and processing of hyperspectral images and polarization images, including the spectral coding, polarization filters, sensor integration, and computational reconstruction. I investigate deeper with various compressive sensing and deep learning algorithms to enhance the performance of the high-dimensional imaging. diff --git a/_pages/includes/news.md b/_pages/includes/news.md index 62d6068b15..a15c342b6a 100644 --- a/_pages/includes/news.md +++ b/_pages/includes/news.md @@ -1,6 +1,2 @@ # 🔥 News -- *2024.03*: 🎉 Two papers are accepted by ICLR 2024 -- *2023.05*: 🎉 Five papers are accepted by ACL 2023 -- *2023.01*: DiffSinger was introduced in [a very popular video](https://www.bilibili.com/video/BV1uM411t7ZJ) (2000k+ views) in Bilibili! -- *2023.01*: I join TikTok as a speech research scientist in Singapore! -- *2022.02*: I release a modern and responsive academic personal [homepage template](https://github.com/RayeRen/acad-homepage.github.io). Welcome to STAR and FORK! \ No newline at end of file +- 2024.11: 🎉 Our [on-chip hyperspectral image sensor paper]([A broadband hyperspectral image sensor with high spatio-temporal resolution](https://scholar.google.com/citations?view_op=view_citation&hl=zh-CN&user=DexiDloAAAAJ&citation_for_view=DexiDloAAAAJ:W7OEmFMy1HYC)) has been published at **Nature**! \ No newline at end of file diff --git a/_pages/includes/others.md b/_pages/includes/others.md index 0d810f2353..a9e6fa98ed 100644 --- a/_pages/includes/others.md +++ b/_pages/includes/others.md @@ -1,19 +1,9 @@ # 📖 Educations -- *2019.06 - 2022.04*, Master, Zhejiang University, Hangzhou. -- *2015.09 - 2019.06*, Undergraduate, Chu Kochen Honors College, Zhejiang Univeristy, Hangzhou. -- *2012.09 - 2015.06*, Luqiao Middle School, Taizhou. +- *2021.09 - Now*, Ph.D., Beijing Institute of Technology, Beijing. # 💬 Invited Talks -- *2022.02*, Hosted MLNLP seminar \| [\[Video\]](https://www.bilibili.com/video/BV1wF411x7qh) -- *2021.06*, Audio & Speech Synthesis, Huawei internal talk -- *2021.03*, Non-autoregressive Speech Synthesis, PaperWeekly & biendata \| [\[video\]](https://www.bilibili.com/video/BV1uf4y1t7Hr/) -- *2020.12*, Non-autoregressive Speech Synthesis, Huawei Noah's Ark Lab internal talk +- NAN # 💻 Internships -- *2021.06 - 2021.09*, Alibaba, Hangzhou. -- *2019.05 - 2020.02*, [EnjoyMusic](https://enjoymusic.ai/), Hangzhou. -- *2019.02 - 2019.05*, [YiWise](https://www.yiwise.com/), Hangzhou. -- *2018.08 - 2019.02*, [MSRA, machine learning Group](https://www.microsoft.com/en-us/research/group/machine-learning-research-group/), Beijing. -- *2018.01 - 2018.06*, [NetEase, AI department](https://hr.163.com/zc/12-ai/index.html), Hangzhou. -- *2017.08 - 2018.12*, DashBase (acquired by [Cisco](https://blogs.cisco.com/news/349511)), Hangzhou. +- NAN diff --git a/_pages/includes/pub.md b/_pages/includes/pub.md index e5c8f61622..84b4c9c9bf 100644 --- a/_pages/includes/pub.md +++ b/_pages/includes/pub.md @@ -1,136 +1,7 @@ # 📝 Publications -## 🎙 Speech Synthesis +## Computational imaging +Authors # contributed equally to this work. * Corresponding author. -
NeurIPS 2019
sym
-
- -[FastSpeech: Fast, Robust and Controllable Text to Speech](https://papers.nips.cc/paper/8580-fastspeech-fast-robust-and-controllable-text-to-speech.pdf) \\ -**Yi Ren**, Yangjun Ruan, Xu Tan, Tao Qin, Sheng Zhao, Zhou Zhao, Tie-Yan Liu - -[**Project**](https://speechresearch.github.io/fastspeech/) - -- FastSpeech is the first fully parallel end-to-end speech synthesis model. -- **Academic Impact**: This work is included by many famous speech synthesis open-source projects, such as [ESPNet ![](https://img.shields.io/github/stars/espnet/espnet?style=social)](https://github.com/espnet/espnet). Our work are promoted by more than 20 media and forums, such as [机器之心](https://mp.weixin.qq.com/s/UkFadiUBy-Ymn-zhJ95JcQ)、[InfoQ](https://www.infoq.cn/article/tvy7hnin8bjvlm6g0myu). -- **Industry Impact**: FastSpeech has been deployed in [Microsoft Azure TTS service](https://techcommunity.microsoft.com/t5/azure-ai/neural-text-to-speech-extends-support-to-15-more-languages-with/ba-p/1505911) and supports 49 more languages with state-of-the-art AI quality. It was also shown as a text-to-speech system acceleration example in [NVIDIA GTC2020](https://resources.nvidia.com/events/GTC2020s21420). -
-
- - -
ICLR 2021
sym
-
- -[FastSpeech 2: Fast and High-Quality End-to-End Text to Speech](https://arxiv.org/abs/2006.04558) \\ -**Yi Ren**, Chenxu Hu, Xu Tan, Tao Qin, Sheng Zhao, Zhou Zhao, Tie-Yan Liu - -[**Project**](https://speechresearch.github.io/fastspeech2/) - - This work is included by many famous speech synthesis open-source projects, such as [PaddlePaddle/Parakeet ![](https://img.shields.io/github/stars/PaddlePaddle/PaddleSpeech?style=social)](https://github.com/PaddlePaddle/PaddleSpeech), [ESPNet ![](https://img.shields.io/github/stars/espnet/espnet?style=social)](https://github.com/espnet/espnet) and [fairseq ![](https://img.shields.io/github/stars/pytorch/fairseq?style=social)](https://github.com/pytorch/fairseq). -
-
- - -
ICLR 2024
sym
-
- -[Mega-TTS 2: Boosting Prompting Mechanisms for Zero-Shot Speech Synthesis](https://openreview.net/forum?id=mvMI3N4AvD) \\ -Ziyue Jiang, Jinglin Liu, **Yi Ren**, et al. - -[**Project**](https://boostprompt.github.io/boostprompt/) - - This work has been deployed on many TikTok products. - - Advandced zero-shot voice cloning model. -
-
- - -
AAAI 2022
sym
-
- -[DiffSinger: Singing Voice Synthesis via Shallow Diffusion Mechanism](https://arxiv.org/abs/2105.02446) \\ -Jinglin Liu, Chengxi Li, **Yi Ren**, Feiyang Chen, Zhou Zhao - -- Many [video demos](https://www.bilibili.com/video/BV1be411N7JA) created by the [DiffSinger community](https://github.com/openvpi) are released. -- DiffSinger was introduced in [a very popular video](https://www.bilibili.com/video/BV1uM411t7ZJ) (1600k+ views) on Bilibili! - -- [**Project**](https://diffsinger.github.io/) \| [![](https://img.shields.io/github/stars/NATSpeech/NATSpeech?style=social&label=DiffSpeech Stars)](https://github.com/NATSpeech/NATSpeech) \| [![](https://img.shields.io/github/stars/MoonInTheRiver/DiffSinger?style=social&label=DiffSinger Stars)](https://github.com/MoonInTheRiver/DiffSinger) \| [![Hugging Face](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-blue?label=Demo)](https://huggingface.co/spaces/NATSpeech/DiffSpeech) -
-
- - -
NeurIPS 2021
sym
-
- -[PortaSpeech: Portable and High-Quality Generative Text-to-Speech](https://arxiv.org/abs/2109.15166) \\ -**Yi Ren**, Jinglin Liu, Zhou Zhao - -[**Project**](https://portaspeech.github.io/) \| [![](https://img.shields.io/github/stars/NATSpeech/NATSpeech?style=social&label=Code+Stars)](https://github.com/NATSpeech/NATSpeech) \| [![Hugging Face](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-blue?label=Demo)](https://huggingface.co/spaces/NATSpeech/PortaSpeech) -
-
- -- `AAAI 2024` [Emotion Rendering for Conversational Speech Synthesis with Heterogeneous Graph-Based Context Modeling](https://arxiv.org/abs/2312.11947), Rui Liu, Yifan Hu, **Yi Ren**, et al. [![](https://img.shields.io/github/stars/walker-hyf/ECSS?style=social&label=Code+Stars)](https://github.com/walker-hyf/ECSS) -- ``ICML 2023`` [Make-An-Audio: Text-To-Audio Generation with Prompt-Enhanced Diffusion Models](https://text-to-audio.github.io/paper.pdf), Rongjie Huang, Jiawei Huang, Dongchao Yang, **Yi Ren**, et al. -- ``ACL 2023`` [CLAPSpeech: Learning Prosody from Text Context with Contrastive Language-Audio Pre-Training](), Zhenhui Ye, Rongjie Huang, **Yi Ren**, et al. -- ``ACL 2023`` [FluentSpeech: Stutter-Oriented Automatic Speech Editing with Context-Aware Diffusion Models](), Ziyue Jiang, Qian Yang, Jialong Zuo, Zhenhui Ye, Rongjie Huang, **Yi Ren** and Zhou Zhao -- ``ACL 2023`` [Revisiting and Incorporating GAN and Diffusion Models in High-Fidelity Speech Synthesis](), Rongjie Huang, **Yi Ren**, Ziyue Jiang, et al. -- ``ACL 2023`` [Improving Prosody with Masked Autoencoder and Conditional Diffusion Model For Expressive Text-to-Speech](), Rongjie Huang, Chunlei Zhang, **Yi Ren**, et al. -- `ICLR 2023` [Bag of Tricks for Unsupervised Text-to-Speech](https://openreview.net/forum?id=SbR9mpTuBn), **Yi Ren**, Chen Zhang, Shuicheng Yan -- `INTERSPEECH 2023` [StyleS2ST: zero-shot style transfer for direct speech-to-speech translation](https://arxiv.org/abs/2305.17732), Kun Song, **Yi Ren**, Yi Lei, et al. -- `INTERSPEECH 2023` [GenerTTS: Pronunciation Disentanglement for Timbre and Style Generalization in Cross-Lingual Text-to-Speech](https://arxiv.org/abs/2306.15304), Yahuan Cong, Haoyu Zhang, Haopeng Lin, Shichao Liu, Chunfeng Wang, **Yi Ren**, et al. -- `NeurIPS 2022` [Dict-TTS: Learning to Pronounce with Prior Dictionary Knowledge for Text-to-Speech](), Ziyue Jiang, Zhe Su, Zhou Zhao, Qian Yang, **Yi Ren**, et al. [![](https://img.shields.io/github/stars/Zain-Jiang/Dict-TTS?style=social&label=Code+Stars)](https://github.com/Zain-Jiang/Dict-TTS) -- `NeurIPS 2022` [GenerSpeech: Towards Style Transfer for Generalizable Out-Of-Domain Text-to-Speech](), Rongjie Huang, **Yi Ren**, et al. -- `NeurIPS 2022` [M4Singer: a Multi-Style, Multi-Singer and Musical Score Provided Mandarin Singing Corpus](), Lichao Zhang, Ruiqi Li, Shoutong Wang, Liqun Deng, Jinglin Liu, **Yi Ren**, et al. *(Datasets and Benchmarks Track)* [![](https://img.shields.io/github/stars/M4Singer/M4Singer?style=social&label=Dataset+Stars)](https://github.com/M4Singer/M4Singer) -- ``ACM-MM 2022`` [ProDiff: Progressive Fast Diffusion Model for High-Quality Text-to-Speech](), Rongjie Huang, Zhou Zhao, Huadai Liu, Jinglin Liu, Chenye Cui, **Yi Ren**, [![](https://img.shields.io/github/stars/Rongjiehuang/ProDiff?style=social&label=Code+Stars)](https://github.com/Rongjiehuang/ProDiff) -- ``ACM-MM 2022`` [SingGAN: Generative Adversarial Network For High-Fidelity Singing Voice Generation](https://arxiv.org/abs/2110.07468), Rongjie Huang, Chenye Cui, Chen Feiayng, **Yi Ren**, et al. -- ``IJCAI 2022`` [SyntaSpeech: Syntax-Aware Generative Adversarial Text-to-Speech](), Zhenhui Ye, Zhou Zhao, **Yi Ren**, et al. [![](https://img.shields.io/github/stars/yerfor/SyntaSpeech?style=social&label=Code+Stars)](https://github.com/yerfor/SyntaSpeech) -- ``IJCAI 2022`` (Oral) [EditSinger: Zero-Shot Text-Based Singing Voice Editing System with Diverse Prosody Modeling](), Lichao Zhang, Zhou Zhao, **Yi Ren**, et al. -- ``IJCAI 2022`` [FastDiff: A Fast Conditional Diffusion Model for High-Quality Speech Synthesis](), Rongjie Huang, Max W. Y. Lam, Jun Wang, Dan Su, Dong Yu, **Yi Ren**, Zhou Zhao, (Oral), [![](https://img.shields.io/github/stars/Rongjiehuang/FastDiff?style=social&label=Code+Stars)](https://github.com/Rongjiehuang/FastDiff) -- ``NAACL 2022`` [A Study of Syntactic Multi-Modality in Non-Autoregressive Machine Translation](), Kexun Zhang, Rui Wang, Xu Tan, Junliang Guo, **Yi Ren**, et al. -- ``ACL 2022`` [Revisiting Over-Smoothness in Text to Speech](https://arxiv.org/abs/2202.13066), **Yi Ren**, Xu Tan, Tao Qin, et al. -- ``ACL 2022`` [Learning the Beauty in Songs: Neural Singing Voice Beautifier](https://arxiv.org/abs/2202.13277), Jinglin Liu, Chengxi Li, **Yi Ren**, et al. \| [![](https://img.shields.io/github/stars/MoonInTheRiver/NeuralSVB?style=social&label=Code+Stars)](https://github.com/MoonInTheRiver/NeuralSVB) -- ``ICASSP 2022`` [ProsoSpeech: Enhancing Prosody With Quantized Vector Pre-training in Text-to-Speech](https://prosospeech.github.io/), **Yi Ren**, et al. -- ``INTERSPEECH 2021`` [EMOVIE: A Mandarin Emotion Speech Dataset with a Simple Emotional Text-to-Speech Model](https://arxiv.org/abs/2106.09317), Chenye Cui, **Yi Ren**, et al. -- ``INTERSPEECH 2021`` (best student paper award candidate) [WSRGlow: A Glow-based Waveform Generative Model for Audio Super-Resolution](https://arxiv.org/abs/2106.08507), Kexun Zhang, **Yi Ren**, Changliang Xu and Zhou Zhao -- ``ICASSP 2021`` [Denoising Text to Speech with Frame-Level Noise Modeling](https://arxiv.org/abs/2012.09547), Chen Zhang, **Yi Ren**, Xu Tan, et al. \| [**Project**](https://speechresearch.github.io/denoispeech/) -- ``ACM-MM 2021`` [Multi-Singer: Fast Multi-Singer Singing Voice Vocoder With A Large-Scale Corpus](https://arxiv.org/pdf/2112.10358), Rongjie Huang, Feiyang Chen, **Yi Ren**, et al. (Oral) -- ``IJCAI 2021`` [FedSpeech: Federated Text-to-Speech with Continual Learning](https://www.ijcai.org/proceedings/2021/527), Ziyue Jiang, **Yi Ren**, et al. -- ``KDD 2020`` [DeepSinger: Singing Voice Synthesis with Data Mined From the Web](https://dl.acm.org/doi/abs/10.1145/3394486.3403249), **Yi Ren**, Xu Tan, Tao Qin, et al. \| [**Project**](https://speechresearch.github.io/deepsinger/) -- ``KDD 2020`` [LRSpeech: Extremely Low-Resource Speech Synthesis and Recognition](https://dl.acm.org/doi/abs/10.1145/3394486.3403331), Jin Xu, Xu Tan, **Yi Ren**, et al. \| [**Project**](https://speechresearch.github.io/lrspeech/) -- ``INTERSPEECH 2020`` [MultiSpeech: Multi-Speaker Text to Speech with Transformer](https://www.isca-speech.org/archive/Interspeech_2020/pdfs/3139.pdf), Mingjian Chen, Xu Tan, **Yi Ren**, et al. \| [**Project**](https://speechresearch.github.io/multispeech/) -- ``ICML 2019`` (Oral) [Almost Unsupervised Text to Speech and Automatic Speech Recognition](https://pdfs.semanticscholar.org/9075/a3e6271e5ef4953491488d1776527e632408.pdf), **Yi Ren**, Xu Tan, Tao Qin, et al. \| [**Project**](https://speechresearch.github.io/unsuper/) - -## 👄 TalkingFace & Avatar - -
ICLR 2024
sym
-
- -[Real3D-Portrait: One-shot Realistic 3D Talking Portrait Synthesis](https://openreview.net/forum?id=7ERQPyR2eb), Zhenhui Ye, Tianyun Zhong, Yi Ren, et al. (Spotlight) [**Project**](https://real3dportrait.github.io/) | [**Code**](https://github.com/yerfor/Real3DPortrait) -
-
- -- `ICLR 2023` [GeneFace: Generalized and High-Fidelity Audio-Driven 3D Talking Face Synthesis](https://openreview.net/forum?id=YfwMIDhPccD), Zhenhui Ye, Ziyue Jiang, **Yi Ren**, et al. -- `AAAI 2024` [AMD: Autoregressive Motion Diffusion](https://arxiv.org/abs/2305.09381), Bo Han, Hao Peng, Minjing Dong, **Yi Ren**, et al. -- ``AAAI 2022`` [Parallel and High-Fidelity Text-to-Lip Generation](https://arxiv.org/abs/2107.06831), Jinglin Liu, Zhiying Zhu, **Yi Ren**, et al. \| [![](https://img.shields.io/github/stars/Dianezzy/ParaLip?style=social&label=ParaLip Stars)](https://github.com/Dianezzy/ParaLip) -- ``AAAI 2022`` [Flow-based Unconstrained Lip to Speech Generation](https://ojs.aaai.org/index.php/AAAI/article/view/19966), Jinzheng He, Zhou Zhao, **Yi Ren**, et al. -- ``ACM-MM 2020`` [FastLR: Non-Autoregressive Lipreading Model with Integrate-and-Fire](https://dl.acm.org/doi/10.1145/3394171.3413740), Jinglin Liu, **Yi Ren**, et al. - -## 📚 Machine Translation -- ``ACL 2023`` [AV-TranSpeech: Audio-Visual Robust Speech-to-Speech Translation](), Rongjie Huang, Huadai Liu, Xize Cheng, **Yi Ren**, et al. -- `ICLR 2023` [TranSpeech: Speech-to-Speech Translation With Bilateral Perturbation](https://openreview.net/forum?id=UVAmFAtC5ye), Rongjie Huang, Jinglin Liu, Huadai Liu, **Yi Ren**, Lichao Zhang, Jinzheng He, Zhou Zhao -- ``AAAI 2021`` [UWSpeech: Speech to Speech Translation for Unwritten Languages](https://arxiv.org/abs/2006.07926), Chen Zhang, Xu Tan, **Yi Ren**, et al. \| [**Project**](https://speechresearch.github.io/uwspeech/) -- ``IJCAI 2020`` [Task-Level Curriculum Learning for Non-Autoregressive Neural Machine Translation](https://www.ijcai.org/Proceedings/2020/0534.pdf), Jinglin Liu, **Yi Ren**, Xu Tan, et al. -- ``ACL 2020`` [SimulSpeech: End-to-End Simultaneous Speech to Text Translation](https://www.aclweb.org/anthology/2020.acl-main.350), **Yi Ren**, Jinglin Liu, Xu Tan, et al. -- ``ACL 2020`` [A Study of Non-autoregressive Model for Sequence Generation](https://arxiv.org/abs/2004.10454), **Yi Ren**, Jinglin Liu, Xu Tan, et al. -- ``ICLR 2019`` [Multilingual Neural Machine Translation with Knowledge Distillation](https://openreview.net/forum?id=S1gUsoR9YX), Xu Tan, **Yi Ren**, Di He, et al. - - -## 🎼 Music & Dance Generation -- ``IEEE TMM`` [SDMuse: Stochastic Differential Music Editing and Generation via Hybrid Representation](https://ieeexplore.ieee.org/document/10149095), Chen Zhang, Yi Ren, Kejun Zhang, Shuicheng Yan. -- ``AAAI 2021`` [SongMASS: Automatic Song Writing with Pre-training and Alignment Constraint](https://arxiv.org/abs/2012.05168), Zhonghao Sheng, Kaitao Song, Xu Tan, **Yi Ren**, et al. -- ``ACM-MM 2020`` (Oral) [PopMAG: Pop Music Accompaniment Generation](https://dl.acm.org/doi/10.1145/3394171.3413721), **Yi Ren**, Jinzheng He, Xu Tan, et al. \| [**Project**](https://speechresearch.github.io/popmag/) - -## 🧑‍🎨 Generative Model -- ``ICLR 2022`` [Pseudo Numerical Methods for Diffusion Models on Manifolds](https://openreview.net/forum?id=PlKWVd2yBkY), Luping Liu, **Yi Ren**, Zhijie Lin, Zhou Zhao \| [![](https://img.shields.io/github/stars/luping-liu/PNDM?style=social&label=Code+Stars)](https://github.com/luping-liu/PNDM) \| [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/pseudo-numerical-methods-for-diffusion-models-1/image-generation-on-celeba-64x64)](https://paperswithcode.com/sota/image-generation-on-celeba-64x64?p=pseudo-numerical-methods-for-diffusion-models-1) - -## Others -- `NeurIPS 2023` [Unsupervised Video Domain Adaptation for Action Recognition: A Disentanglement Perspective](https://openreview.net/forum?id=Rp4PA0ez0m), Pengfei Wei, Lingdong Kong, Xinghua Qu, **Yi Ren**, et al. -- ``ACM-MM 2022`` [Video-Guided Curriculum Learning for Spoken Video Grounding](), Yan Xia, Zhou Zhao, Shangwei Ye, Yang Zhao, Haoyuan Li, **Yi Ren** \ No newline at end of file +- `Nature` Liheng Bian#\*, **Zhen Wang#**, Yuzhe Zhang#, Lianjie Li, Yinuo Zhang, Chen Yang, Wen Fang, Jiajun Zhao, Chunli Zhu, Qinghao Meng, Xuan Peng, Jun Zhang\*, '[A broadband hyperspectral image sensor with high spatio-temporal resolution](https://www.nature.com/articles/s41586-024-08109-1)', **Nature**, 2024, vol. 635, no. 8037, pp. 73-81. Cites: diff --git a/_pages/includes/pub_short.md b/_pages/includes/pub_short.md index efa9775b11..14bf86758b 100644 --- a/_pages/includes/pub_short.md +++ b/_pages/includes/pub_short.md @@ -1,32 +1,8 @@ # 💻 Selected Research Papers -My full paper list is shown at [my personal homepage](https://rayeren.github.io). +My full paper list is shown at [my personal homepage](https://wstruggle.github.io/). -#### 🎙 Audio and Speech Processing -- ``ICLR 2021`` [FastSpeech 2: Fast and High-Quality End-to-End Text to Speech](https://arxiv.org/abs/2006.04558), **Yi Ren**, Chenxu Hu, Xu Tan, et al. -- ``NeurIPS 2019`` [FastSpeech: Fast, Robust and Controllable Text to Speech](https://papers.nips.cc/paper/8580-fastspeech-fast-robust-and-controllable-text-to-speech.pdf), **Yi Ren**, Yangjun Ruan, Xu Tan, et al. -- `ICLR 2024` [Mega-TTS 2: Boosting Prompting Mechanisms for Zero-Shot Speech Synthesis](https://openreview.net/forum?id=mvMI3N4AvD), Ziyue Jiang, Jinglin Liu, **Yi Ren**, et al. -- ``AAAI 2022`` [DiffSinger: Singing Voice Synthesis via Shallow Diffusion Mechanism](https://arxiv.org/abs/2105.02446), Jinglin Liu, Chengxi Li, **Yi Ren**, et al. [**Project**](https://diffsinger.github.io/) \| [![](https://img.shields.io/github/stars/NATSpeech/NATSpeech?style=social&label=DiffSpeech+Stars)](https://github.com/NATSpeech/NATSpeech) \| [![](https://img.shields.io/github/stars/MoonInTheRiver/DiffSinger?style=social&label=DiffSinger+Stars)](https://github.com/MoonInTheRiver/DiffSinger) \| [![Hugging Face](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-blue?label=Demo)](https://huggingface.co/spaces/NATSpeech/DiffSpeech) -- ``NeurIPS 2021`` [PortaSpeech: Portable and High-Quality Generative Text-to-Speech](https://arxiv.org/abs/2109.15166), **Yi Ren**, Jinglin Liu, Zhou Zhao, [**Project**](https://portaspeech.github.io/) \| [![](https://img.shields.io/github/stars/NATSpeech/NATSpeech?style=social&label=Code+Stars)](https://github.com/NATSpeech/NATSpeech) \| [![Hugging Face](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-blue?label=Demo)](https://huggingface.co/spaces/NATSpeech/PortaSpeech) -- ``ICML 2023`` [Make-An-Audio: Text-To-Audio Generation with Prompt-Enhanced Diffusion Models](https://text-to-audio.github.io/paper.pdf), Rongjie Huang, Jiawei Huang, Dongchao Yang, **Yi Ren**, et al. -- ``ICLR 2023`` [Bag of Tricks for Unsupervised Text-to-Speech](https://openreview.net/forum?id=SbR9mpTuBn), **Yi Ren**, Chen Zhang, Shuicheng Yan -- ``ACL 2022`` [Learning the Beauty in Songs: Neural Singing Voice Beautifier](https://arxiv.org/abs/2202.13277), Jinglin Liu, Chengxi Li, **Yi Ren**, Zhiying Zhu, Zhou Zhao \| [![](https://img.shields.io/github/stars/MoonInTheRiver/NeuralSVB?style=social&label=Code+Stars)](https://github.com/MoonInTheRiver/NeuralSVB) -- ``NeurIPS 2022`` [Dict-TTS: Learning to Pronounce with Prior Dictionary Knowledge for Text-to-Speech](), Ziyue Jiang, Zhe Su, Zhou Zhao, Qian Yang, **Yi Ren**, et al. [![](https://img.shields.io/github/stars/Zain-Jiang/Dict-TTS?style=social&label=Code+Stars)](https://github.com/Zain-Jiang/Dict-TTS) - -#### 👄 Talkingface Generation -- ``ICLR 2024`` [Real3D-Portrait: One-shot Realistic 3D Talking Portrait Synthesis](https://openreview.net/forum?id=7ERQPyR2eb), Zhenhui Ye, Tianyun Zhong, **Yi Ren**, et al. -- ``ICLR 2023`` [GeneFace: Generalized and High-Fidelity Audio-Driven 3D Talking Face Synthesis](https://openreview.net/forum?id=YfwMIDhPccD), Zhenhui Ye, Ziyue Jiang`, **Yi Ren**, et al. - -#### 📚 Machine Translation -- ``ACL 2023`` [AV-TranSpeech: Audio-Visual Robust Speech-to-Speech Translation](), Rongjie Huang, Huadai Liu, Xize Cheng, **Yi Ren**, et al. -- ``ICLR 2023`` [TranSpeech: Speech-to-Speech Translation With Bilateral Perturbation](https://openreview.net/forum?id=UVAmFAtC5ye), Rongjie Huang, Jinglin Liu, Huadai Liu, **Yi Ren**, et al. -- ``ACL 2020`` [SimulSpeech: End-to-End Simultaneous Speech to Text Translation](https://www.aclweb.org/anthology/2020.acl-main.350), **Yi Ren**, et al. -- ``ICLR 2019`` [Multilingual Neural Machine Translation with Knowledge Distillation](https://openreview.net/forum?id=S1gUsoR9YX), Xu Tan, **Yi Ren**, et al. - -#### 🎼 Music Generation -- ``ACM-MM 2020`` [PopMAG: Pop Music Accompaniment Generation](https://dl.acm.org/doi/10.1145/3394171.3413721), **Yi Ren**, Jinzheng He, Xu Tan, et al. - -#### 🧑‍🎨 Generative Model -- ``ICLR 2022`` [Pseudo Numerical Methods for Diffusion Models on Manifolds](https://openreview.net/forum?id=PlKWVd2yBkY), Luping Liu, **Yi Ren**, et al. \| [![](https://img.shields.io/github/stars/luping-liu/PNDM?style=social&label=Code+Stars)](https://github.com/luping-liu/PNDM) \| [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/pseudo-numerical-methods-for-diffusion-models-1/image-generation-on-celeba-64x64)](https://paperswithcode.com/sota/image-generation-on-celeba-64x64?p=pseudo-numerical-methods-for-diffusion-models-1) +#### Computational imaging +- `Nature` Liheng Bian#\*, **Zhen Wang#**, Yuzhe Zhang#, Lianjie Li, Yinuo Zhang, Chen Yang, Wen Fang, Jiajun Zhao, Chunli Zhu, Qinghao Meng, Xuan Peng, Jun Zhang\*, '[A broadband hyperspectral image sensor with high spatio-temporal resolution](https://www.nature.com/articles/s41586-024-08109-1)', **Nature**, 2024, vol. 635, no. 8037, pp. 73-81. diff --git a/images/ZhenWang-2.jpg b/images/ZhenWang-2.jpg new file mode 100644 index 0000000000..7ad7598ab0 Binary files /dev/null and b/images/ZhenWang-2.jpg differ diff --git a/images/ZhenWang.jpg b/images/ZhenWang.jpg new file mode 100644 index 0000000000..1c939b4d30 Binary files /dev/null and b/images/ZhenWang.jpg differ diff --git a/images/android-chrome-192x192.jpg b/images/android-chrome-192x192.jpg new file mode 100644 index 0000000000..1c939b4d30 Binary files /dev/null and b/images/android-chrome-192x192.jpg differ diff --git a/images/android-chrome-192x192.png b/images/android-chrome-192x192.png deleted file mode 100755 index 20035d9328..0000000000 Binary files a/images/android-chrome-192x192.png and /dev/null differ diff --git a/images/android-chrome-512x512.jpg b/images/android-chrome-512x512.jpg new file mode 100644 index 0000000000..1c939b4d30 Binary files /dev/null and b/images/android-chrome-512x512.jpg differ diff --git a/images/android-chrome-512x512.png b/images/android-chrome-512x512.png deleted file mode 100755 index 61b9f26be1..0000000000 Binary files a/images/android-chrome-512x512.png and /dev/null differ diff --git a/images/apple-touch-icon.png b/images/apple-touch-icon.png deleted file mode 100755 index b67f1bf511..0000000000 Binary files a/images/apple-touch-icon.png and /dev/null differ diff --git a/images/diffsinger.png b/images/diffsinger.png deleted file mode 100644 index a6b2c92069..0000000000 Binary files a/images/diffsinger.png and /dev/null differ diff --git a/images/favicon-16x16.png b/images/favicon-16x16.png deleted file mode 100755 index 5aa57024aa..0000000000 Binary files a/images/favicon-16x16.png and /dev/null differ diff --git a/images/favicon-32x32.png b/images/favicon-32x32.png deleted file mode 100755 index 7ac55b62af..0000000000 Binary files a/images/favicon-32x32.png and /dev/null differ diff --git a/images/favicon.ico b/images/favicon.ico deleted file mode 100755 index 93fda86cd2..0000000000 Binary files a/images/favicon.ico and /dev/null differ diff --git a/images/fs.png b/images/fs.png deleted file mode 100644 index 061988a9ff..0000000000 Binary files a/images/fs.png and /dev/null differ diff --git a/images/fs2.png b/images/fs2.png deleted file mode 100644 index fb07391e40..0000000000 Binary files a/images/fs2.png and /dev/null differ diff --git a/images/mega.png b/images/mega.png deleted file mode 100644 index ee0c274e8f..0000000000 Binary files a/images/mega.png and /dev/null differ diff --git a/images/portaspeech.png b/images/portaspeech.png deleted file mode 100644 index 75ca78fe0a..0000000000 Binary files a/images/portaspeech.png and /dev/null differ diff --git a/images/real3d.png b/images/real3d.png deleted file mode 100644 index 7d13cdef9c..0000000000 Binary files a/images/real3d.png and /dev/null differ diff --git a/images/ry_profile.jpeg b/images/ry_profile.jpeg deleted file mode 100644 index 877e2ec69f..0000000000 Binary files a/images/ry_profile.jpeg and /dev/null differ diff --git a/images/ry_profile.jpg b/images/ry_profile.jpg new file mode 100644 index 0000000000..1c939b4d30 Binary files /dev/null and b/images/ry_profile.jpg differ