@article{nokey,
title = {Diffusion-Inspired Masked Language Modeling for Symbolic Harmony Generation on a Fixed Time Grid},
author = {Maximos Kaliakatsos-Papakostas and Dimos Makris and Konstantinos Soiledis and Konstantinos-Theodoros Tsamis and Vassilis Katsouros and Emilios Cambouropoulos },
url = {https://www.proquest.com/scholarly-journals/diffusion-inspired-masked-language-modeling/docview/3249675488/se-2},
year = {2025},
date = {2025-01-01},
urldate = {2025-01-01},
journal = {Applied Sciences},
volume = {15},
number = {17},
pages = {9513},
abstract = {We present a novel encoder-only Transformer model for symbolic music harmony generation, based on a fixed time-grid representation of melody and harmony. Inspired by denoising diffusion processes, our model progressively unmasks harmony tokens over a sequence of discrete stages, learning to reconstruct the full harmonic structure from partial context. Unlike autoregressive models, this formulation enables flexible, non-sequential generation and supports explicit control over harmony placement. The model is stage-aware, receiving timestep embeddings analogous to diffusion timesteps, and is conditioned on both a binary piano roll and a pitch class roll to capture melodic context. We explore two unmasking schedules—random token revealing and midpoint doubling—both requiring a fixed and significantly reduced number of model calls at inference time. While our approach achieves competitive performance with strong autoregressive baselines (GPT-2 and BART) across several harmonic metrics, its key advantages lie in controllability, structured decoding with fixed inference steps, and alignment with musical structure. Ablation studies further highlight the role of stage awareness and pitch class conditioning. Our results position this method as a viable and interpretable alternative for symbolic harmony generation and a foundation for future work on structured, controllable musical modeling.},
note = {Copyright - © 2025 by the authors. Licensee MDPI, Basel, Switzerland. This article is an open access article distributed under the terms and conditions of the Creative Commons Attribution (CC BY) license (https://creativecommons.org/licenses/by/4.0/). Notwithstanding the ProQuest Terms and Conditions, you may use this content in accordance with the terms of the License; Last updated - 2026-01-26},
keywords = {},
pubstate = {published},
tppubtype = {article}
}