Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions Assets/audio-integration.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ const AudioLabConfig = {
moonshine_stt: { category: 'audiolab_stt', providerFlag: 'moonshine_stt_params' },
realtimestt_stt: { category: 'audiolab_stt', providerFlag: 'realtimestt_params' },
musicgen_music: { category: 'audiolab_audiogen', providerFlag: 'musicgen_music_params', extraFlags: ['audiocraft_sampling'] },
acestep_music: { category: 'audiolab_audiogen', providerFlag: 'acestep_music_params', extraFlags: ['acestep_lm_params', 'acestep_task_params'] },
acestep_music: { category: 'audiolab_audiogen', providerFlag: 'acestep_music_params', extraFlags: ['acestep_lm_params', 'acestep_task_params'], keepCoreParams: ['steps', 'cfgscale'] },
openvoice_clone: { category: 'audiolab_clone', providerFlag: 'openvoice_clone_params' },
rvc_clone: { category: 'audiolab_clone', providerFlag: 'rvc_clone_params' },
gptsovits_clone: { category: 'audiolab_clone', providerFlag: 'gptsovits_clone_params' },
Expand Down Expand Up @@ -141,10 +141,12 @@ featureSetChangers.push(() => {

const curArch = currentModelHelper.curArch;
const isAudioModel = AudioLabConfig.isAudioModel(curArch);
const archConfig = isAudioModel ? AudioLabConfig.archToCategory[curArch] : null;
const keepCoreParams = archConfig?.keepCoreParams || [];
console.log('[audiolab] featureSetChanger: curArch =', JSON.stringify(curArch), 'isAudioModel =', isAudioModel);

for (const param of gen_param_types) {
if (AudioLabConfig.coreParamsToHide.includes(param.id)) {
if (AudioLabConfig.coreParamsToHide.includes(param.id) && !keepCoreParams.includes(param.id)) {
if (isAudioModel) {
if (!param.hasOwnProperty('original_feature_flag_audiolab')) {
param.original_feature_flag_audiolab = param.feature_flag;
Expand Down
58 changes: 28 additions & 30 deletions AudioBackends/DynamicAudioBackend.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1097,48 +1097,46 @@ private static Dictionary<string, object> BuildEngineArgs(T2IParamInput input, A
break;

case "acestep_music":
// Core DiT params (acestep_music_params)
// Resolve DiT GGUF filename from model config
string ditModel = modelDef?.EngineConfig?.TryGetValue("dit_model", out object dmObj) == true ? dmObj?.ToString() : "acestep-v15-turbo";
args["dit_file"] = AceStepCppManager.GetDitFileName(ditModel);
// AceRequest-compatible params
args["caption"] = input.Get(T2IParamTypes.Prompt, "");
args["lyrics"] = input.TryGet(AudioLabParams.Lyrics, out string ly) ? ly : "[Instrumental]";
args["seed"] = input.TryGet(T2IParamTypes.Seed, out long aceSeed) ? aceSeed : -1L;
args["infer_step"] = input.TryGet(AudioLabParams.InferStep, out int infStep) ? infStep : 8;
args["guidance_scale"] = input.TryGet(AudioLabParams.ACEGuidanceScale, out double aceGuide) ? aceGuide : 7.0;
args["instrumental"] = input.TryGet(AudioLabParams.Instrumental, out string aceInst) ? aceInst : "false";
args["bpm"] = input.TryGet(AudioLabParams.BPM, out int aceBpm) ? aceBpm : 120;
if (input.TryGet(AudioLabParams.KeyScale, out string aceKey) && !string.IsNullOrEmpty(aceKey))
args["key_scale"] = aceKey;
args["time_signature"] = input.TryGet(AudioLabParams.TimeSignature, out string aceTs) ? aceTs : "4";
args["vocal_language"] = input.TryGet(AudioLabParams.VocalLanguage, out string aceVl) ? aceVl : "en";
args["shift"] = input.TryGet(AudioLabParams.ACEShift, out double aceShift) ? aceShift : 3.0;
args["infer_method"] = input.TryGet(AudioLabParams.InferMethod, out string aceIm) ? aceIm : "ode";
args["use_adg"] = input.TryGet(AudioLabParams.UseADG, out string aceAdg) ? aceAdg : "false";
args["cfg_interval_start"] = input.TryGet(AudioLabParams.CFGIntervalStart, out double aceCfgS) ? aceCfgS : 0.0;
args["cfg_interval_end"] = input.TryGet(AudioLabParams.CFGIntervalEnd, out double aceCfgE) ? aceCfgE : 1.0;
args["enable_normalization"] = input.TryGet(AudioLabParams.EnableNormalization, out string aceNorm) ? aceNorm : "true";
args["normalization_db"] = input.TryGet(AudioLabParams.NormalizationDB, out double aceNormDb) ? aceNormDb : -14.0;
// LM planner params (acestep_lm_params) — TODO: integrate with SwarmUI AbstractLLMBackend
args["duration"] = input.TryGet(T2IParamTypes.Text2AudioDuration, out double aceDur) ? aceDur : 30.0;
args["inference_steps"] = input.TryGet(T2IParamTypes.Steps, out int infStep) ? infStep : 0;
args["guidance_scale"] = input.TryGet(T2IParamTypes.CFGScale, out double aceGuide) ? aceGuide : 0.0;
args["bpm"] = input.TryGet(T2IParamTypes.Text2AudioBPM, out long aceBpm) ? (int)aceBpm : 0;
args["keyscale"] = input.TryGet(T2IParamTypes.Text2AudioKeyScale, out string aceKey) ? aceKey : "";
args["timesignature"] = input.TryGet(T2IParamTypes.Text2AudioTimeSignature, out string aceTs) ? aceTs : "";
args["vocal_language"] = input.TryGet(T2IParamTypes.Text2AudioLanguage, out string aceVl) ? aceVl : "";
args["shift"] = input.TryGet(AudioLabParams.ACEShift, out double aceShift) ? aceShift : 0.0;
// LM planner params (acestep_lm_params)
args["lm_model"] = input.TryGet(AudioLabParams.ACELMModel, out string aceLm) ? aceLm : "none";
args["thinking"] = input.TryGet(AudioLabParams.Thinking, out string aceThink) ? aceThink : "true";
args["lm_temperature"] = input.TryGet(AudioLabParams.LMTemperature, out double aceLmTemp) ? aceLmTemp : 0.85;
args["lm_cfg_scale"] = input.TryGet(AudioLabParams.LMCFGScale, out double aceLmCfg) ? aceLmCfg : 2.0;
args["lm_top_k"] = input.TryGet(AudioLabParams.LMTopK, out int aceLmTopK) ? aceLmTopK : 0;
args["lm_top_p"] = input.TryGet(AudioLabParams.LMTopP, out double aceLmTopP) ? aceLmTopP : 0.9;
if (input.TryGet(AudioLabParams.LMNegativePrompt, out string aceLmNeg) && !string.IsNullOrEmpty(aceLmNeg))
args["lm_negative_prompt"] = aceLmNeg;
args["use_cot_metas"] = input.TryGet(AudioLabParams.UseCotMetas, out string aceCotM) ? aceCotM : "true";
args["use_cot_caption"] = input.TryGet(AudioLabParams.UseCotCaption, out string aceCotC) ? aceCotC : "true";
args["use_cot_language"] = input.TryGet(AudioLabParams.UseCotLanguage, out string aceCotL) ? aceCotL : "true";
// Task params (acestep_task_params)
args["task_type"] = input.TryGet(AudioLabParams.ACETaskType, out string aceTask) ? aceTask : "text2music";
args["use_cot_caption"] = input.TryGet(AudioLabParams.UseCotCaption, out string aceCotC) && aceCotC == "true";
// Task params — source audio determines task mode (acestep_task_params)
string aceSrcAudio = GetBase64Audio(input, AudioLabParams.ACESourceAudio);
if (!string.IsNullOrEmpty(aceSrcAudio))
{
args["src_audio"] = aceSrcAudio;
string aceRefAudio = GetBase64Audio(input, AudioLabParams.ACEReferenceAudio);
if (!string.IsNullOrEmpty(aceRefAudio))
args["reference_audio"] = aceRefAudio;
args["repaint_start"] = input.TryGet(AudioLabParams.RepaintStart, out double aceRpS) ? aceRpS : 0.0;
args["repaint_end"] = input.TryGet(AudioLabParams.RepaintEnd, out double aceRpE) ? aceRpE : -1.0;
args["cover_strength"] = input.TryGet(AudioLabParams.CoverStrength, out double aceCovStr) ? aceCovStr : 1.0;
args["cover_noise_strength"] = input.TryGet(AudioLabParams.CoverNoiseStrength, out double aceCovNs) ? aceCovNs : 0.0;
string taskType = input.TryGet(AudioLabParams.ACETaskType, out string aceTask) ? aceTask : "cover";
if (taskType == "cover")
args["audio_cover_strength"] = input.TryGet(AudioLabParams.CoverStrength, out double aceCovStr) ? aceCovStr : 0.5;
if (taskType == "repaint")
{
args["repainting_start"] = input.TryGet(AudioLabParams.RepaintStart, out double aceRpS) ? aceRpS : -1.0;
args["repainting_end"] = input.TryGet(AudioLabParams.RepaintEnd, out double aceRpE) ? aceRpE : -1.0;
}
if (taskType == "lego")
args["lego"] = "vocals";
}
break;

case "musicgen_music":
Expand Down
Loading