Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 18 additions & 4 deletions sdk/voicelive/azure-ai-voicelive/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,15 +1,29 @@
# Release History

## 1.1.0-beta.1 (Unreleased)
## 1.1.0-beta.1 (2026-06-02)

### Features Added

Comment thread
xitzhang marked this conversation as resolved.
### Breaking Changes

### Bugs Fixed
- **WebRTC SDP negotiation** for browser/native WebRTC clients:
- New `ClientEventRtcCallSdpCreate` (`rtc.call.sdp.create`) to send an SDP offer with an optional initial `VoiceLiveSessionOptions`
- New `ServerEventRtcCallSdpCreated` (`rtc.call.sdp.created`) carrying the SDP answer and `rtcCallId`
- New `ServerEventRtcCallError` (`rtc.call.error`) with structured `RtcCallErrorDetails` (type/code/message) and originating operation
- **Streaming input text** into a conversation item:
- New `ClientEventInputTextDelta` (`input_text.delta`) and `ClientEventInputTextDone` (`input_text.done`), each with optional `contentIndex`
- **Output audio buffer playback lifecycle**:
- New `ServerEventOutputAudioBufferStarted` (`output_audio_buffer.started`) and `ServerEventOutputAudioBufferStopped` (`output_audio_buffer.stopped`), each exposing `responseId`
- **Smart audio-based end-of-turn detection**: new `SmartEndOfTurnDetection` (`smart_end_of_turn_detection`) `EouDetection` variant with `thresholdLevel` and `timeoutMs`; new `EouDetectionModel.SMART_END_OF_TURN_DETECTION` value
- **Echo cancellation reference source**: `AudioEchoCancellation` is now `@Fluent` and exposes `referenceSource` (new `EchoCancellationReferenceSource` enum with `SERVER` / `CLIENT`) and `channels` for stereo input where channel 1 is the client-supplied echo reference
- **Azure realtime native voices**: new `AzureRealtimeNativeVoice` (type `azure-realtime-native`) and `AzureRealtimeNativeVoiceName` expandable enum (`AARTI`, `ANDREW`, `AVA`, `DENISE`, `DIYA`, `ELSA`, `FLORIAN`, `FRANCISCA`, `MEERA`, `XIAOXIAO`, `YUNXI`, `XIMENA`) for use with the `azure-realtime` model
- **Parallel tool calls**: `VoiceLiveSessionOptions.setParallelToolCalls(Boolean)` / `isParallelToolCalls()` (and matching getter on `VoiceLiveSessionResponse`) to control whether the model may invoke tools in parallel
- **Hosted agent invocation passthrough**:
- `ResponseCreateParams.setInvokeInput(Map<String, BinaryData>)` to attach input data for a hosted agent invocation on a single response request (preview)
- New `ServerEventResponseInvocationDelta` (`response.invocation.delta`) passes through non-speech SSE events from the hosted agent

### Other Changes

- Regenerated against `2026-06-01-preview` VoiceLive TypeSpec. The default service API version used by `VoiceLiveClientBuilder` is unchanged (`V2026_04_10`, GA).

## 1.0.0 (2026-06-01)

This is the first General Availability (GA) release of the Azure VoiceLive client library for Java.
Expand Down
36 changes: 36 additions & 0 deletions sdk/voicelive/azure-ai-voicelive/cspell.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{
"version": "0.2",
"language": "en",
"words": [
"Aarti",
"AARTI",
"aiservices",
"AIservices",
"Dexec",
"Diya",
"DIYA",
"filler",
"FILLER",
"foundry",
"FOUNDRY",
"genai",
"GENAI",
"HDOMNI",
"Meera",
"MEERA",
"SSML",
"Unpooled",
"viseme",
"VISEME",
"webrtc",
"WEBRTC",
"Xiaoxiao",
"XIAOXIAO",
"Ximena",
"XIMENA",
"xhigh",
"XHIGH",
"Yunxi",
"YUNXI"
]
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
// Code generated by Microsoft (R) TypeSpec Code Generator.
package com.azure.ai.voicelive.models;

import com.azure.core.annotation.Fluent;
import com.azure.core.annotation.Generated;
import com.azure.core.annotation.Immutable;
import com.azure.json.JsonReader;
import com.azure.json.JsonSerializable;
import com.azure.json.JsonToken;
Expand All @@ -14,7 +14,7 @@
/**
* Echo cancellation configuration for server-side audio processing.
*/
@Immutable
@Fluent
public final class AudioEchoCancellation implements JsonSerializable<AudioEchoCancellation> {

/*
Expand Down Expand Up @@ -48,6 +48,9 @@ public String getType() {
public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
jsonWriter.writeStartObject();
jsonWriter.writeStringField("type", this.type);
jsonWriter.writeStringField("reference_source",
this.referenceSource == null ? null : this.referenceSource.toString());
jsonWriter.writeNumberField("channels", this.channels);
return jsonWriter.writeEndObject();
}

Expand All @@ -67,9 +70,93 @@ public static AudioEchoCancellation fromJson(JsonReader jsonReader) throws IOExc
while (reader.nextToken() != JsonToken.END_OBJECT) {
String fieldName = reader.getFieldName();
reader.nextToken();
reader.skipChildren();
if ("reference_source".equals(fieldName)) {
deserializedAudioEchoCancellation.referenceSource
= EchoCancellationReferenceSource.fromString(reader.getString());
} else if ("channels".equals(fieldName)) {
deserializedAudioEchoCancellation.channels = reader.getNullable(JsonReader::getInt);
} else {
reader.skipChildren();
}
}
return deserializedAudioEchoCancellation;
});
}

/*
* The source of the echo cancellation reference signal.
* - `server`: EC uses the internal TTS loopback as the reference signal (default, existing behavior).
* - `client`: EC uses the client-supplied reference channel (ch1 of stereo input). Internal TTS loopback is
* skipped.
*/
@Generated
private EchoCancellationReferenceSource referenceSource;

/*
* Number of input audio channels.
* - `1`: Mono input (default).
* - `2`: Interleaved stereo input where channel 0 is the microphone signal and channel 1 is the echo reference
* signal.
* When set to 2, `reference_source` must be `client` and `input_audio_format` must be `pcm16`.
*/
@Generated
private Integer channels;

/**
* Get the referenceSource property: The source of the echo cancellation reference signal.
* - `server`: EC uses the internal TTS loopback as the reference signal (default, existing behavior).
* - `client`: EC uses the client-supplied reference channel (ch1 of stereo input). Internal TTS loopback is
* skipped.
*
* @return the referenceSource value.
*/
@Generated
public EchoCancellationReferenceSource getReferenceSource() {
return this.referenceSource;
}

/**
* Set the referenceSource property: The source of the echo cancellation reference signal.
* - `server`: EC uses the internal TTS loopback as the reference signal (default, existing behavior).
* - `client`: EC uses the client-supplied reference channel (ch1 of stereo input). Internal TTS loopback is
* skipped.
*
* @param referenceSource the referenceSource value to set.
* @return the AudioEchoCancellation object itself.
*/
@Generated
public AudioEchoCancellation setReferenceSource(EchoCancellationReferenceSource referenceSource) {
this.referenceSource = referenceSource;
return this;
}

/**
* Get the channels property: Number of input audio channels.
* - `1`: Mono input (default).
* - `2`: Interleaved stereo input where channel 0 is the microphone signal and channel 1 is the echo reference
* signal.
* When set to 2, `reference_source` must be `client` and `input_audio_format` must be `pcm16`.
*
* @return the channels value.
*/
@Generated
public Integer getChannels() {
return this.channels;
}

/**
* Set the channels property: Number of input audio channels.
* - `1`: Mono input (default).
* - `2`: Interleaved stereo input where channel 0 is the microphone signal and channel 1 is the echo reference
* signal.
* When set to 2, `reference_source` must be `client` and `input_audio_format` must be `pcm16`.
*
* @param channels the channels value to set.
* @return the AudioEchoCancellation object itself.
*/
@Generated
public AudioEchoCancellation setChannels(Integer channels) {
this.channels = channels;
return this;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// Code generated by Microsoft (R) TypeSpec Code Generator.
package com.azure.ai.voicelive.models;

import com.azure.core.annotation.Generated;
import com.azure.core.annotation.Immutable;
import com.azure.json.JsonReader;
import com.azure.json.JsonSerializable;
import com.azure.json.JsonToken;
import com.azure.json.JsonWriter;
import java.io.IOException;

/**
* Azure realtime native voice configuration. These voices are natively
* supported by the `azure-realtime` model and offer higher quality speech
* synthesis than standard Azure voices. Only valid when using the
* `azure-realtime` model.
*/
@Immutable
public final class AzureRealtimeNativeVoice implements JsonSerializable<AzureRealtimeNativeVoice> {

/*
* The type of the voice.
*/
@Generated
private final String type = "azure-realtime-native";

/*
* The name of the Azure realtime native voice.
*/
@Generated
private final AzureRealtimeNativeVoiceName name;

/**
* Creates an instance of AzureRealtimeNativeVoice class.
*
* @param name the name value to set.
*/
@Generated
public AzureRealtimeNativeVoice(AzureRealtimeNativeVoiceName name) {
this.name = name;
}

/**
* Get the type property: The type of the voice.
*
* @return the type value.
*/
@Generated
public String getType() {
return this.type;
}

/**
* Get the name property: The name of the Azure realtime native voice.
*
* @return the name value.
*/
@Generated
public AzureRealtimeNativeVoiceName getName() {
return this.name;
}

/**
* {@inheritDoc}
*/
@Generated
@Override
public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
jsonWriter.writeStartObject();
jsonWriter.writeStringField("type", this.type);
jsonWriter.writeStringField("name", this.name == null ? null : this.name.toString());
return jsonWriter.writeEndObject();
}

/**
* Reads an instance of AzureRealtimeNativeVoice from the JsonReader.
*
* @param jsonReader The JsonReader being read.
* @return An instance of AzureRealtimeNativeVoice if the JsonReader was pointing to an instance of it, or null if
* it was pointing to JSON null.
* @throws IllegalStateException If the deserialized JSON object was missing any required properties.
* @throws IOException If an error occurs while reading the AzureRealtimeNativeVoice.
*/
@Generated
public static AzureRealtimeNativeVoice fromJson(JsonReader jsonReader) throws IOException {
return jsonReader.readObject(reader -> {
AzureRealtimeNativeVoiceName name = null;
while (reader.nextToken() != JsonToken.END_OBJECT) {
String fieldName = reader.getFieldName();
reader.nextToken();
if ("name".equals(fieldName)) {
name = AzureRealtimeNativeVoiceName.fromString(reader.getString());
} else {
reader.skipChildren();
}
}
return new AzureRealtimeNativeVoice(name);
});
}
}
Loading