Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 19 additions & 2 deletions packages/components/src/speechToText.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,19 @@ const SpeechToTextType = {
GROQ_WHISPER: 'groqWhisper'
}

export const buildAzureSpeechToTextUrl = (serviceRegion: string, apiVersion: string, baseUrl?: string) => {
const trimmedBaseUrl = baseUrl?.trim()
const base = trimmedBaseUrl
? trimmedBaseUrl.replace(/\/+$/, '')
: `https://${serviceRegion}.cognitiveservices.azure.com/speechtotext/transcriptions:transcribe`

if (/[?&]api-version=/.test(base)) {
return base
}

return `${base}${base.includes('?') ? '&' : '?'}api-version=${encodeURIComponent(apiVersion)}`
}
Comment on lines +17 to +28
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

While the current string-based URL construction works for standard cases, it is susceptible to issues with trailing separators (e.g., ?&) or fragments (e.g., #frag?api-version=...). Using the native URL API would make this helper more robust and maintainable by automatically handling query parameter merging and encoding.

export const buildAzureSpeechToTextUrl = (serviceRegion: string, apiVersion: string, baseUrl?: string) => {
    const trimmedBaseUrl = baseUrl?.trim()
    const base = trimmedBaseUrl
        ? trimmedBaseUrl.replace(/\/+$/, "")
        : "https://" + serviceRegion + ".cognitiveservices.azure.com/speechtotext/transcriptions:transcribe"

    try {
        const url = new URL(base)
        if (!url.searchParams.has("api-version")) {
            url.searchParams.set("api-version", apiVersion)
        }
        return url.toString()
    } catch {
        if (/[?&]api-version=/.test(base)) {
            return base
        }
        return base + (base.includes("?") ? "&" : "?") + "api-version=" + encodeURIComponent(apiVersion)
    }
}


export const convertSpeechToText = async (upload: IFileUpload, speechToTextConfig: ICommonObject, options: ICommonObject) => {
if (speechToTextConfig) {
const credentialId = speechToTextConfig.credentialId as string
Expand Down Expand Up @@ -76,8 +89,12 @@ export const convertSpeechToText = async (upload: IFileUpload, speechToTextConfi
}
case SpeechToTextType.AZURE_COGNITIVE: {
try {
const baseUrl = `https://${credentialData.serviceRegion}.cognitiveservices.azure.com/speechtotext/transcriptions:transcribe`
const apiVersion = credentialData.apiVersion || '2024-05-15-preview'
const azureSpeechToTextUrl = buildAzureSpeechToTextUrl(
credentialData.serviceRegion,
apiVersion,
speechToTextConfig?.baseUrl
)

const formData = new FormData()
const audioBlob = new Blob([new Uint8Array(audio_file)], { type: upload.type })
Expand All @@ -93,7 +110,7 @@ export const convertSpeechToText = async (upload: IFileUpload, speechToTextConfi
}
formData.append('definition', JSON.stringify(definition))

const response = await axios.post(`${baseUrl}?api-version=${apiVersion}`, formData, {
const response = await axios.post(azureSpeechToTextUrl, formData, {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-high high

Since the baseUrl is now user-configurable via the UI, using the raw axios client poses a security risk as it bypasses Server-Side Request Forgery (SSRF) protections. It is highly recommended to use the secureAxiosRequest wrapper from ./httpSecurity, which validates the target URL against the repository's deny list (e.g., internal network ranges). Note that you will need to import secureAxiosRequest from ./httpSecurity and adjust the call to use the configuration object syntax.

headers: {
'Ocp-Apim-Subscription-Key': credentialData.azureSubscriptionKey,
Accept: 'application/json'
Expand Down
32 changes: 32 additions & 0 deletions packages/components/src/utils.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { removeInvalidImageMarkdown, convertRequireToImport, COMMONJS_REQUIRE_REGEX, IMPORT_EXTRACTION_REGEX } from './utils'
import { buildAzureSpeechToTextUrl } from './speechToText'

describe('removeInvalidImageMarkdown', () => {
describe('strips non-http/https image markdown', () => {
Expand Down Expand Up @@ -229,3 +230,34 @@ describe('Import extraction regex (utils.ts line 1596 pattern)', () => {
expect(extractModules('console.log("hello")')).toEqual([])
})
})

describe('buildAzureSpeechToTextUrl', () => {
it('builds default regional URL', () => {
const url = buildAzureSpeechToTextUrl('eastus', '2024-05-15-preview')
expect(url).toBe(
'https://eastus.cognitiveservices.azure.com/speechtotext/transcriptions:transcribe?api-version=2024-05-15-preview'
)
})

it('uses custom baseUrl and appends api-version', () => {
const url = buildAzureSpeechToTextUrl(
'eastus',
'2024-05-15-preview',
'https://custom.example.com/speechtotext/transcriptions:transcribe/'
)
expect(url).toBe(
'https://custom.example.com/speechtotext/transcriptions:transcribe?api-version=2024-05-15-preview'
)
})

it('keeps existing api-version in custom baseUrl', () => {
const url = buildAzureSpeechToTextUrl(
'eastus',
'2024-05-15-preview',
'https://custom.example.com/speechtotext/transcriptions:transcribe?api-version=2023-10-01'
)
expect(url).toBe(
'https://custom.example.com/speechtotext/transcriptions:transcribe?api-version=2023-10-01'
)
})
})
9 changes: 9 additions & 0 deletions packages/ui/src/ui-component/extended/SpeechToText.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,15 @@ const speechToTextProviders = {
placeholder: 'en-US',
optional: true
},
{
label: 'Base URL',
name: 'baseUrl',
type: 'string',
description:
'Optional custom Azure Speech endpoint URL. Leave blank to use the default regional endpoint.',
placeholder: 'https://{region}.cognitiveservices.azure.com/speechtotext/transcriptions:transcribe',
optional: true
},
{
label: 'Profanity Filter Mode',
name: 'profanityFilterMode',
Expand Down