Skip to main content

Python

This sample script demonstrates how to interact with the Recordly API by performing two operations:

  1. Starting a transcription (POST /transcription)
  2. Retrieving the transcription result (GET /transcription/<job_id>)

API Endpoints

  1. POST /transcription

    • URL: https://platform.recordly.ai/transcription
    • Headers:
      • Content-Type: application/json
      • Authorization: Bearer <TOKEN>
    • Body Parameters:
      • audio_path (required): The URL of the audio to be transcribed. URL can be public video URL.
      • task (optional, default: "transcribe"): Specifies the task. Default is "transcribe".
      • language (optional, default: "en"): The language of the transcription.
      • initial_prompt (optional, default: None): An initial text prompt to guide the transcription.
      • align (optional, default: true): Enables word alignment in the transcript.
      • diarize (optional, default: true): Enables speaker diarization to identify different speakers.
  2. GET /transcription/<job_id>

Usage

The example below shows how to start a transcription request and then retrieve the result using the returned job_id.

import requests
import json
import time

BASE_URL = "https://platform.recordly.ai"
TOKEN = "<TOKEN>"

def start_transcription(audio_path: str = None, task: str = "transcribe",
language: str = "en", initial_prompt=None, align: bool = True, diarize: bool = True):
"""
Starts a transcription task.

Parameters:
-----------
audio_path : str
The URL of the audio file to transcribe.
task : str, optional
The task to perform. Default is "transcribe".
language : str, optional
The language of the transcription. Default is "en".
initial_prompt : str, optional
An initial prompt to guide the transcription process. Default is None.
align : bool, optional
If true, enables word alignment. Default is True.
diarize : bool, optional
If true, enables speaker diarization. Default is True.

Returns:
--------
dict : The JSON response from the API.
"""

endpoint = f"{BASE_URL}/transcription"
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {TOKEN}"
}
payload = {
"audio_path": audio_path,
"task": task,
"language": language,
"initial_prompt": initial_prompt,
"align": align,
"diarize": diarize
}

print("Starting transcription with payload:")
print(json.dumps(payload, indent=2))

response = requests.post(endpoint, headers=headers, json=payload)
response.raise_for_status()
return response.json()

def get_transcription_result(job_id: str):
"""
Retrieves the transcription result for the given job_id.

Parameters:
-----------
job_id : str
The unique identifier of the transcription task.

Returns:
--------
dict : The JSON response from the API.
"""
endpoint = f"{BASE_URL}/transcription/{job_id}"
headers = {
"Authorization": f"Bearer {TOKEN}"
}

response = requests.get(endpoint, headers=headers)
response.raise_for_status()
return response.json()

if __name__ == "__main__":
# Set the audio URL (e.g., a public video URL)
audio_path = "<AUDIO_PATH>" # Replace with the actual audio URL

# Start the transcription process
print("Starting transcription process...")
transcription_response = start_transcription(audio_path=audio_path)
print("Transcription initiated:")
print(json.dumps(transcription_response, indent=2))

# Extract the job_id from the response
job_id = transcription_response.get("job_id")

if job_id:
print(f"\nJob ID: {job_id} - Fetching transcription result...")
# Wait for the transcription task to complete. Adjust the sleep time as needed.
time.sleep(10)

result = get_transcription_result(job_id)
print("Transcription result:")
print(json.dumps(result, indent=2))
else:
print("Error: No job_id was returned when starting the transcription.")