Source code for audinota.utils

# -*- coding: utf-8 -*-

"""
Audio Processing Utilities

This module provides utilities for audio segmentation and metadata extraction.
It uses soundfile for direct audio I/O to avoid deprecated audioread dependencies.
"""

import typing as T
import io
import math

import soundfile


[docs] def segment_audio_by_count( audio: T.BinaryIO, n_seg: int, ) -> list[bytes]: """ Split audio into a fixed number of segments with equal duration. Each segment will have approximately the same duration, with the last segment potentially being slightly longer to include any remaining samples. :param audio: Audio data as a binary stream (e.g., io.BytesIO from file bytes) :param n_seg: Number of segments to create (must be positive integer) :return: List of WAV audio segments as bytes, ready for further processing Example: >>> audio_bytes = Path("audio.mp3").read_bytes() >>> audio_stream = io.BytesIO(audio_bytes) >>> segments = segment_audio_by_count(audio_stream, 4) >>> print(f"Created {len(segments)} segments") """ # Reset stream to beginning to ensure we read from start audio.seek(0) # Load audio data directly with soundfile (avoids deprecated audioread) audio_data, sample_rate = soundfile.read(audio) # Calculate samples per segment for equal distribution total_samples = len(audio_data) samples_per_segment = total_samples // n_seg segments = [] # Create segments with equal sample counts for segment_idx in range(n_seg): # Calculate segment boundaries in sample indices start_sample = segment_idx * samples_per_segment # Last segment includes any remaining samples to avoid data loss if segment_idx == n_seg - 1: end_sample = total_samples else: end_sample = (segment_idx + 1) * samples_per_segment # Extract audio data for this segment segment_audio_data = audio_data[start_sample:end_sample] # Convert segment to WAV bytes for compatibility segment_buffer = io.BytesIO() soundfile.write(segment_buffer, segment_audio_data, sample_rate, format="WAV") segment_buffer.seek(0) # Store the complete WAV file as bytes segments.append(segment_buffer.getvalue()) return segments
[docs] def get_audio_duration(audio: T.BinaryIO) -> float: """ Get audio duration in seconds from audio metadata without loading audio data. This function reads only the audio file header to extract duration information, making it efficient for large audio files where you only need the duration. :param audio: Audio data as a binary stream (e.g., io.BytesIO from file bytes) :return: Audio duration in seconds as a floating-point number Example: >>> audio_bytes = Path("recording.wav").read_bytes() >>> audio_stream = io.BytesIO(audio_bytes) >>> duration = get_audio_duration(audio_stream) >>> print(f"Audio is {duration:.1f} seconds long") """ # Reset stream to beginning for reliable metadata reading audio.seek(0) # Extract audio metadata efficiently (header-only, no data loading) audio_info = soundfile.info(audio) # Reset stream position for subsequent operations audio.seek(0) return audio_info.duration
[docs] def segment_audio_by_duration( audio: T.BinaryIO, duration: float, ) -> list[bytes]: """ Split audio into segments with a target duration per segment. The audio will be divided into segments where each segment (except possibly the last one) has approximately the specified duration. The last segment may be shorter if the total duration is not evenly divisible. :param audio: Audio data as a binary stream (e.g., io.BytesIO from file bytes) :param duration: Target duration for each segment in seconds (can be fractional) :return: List of WAV audio segments as bytes, ready for further processing Example: >>> audio_bytes = Path("lecture.mp3").read_bytes() >>> audio_stream = io.BytesIO(audio_bytes) >>> # Split into 2-minute segments >>> segments = segment_audio_by_duration(audio_stream, 120.0) >>> print(f"Created {len(segments)} segments of ~2 minutes each") """ # Get total audio duration from metadata total_duration = get_audio_duration(audio) # Calculate number of segments needed to achieve target duration num_segments = math.ceil(total_duration / duration) # Delegate to count-based segmentation for consistent behavior return segment_audio_by_count(audio, num_segments)