# -*- coding: utf-8 -*-"""Audio Processing UtilitiesThis module provides utilities for audio segmentation and metadata extraction.It uses soundfile for direct audio I/O to avoid deprecated audioread dependencies."""importtypingasTimportioimportmathimportsoundfile
[docs]defsegment_audio_by_count(audio:T.BinaryIO,n_seg:int,)->list[bytes]:""" Split audio into a fixed number of segments with equal duration. Each segment will have approximately the same duration, with the last segment potentially being slightly longer to include any remaining samples. :param audio: Audio data as a binary stream (e.g., io.BytesIO from file bytes) :param n_seg: Number of segments to create (must be positive integer) :return: List of WAV audio segments as bytes, ready for further processing Example: >>> audio_bytes = Path("audio.mp3").read_bytes() >>> audio_stream = io.BytesIO(audio_bytes) >>> segments = segment_audio_by_count(audio_stream, 4) >>> print(f"Created {len(segments)} segments") """# Reset stream to beginning to ensure we read from startaudio.seek(0)# Load audio data directly with soundfile (avoids deprecated audioread)audio_data,sample_rate=soundfile.read(audio)# Calculate samples per segment for equal distributiontotal_samples=len(audio_data)samples_per_segment=total_samples//n_segsegments=[]# Create segments with equal sample countsforsegment_idxinrange(n_seg):# Calculate segment boundaries in sample indicesstart_sample=segment_idx*samples_per_segment# Last segment includes any remaining samples to avoid data lossifsegment_idx==n_seg-1:end_sample=total_sampleselse:end_sample=(segment_idx+1)*samples_per_segment# Extract audio data for this segmentsegment_audio_data=audio_data[start_sample:end_sample]# Convert segment to WAV bytes for compatibilitysegment_buffer=io.BytesIO()soundfile.write(segment_buffer,segment_audio_data,sample_rate,format="WAV")segment_buffer.seek(0)# Store the complete WAV file as bytessegments.append(segment_buffer.getvalue())returnsegments
[docs]defget_audio_duration(audio:T.BinaryIO)->float:""" Get audio duration in seconds from audio metadata without loading audio data. This function reads only the audio file header to extract duration information, making it efficient for large audio files where you only need the duration. :param audio: Audio data as a binary stream (e.g., io.BytesIO from file bytes) :return: Audio duration in seconds as a floating-point number Example: >>> audio_bytes = Path("recording.wav").read_bytes() >>> audio_stream = io.BytesIO(audio_bytes) >>> duration = get_audio_duration(audio_stream) >>> print(f"Audio is {duration:.1f} seconds long") """# Reset stream to beginning for reliable metadata readingaudio.seek(0)# Extract audio metadata efficiently (header-only, no data loading)audio_info=soundfile.info(audio)# Reset stream position for subsequent operationsaudio.seek(0)returnaudio_info.duration
[docs]defsegment_audio_by_duration(audio:T.BinaryIO,duration:float,)->list[bytes]:""" Split audio into segments with a target duration per segment. The audio will be divided into segments where each segment (except possibly the last one) has approximately the specified duration. The last segment may be shorter if the total duration is not evenly divisible. :param audio: Audio data as a binary stream (e.g., io.BytesIO from file bytes) :param duration: Target duration for each segment in seconds (can be fractional) :return: List of WAV audio segments as bytes, ready for further processing Example: >>> audio_bytes = Path("lecture.mp3").read_bytes() >>> audio_stream = io.BytesIO(audio_bytes) >>> # Split into 2-minute segments >>> segments = segment_audio_by_duration(audio_stream, 120.0) >>> print(f"Created {len(segments)} segments of ~2 minutes each") """# Get total audio duration from metadatatotal_duration=get_audio_duration(audio)# Calculate number of segments needed to achieve target durationnum_segments=math.ceil(total_duration/duration)# Delegate to count-based segmentation for consistent behaviorreturnsegment_audio_by_count(audio,num_segments)