Dup Goto 📝

WavToSvg

DAW/scripts/python 02-06 00:56:49
To
100 lines, 375 words, 3741 chars Friday 2026-02-06 00:56:49

This is something I got Gemini to write. Takes a .wav file and produces an SVG of the envelope (peak and/or rms). It needs scipy and numpy.

#!/usr/bin/env python3

import argparse
import numpy as np
from scipy.io import wavfile

def get_envelopes(wav_path, num_points, smooth_window):
    # scipy.io.wavfile is very efficient for bit-depth handling
    sample_rate, data = wavfile.read(wav_path)

    # If stereo/multi-channel, take the first channel (Left)
    if len(data.shape) > 1:
        data = data[:, 0]

    # Convert to float64 to prevent overflow during squaring
    data = data.astype(np.float64)

    total_frames = len(data)
    samples_per_pixel = total_frames // num_points

    # Truncate to a perfect multiple for reshaping
    truncated_len = num_points * samples_per_pixel
    reshaped_data = data[:truncated_len].reshape((num_points, samples_per_pixel))

    # Vectorized Peak calculation
    peaks = np.max(np.abs(reshaped_data), axis=1)

    # Vectorized RMS calculation
    rms = np.sqrt(np.mean(np.square(reshaped_data), axis=1))

    # Apply smoothing to RMS via convolution
    if smooth_window > 1:
        kernel = np.ones(smooth_window) / smooth_window
        rms = np.convolve(rms, kernel, mode='same')

    return peaks, rms

def generate_poly_points(data, width, height, max_val):
    if len(data) == 0: return ""
    mid_y = height / 2
    scale = (height / 2) / max_val if max_val > 0 else 1

    x_coords = np.linspace(0, width, len(data))
    upper_y = mid_y - (data * scale)
    lower_y = mid_y + (data * scale)

    # Trace top left->right, then bottom right->left to close the polygon
    upper_pts = np.column_stack((x_coords, upper_y))
    lower_pts = np.column_stack((x_coords[::-1], lower_y[::-1]))
    all_pts = np.vstack((upper_pts, lower_pts))

    return " ".join([f"{p[0]:.2f},{p[1]:.2f}" for p in all_pts])

def main():
    parser = argparse.ArgumentParser(description="SVG Waveform: Peak and RMS visualizer.")
    parser.add_argument("input", help="Input .wav file")
    parser.add_argument("output", help="Output .svg file")
    parser.add_argument("--width", type=int, default=1024, help="SVG width")
    parser.add_argument("--height", type=int, default=512, help="SVG height")
    parser.add_argument("--smooth", type=int, default=5, help="Smoothing window for RMS")
    parser.add_argument("--mode", choices=['peak', 'rms', 'both'], default='both', 
                        help="Which envelopes to display (default: both)")

    args = parser.parse_args()

    try:
        peaks, rms_vals = get_envelopes(args.input, args.width, args.smooth)
        global_max = np.max(peaks) if len(peaks) > 0 else 1

        polygons = []

        # Define the Peak polygon (Grey)
        if args.mode in ['peak', 'both']:
            p_pts = generate_poly_points(peaks, args.width, args.height, global_max)
            polygons.append(f'<polygon points="{p_pts}" fill="#E8E8E8" stroke="#CCCCCC" stroke-width="0.5" />')

        # Define the RMS polygon (Black)
        if args.mode in ['rms', 'both']:
            r_pts = generate_poly_points(rms_vals, args.width, args.height, global_max)
            polygons.append(f'<polygon points="{r_pts}" fill="black" />')

        svg_content = f'''<svg viewBox="0 0 {args.width} {args.height}" xmlns="http://www.w3.org/2000/svg">
  {"".join(polygons)}
</svg>'''

        with open(args.output, 'w') as f:
            f.write(svg_content)

        print(f"Success: {args.output} generated in '{args.mode}' mode.")

    except Exception as e:
        print(f"Error processing audio: {e}")

if __name__ == "__main__":
    main()