This is something I got Gemini to write. Takes a .wav file and produces an
SVG of the envelope (peak and/or rms). It needs scipy and numpy.
```python
#!/usr/bin/env python3
import argparse
import numpy as np
from scipy.io import wavfile
def get_envelopes(wav_path, num_points, smooth_window):
# scipy.io.wavfile is very efficient for bit-depth handling
sample_rate, data = wavfile.read(wav_path)
# If stereo/multi-channel, take the first channel (Left)
if len(data.shape) > 1:
data = data[:, 0]
# Convert to float64 to prevent overflow during squaring
data = data.astype(np.float64)
total_frames = len(data)
samples_per_pixel = total_frames // num_points
# Truncate to a perfect multiple for reshaping
truncated_len = num_points * samples_per_pixel
reshaped_data = data[:truncated_len].reshape((num_points, samples_per_pixel))
# Vectorized Peak calculation
peaks = np.max(np.abs(reshaped_data), axis=1)
# Vectorized RMS calculation
rms = np.sqrt(np.mean(np.square(reshaped_data), axis=1))
# Apply smoothing to RMS via convolution
if smooth_window > 1:
kernel = np.ones(smooth_window) / smooth_window
rms = np.convolve(rms, kernel, mode='same')
return peaks, rms
def generate_poly_points(data, width, height, max_val):
if len(data) == 0: return ""
mid_y = height / 2
scale = (height / 2) / max_val if max_val > 0 else 1
x_coords = np.linspace(0, width, len(data))
upper_y = mid_y - (data * scale)
lower_y = mid_y + (data * scale)
# Trace top left->right, then bottom right->left to close the polygon
upper_pts = np.column_stack((x_coords, upper_y))
lower_pts = np.column_stack((x_coords[::-1], lower_y[::-1]))
all_pts = np.vstack((upper_pts, lower_pts))
return " ".join([f"{p[0]:.2f},{p[1]:.2f}" for p in all_pts])
def main():
parser = argparse.ArgumentParser(description="SVG Waveform: Peak and RMS visualizer.")
parser.add_argument("input", help="Input .wav file")
parser.add_argument("output", help="Output .svg file")
parser.add_argument("--width", type=int, default=1024, help="SVG width")
parser.add_argument("--height", type=int, default=512, help="SVG height")
parser.add_argument("--smooth", type=int, default=5, help="Smoothing window for RMS")
parser.add_argument("--mode", choices=['peak', 'rms', 'both'], default='both',
help="Which envelopes to display (default: both)")
args = parser.parse_args()
try:
peaks, rms_vals = get_envelopes(args.input, args.width, args.smooth)
global_max = np.max(peaks) if len(peaks) > 0 else 1
polygons = []
# Define the Peak polygon (Grey)
if args.mode in ['peak', 'both']:
p_pts = generate_poly_points(peaks, args.width, args.height, global_max)
polygons.append(f'')
# Define the RMS polygon (Black)
if args.mode in ['rms', 'both']:
r_pts = generate_poly_points(rms_vals, args.width, args.height, global_max)
polygons.append(f'')
svg_content = f''''''
with open(args.output, 'w') as f:
f.write(svg_content)
print(f"Success: {args.output} generated in '{args.mode}' mode.")
except Exception as e:
print(f"Error processing audio: {e}")
if __name__ == "__main__":
main()
```