3
votes

I am getting the peak frequency from wav files

My code for getting the peak frequency from a wav file is:

import wave
import struct
import numpy as np
import wave
import contextlib

if __name__ == '__main__':
    fname = "test.wav"
    frate = 0
    data_size = 0
    with contextlib.closing(wave.open(fname,'r')) as f:
        frate = f.getframerate()
        data_size = f.getnframes()
    wav_file = wave.open(fname, 'r')
    data = wav_file.readframes(data_size)
    data_size = data_size * wav_file.getnchannels()
    print wav_file.getparams()
    wav_file.close()
    data = struct.unpack('{n}h'.format(n=data_size), data)
    data = np.array(data)

    w = np.fft.fft(data)
    freqs = np.fft.fftfreq(len(w))
    print(freqs.min(), freqs.max())

    # Find the peak in the coefficients
    idx = np.argmax(np.abs(w))
    freq = freqs[idx]
    freq_in_hertz = abs(freq * frate)
    print(freq_in_hertz)

I recorded a wav file with 48000 sample rate, 16 bitwidth, 2 channels. In that file I have a sine tone with 1000Hz. But the script outputting only 500Hz. I dont know where I went wrong. But for single channel and generated wav file with 48000 sample rate, 16 bitwidth, 2 channels it is working fine.

I generated the wav file using the following script

import math
import wave
import struct

if __name__ == '__main__':
    # http://stackoverflow.com/questions/3637350/how-to-write-stereo-wav-files-in-python
    # http://www.sonicspot.com/guide/wavefiles.html
    freq = 1000
    data_size = 454656 * 2
    fname = "test.wav"
    frate = 48000.0
    amp = 64000.0
    nchannels = 2
    sampwidth = 2
    framerate = int(frate)
    nframes = data_size
    comptype = "NONE"
    compname = "not compressed"
    data = [math.sin(2 * math.pi * freq * (x / frate))
            for x in range(data_size)]
    wav_file = wave.open(fname, 'w')
    wav_file.setparams(
        (nchannels, sampwidth, framerate, nframes, comptype,     compname))
    for v in data:
        wav_file.writeframes(struct.pack('h', int(v * amp / 2)))
    wav_file.close()

I dont know where I did wrong. I uploaded my wav files at script generated wav script_gen.wav with 48000 sample rate, 2 channels, 16 bit. Recorded wavs: 2 channel wav with 48000 sample rate, 2 channels, 16 bit 1 channel wav(Not allowing to post the link here, so will post in the comments) with 48000 sample rate, 1 channel, 16 bit.

I checked all these peak frequency in audacity, it showing 1000Khz only.

But when I tried with my scirpt I am getting correct output for 1 channel wav and failing for 2 channel wav.

update: I am getting the half of the peak frequency as the output for 2 channels.

I am feeling that I missed something. Can anyone help me in this?

2
1 channel wavuser6464594

2 Answers

1
votes

Why so complicated? Consider the following

#!/usr/bin/env python3
import numpy as np
from numpy import fft
import scipy.io.wavfile as wf
import matplotlib.pyplot as plt

sr = 44100    # sample rate
len_sig = 2   # length of resulting signal in seconds

f = 1000      # frequency in Hz

# set you time axis
t = np.linspace(0, len_sig, sr*len_sig)

# set your signal
mono_data = np.sin(2*np.pi*t*f)

# write single channel .wav file
wf.write('mono.wav', sr, mono_data)

# write two-channel .wav file 
stereo_data = np.vstack((mono_data, mono_data)).T
wf.write('stereo.wav', sr, stereo_data)

Now test it by loading and analyzing the data

# Load data
mono_sr, mono_data = wf.read('mono.wav')
stereo_sr, stereo_data = wf.read('stereo.wav')

# analyze the data
X_mono = fft.fft(mono_data) / len(mono_data)    # remember to normalize your amplitudes

# Remember that half of energy of the signal is distributed over the 
# positive frequencies and the other half over the negative frequencies.
# 
# Commonly you want see a magnitude spectrum. That means, we ignore the phases. Hence, we
# simply multiply the spectrum by 2 and consider ONLY the first half of it.
freq_nq = len(X_mono) // 2
X_mono = abs(X_mono[:freq_nq]) * 2
freqs_mono = fft.fftfreq(len(mono_data), 1/mono_sr)[:freq_nq]

# in order the analyze a stereo signal you first have to add both channels
sum_stereo = stereo_data.sum(axis=1) / 2

# and now the same way as above
freq_nq = len(sum_stereo) // 2
X_stereo= abs(fft.fft(sum_stereo))[:freq_nq] / len(stereo_data) * 2
freqs_stereo = fft.fftfreq(len(stereo_data), 1/stereo_sr)[:freq_nq]

Peak picking:

freqs_mono[np.argmax(X_mono)]        # == 1000.0
freqs_stereo[np.argmax(X_stereo)]    # == 1000.0

Plot the result:

fig, (ax1, ax2) = plt.subplots(2, figsize=(10,5), sharex=True, sharey=True)
ax1.set_title('mono signal')
ax1.set_xlim([0, 2000])
ax1.plot(freqs_mono, X_mono, 'b', lw=2)

ax2.set_title('stereo signal')
ax2.plot(freqs_stereo, X_stereo, 'g', lw=2)
ax2.set_xlim([0, 2000])
plt.tight_layout()
plt.show()

Mono and stereo peaks

0
votes

I think this will help you on the way. Just added few more stuff to work with the way you looking. Used MaxPowers logic. You need to convert the 24 bit data to 32 bit and then this will work for 24 bit too.

import sys
import wave
import struct
import numpy as np
import wave
import argparse

def parse_arguments():
    """Parses command line arguments."""
    parser = argparse.ArgumentParser(description='Tool to get peak frequency')
    parser.add_argument('fname', metavar='test.wav', type=str,
                        help='Path to a wav file')
    args = parser.parse_args()
    return args


def main():
    args = parse_arguments()
    fname = args.fname
    wav_file = wave.open(fname, 'r')
    frate = wav_file.getframerate()
    data_size = wav_file.getnframes()
    data = wav_file.readframes(data_size)
    nChannels = wav_file.getnchannels()
    nSample = wav_file.getsampwidth()
    data_size = data_size * nChannels * nSample
    wav_file.close()
    if nSample == 2:
        fmt = "<i2"
    else :
        fmt = "<i4"
    data = np.frombuffer(data,dtype=fmt)
    if nChannels == 2 :
        data = data.reshape(-1,nChannels)
        data = data.sum(axis=1) / 2
    # and now the same way as above as said by maxpowers
    freq_nq = len(data) // 2
    X= abs(np.fft.fft(data))[:freq_nq] / len(data) * 2
    freqs = np.fft.fftfreq(len(data), 1./frate)[:freq_nq]
    print freqs[np.argmax(X)] 

if __name__ == '__main__':
    try:
        main()
    except (Exception) as e:
        print str(e)
        sys.exit(255)