I ran the github repo code sound_classification_ml_production and got good accuracy, but found that .npy features are pre produced by author. So I want produce my own .npy and add code blow:
# FeatureExtractor class including librosa audio processing functions
class FeatureExtractor:
...
def compute_save_features(self,
mode='mfcc',
sample_rate=22050,
n_fft=2048,
hop_length=512,
n_mfcc=40,
output_path='features',
deltas=False
):
dataset_features = []
max_pad = self._compute_max_pad_length(self.max_audio_duration,
sample_rate=sample_rate,
n_fft=n_fft,
hop_length=hop_length)
print('Max Padding = ', max_pad)
if not os.path.exists(output_path):
print('Creating output folder: ', output_path)
os.makedirs(output_path)
else:
print('Output folder already existed')
print('Saving features in ', output_path)
i = 0
t = time.time()
features_path = []
for relative_filepath in self.dataset_df['filepath']:
filepath = base_path + relative_filepath;
print('compute_save_features, filepath = ' + str(filepath))
if i % 100 == 0:
print('{} files processed in {}s'.format(i, time.time() - t))
print('compute_save_features, librosa.load, filepath = ' + str(filepath))
audio_file, sample_rate = librosa.load(filepath, sr=sample_rate, res_type='kaiser_fast')
if mode == 'mfcc':
audio_features = self.compute_mfcc(audio_file, sample_rate, n_fft, hop_length, n_mfcc, deltas)
elif mode == 'stft':
audio_features = self.compute_stft(audio_file, sample_rate, n_fft, hop_length)
elif mode == 'mel-spectogram':
audio_features = self.compute_mel_spectogram(audio_file, sample_rate, n_fft, hop_length)
audio_features = np.pad(audio_features, pad_width=((0, 0), (0, max_pad - audio_features.shape[1])))
print('compute_save_features, audio_features = ' + str(type(audio_features)) + ', ' + str(audio_features))
# here I add code and save audio_features to my .npy
npy_path = os.path.join(output_path, filepath.split('/')[-1].replace('wav', 'npy'))
print('compute_save_features, npy_path = ' + str(npy_path))
np.save(npy_path, audio_features)
...
fe = FeatureExtractor(base_path + 'UrbanSound8K/metadata/UrbanSound8K.csv')
dataset_df = fe.compute_save_features(mode='mfcc', n_mfcc=13, output_path=base_path + 'my_features_mfcc', deltas=True)
...
after compared with author's origin .npy, there is so many diffs:
Please anyone could tell me that how to produce exactly the same .npy features as in repo code show, thanks in advance.