1.https://www.jiqizhixin.com/articles/2019-01-11-25(讲解)
# feature extractoring and preprocessing data import librosa import pandas as pd import numpy as np import matplotlib.pyplot as plt import os from PIL import Image import pathlib import csv # Preprocessing from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder, StandardScaler #Keras import keras import warnings from keras import models from keras import layers from keras.models import load_model warnings.filterwarnings('ignore') cmap = plt.get_cmap('inferno') plt.figure(figsize=(10,10)) # genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split() # #转换成对应的谱图,保存到imag_data文件夹里面 # for g in genres: # pathlib.Path(f'img_data/{g}').mkdir(parents=True, exist_ok=True) # for filename in os.listdir(f'./music/{g}'): # songname = f'./music/{g}/{filename}' # y, sr = librosa.load(songname, mono=True, duration=5) # plt.specgram(y, NFFT=2048, Fs=2, Fc=0, noverlap=128, cmap=cmap, sides='default', mode='default', scale='dB'); # plt.axis('off') # plt.savefig(f'img_data/{g}/{filename[:-3].replace(".", "")}.png') # plt.clf() #提取各个音频的特征 # header = 'filename chroma_stft rmse spectral_centroid spectral_bandwidth rolloff zero_crossing_rate' # for i in range(1, 21): # header += f' mfcc{i}' # header += ' label' # header = header.split() # file = open('data.csv', 'w', newline='') # with file: # writer = csv.writer(file) # writer.writerow(header) # genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split() # for g in genres: # for filename in os.listdir(f'./music/{g}'): # songname = f'./music/{g}/{filename}' # y, sr = librosa.load(songname, mono=True, duration=30) # chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr) # rmse=librosa.feature.rms(y=y) # spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr) # spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr) # rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr) # zcr = librosa.feature.zero_crossing_rate(y) # mfcc = librosa.feature.mfcc(y=y, sr=sr) # to_append = f'{filename} {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}' # for e in mfcc: # to_append += f' {np.mean(e)}' # to_append += f' {g}' # file = open('data.csv', 'a', newline='') # with file: # writer = csv.writer(file) # writer.writerow(to_append.split()) #用keras训练模型 data = pd.read_csv('data.csv') genre_list = data.iloc[:, -1] encoder = LabelEncoder() #将标签y进行数字化表示(0-9) y = encoder.fit_transform(genre_list) scaler = StandardScaler() #标准化数据特征 X = scaler.fit_transform(np.array(data.iloc[:, 1:-1], dtype = float)) #切分数据集 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) # model = models.Sequential() # model.add(layers.Dense(256, activation='relu', input_shape=(X_train.shape[1],))) # model.add(layers.Dense(128, activation='relu')) # model.add(layers.Dense(64, activation='relu')) # model.add(layers.Dense(10, activation='softmax')) # model.compile(optimizer='adam', # loss='sparse_categorical_crossentropy', # metrics=['accuracy']) # history = model.fit(X_train, # y_train, # epochs=20, # batch_size=128) # test_loss, test_acc = model.evaluate(X_test,y_test) # print() # print('test_acc: ',test_acc) # print('test_loss: ',test_loss) # model.save('music_model.h5') model = load_model('music_model.h5') #验证: predictions = model.predict(X_test) acc=0 sum=len(predictions) for i in range(len(predictions)): if(np.argmax(predictions[i])==y_test[i]): acc=acc+1 print("预测:",np.argmax(predictions[0]),"真实:",y_test[i]) print("正确率:",acc/sum)