1 import numpy as np 2 import matplotlib.pyplot as plt 3 4 5 def is_outlier(points, threshold=3.5): 6 if len(points.shape) == 1: 7 points = points[:, None] 8 9 # Find the median number of points 10 median = np.median(points, axis=0) 11 12 diff = np.sum((points - median)**2, axis=-1) 13 diff = np.sqrt(diff) 14 MAD = np.median(diff) 15 16 MZS = 0.6745 * diff / MAD 17 18 return MZS > threshold 19 20 # Create 100 random numbers 21 x = np.random.random(100) 22 23 # The number of the histogram buckets 24 buckets = 50 25 26 # Add in a few outliers 27 x = np.r_[x, -49, 95, 100, -100] 28 29 # The function 'is_outlier()' return a array of boolean 30 # If True, get the element; else pass the element 31 # For example: 32 # x = [1,2,3,4] 33 # y = x[array([False,True,True,False])] 34 # y is [2,3] 35 filtered = x[~is_outlier(x)] 36 37 # Create a new figure 38 plt.figure() 39 40 # Define the width of the figure 41 plt.subplot(211) 42 # Drawing histogram 43 # histogram(arr,bins,normed,facecolor,edgecolor,alpha,histtype) 44 plt.hist(x, buckets) 45 plt.xlabel('Raw') 46 47 plt.subplot(212) 48 plt.hist(filtered, buckets) 49 plt.xlabel('Cleaned') 50 51 # Show the figure 52 plt.show()