zoukankan      html  css  js  c++  java
  • [Python] Making a package

    In this next section, you'll convert the Distributions code into a Python package. A package is a collection of Python modules. Although the previous code might already seem like it was a Python package because it contained multiple files, a Python package also needs an __init__.py file. In this section, you'll learn how to create this __init__.py file and then pip install the package into your local Python installation.

    Folder structure:

    |- distribution

      | - __init__.py

      | - Gaussiandistribution.py

      | - Generaldistribution.py

    |- setup.py

    Gaussiandistribution.py:

    import math
    import matplotlib.pyplot as plt
    from .Generaldistribution import Distribution # need a . for python 3
    
    class Gaussian(Distribution):
    	""" Gaussian distribution class for calculating and 
    	visualizing a Gaussian distribution.
    	
    	Attributes:
    		mean (float) representing the mean value of the distribution
    		stdev (float) representing the standard deviation of the distribution
    		data_list (list of floats) a list of floats extracted from the data file
    			
    	"""
    	def __init__(self, mu=0, sigma=1):
    		
    		Distribution.__init__(self, mu, sigma)
    
    	
    	def calculate_mean(self):
    	
    		"""Function to calculate the mean of the data set.
    		
    		Args: 
    			None
    		
    		Returns: 
    			float: mean of the data set
    	
    		"""
    					
    		avg = 1.0 * sum(self.data) / len(self.data)
    		
    		self.mean = avg
    		
    		return self.mean
    
    
    
    	def calculate_stdev(self, sample=True):
    
    		"""Function to calculate the standard deviation of the data set.
    		
    		Args: 
    			sample (bool): whether the data represents a sample or population
    		
    		Returns: 
    			float: standard deviation of the data set
    	
    		"""
    
    		if sample:
    			n = len(self.data) - 1
    		else:
    			n = len(self.data)
    	
    		mean = self.mean
    	
    		sigma = 0
    	
    		for d in self.data:
    			sigma += (d - mean) ** 2
    		
    		sigma = math.sqrt(sigma / n)
    	
    		self.stdev = sigma
    		
    		return self.stdev
    		
    
    	def read_data_file(self, file_name, sample=True):
    	
    		"""Function to read in data from a txt file. The txt file should have
    		one number (float) per line. The numbers are stored in the data attribute. 
    		After reading in the file, the mean and standard deviation are calculated
    				
    		Args:
    			file_name (string): name of a file to read from
    		
    		Returns:
    			None
    		
    		"""
    			
    		with open(file_name) as file:
    			data_list = []
    			line = file.readline()
    			while line:
    				data_list.append(int(line))
    				line = file.readline()
    		file.close()
    	
    		self.data = data_list
    		self.mean = self.calculate_mean()
    		self.stdev = self.calculate_stdev(sample)
    		
    		
    	def plot_histogram(self):
    		"""Function to output a histogram of the instance variable data using 
    		matplotlib pyplot library.
    		
    		Args:
    			None
    			
    		Returns:
    			None
    		"""
    		plt.hist(self.data)
    		plt.title('Histogram of Data')
    		plt.xlabel('data')
    		plt.ylabel('count')
    		
    		
    		
    	def pdf(self, x):
    		"""Probability density function calculator for the gaussian distribution.
    		
    		Args:
    			x (float): point for calculating the probability density function
    			
    		
    		Returns:
    			float: probability density function output
    		"""
    		
    		return (1.0 / (self.stdev * math.sqrt(2*math.pi))) * math.exp(-0.5*((x - self.mean) / self.stdev) ** 2)
    		
    
    	def plot_histogram_pdf(self, n_spaces = 50):
    
    		"""Function to plot the normalized histogram of the data and a plot of the 
    		probability density function along the same range
    		
    		Args:
    			n_spaces (int): number of data points 
    		
    		Returns:
    			list: x values for the pdf plot
    			list: y values for the pdf plot
    			
    		"""
    		
    		mu = self.mean
    		sigma = self.stdev
    
    		min_range = min(self.data)
    		max_range = max(self.data)
    		
    		 # calculates the interval between x values
    		interval = 1.0 * (max_range - min_range) / n_spaces
    
    		x = []
    		y = []
    		
    		# calculate the x values to visualize
    		for i in range(n_spaces):
    			tmp = min_range + interval*i
    			x.append(tmp)
    			y.append(self.pdf(tmp))
    
    		# make the plots
    		fig, axes = plt.subplots(2,sharex=True)
    		fig.subplots_adjust(hspace=.5)
    		axes[0].hist(self.data, density=True)
    		axes[0].set_title('Normed Histogram of Data')
    		axes[0].set_ylabel('Density')
    
    		axes[1].plot(x, y)
    		axes[1].set_title('Normal Distribution for 
     Sample Mean and Sample Standard Deviation')
    		axes[0].set_ylabel('Density')
    		plt.show()
    
    		return x, y
    		
    	def __add__(self, other):
    		
    		"""Function to add together two Gaussian distributions
    		
    		Args:
    			other (Gaussian): Gaussian instance
    			
    		Returns:
    			Gaussian: Gaussian distribution
    			
    		"""
    		
    		result = Gaussian()
    		result.mean = self.mean + other.mean
    		result.stdev = math.sqrt(self.stdev ** 2 + other.stdev ** 2)
    		
    		return result
    		
    		
    	def __repr__(self):
    	
    		"""Function to output the characteristics of the Gaussian instance
    		
    		Args:
    			None
    		
    		Returns:
    			string: characteristics of the Gaussian
    		
    		"""
    		
    		return "mean {}, standard deviation {}".format(self.mean, self.stdev)
    

      

    Generaldistribution.py

    class Distribution:
        
        def __init__(self, mu=0, sigma=1):
        
            """ Generic distribution class for calculating and 
            visualizing a probability distribution.
        
            Attributes:
                mean (float) representing the mean value of the distribution
                stdev (float) representing the standard deviation of the distribution
                data_list (list of floats) a list of floats extracted from the data file
                """
            
            self.mean = mu
            self.stdev = sigma
            self.data = []
    
    
        def read_data_file(self, file_name):
        
            """Function to read in data from a txt file. The txt file should have
            one number (float) per line. The numbers are stored in the data attribute.
                    
            Args:
                file_name (string): name of a file to read from
            
            Returns:
                None
            
            """
                
            with open(file_name) as file:
                data_list = []
                line = file.readline()
                while line:
                    data_list.append(int(line))
                    line = file.readline()
            file.close()
        
            self.data = data_list
    

      

    __init__.py:

    from .Gaussiandistribution import Gaussian

    setup.py:

    from setuptools import setup
    
    setup(name='distribution',
    version='1.0',
    description='Gaussian distributions',
    packages=['distributions'],
    zip_safe=False)

    Run:

    pip install . # install package locally

    Update an reinstall:

    pip install --upgrade .
  • 相关阅读:
    elasticsearch之python操作
    Elasticsearch之性能优化
    Elasticsearch之基本使用
    Redis主从哨兵集群搭建
    Docker Swarm集群
    Elasticsearch之权限验证(Basic)
    docker之网络与数据管理
    docker之可视化工具
    基于Github gist的代码片段管理工具Lepton
    Redis工具之redis_rdb_tools
  • 原文地址:https://www.cnblogs.com/Answer1215/p/13037297.html
Copyright © 2011-2022 走看看