![](https://img2018.cnblogs.com/blog/1020928/201907/1020928-20190723184916749-1465174472.png)
# import pandas
import pandas as pd
# creating a DataFrame
pd.DataFrame({'Yes': [50, 31], 'No': [101, 2]})
![](https://img2018.cnblogs.com/blog/1020928/201907/1020928-20190723185035990-27269693.png)
# another example of creating a dataframe
pd.DataFrame({'Bob': ['I liked it.', 'It was awful.'], 'Sue': ['Pretty good.', 'Bland']})
![](https://img2018.cnblogs.com/blog/1020928/201907/1020928-20190723185117654-1319705596.png)
pd.DataFrame({'Bob': ['I liked it.', 'It was awful.'],
'Sue': ['Pretty good.', 'Bland.']},
index = ['Product A', 'Product B'])
![](https://img2018.cnblogs.com/blog/1020928/201907/1020928-20190723185151748-722999051.png)
# creating a pandas series
pd.Series([1, 2, 3, 4, 5])
![](https://img2018.cnblogs.com/blog/1020928/201907/1020928-20190723185226924-936151662.png)
# we can think of a Series as a column of a DataFrame.
# we can assign index values to Series in same way as pandas DataFrame
pd.Series([10, 20, 30], index=['2015 sales', '2016 sales', '2017 sales'], name='Product A')
![](https://img2018.cnblogs.com/blog/1020928/201907/1020928-20190723185303527-1691574205.png)
# reading a csv file and storing it in a variable
wine_reviews = pd.read_csv("F:\kaggleDataSet\wine-reviews\winemag-data-130k-v2.csv")
# we can use the 'shape' attribute to check size of dataset
wine_reviews.shape
![](https://img2018.cnblogs.com/blog/1020928/201907/1020928-20190723185539013-1685392395.png)
# To show first five rows of data, use 'head()' method
wine_reviews.head()
![](https://img2018.cnblogs.com/blog/1020928/201907/1020928-20190723185609909-1211134001.png)
wine_reviews = pd.read_csv("F:\kaggleDataSet\wine-reviews\winemag-data-130k-v2.csv", index_col=0)
wine_reviews.head()
![](https://img2018.cnblogs.com/blog/1020928/201907/1020928-20190723185713477-1575701889.png)
wine_reviews.head().to_csv("F:\wine_reviews.csv")
![](https://img2018.cnblogs.com/blog/1020928/201907/1020928-20190723185940089-1320333523.png)
import pandas as pd
reviews = pd.read_csv("F:\kaggleDataSet\wine-reviews\winemag-data-130k-v2.csv", index_col=0)
pd.set_option("display.max_rows", 5)
![](https://img2018.cnblogs.com/blog/1020928/201907/1020928-20190723190114394-752949169.png)
# access 'country' property (or column) of 'reviews'
reviews.country
![](https://img2018.cnblogs.com/blog/1020928/201907/1020928-20190723190147001-1241297823.png)
# Another way to do above operation
# when a column name contains space, we have to use this method
reviews['country']
![](https://img2018.cnblogs.com/blog/1020928/201907/1020928-20190723190220648-1166345904.png)
# To access first row of country column
reviews['country'][0]
![](https://img2018.cnblogs.com/blog/1020928/201907/1020928-20190723190249197-1963800511.png)
# returns first row
reviews.iloc[0]
![](https://img2018.cnblogs.com/blog/1020928/201907/1020928-20190723190317540-1522502706.png)
# returns first column (country) (all rows due to ':')
reviews.iloc[:, 0]
![](https://img2018.cnblogs.com/blog/1020928/201907/1020928-20190723190341757-377550320.png)
# retruns first 3 rows of first column
reviews.iloc[:3, 0]
![](https://img2018.cnblogs.com/blog/1020928/201907/1020928-20190723190408774-898975367.png)
# we can pass a list of indices of rows/columns to select
reviews.iloc[[0, 1, 2, 3], 0]
![](https://img2018.cnblogs.com/blog/1020928/201907/1020928-20190723190446441-221075594.png)
# We can also pass negative numbers as we do in Python
reviews.iloc[-5:]
![](https://img2018.cnblogs.com/blog/1020928/201907/1020928-20190723190517388-1262770591.png)
# To select first entry in country column
reviews.loc[0, 'country']
![](https://img2018.cnblogs.com/blog/1020928/201907/1020928-20190723190548179-1376791574.png)
# select columns by name using 'loc'
reviews.loc[:, ['taster_name', 'taster_twitter_handle', 'points']]
![](https://img2018.cnblogs.com/blog/1020928/201907/1020928-20190723190612774-160446168.png)
# 'set_index' to the 'title' field
reviews.set_index('title')
![](https://img2018.cnblogs.com/blog/1020928/201907/1020928-20190723190638713-315251273.png)
# 1. Find out whether wine is produced in Italy
reviews.country == 'Italy'
![](https://img2018.cnblogs.com/blog/1020928/201907/1020928-20190723190705254-170965673.png)
# 2. Now select all wines produced in Italy
reviews.loc[reviews.country == 'Italy'] #reviews[reviews.country == 'Italy']
![](https://img2018.cnblogs.com/blog/1020928/201907/1020928-20190723190730189-1328571480.png)
# Add one more condition for points to find better than average wines produced in Italy
reviews.loc[(reviews.country == 'Italy') & (reviews.points >= 90)] # use | for 'OR' condition
![](https://img2018.cnblogs.com/blog/1020928/201907/1020928-20190723190800493-1099970108.png)
reviews.loc[reviews.country.isin(['Italy', 'France'])]
![](https://img2018.cnblogs.com/blog/1020928/201907/1020928-20190723190833714-426365324.png)
reviews.loc[reviews.price.notnull()]
![](https://img2018.cnblogs.com/blog/1020928/201907/1020928-20190723190904944-352628511.png)
reviews['critic'] = 'everyone'
reviews.critic
![](https://img2018.cnblogs.com/blog/1020928/201907/1020928-20190723190935028-752761877.png)
# using iterable for assigning
reviews['index_backwards'] = range(len(reviews), 0, -1)
reviews['index_backwards']
![](https://img2018.cnblogs.com/blog/1020928/201907/1020928-20190723190958183-1063889629.png)