import modules
import pandas as pd
Create some dummy data
创建一些虚拟数据
raw_data = {'name': ['Willard Morris', 'Al Jennings', 'Omar Mullins', 'Spencer McDaniel'], 'age': [20, 19, 22, 21], 'favorite_color': ['blue', 'blue', 'yellow', "green"], 'grade': [88, 92, 95, 70]} df = pd.DataFrame(raw_data) df.head()
根据列值选择行:
#To select rows whose column value equals a scalar, some_value, use ==: df.loc[df['favorite_color'] == 'yellow']
选择列值在可迭代数组中的行:
#To select rows whose column value is in an iterable array, which we'll define as array, you can use isin:
array = ['yellow', 'green']
df.loc[df['favorite_color'].isin(array)]
根据多列条件选择行:
#To select a row based on multiple conditions you can use &: array = ['yellow', 'green'] df.loc[(df['age'] == 21) & df['favorite_color'].isin(array)]
选择列不等于值的行:
#To select rows where a column value does not equal a value, use !=: df.loc[df['favorite_color'] != 'yellow']
Select rows whose column value is not in an iterable array:
选择列值不在可迭代数组中的行:
#To return a rows where column value is not in an iterable array, use ~ in front of df: array = ['yellow', 'green'] df.loc[~df['favorite_color'].isin(array)]