Create an empty Data frame with date index:
import pandas as pd def test_run(): start_date='2017-11-24' end_data='2017-11-28' dates=pd.date_range(start_date, end_data) df1=pd.DataFrame(index=dates) print(df1) """ Empty DataFrame Columns: [] Index: [2010-01-22 00:00:00, 2010-01-23 00:00:00, 2010-01-24 00:00:00, 2010-01-25 00:00:00, 2010-01-26 00:00:00] """
Now we want to load SPY.csv and get 'Adj Close' column value and copy the range (11-21, 11-28) data to the empty data frame:
import pandas as pd def test_run(): start_date='2017-11-24' end_data='2017-11-28' dates=pd.date_range(start_date, end_data) # Create an empty data frame df1=pd.DataFrame(index=dates) # Load csv file dspy=pd.read_csv('data/SPY.csv', index_col="Date", parse_dates=True, usecols=['Date', 'Adj Close'], na_values=['nan']) # print(dspy) """ Adj Close Date 2017-11-16 258.619995 2017-11-17 257.859985 2017-11-20 258.299988 """ # join the table df1=df1.join(dspy) #print(df1) """ Adj Close 2017-11-24 260.359985 2017-11-25 NaN 2017-11-26 NaN 2017-11-27 260.230011 """ # drop the nan row df1=df1.dropna() print(df1) """ Adj Close 2017-11-24 260.359985 2017-11-27 260.230011 2017-11-28 262.869995 """ if __name__ == '__main__': test_run()
There is a simpy way to drop the data which index is not present in dspy:
df1=df1.join(dspy, how='inner')
We can also rename the 'Adj Close' to prevent conflicts:
# rename the column dspy=dspy.rename(columns={'Adj Close': 'SPY'})
Load more stocks:
import pandas as pd def test_run(): start_date='2017-11-24' end_data='2017-11-28' dates=pd.date_range(start_date, end_data) # Create an empty data frame df1=pd.DataFrame(index=dates) # Load csv file dspy=pd.read_csv('data/spy.csv', index_col="Date", parse_dates=True, usecols=['Date', 'Adj Close'], na_values=['nan']) # print(dspy) """ Adj Close Date 2017-11-16 258.619995 2017-11-17 257.859985 2017-11-20 258.299988 """ # rename the column dspy=dspy.rename(columns={'Adj Close': 'spy'}) # join the table df1=df1.join(dspy, how='inner') # print(df1) """ Adj Close 2017-11-24 260.359985 2017-11-27 260.230011 2017-11-28 262.869995 """ symbols=['aapl', 'ibm'] for symbol in symbols: temp=pd.read_csv('data/{0}.csv'.format(symbol), index_col="Date", parse_dates=True, usecols=['Date', 'Adj Close'], na_values=['nan']) temp=temp.rename(columns={'Adj Close': symbol}) df1=df1.join(temp) print(df1) """ spy aapl ibm 2017-11-24 260.359985 174.970001 151.839996 2017-11-27 260.230011 174.089996 151.979996 2017-11-28 262.869995 173.070007 152.470001 """ if __name__ == '__main__': test_run()