55.. ipython :: python
66 :suppress:
77
8+ import os
89 import numpy as np
10+
911 import pandas as pd
10- import os
12+
1113 np.random.seed(123456 )
1214 np.set_printoptions(precision = 4 , suppress = True )
13- import matplotlib
14- # matplotlib.style.use('default')
1515 pd.options.display.max_rows = 15
1616
17- # ### portions of this were borrowed from the
18- # ### Pandas cheatsheet
19- # ### created during the PyData Workshop-Sprint 2012
20- # ### Hannah Chen, Henry Chow, Eric Cox, Robert Mauriello
17+ # portions of this were borrowed from the
18+ # Pandas cheatsheet
19+ # created during the PyData Workshop-Sprint 2012
20+ # Hannah Chen, Henry Chow, Eric Cox, Robert Mauriello
2121
2222
2323********************
@@ -31,9 +31,8 @@ Customarily, we import as follows:
3131
3232.. ipython :: python
3333
34- import pandas as pd
3534 import numpy as np
36- import matplotlib.pyplot as plt
35+ import pandas as pd
3736
3837 Object Creation
3938---------------
@@ -55,7 +54,7 @@ and labeled columns:
5554
5655 dates = pd.date_range(' 20130101' , periods = 6 )
5756 dates
58- df = pd.DataFrame(np.random.randn(6 ,4 ), index = dates, columns = list (' ABCD' ))
57+ df = pd.DataFrame(np.random.randn(6 , 4 ), index = dates, columns = list (' ABCD' ))
5958 df
6059
6160 Creating a ``DataFrame `` by passing a dict of objects that can be converted to series-like.
@@ -64,7 +63,7 @@ Creating a ``DataFrame`` by passing a dict of objects that can be converted to s
6463
6564 df2 = pd.DataFrame({' A' : 1 .,
6665 ' B' : pd.Timestamp(' 20130102' ),
67- ' C' : pd.Series(1 , index = list (range (4 )),dtype = ' float32' ),
66+ ' C' : pd.Series(1 , index = list (range (4 )), dtype = ' float32' ),
6867 ' D' : np.array([3 ] * 4 , dtype = ' int32' ),
6968 ' E' : pd.Categorical([" test" , " train" , " test" , " train" ]),
7069 ' F' : ' foo' })
@@ -190,31 +189,31 @@ Selecting on a multi-axis by label:
190189
191190.. ipython :: python
192191
193- df.loc[:,[' A' ,' B' ]]
192+ df.loc[:, [' A' , ' B' ]]
194193
195194 Showing label slicing, both endpoints are *included *:
196195
197196.. ipython :: python
198197
199- df.loc[' 20130102' :' 20130104' ,[' A' ,' B' ]]
198+ df.loc[' 20130102' :' 20130104' , [' A' , ' B' ]]
200199
201200 Reduction in the dimensions of the returned object:
202201
203202.. ipython :: python
204203
205- df.loc[' 20130102' ,[' A' ,' B' ]]
204+ df.loc[' 20130102' , [' A' , ' B' ]]
206205
207206 For getting a scalar value:
208207
209208.. ipython :: python
210209
211- df.loc[dates[0 ],' A' ]
210+ df.loc[dates[0 ], ' A' ]
212211
213212 For getting fast access to a scalar (equivalent to the prior method):
214213
215214.. ipython :: python
216215
217- df.at[dates[0 ],' A' ]
216+ df.at[dates[0 ], ' A' ]
218217
219218 Selection by Position
220219~~~~~~~~~~~~~~~~~~~~~
@@ -231,37 +230,37 @@ By integer slices, acting similar to numpy/python:
231230
232231.. ipython :: python
233232
234- df.iloc[3 :5 ,0 :2 ]
233+ df.iloc[3 :5 , 0 :2 ]
235234
236235 By lists of integer position locations, similar to the numpy/python style:
237236
238237.. ipython :: python
239238
240- df.iloc[[1 ,2 , 4 ],[0 ,2 ]]
239+ df.iloc[[1 , 2 , 4 ], [0 , 2 ]]
241240
242241 For slicing rows explicitly:
243242
244243.. ipython :: python
245244
246- df.iloc[1 :3 ,:]
245+ df.iloc[1 :3 , :]
247246
248247 For slicing columns explicitly:
249248
250249.. ipython :: python
251250
252- df.iloc[:,1 :3 ]
251+ df.iloc[:, 1 :3 ]
253252
254253 For getting a value explicitly:
255254
256255.. ipython :: python
257256
258- df.iloc[1 ,1 ]
257+ df.iloc[1 , 1 ]
259258
260259 For getting fast access to a scalar (equivalent to the prior method):
261260
262261.. ipython :: python
263262
264- df.iat[1 ,1 ]
263+ df.iat[1 , 1 ]
265264
266265 Boolean Indexing
267266~~~~~~~~~~~~~~~~
@@ -303,19 +302,19 @@ Setting values by label:
303302
304303.. ipython :: python
305304
306- df.at[dates[0 ],' A' ] = 0
305+ df.at[dates[0 ], ' A' ] = 0
307306
308307 Setting values by position:
309308
310309.. ipython :: python
311310
312- df.iat[0 ,1 ] = 0
311+ df.iat[0 , 1 ] = 0
313312
314313 Setting by assigning with a NumPy array:
315314
316315.. ipython :: python
317316
318- df.loc[:,' D' ] = np.array([5 ] * len (df))
317+ df.loc[:, ' D' ] = np.array([5 ] * len (df))
319318
320319 The result of the prior setting operations.
321320
@@ -345,7 +344,7 @@ returns a copy of the data.
345344.. ipython :: python
346345
347346 df1 = df.reindex(index = dates[0 :4 ], columns = list (df.columns) + [' E' ])
348- df1.loc[dates[0 ]:dates[1 ],' E' ] = 1
347+ df1.loc[dates[0 ]:dates[1 ], ' E' ] = 1
349348 df1
350349
351350 To drop any rows that have missing data.
@@ -653,7 +652,8 @@ pandas can include categorical data in a ``DataFrame``. For full docs, see the
653652
654653.. ipython :: python
655654
656- df = pd.DataFrame({" id" :[1 , 2 , 3 , 4 , 5 , 6 ], " raw_grade" :[' a' , ' b' , ' b' , ' a' , ' a' , ' e' ]})
655+ df = pd.DataFrame({" id" : [1 , 2 , 3 , 4 , 5 , 6 ],
656+ " raw_grade" : [' a' , ' b' , ' b' , ' a' , ' a' , ' e' ]})
657657
658658 Convert the raw grades to a categorical data type.
659659
@@ -674,7 +674,8 @@ Reorder the categories and simultaneously add the missing categories (methods un
674674
675675.. ipython :: python
676676
677- df[" grade" ] = df[" grade" ].cat.set_categories([" very bad" , " bad" , " medium" , " good" , " very good" ])
677+ df[" grade" ] = df[" grade" ].cat.set_categories([" very bad" , " bad" , " medium" ,
678+ " good" , " very good" ])
678679 df[" grade" ]
679680
680681 Sorting is per order in the categories, not lexical order.
@@ -703,7 +704,8 @@ See the :ref:`Plotting <visualization>` docs.
703704
704705 .. ipython :: python
705706
706- ts = pd.Series(np.random.randn(1000 ), index = pd.date_range(' 1/1/2000' , periods = 1000 ))
707+ ts = pd.Series(np.random.randn(1000 ),
708+ index = pd.date_range(' 1/1/2000' , periods = 1000 ))
707709 ts = ts.cumsum()
708710
709711 @savefig series_plot_basic.png
@@ -718,8 +720,10 @@ of the columns with labels:
718720 columns = [' A' , ' B' , ' C' , ' D' ])
719721 df = df.cumsum()
720722
723+ plt.figure()
724+ df.plot()
721725 @savefig frame_plot_basic.png
722- plt.figure(); df.plot(); plt. legend(loc = ' best' )
726+ plt.legend(loc = ' best' )
723727
724728 Getting Data In/Out
725729-------------------
0 commit comments