@@ -507,6 +507,15 @@ def normalize(series):
507507def andrews_curves (frame , class_column , ax = None , samples = 200 , color = None ,
508508 colormap = None , ** kwds ):
509509 """
510+ Generates a matplotlib plot of Andrews curves, for visualising clusters of multivariate data.
511+
512+ Andrews curves have the functional form:
513+
514+ f(t) = x_1/sqrt(2) + x_2 sin(t) + x_3 cos(t) + x_4 sin(2t) + x_5 cos(2t) + ...
515+
516+ Where x coefficients correspond to the values of each dimension and t is linearly spaced between -pi and +pi. Each
517+ row of frame then corresponds to a single curve.
518+
510519 Parameters:
511520 -----------
512521 frame : DataFrame
@@ -527,28 +536,34 @@ def andrews_curves(frame, class_column, ax=None, samples=200, color=None,
527536 ax: Matplotlib axis object
528537
529538 """
530- from math import sqrt , pi , sin , cos
539+ from math import sqrt , pi
531540 import matplotlib .pyplot as plt
532541
533542 def function (amplitudes ):
534- def f (x ):
543+ def f (t ):
535544 x1 = amplitudes [0 ]
536545 result = x1 / sqrt (2.0 )
537- harmonic = 1.0
538- for x_even , x_odd in zip (amplitudes [1 ::2 ], amplitudes [2 ::2 ]):
539- result += (x_even * sin (harmonic * x ) +
540- x_odd * cos (harmonic * x ))
541- harmonic += 1.0
542- if len (amplitudes ) % 2 != 0 :
543- result += amplitudes [- 1 ] * sin (harmonic * x )
546+
547+ # Take the rest of the coefficients and resize them appropriately. Take a copy of amplitudes as otherwise
548+ # numpy deletes the element from amplitudes itself.
549+ coeffs = np .delete (np .copy (amplitudes ), 0 )
550+ coeffs .resize ((coeffs .size + 1 ) / 2 , 2 )
551+
552+ # Generate the harmonics and arguments for the sin and cos functions.
553+ harmonics = np .arange (0 , coeffs .shape [0 ]) + 1
554+ trig_args = np .outer (harmonics , t )
555+
556+ result += np .sum (coeffs [:, 0 , np .newaxis ] * np .sin (trig_args ) +
557+ coeffs [:, 1 , np .newaxis ] * np .cos (trig_args ),
558+ axis = 0 )
544559 return result
545560 return f
546561
547562 n = len (frame )
548563 class_col = frame [class_column ]
549564 classes = frame [class_column ].drop_duplicates ()
550565 df = frame .drop (class_column , axis = 1 )
551- x = [ - pi + 2.0 * pi * ( t / float ( samples )) for t in range ( samples )]
566+ t = np . linspace ( - pi , pi , samples )
552567 used_legends = set ([])
553568
554569 color_values = _get_standard_colors (num_colors = len (classes ),
@@ -560,14 +575,14 @@ def f(x):
560575 for i in range (n ):
561576 row = df .iloc [i ].values
562577 f = function (row )
563- y = [ f (t ) for t in x ]
578+ y = f (t )
564579 kls = class_col .iat [i ]
565580 label = com .pprint_thing (kls )
566581 if label not in used_legends :
567582 used_legends .add (label )
568- ax .plot (x , y , color = colors [kls ], label = label , ** kwds )
583+ ax .plot (t , y , color = colors [kls ], label = label , ** kwds )
569584 else :
570- ax .plot (x , y , color = colors [kls ], ** kwds )
585+ ax .plot (t , y , color = colors [kls ], ** kwds )
571586
572587 ax .legend (loc = 'upper right' )
573588 ax .grid ()
0 commit comments