@@ -518,97 +518,6 @@ def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1) -> ndarr
518518 return result
519519
520520
521- # ----------------------------------------------------------------------
522- # Kendall correlation
523- # Wikipedia article: https://en.wikipedia.org/wiki/Kendall_rank_correlation_coefficient
524-
525- @ cython.boundscheck (False )
526- @ cython.wraparound (False )
527- def nancorr_kendall (ndarray[float64_t , ndim = 2 ] mat, Py_ssize_t minp = 1 ) -> ndarray:
528- """
529- Perform kendall correlation on a 2d array
530-
531- Parameters
532- ----------
533- mat : np.ndarray[float64_t , ndim = 2 ]
534- Array to compute kendall correlation on
535- minp : int , default 1
536- Minimum number of observations required per pair of columns
537- to have a valid result.
538-
539- Returns
540- -------
541- numpy.ndarray[float64_t , ndim = 2 ]
542- Correlation matrix
543- """
544- cdef:
545- Py_ssize_t i , j , k , xi , yi , N , K
546- ndarray[float64_t , ndim = 2 ] result
547- ndarray[float64_t , ndim = 2 ] ranked_mat
548- ndarray[uint8_t , ndim = 2 ] mask
549- float64_t currj
550- ndarray[uint8_t , ndim = 1 ] valid
551- ndarray[int64_t] sorted_idxs
552- ndarray[float64_t , ndim = 1 ] col
553- int64_t n_concordant
554- int64_t total_concordant = 0
555- int64_t total_discordant = 0
556- float64_t kendall_tau
557- int64_t n_obs
558-
559- N , K = (< object > mat).shape
560-
561- result = np.empty((K, K), dtype = np.float64)
562- mask = np.isfinite(mat)
563-
564- ranked_mat = np.empty((N, K), dtype = np.float64)
565-
566- for i in range(K ):
567- ranked_mat[:, i] = rank_1d(mat[:, i])
568-
569- for xi in range (K):
570- sorted_idxs = ranked_mat[:, xi].argsort()
571- ranked_mat = ranked_mat[sorted_idxs]
572- mask = mask[sorted_idxs]
573- for yi in range (xi + 1 , K):
574- valid = mask[:, xi] & mask[:, yi]
575- if valid.sum() < minp:
576- result[xi, yi] = NaN
577- result[yi, xi] = NaN
578- else :
579- # Get columns and order second column using 1st column ranks
580- if not valid.all():
581- col = ranked_mat[valid.nonzero()][:, yi]
582- else :
583- col = ranked_mat[:, yi]
584- n_obs = col.shape[0 ]
585- total_concordant = 0
586- total_discordant = 0
587- for j in range (n_obs - 1 ):
588- currj = col[j]
589- # Count num concordant and discordant pairs
590- n_concordant = 0
591- for k in range (j, n_obs):
592- if col[k] > currj:
593- n_concordant += 1
594- total_concordant += n_concordant
595- total_discordant += (n_obs - 1 - j - n_concordant)
596- # Note: we do total_concordant+total_discordant here which is
597- # equivalent to the C(n, 2), the total # of pairs,
598- # listed on wikipedia
599- kendall_tau = (total_concordant - total_discordant) / \
600- (total_concordant + total_discordant)
601- result[xi, yi] = kendall_tau
602- result[yi, xi] = kendall_tau
603-
604- if mask[:, xi].sum() > minp:
605- result[xi, xi] = 1
606- else :
607- result[xi, xi] = NaN
608-
609- return result
610-
611-
612521# ----------------------------------------------------------------------
613522
614523ctypedef fused algos_t:
0 commit comments