FunctionScripts/diffprop_functions.R at master · Jihong528/FunctionScripts · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125

  # Differential proportion analysis of single-cell populations

  ###
  # Simulation of a data set with different number of cells and their cell types
  ###
  simCells<-function(n=10^7, prop=c(0.6, 0.3, 0.05, 0.03, 0.01, 0.005, 0.005)){
  	ct<-paste("Cell_Type_",1:length(prop),sep="");
  	nct<-round(n*prop);
  	lab<-rep(ct,nct);
  	return(lab);
  }

  ###
  # Simulate a data-set with some provided error rate to simulate experimental/biological noise
  ###
  simCellsNoisy<-function(n=10^7, prop, error = 0.05){
    # Create a 'noisy' distribution for sampling cells
    prop.noisy = unlist(lapply(prop, function(x)
                       {ifelse(sample(c(1:2), size=1) == 1, x+x*error, x-x*error)}))
    prop.noisy = prop.noisy/sum(prop.noisy)

    # Sample as before but with a modified distribution
    ct<-paste("Cell_Type_",1:length(prop.noisy),sep="");
    nct<-round(n*prop.noisy);
    lab<-rep(ct,nct);
    return(lab);
  }

  ###
  # Subsampling of cells
  ###
  subCells<-function(cells, n=round(length(cells)/10)){
  	sample(cells,n);
  }


  ###
  # Create the condition and cluster variables from the observed contingency table
  ###
  makeVariables<-function(obs.table){
     cond_names<-dimnames(obs.table)[[1]];
     clus_names<-dimnames(obs.table)[[2]];

     cond<-rep(cond_names, apply(obs.table,1,sum) );
     clus_list<-apply(obs.table,1,function(x){
       list(rep(clus_names, x ));
     })
     #clus <- base::do.call(c,clus_list);
     #table(cond,clus);
     clus <- unlist(clus_list);
     return(list(cond=cond, clus=clus));
  }

  ###
  # Generate null distributions based on a permutation procedure
  ###
  generateNull<-function(obs.table, n=10000, p=0.2){
      obs<-makeVariables(obs.table);
      all.exp<-array(NA,dim=c(dim(obs.table)[1],dim(obs.table)[2],n))
      dimnames(all.exp)[[1]]<-dimnames(obs.table)[[1]];
      dimnames(all.exp)[[2]]<-dimnames(obs.table)[[2]];
      dimnames(all.exp)[[3]]<-1:n;
      clus_names<-dimnames(obs.table)[[2]];
      cond_names<-dimnames(obs.table)[[1]];

      v<-makeVariables(obs.table);

      ### Perform permutation n times
      for(i in 1:n){
        pv<-v;

        if (i %% 1000 == 0){
          print(i);}

        ### Permutation: Randomly select p*100% of points to be re-sampled from the background
        randInd<-sample(1:length(pv$clus),round(length(pv$clus)*p));
        pv$clus[randInd]<-sample(v$clus,length(randInd),replace=F);

        this.exp<-table(pv$cond,pv$clus);
        exp.table<-obs.table;
        exp.table[1:dim(exp.table)[1],1:dim(exp.table)[2]]<-NA
        exp.table[dimnames(this.exp)[[1]],dimnames(this.exp)[[2]]]<-this.exp;
        exp.table
        all.exp[,,i]<-exp.table;
      }
      return(all.exp);
  }

  ###
  # Perform sum by ignoring NA
  ###
  sumNoNA<-function(x){
    sum(x[which(!is.na(x))])
  }

  ###
  # Perform a two-class test of significance
  ###
  two.class.test<-function(obs.table, all.exp, cond.control="C", cond.treatment="PA",to.plot=T){
    clus_names<-dimnames(obs.table)[[2]]
    pp<-array(-1,length(clus_names));
    names(pp)<-clus_names;

    if(to.plot){
       par(mfrow=c(3,ceiling(length(clus_names)/3)));
    }
    for(this_clus in clus_names){
      obs.diff<-obs.table[cond.treatment,this_clus]/sum(obs.table[cond.treatment,]) -
                obs.table[cond.control,this_clus]/sum(obs.table[cond.control,]);
      all.diff<-all.exp[cond.treatment,this_clus,]/apply(all.exp[cond.treatment,,],2,sumNoNA) -
                all.exp[cond.control,this_clus,]/apply(all.exp[cond.control,,],2,sumNoNA);
      if(to.plot){
         hist(all.diff,breaks=50,col="grey",border="grey",main=this_clus)
         abline(v=obs.diff,col="red")
      }
      p.r<-length(which(obs.diff>all.diff))/length(which(!is.na(all.diff)));
      p.l<-length(which(obs.diff<all.diff))/length(which(!is.na(all.diff)));
      pp[this_clus]<-min(p.r,p.l);
    }
    return(pp);
  }