From 867507932dd1ad5dca24a70a29a95a0ca0f3d884 Mon Sep 17 00:00:00 2001 From: ACCakut <7684542+ACCakut@users.noreply.github.com> Date: Fri, 10 Nov 2023 11:28:13 +0100 Subject: [PATCH] A0c_AccessoryGenome.m rename recipsize -> dictsize MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since “recipient” is not unambiguous, it should be replaced with a more precise naming, like “dict”. Depending on whether one generates accessory genome for A1_SNP2CNP or A4_Cov2Ins, the dictionary is either from donor or recipient strain, so that the old name is quite misleading. +1 for this PR from MF ;-) --- 1_Detection/A0c_AccessoryGenome.m | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/1_Detection/A0c_AccessoryGenome.m b/1_Detection/A0c_AccessoryGenome.m index f4da8de..cad87cb 100644 --- a/1_Detection/A0c_AccessoryGenome.m +++ b/1_Detection/A0c_AccessoryGenome.m @@ -9,8 +9,11 @@ path = "/home/isabel/Documents/Doktorarbeit_Mai2022/1_kleinesPaper_BigData/Donor2Bs166/"; fileName = "Bmoj_2NCe"; fileSuffix = "_bcf.vcf"; + + % genome size of the .dict that .vcf was mapped to = number of letters in .fasta file + dictsize = 4215607; % B.sub Bs166 + %dictsize = 4027680; % B.spiz W23 WT - recipsize = 4215607; maxDP = 50; % Everything covered >= maxDP, is not included in the % accessory genome minLength = 0; % Only regions longer minLength bps are considered @@ -49,7 +52,7 @@ % 2. Find positions with no entry in the bcf - no read mapped, position % with no cover --> posMissingMask == 1 -posRecip = [1 : recipsize]; +posRecip = [1 : dictsize]; posMissingMask = ~ismember(posRecip, posAll); % include 1.: set all pos where DP < 50: posMissingMask == 1 @@ -82,5 +85,5 @@ save(savePath + saveName + "_allBP.mat","accList") % -accessoryFraction = sum(totalLengthFilt)/recipsize; +accessoryFraction = sum(totalLengthFilt)/dictsize; disp("For " + fileName + ", "+ num2str(accessoryFraction*100, "%0.1f")+ " % of the genome is accessory.");