diff --git a/bin/x_04b_all_vars b/bin/x_04b_all_vars index aa8a8380..bdf2158c 100755 --- a/bin/x_04b_all_vars +++ b/bin/x_04b_all_vars @@ -15,7 +15,8 @@ i=0 echo "Visible phase space" for var in MET HT ST WPT lepton_pt abs_lepton_eta NJets; do echo "Plotting diff. x-section for distribution: $var" - nohup time python dps/analysis/xsection/04_make_plots_matplotlib.py --visiblePS --show-generator-ratio -v $var -c 13 -p data/normalisation/background_subtraction/ -o plots/background_subtraction &> logs/04_${var}_plot_Vis_13TeV.log & # -a <--add this option for additional plots + # nohup time python dps/analysis/xsection/04_make_plots_matplotlib.py --visiblePS --show-generator-ratio -v $var -c 13 -p data/normalisation/background_subtraction/ -o plots/background_subtraction &> logs/04_${var}_plot_Vis_13TeV.log & # -a <--add this option for additional plots + nohup time python dps/analysis/xsection/04_make_plots_matplotlib.py --visiblePS -v $var -c 13 -p data/normalisation/background_subtraction/ -o plots/background_subtraction &> logs/04_${var}_plot_Vis_13TeV.log & # -a <--add this option for additional plots let i+=1 shallIwait $i $N_JOBS done diff --git a/bin/x_05b_all_vars b/bin/x_05b_all_vars index 1e5b5875..4e9f9ae5 100755 --- a/bin/x_05b_all_vars +++ b/bin/x_05b_all_vars @@ -20,7 +20,7 @@ echo "Now visible phase space" for var in MET HT ST WPT lepton_pt abs_lepton_eta NJets; do echo "Tabulating diff. x-section for distribution: $var" - nohup time python dps/analysis/xsection/05_make_tables.py -v $var -c 13 -p data/normalisation/background_subtraction/ -a --visiblePS -o tables/background_subtraction &> logs/05_${var}_table_13TeV.log & + nohup time python dps/analysis/xsection/05_make_systematic_plots.py -v $var --visiblePS &> logs/05_${var}_table_13TeV.log & let i+=1 if (( $i % N_JOBS == 0 )) then diff --git a/dps/analysis/BLTUnfold/produceUnfoldingHistograms.py b/dps/analysis/BLTUnfold/produceUnfoldingHistograms.py index 443a473b..3bfd4b87 100644 --- a/dps/analysis/BLTUnfold/produceUnfoldingHistograms.py +++ b/dps/analysis/BLTUnfold/produceUnfoldingHistograms.py @@ -1,15 +1,13 @@ from rootpy.plotting import Hist, Hist2D from rootpy.io import root_open #from rootpy.interactive import wait -from optparse import OptionParser +from argparse import ArgumentParser from dps.config.xsection import XSectionConfig from dps.config.variable_binning import bin_edges_vis, reco_bin_edges_vis from dps.config.variableBranchNames import branchNames, genBranchNames_particle, genBranchNames_parton from dps.utils.file_utilities import make_folder_if_not_exists from math import trunc, exp, sqrt -from scaleFactors import * - import ROOT as ROOT ROOT.gROOT.SetBatch(True) ROOT.gROOT.ProcessLine( 'gErrorIgnoreLevel = 2001;' ) @@ -30,7 +28,7 @@ def calculateTopEtaWeight( lepTopRap, hadTopRap, whichWayToWeight = 1): else : return 1 -def calculateTopPtWeight( lepTopPt, hadTopPt, whichWayToWeight = 1 ): + def calculateTopPtWeight( lepTopPt, hadTopPt, whichWayToWeight = 1 ): if whichWayToWeight == -1 : return max ( (-0.001 * lepTopPt + 1.1 ) * (-0.001 * hadTopPt + 1.1), 0.1 ) elif whichWayToWeight == 1 : @@ -38,6 +36,23 @@ def calculateTopPtWeight( lepTopPt, hadTopPt, whichWayToWeight = 1 ): else : return 1 +def calculateTopPtSystematicWeight( lepTopPt, hadTopPt ): + ''' + Calculating the top pt weight + ______________ A + B.Pt + W = / SF(t)SF(tbar) , SF(t) = e + + A = 0.0615 + B = -0.0005 + ''' + lepTopWeight = ptWeight( lepTopPt ) + hadTopWeight = ptWeight( hadTopPt ) + return sqrt( lepTopWeight * hadTopWeight ) + +def ptWeight( pt ): + return exp( 0.0615 - 0.0005 * pt ) + + def calculateTopPtSystematicWeight( lepTopPt, hadTopPt ): lepTopWeight = ptWeight( lepTopPt ) hadTopWeight = ptWeight( hadTopPt ) @@ -49,103 +64,162 @@ def ptWeight( pt ): def getFileName( com, sample, measurementConfig ) : fileNames = { - '13TeV' : { - 'central' : measurementConfig.ttbar_category_templates_trees['central'], - 'amcatnlo' : measurementConfig.ttbar_amc_category_templates_trees, - 'madgraph' : measurementConfig.ttbar_madgraph_category_templates_trees, - 'powhegherwigpp' : measurementConfig.ttbar_powhegherwigpp_category_templates_trees, - 'amcatnloherwigpp' : measurementConfig.ttbar_amcatnloherwigpp_category_templates_trees, - 'massdown' : measurementConfig.ttbar_mtop1695_category_templates_trees, - 'massup' : measurementConfig.ttbar_mtop1755_category_templates_trees, - 'topPtSystematic' : measurementConfig.ttbar_category_templates_trees['central'], - 'fsrup' : measurementConfig.ttbar_fsrup_category_templates_trees, - 'fsrdown' : measurementConfig.ttbar_fsrdown_category_templates_trees, - 'isrup' : measurementConfig.ttbar_isrup_category_templates_trees, - 'isrdown' : measurementConfig.ttbar_isrdown_category_templates_trees, - 'ueup' : measurementConfig.ttbar_ueup_category_templates_trees, - 'uedown' : measurementConfig.ttbar_uedown_category_templates_trees, - - 'jesdown' : measurementConfig.ttbar_jesdown_category_templates_trees, - 'jesup' : measurementConfig.ttbar_jesup_category_templates_trees, - 'jerdown' : measurementConfig.ttbar_jerdown_category_templates_trees, - 'jerup' : measurementConfig.ttbar_jerup_category_templates_trees, - 'bjetdown' : measurementConfig.ttbar_category_templates_trees['central'], - 'bjetup' : measurementConfig.ttbar_category_templates_trees['central'], - 'lightjetdown' : measurementConfig.ttbar_category_templates_trees['central'], - 'lightjetup' : measurementConfig.ttbar_category_templates_trees['central'], - 'leptondown' : measurementConfig.ttbar_category_templates_trees['central'], - 'leptonup' : measurementConfig.ttbar_category_templates_trees['central'], - 'pileupUp' : measurementConfig.ttbar_category_templates_trees['central'], - 'pileupDown' : measurementConfig.ttbar_category_templates_trees['central'], - - - 'ElectronEnUp' : measurementConfig.ttbar_category_templates_trees['central'], - 'ElectronEnDown' : measurementConfig.ttbar_category_templates_trees['central'], - 'MuonEnUp' : measurementConfig.ttbar_category_templates_trees['central'], - 'MuonEnDown' : measurementConfig.ttbar_category_templates_trees['central'], - 'TauEnUp' : measurementConfig.ttbar_category_templates_trees['central'], - 'TauEnDown' : measurementConfig.ttbar_category_templates_trees['central'], - 'UnclusteredEnUp' : measurementConfig.ttbar_category_templates_trees['central'], - 'UnclusteredEnDown' : measurementConfig.ttbar_category_templates_trees['central'], - }, - } + '13TeV' : { + 'central' : measurementConfig.ttbar_trees['central'], + + 'amcatnlo' : measurementConfig.ttbar_amc_trees, + 'madgraph' : measurementConfig.ttbar_madgraph_trees, + 'powhegherwigpp' : measurementConfig.ttbar_powhegherwigpp_trees, + + + 'ueup' : measurementConfig.ttbar_ueup_trees, + 'uedown' : measurementConfig.ttbar_uedown_trees, + 'isrup' : measurementConfig.ttbar_isrup_trees, + 'isrdown' : measurementConfig.ttbar_isrdown_trees, + 'fsrup' : measurementConfig.ttbar_fsrup_trees, + 'fsrdown' : measurementConfig.ttbar_fsrdown_trees, + + 'massdown' : measurementConfig.ttbar_mtop1695_trees, + 'massup' : measurementConfig.ttbar_mtop1755_trees, + + 'jesdown' : measurementConfig.ttbar_jesdown_trees, + 'jesup' : measurementConfig.ttbar_jesup_trees, + 'jerdown' : measurementConfig.ttbar_jerdown_trees, + 'jerup' : measurementConfig.ttbar_jerup_trees, + + 'bjetdown' : measurementConfig.ttbar_trees['central'], + 'bjetup' : measurementConfig.ttbar_trees['central'], + 'lightjetdown' : measurementConfig.ttbar_trees['central'], + 'lightjetup' : measurementConfig.ttbar_trees['central'], + + 'leptondown' : measurementConfig.ttbar_trees['central'], + 'leptonup' : measurementConfig.ttbar_trees['central'], + 'pileupUp' : measurementConfig.ttbar_trees['central'], + 'pileupDown' : measurementConfig.ttbar_trees['central'], + + 'ElectronEnUp' : measurementConfig.ttbar_trees['central'], + 'ElectronEnDown' : measurementConfig.ttbar_trees['central'], + 'MuonEnUp' : measurementConfig.ttbar_trees['central'], + 'MuonEnDown' : measurementConfig.ttbar_trees['central'], + 'TauEnUp' : measurementConfig.ttbar_trees['central'], + 'TauEnDown' : measurementConfig.ttbar_trees['central'], + 'UnclusteredEnUp' : measurementConfig.ttbar_trees['central'], + 'UnclusteredEnDown' : measurementConfig.ttbar_trees['central'], + + 'topPtSystematic' : measurementConfig.ttbar_trees['central'], + + }, + } return fileNames[com][sample] channels = [ - channel( 'ePlusJets', 'rootTupleTreeEPlusJets', 'electron'), - channel( 'muPlusJets', 'rootTupleTreeMuPlusJets', 'muon') - ] + channel( 'ePlusJets', 'rootTupleTreeEPlusJets', 'electron'), + channel( 'muPlusJets', 'rootTupleTreeMuPlusJets', 'muon'), +] + + + +def parse_arguments(): + parser = ArgumentParser(__doc__) + parser.add_argument('--topPtReweighting', + action='store_true', + dest='applyTopPtReweighting', + default=False + ) + parser.add_argument('--topEtaReweighting', + dest='applyTopEtaReweighting', + type=int, + default=0 + ) + parser.add_argument('-c', '--centreOfMassEnergy', + dest='centreOfMassEnergy', + type=int, + default=13 + ) + parser.add_argument('--pdfWeight', + type=int, + dest='pdfWeight', + default=-1 + ) + parser.add_argument('--muFmuRWeight', + type=int, + dest='muFmuRWeight', + default=-1 + ) + parser.add_argument('--alphaSWeight', + type=int, + dest='alphaSWeight', + default=-1 + ) + parser.add_argument('--matchingWeight', + type=int, + dest='matchingWeight', + default=-1 + ) + parser.add_argument('--nGeneratorWeights', + type=int, + dest='nGeneratorWeights', + default=1 + ) + parser.add_argument('-s', '--sample', + dest='sample', + default='central' + ) + parser.add_argument('-d', '--debug', + action='store_true', + dest='debug', + default=False + ) + parser.add_argument('-n', + action='store_true', + dest='donothing', + default=False + ) + parser.add_argument('-e', + action='store_true', + dest='extraHists', + default=False + ) + parser.add_argument('-f', + action='store_true', + dest='fineBinned', + default=False + ) + args = parser.parse_args() + return args def main(): - - parser = OptionParser() - parser.add_option('--topPtReweighting', dest='applyTopPtReweighting', type='int', default=0 ) - parser.add_option('--topEtaReweighting', dest='applyTopEtaReweighting', type='int', default=0 ) - parser.add_option('-c', '--centreOfMassEnergy', dest='centreOfMassEnergy', type='int', default=13 ) - parser.add_option('--pdfWeight', type='int', dest='pdfWeight', default=-1 ) - parser.add_option('--muFmuRWeight', type='int', dest='muFmuRWeight', default=-1 ) - parser.add_option('--nGeneratorWeights', type='int', dest='nGeneratorWeights', default=1 ) - parser.add_option('-s', '--sample', dest='sample', default='central') - parser.add_option('-d', '--debug', action='store_true', dest='debug', default=False) - parser.add_option('-n', action='store_true', dest='donothing', default=False) - parser.add_option('-e', action='store_true', dest='extraHists', default=False) - parser.add_option('-f',action='store_true', dest='fineBinned', default=False) - - (options, _) = parser.parse_args() - - measurement_config = XSectionConfig( options.centreOfMassEnergy ) + args = parse_arguments() + + measurement_config = XSectionConfig( args.centreOfMassEnergy ) # Input file name file_name = 'crap.root' - if int(options.centreOfMassEnergy) == 13: - # file_name = fileNames['13TeV'][options.sample] - file_name = getFileName('13TeV', options.sample, measurement_config) - # if options.generatorWeight >= 0: - # file_name = 'localInputFile.root' + if int(args.centreOfMassEnergy) == 13: + file_name = getFileName('13TeV', args.sample, measurement_config) else: print "Error: Unrecognised centre of mass energy." - pdfWeight = options.pdfWeight - muFmuRWeight = options.muFmuRWeight + pdfWeight = args.pdfWeight + muFmuRWeight = args.muFmuRWeight + alphaSWeight = args.alphaSWeight + matchingWeight = options.matchingWeight # Output file name outputFileName = 'crap.root' - outputFileDir = 'unfolding/%sTeV/' % options.centreOfMassEnergy + outputFileDir = 'unfolding/%sTeV/' % args.centreOfMassEnergy make_folder_if_not_exists(outputFileDir) - energySuffix = '%sTeV' % ( options.centreOfMassEnergy ) + energySuffix = '%sTeV' % ( args.centreOfMassEnergy ) - if options.applyTopEtaReweighting != 0: - if options.applyTopEtaReweighting == 1: + if args.applyTopEtaReweighting != 0: + if args.applyTopEtaReweighting == 1: outputFileName = outputFileDir+'/unfolding_TTJets_%s_asymmetric_withTopEtaReweighting_up.root' % energySuffix - elif options.applyTopEtaReweighting == -1: + elif args.applyTopEtaReweighting == -1: outputFileName = outputFileDir+'/unfolding_TTJets_%s_asymmetric_withTopEtaReweighting_down.root' % energySuffix - elif options.applyTopPtReweighting != 0: - if options.applyTopPtReweighting == 1: - outputFileName = outputFileDir+'/unfolding_TTJets_%s_asymmetric_withTopPtReweighting_up.root' % energySuffix - elif options.applyTopPtReweighting == -1: - outputFileName = outputFileDir+'/unfolding_TTJets_%s_asymmetric_withTopPtReweighting_down.root' % energySuffix + elif args.applyTopPtReweighting: + outputFileName = outputFileDir+'/unfolding_TTJets_%s_asymmetric_withTopPtReweighting.root' % energySuffix elif muFmuRWeight == 1: outputFileName = outputFileDir+'/unfolding_TTJets_%s_asymmetric_1muR2muF.root' % ( energySuffix ) elif muFmuRWeight == 2: @@ -158,11 +232,23 @@ def main(): outputFileName = outputFileDir+'/unfolding_TTJets_%s_asymmetric_05muR1muF.root' % ( energySuffix ) elif muFmuRWeight == 8: outputFileName = outputFileDir+'/unfolding_TTJets_%s_asymmetric_05muR05muF.root' % ( energySuffix ) + + elif matchingWeight == 9: + outputFileName = outputFileDir+'/unfolding_TTJets_%s_asymmetric_matching_down.root' % ( energySuffix ) + elif matchingWeight == 18: + outputFileName = outputFileDir+'/unfolding_TTJets_%s_asymmetric_matching_up.root' % ( energySuffix ) + elif matchingWeight >= 0: + outputFileName = outputFileDir+'/unfolding_TTJets_%s_asymmetric_matchingWeight_%i.root' % ( energySuffix, matchingWeight ) + + elif alphaSWeight == 0: + outputFileName = outputFileDir+'/unfolding_TTJets_%s_asymmetric_alphaS_down.root' % ( energySuffix ) + elif alphaSWeight == 1: + outputFileName = outputFileDir+'/unfolding_TTJets_%s_asymmetric_alphaS_up.root' % ( energySuffix ) elif pdfWeight >= 0 and pdfWeight <= 99: outputFileName = outputFileDir+'/unfolding_TTJets_%s_asymmetric_pdfWeight_%i.root' % ( energySuffix, pdfWeight ) - elif options.sample != 'central': - outputFileName = outputFileDir+'/unfolding_TTJets_%s_%s_asymmetric.root' % ( energySuffix, options.sample ) - elif options.fineBinned : + elif args.sample != 'central': + outputFileName = outputFileDir+'/unfolding_TTJets_%s_%s_asymmetric.root' % ( energySuffix, args.sample ) + elif args.fineBinned : outputFileName = outputFileDir+'/unfolding_TTJets_%s.root' % ( energySuffix ) else: outputFileName = outputFileDir+'/unfolding_TTJets_%s_asymmetric.root' % energySuffix @@ -171,31 +257,24 @@ def main(): # Get the tree treeName = "TTbar_plus_X_analysis/Unfolding/Unfolding" - if options.sample == "jesup": + if args.sample == "jesup": treeName += "_JESUp" - elif options.sample == "jesdown": + elif args.sample == "jesdown": treeName += "_JESDown" - elif options.sample == "jerup": + elif args.sample == "jerup": treeName += "_JERUp" - elif options.sample == "jerdown": + elif args.sample == "jerdown": treeName += "_JERDown" tree = f.Get(treeName) nEntries = tree.GetEntries() - # weightTree = f.Get('TTbar_plus_X_analysis/Unfolding/GeneratorSystematicWeights') - # if meWeight >= 0 : - # tree.AddFriend('TTbar_plus_X_analysis/Unfolding/GeneratorSystematicWeights') - # tree.SetBranchStatus('genWeight_*',1) - # tree.SetBranchStatus('genWeight_%i' % meWeight, 1) # For variables where you want bins to be symmetric about 0, use abs(variable) (but also make plots for signed variable) allVariablesBins = bin_edges_vis.copy() for variable in bin_edges_vis: - if 'Rap' in variable: allVariablesBins['abs_%s' % variable] = [0,bin_edges_vis[variable][-1]] - recoVariableNames = {} genVariable_particle_names = {} genVariable_parton_names = {} @@ -203,9 +282,9 @@ def main(): outputDirs = {} for variable in allVariablesBins: - if options.debug and variable != 'HT' : continue - - if options.sample in measurement_config.met_systematics and variable not in ['MET', 'ST', 'WPT']: + if args.debug and variable != 'HT' : continue + if args.sample in measurement_config.met_specific_systematics \ + and variable in measurement_config.variables_no_met: continue outputDirs[variable] = {} @@ -217,21 +296,22 @@ def main(): recoVariableName = branchNames[variable] sysIndex = None if variable in ['MET', 'ST', 'WPT']: - if options.sample == "jesup": - recoVariableName += '_METUncertainties' - sysIndex = 2 - elif options.sample == "jesdown": - recoVariableName += '_METUncertainties' - sysIndex = 3 - elif options.sample == "jerup": + if args.sample == "jerup": recoVariableName += '_METUncertainties' sysIndex = 0 - elif options.sample == "jerdown": + elif args.sample == "jerdown": recoVariableName+= '_METUncertainties' sysIndex = 1 - elif options.sample in measurement_config.met_systematics: + elif args.sample == "jesup": recoVariableName += '_METUncertainties' - sysIndex = measurement_config.met_systematics[options.sample] + sysIndex = 2 + elif args.sample == "jesdown": + recoVariableName += '_METUncertainties' + sysIndex = 3 + # Dont need this? + elif args.sample in measurement_config.met_systematics: + recoVariableName += '_METUncertainties' + sysIndex = measurement_config.met_systematics[args.sample] genVariable_particle_name = None genVariable_parton_name = None @@ -272,7 +352,7 @@ def main(): h['response_parton'] = Hist2D( reco_bin_edges_vis[variable], allVariablesBins[variable], name='response_parton') h['response_without_fakes_parton'] = Hist2D( reco_bin_edges_vis[variable], allVariablesBins[variable], name='response_without_fakes_parton') - if options.fineBinned: + if args.fineBinned: minVar = trunc( allVariablesBins[variable][0] ) maxVar = trunc( max( tree.GetMaximum(genVariable_particle_names[variable]), tree.GetMaximum( recoVariableNames[variable] ) ) * 1.2 ) nBins = int(maxVar - minVar) @@ -319,12 +399,12 @@ def main(): # Counters for studying phase space - nVis = {c.channelName : 0 for c in channels} - nVisNotOffline = {c.channelName : 0 for c in channels} - nOffline = {c.channelName : 0 for c in channels} - nOfflineNotVis = {c.channelName : 0 for c in channels} - nFull = {c.channelName : 0 for c in channels} - nOfflineSL = {c.channelName : 0 for c in channels} + nVis = {c.channelName : 0 for c in channels} + nVisNotOffline = {c.channelName : 0 for c in channels} + nOffline = {c.channelName : 0 for c in channels} + nOfflineNotVis = {c.channelName : 0 for c in channels} + nFull = {c.channelName : 0 for c in channels} + nOfflineSL = {c.channelName : 0 for c in channels} n=0 # Event Loop @@ -342,41 +422,39 @@ def main(): # Don't apply if calculating systematic pileupWeight = event.PUWeight # print event.PUWeight,event.PUWeight_up,event.PUWeight_down - if options.sample == "pileupUp": + if args.sample == "pileupUp": pileupWeight = event.PUWeight_up - elif options.sample == "pileupDown": + elif args.sample == "pileupDown": pileupWeight = event.PUWeight_down # Generator level weight genWeight = event.EventWeight * measurement_config.luminosity_scale - # Offline level weights - offlineWeight = pileupWeight - # Lepton weight leptonWeight = event.LeptonEfficiencyCorrection - if options.sample == 'leptonup': + if args.sample == 'leptonup': leptonWeight = event.LeptonEfficiencyCorrectionUp - elif options.sample == 'leptondown': - leptonWeight == event.LeptonEfficiencyCorrectionDown + elif args.sample == 'leptondown': + leptonWeight = event.LeptonEfficiencyCorrectionDown # B Jet Weight bjetWeight = event.BJetWeight - if options.sample == "bjetup": + if args.sample == "bjetup": bjetWeight = event.BJetUpWeight - elif options.sample == "bjetdown": + elif args.sample == "bjetdown": bjetWeight = event.BJetDownWeight - elif options.sample == "lightjetup": + elif args.sample == "lightjetup": bjetWeight = event.LightJetUpWeight - elif options.sample == "lightjetdown": + elif args.sample == "lightjetdown": bjetWeight = event.LightJetDownWeight # Top pt systematic weight topPtSystematicWeight = 1 - if options.sample == 'topPtSystematic': + if args.sample == 'topPtSystematic': topPtSystematicWeight = calculateTopPtSystematicWeight( branch('lepTopPt_parton'), branch('hadTopPt_parton')) + # Offline level weights offlineWeight = event.EventWeight * measurement_config.luminosity_scale offlineWeight *= pileupWeight offlineWeight *= bjetWeight @@ -396,18 +474,23 @@ def main(): offlineWeight *= branch('muFmuRWeight_%i' % muFmuRWeight) pass - if options.applyTopPtReweighting != 0: - ptWeight = calculateTopPtWeight( branch('lepTopPt_parton'), branch('hadTopPt_parton'), options.applyTopPtReweighting) - offlineWeight *= ptWeight - genWeight *= ptWeight + if alphaSWeight == 0 or alphaSWeight == 1: + genWeight *= branch('alphaSWeight_%i' % alphaSWeight) + offlineWeight *= branch('alphaSWeight_%i' % alphaSWeight) + pass - if options.applyTopPtReweighting != 0: - ptWeight = calculateTopPtWeight( branch('lepTopPt_parton'), branch('hadTopPt_parton'), options.applyTopPtReweighting) + if matchingWeight >= 0: + genWeight *= branch('matchingWeight_%i' % matchingWeight) + offlineWeight *= branch('matchingWeight_%i' % matchingWeight) + pass + + if args.applyTopPtReweighting != 0: + ptWeight = calculateTopPtWeight( branch('lepTopPt_parton'), branch('hadTopPt_parton'), args.applyTopPtReweighting) offlineWeight *= ptWeight genWeight *= ptWeight - if options.applyTopEtaReweighting != 0: - etaWeight = calculateTopEtaWeight( branch('lepTopRap_parton'), branch('hadTopRap_parton'), options.applyTopEtaReweighting) + if args.applyTopEtaReweighting != 0: + etaWeight = calculateTopEtaWeight( branch('lepTopRap_parton'), branch('hadTopRap_parton'), args.applyTopEtaReweighting) offlineWeight *= etaWeight genWeight *= etaWeight @@ -449,7 +532,9 @@ def main(): nOfflineNotVis[channel.channelName] += offlineWeight for variable in allVariablesBins: - if options.sample in measurement_config.met_systematics and variable not in ['MET', 'ST', 'WPT']: + if args.debug and variable != 'HT' : continue + if args.sample in measurement_config.met_specific_systematics and \ + variable in measurement_config.variables_no_met: continue # # # @@ -467,7 +552,7 @@ def main(): # if recoVariable > allVariablesBins[variable][-1]: # print 'Big reco variable : ',recoVariable # print 'Setting to :',min( recoVariable, allVariablesBins[variable][-1] - 0.000001 ) - if not options.fineBinned: + if not args.fineBinned: recoVariable = min( recoVariable, allVariablesBins[variable][-1] - 0.000001 ) genVariable_particle = branch(genVariable_particle_names[variable]) if 'abs' in variable: @@ -476,7 +561,7 @@ def main(): # # Fill histograms # # histogramsToFill = histograms[variable][channel.channelName] - if not options.donothing: + if not args.donothing: if genSelection: histogramsToFill['truth'].Fill( genVariable_particle, genWeight) @@ -505,7 +590,7 @@ def main(): if fakeSelectionVis: histogramsToFill['fakeVis'].Fill( recoVariable, offlineWeight) - if options.extraHists: + if args.extraHists: if genSelection: histogramsToFill['eventWeightHist'].Fill(event.EventWeight) histogramsToFill['genWeightHist'].Fill(genWeight) @@ -515,19 +600,30 @@ def main(): # Output histgorams to file # for variable in allVariablesBins: - if options.sample in measurement_config.met_systematics and variable not in ['MET', 'ST', 'WPT']: + if args.debug and variable != 'HT' : continue + if args.sample in measurement_config.met_systematics and variable not in ['MET', 'ST', 'WPT']: continue for channel in channels: - # Fill phase space info - h = histograms[variable][channel.channelName]['phaseSpaceInfoHist'] - h.SetBinContent(1, nVisNotOffline[channel.channelName] / nVis[channel.channelName]) - h.SetBinContent(2, nOfflineNotVis[channel.channelName] / nOffline[channel.channelName]) - h.SetBinContent(3, nVis[channel.channelName] / nFull[channel.channelName]) - # Selection efficiency for SL ttbar - h.SetBinContent(4, nOfflineSL[channel.channelName] / nFull[channel.channelName]) - # Fraction of offline that are SL - h.SetBinContent(5, nOfflineSL[channel.channelName] / nOffline[channel.channelName]) + if nOffline[channel.channelName] != 0 : + # Fill phase space info + h = histograms[variable][channel.channelName]['phaseSpaceInfoHist'] + h.SetBinContent(1, nVisNotOffline[channel.channelName] / nVis[channel.channelName]) + # h.GetXaxis().SetBinLabel(1, "nVisNotOffline/nVis") + + h.SetBinContent(2, nOfflineNotVis[channel.channelName] / nOffline[channel.channelName]) + # h.GetXaxis().SetBinLabel(2, "nOfflineNotVis/nOffline") + + h.SetBinContent(3, nVis[channel.channelName] / nFull[channel.channelName]) + # h.GetXaxis().SetBinLabel(3, "nVis/nFull") + + # Selection efficiency for SL ttbar + h.SetBinContent(4, nOfflineSL[channel.channelName] / nFull[channel.channelName]) + # h.GetXaxis().SetBinLabel(4, "nOfflineSL/nFull") + + # Fraction of offline that are SL + h.SetBinContent(5, nOfflineSL[channel.channelName] / nOffline[channel.channelName]) + # h.GetXaxis().SetBinLabel(5, "nOfflineSL/nOffline") outputDirs[variable][channel.channelName].cd() for h in histograms[variable][channel.channelName]: diff --git a/dps/analysis/BLTUnfold/runJobsCrab.py b/dps/analysis/BLTUnfold/runJobsCrab.py index 8613dc62..08e87821 100755 --- a/dps/analysis/BLTUnfold/runJobsCrab.py +++ b/dps/analysis/BLTUnfold/runJobsCrab.py @@ -7,13 +7,30 @@ '--centreOfMassEnergy 13 -f', '--centreOfMassEnergy 13 -s central', + # '--centreOfMassEnergy 13 -s central --topPtReweighting 1', + # '--centreOfMassEnergy 13 -s central --topPtReweighting -1', + # '--centreOfMassEnergy 13 -s central --topEtaReweighting 1', + # '--centreOfMassEnergy 13 -s central --topEtaReweighting -1', # '--centreOfMassEnergy 13 -s amcatnlo', # '--centreOfMassEnergy 13 -s madgraph', '--centreOfMassEnergy 13 -s powhegherwigpp', # # '--centreOfMassEnergy 13 -s amcatnloherwigpp', - # # ME scale weights + # Top pt + '--centreOfMassEnergy 13 -s topPtSystematic', + + # Underlying event samples + '--centreOfMassEnergy 13 -s ueup', + '--centreOfMassEnergy 13 -s uedown', + + # isr/fsr variations + '--centreOfMassEnergy 13 -s isrup', + '--centreOfMassEnergy 13 -s isrdown', + '--centreOfMassEnergy 13 -s fsrup', + '--centreOfMassEnergy 13 -s fsrdown', + + # ME scale weights '--centreOfMassEnergy 13 --muFmuRWeight 1', '--centreOfMassEnergy 13 --muFmuRWeight 2', '--centreOfMassEnergy 13 --muFmuRWeight 3', @@ -21,7 +38,14 @@ '--centreOfMassEnergy 13 --muFmuRWeight 6', '--centreOfMassEnergy 13 --muFmuRWeight 8', - # # # Top mass + '--centreOfMassEnergy 13 --alphaSWeight 0', + '--centreOfMassEnergy 13 --alphaSWeight 1', + + # ME-PS matching weight + '--centreOfMassEnergy 13 --matchingWeight 9', + '--centreOfMassEnergy 13 --matchingWeight 18', + + # # Top mass '--centreOfMassEnergy 13 -s massup', '--centreOfMassEnergy 13 -s massdown', diff --git a/dps/analysis/BLTUnfold/submitBLTUnfold.description b/dps/analysis/BLTUnfold/submitBLTUnfold.description index ecde298e..1ed76567 100644 --- a/dps/analysis/BLTUnfold/submitBLTUnfold.description +++ b/dps/analysis/BLTUnfold/submitBLTUnfold.description @@ -15,4 +15,4 @@ request_memory=500 # use the ENV that is provided getenv = true -queue 138 +queue 142 diff --git a/dps/analysis/search/test.root b/dps/analysis/search/test.root deleted file mode 100644 index 51645386..00000000 Binary files a/dps/analysis/search/test.root and /dev/null differ diff --git a/dps/analysis/unfolding_tests/makeConfig.py b/dps/analysis/unfolding_tests/00_makeConfig.py similarity index 92% rename from dps/analysis/unfolding_tests/makeConfig.py rename to dps/analysis/unfolding_tests/00_makeConfig.py index 8f28c057..852cd3e7 100644 --- a/dps/analysis/unfolding_tests/makeConfig.py +++ b/dps/analysis/unfolding_tests/00_makeConfig.py @@ -3,11 +3,9 @@ from dps.utils.file_utilities import make_folder_if_not_exists com = 13 -fitVars = "M3_angle_bl" - config = XSectionConfig( com ) -make_folder_if_not_exists('config/unfolding/FullPS/') +# make_folder_if_not_exists('config/unfolding/FullPS/') make_folder_if_not_exists('config/unfolding/VisiblePS/') for channel in config.analysis_types.keys(): @@ -48,7 +46,7 @@ histogramTemplate = "%s_%s" % ( variable, channel ) outputJson = { "output_folder": "plots/unfolding/bestRegularisation/VisiblePS", - "output_format": ["png", "pdf"], + "output_format": ["pdf"], "centre-of-mass energy" : com, "channel": "%s" % channel, "variable": "%s" % variable, @@ -66,7 +64,7 @@ # "histogram": "%s/measuredVis" % ( histogramTemplate ), }, "data" : { - "file": "data/normalisation/background_subtraction/%sTeV/%s/VisiblePS/central/normalisation_%s_patType1CorrectedPFMet.txt" % ( com, variable, channel), + "file": "data/normalisation/background_subtraction/%sTeV/%s/VisiblePS/central/normalisation_%s.txt" % ( com, variable, channel), "histogram": "TTJet" }, } diff --git a/dps/analysis/unfolding_tests/getBestTau.py b/dps/analysis/unfolding_tests/01_getBestTau.py similarity index 70% rename from dps/analysis/unfolding_tests/getBestTau.py rename to dps/analysis/unfolding_tests/01_getBestTau.py index 305b14dc..216b4b87 100644 --- a/dps/analysis/unfolding_tests/getBestTau.py +++ b/dps/analysis/unfolding_tests/01_getBestTau.py @@ -17,8 +17,9 @@ usage: python getBestTau.py config.json # for 13 TeV in the visible phase space : - python dps/analysis/unfolding_tests/getBestTau.py config/unfolding/VisiblePS/*.json -n 100 --refold_plots --test + python dps/analysis/unfolding_tests/01_getBestTau.py config/unfolding/VisiblePS/*.json -n 100 -t 0.005 --refold_plots --test -n = number of tau points + -t = specific tau value --refold_plots = output some comparison plots for every tau (suggest few tau) --test = runs the measured distribution as data. Should return P(Chi2|NDF) of 0 i.e. exact ''' @@ -36,12 +37,13 @@ from dps.utils.plotting import Histogram_properties from dps.config import CMS from dps.config.latex_labels import variables_latex -from ROOT import TUnfoldDensity, TUnfold, TCanvas, TPad, TMath, gROOT, TRandom3 +from ROOT import TUnfoldDensity, TUnfold, TCanvas, TPad, TLegend, TMath, gROOT, TRandom3 from dps.config.variable_binning import reco_bin_edges_vis # , gen_bin_edges_vis from dps.utils.Unfolding import Unfolding, get_unfold_histogram_tuple, removeFakes from dps.utils.file_utilities import read_data_from_JSON, make_folder_if_not_exists +from dps.utils.pandas_utilities import read_tuple_from_file from dps.utils.hist_utilities import hist_to_value_error_tuplelist, value_error_tuplelist_to_hist import pandas as pd @@ -84,10 +86,11 @@ def __init__( self, input_values ): def __set_unfolding_histograms__( self ): # at the moment only one file is supported for the unfolding input - files = set( [self.truth['file'], - self.gen_vs_reco['file'], - self.measured['file']] - ) + files = set( + [self.truth['file'], + self.gen_vs_reco['file'], + self.measured['file']] + ) if len( files ) > 1: print "Currently not supported to have different files for truth, gen_vs_reco and measured" sys.exit() @@ -121,7 +124,7 @@ def __set_unfolding_histograms__( self ): edges = [] edges = reco_bin_edges_vis[self.variable] - json_input = read_data_from_JSON(data_file) + json_input = read_tuple_from_file(data_file) if data_key == "": # JSON file == histogram self.h_data = value_error_tuplelist_to_hist(json_input, edges) @@ -141,51 +144,72 @@ def main(): clear_old_df('tables/taufinding/') for input_values, json_file in zip( input_values_sets, json_input_files ): - # print '\nProcessing', json_file + if 'combined' in json_file: continue + # Initialise the TauFinding class regularisation_settings = TauFinding( input_values ) - # Set additional elemtents - regularisation_settings.taus_to_test = get_tau_list(args.n_ticks_in_log) - variable = regularisation_settings.variable channel = regularisation_settings.channel com = regularisation_settings.centre_of_mass_energy - if 'muon' not in channel : continue + + # Specific channel or variable + if args.ch: + if args.ch not in channel: continue + if args.var: + if args.var not in variable: continue + + print 'Running for:' print 'Variable = {0}, channel = {1}, sqrt(s) = {2}'.format(variable, channel, com) + # Set additional elements + regularisation_settings.taus_to_test = get_tau_values(args.n_tau_in_log) + isTauCalculator = True + + # Specific unfolding tests go here + if args.specific_tau is not None: + regularisation_settings.taus_to_test = [args.specific_tau] + df_chi2_specific_tau = get_chi2(regularisation_settings, args) + isTauCalculator = False + if args.run_measured_as_data: regularisation_settings.taus_to_test = [0] regularisation_settings.h_data = regularisation_settings.h_measured - df_chi2 = get_chi2s_of_tau_range(regularisation_settings, args) - - if args.perform_varied_measured_unfolding_test: - h_data = hist_to_value_error_tuplelist(regularisation_settings.h_data) - h_data_varied = [(return_rnd_Poisson(val),return_rnd_Poisson(err)) for val, err in h_data ] - h_data_varied = value_error_tuplelist_to_hist(h_data_varied, reco_bin_edges_vis[variable]) - regularisation_settings.h_data = h_data_varied - df_chi2_smeared = get_chi2s_of_tau_range(regularisation_settings, args, unfold_test=True) - print df_chi2_smeared - # No point in trying to find best tau if it is given as 0... - sys.exit() + df_chi2_measured = get_chi2(regularisation_settings, args) + isTauCalculator = False + + if args.run_smeared_measured_as_data: + regularisation_settings.taus_to_test = [0] + regularisation_settings.h_data = regularisation_settings.h_measured + h_data = hist_to_value_error_tuplelist(regularisation_settings.h_data) + h_data_varied = [(return_rnd_Poisson(val),return_rnd_Poisson(err)) for val, err in h_data ] + h_data_varied = value_error_tuplelist_to_hist(h_data_varied, reco_bin_edges_vis[variable]) + regularisation_settings.h_data = h_data_varied + df_chi2_smeared = get_chi2(regularisation_settings, args, smearing_test=True) + isTauCalculator = False - # Find the corresponding Chi2 and write to file - df_chi2 = get_chi2s_of_tau_range(regularisation_settings, args) - print df_chi2 - - # Have the dataframes now - albeit read to a file - # Read in each one corresponding to their channel - # Find the best tau and print to screen - for channel in ['electron', 'muon', 'combined']: - chi2_cut = 0.005 - path = regularisation_settings.outpath+'tbl_'+channel+'_tauscan.txt' - df_chi2 = get_df_from_file(path) - if df_chi2 is None: continue - print '\n', "1 - P(Chi2|NDF)", '\n', df_chi2, '\n' - - # cutoff to be changed to 0.001 when able to - best_taus = interpolate_tau(chi2_cut, df_chi2) - chi2_to_plots(df_chi2, regularisation_settings, chi2_cut, channel) - print_results_to_screen(best_taus, channel) + # Dont need to calculate chi2 for given tau tests + if not isTauCalculator: continue + + # Find Chi2 for each tau and write to file + df_chi2 = get_chi2(regularisation_settings, args) + + # Dont need to calculate tau for given tests + if not isTauCalculator: sys.exit() + + # Have the dataframes now - albeit read to a file + # Read in each one corresponding to their channel + # Find the best tau and print to screen + for channel in ['electron', 'muon', 'combined']: + chi2_cut = 0.005 + path = regularisation_settings.outpath+'tbl_'+channel+'_tauscan.txt' + df_chi2 = get_df_from_file(path) + if df_chi2 is None: continue + print '\n', "1 - P(Chi2|NDF)", '\n', df_chi2, '\n' + + # cutoff to be changed to 0.001 when able to + best_taus = interpolate_tau(chi2_cut, df_chi2) + chi2_to_plots(args, df_chi2, regularisation_settings, chi2_cut, channel) + print_results_to_screen(best_taus, channel) return @@ -197,27 +221,42 @@ def parse_options(): parser.add_argument("in_files", nargs='*', help="List of the input files") - parser.add_argument( "-t", "--test", + parser.add_argument( "--measured_test", dest = "run_measured_as_data", action = "store_true", - help = "For debugging - run the measured distribution as data." ) - parser.add_argument( "-v", "--vary_measured_test", - dest = "perform_varied_measured_unfolding_test", + help = "For debugging. Run the measured distribution as data." ) + parser.add_argument( "--smeared_test", + dest = "run_smeared_measured_as_data", action = "store_true", - help = "Unfolding test. Vary measured vals by Poisson then find ChiSq" ) + help = "Test. Run (poisson) smeared measured distribution as data" ) parser.add_argument( "-p", "--refold_plots", - dest = "run_refold_plots", + dest = "create_refold_plots", action = "store_true", - help = "For debugging - output unfolded vs refolded for each tau" ) - parser.add_argument( "-n", "--n_ticks_in_log", - dest = "n_ticks_in_log", + help = "Plot. Produce unfolded vs refolded plot for each tau run" ) + parser.add_argument( "-n", "--n_tau_in_log", + dest = "n_tau_in_log", default = 10, type = int, help = "How many taus in the range do you want" ) + parser.add_argument( "-t", "--tau", + dest = "specific_tau", + default = None, + type = float, + help = "How many taus in the range do you want" ) parser.add_argument( "-u", "--unfolded_binning", dest = "unfolded_binning", action = "store_true", help = "Run the tau scans for unfolded (gen) binning" ) + parser.add_argument( "-c", "--channel", + dest = "ch", + default = None, + type = str, + help = "Which channel to run over" ) + parser.add_argument( "-v", "--variable", + dest = "var", + default = None, + type = str, + help = "Which varibale to run over" ) args = parser.parse_args() if args.unfolded_binning: @@ -238,37 +277,36 @@ def clear_old_df(path): ''' Delete any previous dataframe. (Code would append a new dataframe to file instead of replace) ''' - for root, dirs, files in os.walk(path, topdown=False): for name in files: os.remove(os.path.join(root, name)) return -def get_tau_list(logSpacing, logMin = log10(pow(10,-16)), logMax = log10(1)): +def get_tau_values(logSpacing, logMin = log10(pow(10,-16)), logMax = log10(1)): ''' - Large scanning range from unity to 10^-8. Split into equal points based on log system + Large scanning range from 1 to 10^-16. Split into equal points based on log system given the number of tau points to scan over. ''' - taus = [] r = int(logMax - logMin) - tau_test_range = [10**(logMax - i/float(logSpacing)) for i in range(r*logSpacing)] - return tau_test_range + tau_values = [10**(logMax - i/float(logSpacing)) for i in range(r*logSpacing)] + return tau_values -def get_chi2s_of_tau_range( regularisation_settings, args, unfold_test=False ): +def get_chi2( regularisation_settings, args, smearing_test=False ): ''' Takes each tau value, unfolds and refolds, calcs the chi2, the prob of chi2 given ndf (n_bins) and returns a dictionary of (1-P(Chi2|NDF)) for each tau For measured test where we only worry about tau=0 outputs tau variables to data frame (+smeared measured values) ''' h_truth, h_response, h_measured, h_data, h_fakes = regularisation_settings.get_histograms() - if not args.run_measured_as_data : + + # Dont remove any fakes if we are using the true mc distribution + if not args.run_measured_as_data or not args.run_smeared_measured_as_data: h_data = removeFakes( h_measured, h_fakes, h_data ) variable = regularisation_settings.variable taus = regularisation_settings.taus_to_test chi2_ndf = [] for tau in taus: - unfolding = Unfolding( h_data, h_truth, @@ -278,6 +316,7 @@ def get_chi2s_of_tau_range( regularisation_settings, args, unfold_test=False ): method = 'TUnfold', tau = tau ) + # Cannot refold without first unfolding h_unfolded_data = unfolding.unfold() h_refolded_data = unfolding.refold() @@ -292,27 +331,28 @@ def get_chi2s_of_tau_range( regularisation_settings, args, unfold_test=False ): regularisation_settings.h_refolded = h_refolded_data ndf = regularisation_settings.ndf - if args.run_refold_plots: - plot_data_vs_refold(args, regularisation_settings, tau) if args.unfolded_binning: unfolding.refolded_data = h_refolded_data.rebinned(2) unfolding.data = h_data.rebinned(2) ndf = int(regularisation_settings.ndf / 2) - # print("Data") - # print (hist_to_value_error_tuplelist(regularisation_settings.h_data)) - # print("Refolded Data") - # print (hist_to_value_error_tuplelist(regularisation_settings.h_refolded)) + regularisation_settings.h_refolded = unfolding.refolded_data + regularisation_settings.h_data = unfolding.data + if args.create_refold_plots: + plot_data_vs_refold(args, regularisation_settings, tau) + # Calculate the chi2 between refold and unfold chi2 = unfolding.getUnfoldRefoldChi2() + # Calculate the Prob chi2 given NDF prob = TMath.Prob( chi2, ndf ) + # 1-P(Chi2|NDF) chi2_ndf.append(1-prob) # print( tau, chi2, prob, 1-prob ) - # Create pandas dictionary + # Create tau and Chi2 dictionary d_chi2 = {variable : pd.Series( chi2_ndf )} d_taus = {'tau' : pd.Series( taus )} - if unfold_test: + if smearing_test: d_tau_vars = { variable : { 'Tau' : tau, @@ -383,8 +423,6 @@ def chi2_to_df(chi2, taus, regularisation_settings, appendage=''): # return the new df return df_new - - def get_df_from_file(p): ''' Get the dataframe from the file @@ -399,11 +437,10 @@ def get_df_from_file(p): print "Cannot find path : ", p return df -def chi2_to_plots(df_chi2, regularisation_settings, chi2_cut, channel): +def chi2_to_plots(args,df_chi2, regularisation_settings, chi2_cut, channel): ''' Plot chi2 figures ''' - # variable = regularisation_settings.variable plot_outpath = regularisation_settings.outpath.replace('tables/', 'plots/') + 'tauscan/' make_folder_if_not_exists(plot_outpath) @@ -414,107 +451,89 @@ def chi2_to_plots(df_chi2, regularisation_settings, chi2_cut, channel): for var in df_chi2.columns: if var == 'tau': continue + # Plot tau distributions for each variable plt.loglog( df_chi2['tau'], df_chi2[var], label = variables_latex[var], ) + # Plot current chi2 cutoff value plt.axhline(y=chi2_cut, color='black', linestyle='dashed') + + # Plot legend handles, labels = ax1.get_legend_handles_labels() ax1.legend(handles, labels, loc=4) + + # Plot axis titles ax1.set_xlabel('Regularisation Parameter \ensuremath{\\tau}') ax1.set_ylabel('\ensuremath{1-P(\\chi^{2}|NDF)}') + # Save plot pltName = os.path.join(plot_outpath,'{channel}_all_tauscan.pdf'.format(channel = channel)) + if args.unfolded_binning: + pltName = pltName.replace('.pdf', '_unf_binning.pdf') fig1.savefig(pltName) - print "Written plots to {plot_outpath}{channel}_all_tauscan.pdf".format(plot_outpath = plot_outpath, channel = channel) - + print "Written plots to {plot_outpath}{pltName}".format(plot_outpath = plot_outpath, pltName = pltName) return -def interpolate_tau(cutoff, df_chi2): - ''' - Interpolate to get best tau from tau scan - 1e-8 < tau < 1 - n < i < 0 - - chisq_lo chisq cutoff chisq_hi - |------------|-------------------------| - a b - Find ratio a/(a+b) - Interpolate to find best tau - tau = tau_lo + ratio * (tau_hi - tau_lo) - - | - \|/ - |--------------------------------------| - tau_lo best tau tau_hi - ''' - best_tau = {} - for variable in df_chi2.columns: - if variable == 'tau': continue - - i=0 - for chisq in df_chi2[variable]: - if chisq > cutoff: - i+=1 - continue - else: - break - if chisq > cutoff: - print "{var} exceeds required cut".format(var=variable) - # last i becomes out of range - best_tau[variable] = df_chi2['tau'][i-1] - else: - chisq_lo = df_chi2[variable][i+1] - chisq_hi = df_chi2[variable][i] - ratio = (cutoff - chisq_lo) / (chisq_hi - chisq_lo) - tau_lo = df_chi2['tau'][i+1] - tau_hi = df_chi2['tau'][i] - tau = tau_lo + ratio*(tau_hi - tau_lo) - best_tau[variable] = tau - return best_tau - - def plot_data_vs_refold(args, regularisation_settings, tau): ''' Plot the differences between the unfolded and refolded distributions TODO Include also with best tau - redo unfolding with best tau then come here ''' - tau = str(tau).replace('.', 'p') - # data = hist_to_value_error_tuplelist(regularisation_settings.h_data) - # measured = hist_to_value_error_tuplelist(regularisation_settings.h_measured) + from ROOT import gStyle + variable = regularisation_settings.variable channel = regularisation_settings.channel - plot_outpath = regularisation_settings.outpath.replace('tables/', 'plots/')+variable+'/' + plot_outpath = regularisation_settings.outpath.replace('tables/', 'plots/')+'tauscan/taus/' make_folder_if_not_exists(plot_outpath) - outfile = plot_outpath+channel+'_unfold_refold_test_tau_'+tau+'.pdf' + + # tau as string name for output + tau = str(tau).replace('.', 'p') + + outfile = plot_outpath+'data_vs_refold_'+channel+'_'+variable+'_tau_'+tau+'.pdf' if args.run_measured_as_data: - outfile = plot_outpath+channel+'_run_measured_as_data_tau_'+tau+'.pdf' + outfile = plot_outpath+'measured_vs_refold_'+channel+'_'+variable+'_tau_'+tau+'.pdf' + if args.run_smeared_measured_as_data: + outfile = plot_outpath+'smeared_vs_refold_'+channel+'_'+variable+'_tau_'+tau+'.pdf' + if args.unfolded_binning: + outfile = outfile.replace('.pdf', '_unf_binning.pdf') + + c = TCanvas('c1','c1',1000,800) + gStyle.SetOptStat(0) - c = TCanvas('c1','c1',600,400) - c.SetFillColor(2); p1 = TPad("pad1", "p1",0.0,0.2,1.0,1.0,21) - p2 = TPad("pad2", "p2",0.0,0.0,1.0,0.2,22) p1.SetFillColor(0); - p2.SetFillColor(0); p1.Draw() + p2 = TPad("pad2", "p2",0.0,0.0,1.0,0.2,22) + p2.SetFillColor(0); p2.Draw() + p1.cd() - regularisation_settings.h_refolded.SetMarkerStyle(10); - regularisation_settings.h_refolded.SetMarkerColor(4); - # regularisation_settings.h_refolded.SetMarkerSize(10); - regularisation_settings.h_refolded.Draw() - regularisation_settings.h_data.SetFillColor(3); - regularisation_settings.h_data.Draw("hist same"); + regularisation_settings.h_data.SetTitle("Data vs Refolded Data;;NEvents") + regularisation_settings.h_data.Draw() + + regularisation_settings.h_refolded.SetLineColor(2) + regularisation_settings.h_refolded.Draw("same") + + leg1 = TLegend(0.7, 0.8, 0.9, 0.9) + leg1.SetLineColor(0) + leg1.SetFillColor(0) + leg1.AddEntry(regularisation_settings.h_data, "Data") + leg1.AddEntry(regularisation_settings.h_refolded, "Refolded Data") + leg1.Draw() p2.cd() h_ratio = regularisation_settings.h_data.Clone() h_ratio.Divide(regularisation_settings.h_refolded) - h_ratio.SetMarkerSize(0.1); + h_ratio.SetTitle(";"+variable+";") + h_ratio.SetLineColor(1); h_ratio.Draw() + c.SaveAs(outfile) c.Delete() + print "Written plots to {outfile}".format(outfile = outfile) return def print_results_to_screen(best_taus, channel): @@ -527,7 +546,6 @@ def print_results_to_screen(best_taus, channel): print '"{0}" : {1},'.format(variable, tau) return - def return_rnd_Poisson(mu): ''' Returning a random poisson number @@ -541,10 +559,53 @@ def return_rnd_Poisson(mu): gRandom = TRandom3() gRandom.SetSeed(0) # Cache for quicker running - landau = gRandom.Landau poisson = gRandom.Poisson rnd_po = poisson( mu ) return rnd_po + +def interpolate_tau(cutoff, df_chi2): + ''' + Interpolate to get best tau from tau scan + 1e-8 < tau < 1 + n < i < 0 + + chisq_lo chisq cutoff chisq_hi + |------------|-------------------------| + a b + Find ratio a/(a+b) + Interpolate to find best tau + tau = tau_lo + ratio * (tau_hi - tau_lo) + + | + \|/ + |--------------------------------------| + tau_lo best tau tau_hi + ''' + best_tau = {} + for variable in df_chi2.columns: + if variable == 'tau': continue + + i=0 + for chisq in df_chi2[variable]: + if chisq > cutoff: + i+=1 + continue + else: + break + if chisq > cutoff: + print "{var} exceeds required cut".format(var=variable) + # last i becomes out of range + best_tau[variable] = df_chi2['tau'][i-1] + else: + chisq_lo = df_chi2[variable][i+1] + chisq_hi = df_chi2[variable][i] + ratio = (cutoff - chisq_lo) / (chisq_hi - chisq_lo) + tau_lo = df_chi2['tau'][i+1] + tau_hi = df_chi2['tau'][i] + tau = tau_lo + ratio*(tau_hi - tau_lo) + best_tau[variable] = tau + return best_tau + if __name__ == '__main__': set_root_defaults( set_batch = True, msg_ignore_level = 3001 ) main() diff --git a/dps/analysis/unfolding_tests/get_best_regularisation_TUnfold.py b/dps/analysis/unfolding_tests/01_get_best_regularisation_TUnfold.py similarity index 98% rename from dps/analysis/unfolding_tests/get_best_regularisation_TUnfold.py rename to dps/analysis/unfolding_tests/01_get_best_regularisation_TUnfold.py index 0e92dc30..9502c74a 100644 --- a/dps/analysis/unfolding_tests/get_best_regularisation_TUnfold.py +++ b/dps/analysis/unfolding_tests/01_get_best_regularisation_TUnfold.py @@ -36,6 +36,7 @@ from dps.config import CMS from ROOT import TGraph, TSpline3, Double, TUnfoldDensity, TUnfold, TDecompSVD, TMatrixD, TCanvas, gROOT from rootpy import asrootpy +from dps.utils.pandas_utilities import read_tuple_from_file rc('font',**CMS.font) rc( 'text', usetex = True ) @@ -107,7 +108,7 @@ def __set_unfolding_histograms__( self ): edges = [] edges = reco_bin_edges_vis[self.variable] - json_input = read_data_from_JSON(data_file) + json_input = read_tuple_from_file(data_file) if data_key == "": # JSON file == histogram self.h_data = value_error_tuplelist_to_hist(json_input, edges) @@ -124,6 +125,7 @@ def main(): results = {} for input_values, json_file in zip( input_values_sets, json_input_files ): print 'Processing', json_file + if 'combined' in json_file: continue regularisation_settings = RegularisationSettings( input_values ) variable = regularisation_settings.variable channel = regularisation_settings.channel @@ -142,7 +144,6 @@ def main(): h_response, fakes = None, method = 'TUnfold', - k_value = -1, tau = 0. ) @@ -266,7 +267,6 @@ def get_best_tau( regularisation_settings ): h_response, fakes = None, method = 'TUnfold', - k_value = -1, tau = -1 ) diff --git a/dps/analysis/unfolding_tests/compare_reweighting.py b/dps/analysis/unfolding_tests/02_compare_reweighting.py similarity index 63% rename from dps/analysis/unfolding_tests/compare_reweighting.py rename to dps/analysis/unfolding_tests/02_compare_reweighting.py index 94b10812..1eb0fdcc 100644 --- a/dps/analysis/unfolding_tests/compare_reweighting.py +++ b/dps/analysis/unfolding_tests/02_compare_reweighting.py @@ -9,20 +9,15 @@ from dps.utils.plotting import compare_measurements, Histogram_properties from dps.config import latex_labels - - def main(): config = XSectionConfig(13) - file_for_powhegPythia = File(config.unfolding_central, 'read') - file_for_ptReweight_up = File(config.unfolding_ptreweight_up, 'read') - file_for_ptReweight_down = File(config.unfolding_ptreweight_down, 'read') - file_for_etaReweight_up = File(config.unfolding_etareweight_up, 'read') - file_for_etaReweight_down = File(config.unfolding_etareweight_down, 'read') - file_for_data_template = 'data/normalisation/background_subtraction/13TeV/{variable}/VisiblePS/central/normalisation_combined_patType1CorrectedPFMet.txt' - - + file_for_powhegPythia = File(config.unfolding_central, 'read') + file_for_ptReweight = File(config.unfolding_ptreweight, 'read') + file_for_etaReweight_up = File(config.unfolding_etareweight_up, 'read') + file_for_etaReweight_down = File(config.unfolding_etareweight_down, 'read') + file_for_data_template = 'data/normalisation/background_subtraction/13TeV/{variable}/VisiblePS/central/normalisation_combined.txt' for channel in ['combined']: for variable in config.variables: @@ -43,8 +38,8 @@ def main(): # Get the reweighted powheg pythia distributions - _, _, response_pt_reweighted_up, _ = get_unfold_histogram_tuple( - inputfile=file_for_ptReweight_up, + _, _, response_pt_reweighted, _ = get_unfold_histogram_tuple( + inputfile=file_for_ptReweight, variable=variable, channel=channel, centre_of_mass=13, @@ -52,21 +47,9 @@ def main(): visiblePS=True ) - measured_pt_reweighted_up = asrootpy(response_pt_reweighted_up.ProjectionX('px',1)) - truth_pt_reweighted_up = asrootpy(response_pt_reweighted_up.ProjectionY()) + measured_pt_reweighted = asrootpy(response_pt_reweighted_up.ProjectionX('px',1)) + truth_pt_reweighted = asrootpy(response_pt_reweighted_up.ProjectionY()) - _, _, response_pt_reweighted_down, _ = get_unfold_histogram_tuple( - inputfile=file_for_ptReweight_down, - variable=variable, - channel=channel, - centre_of_mass=13, - load_fakes=False, - visiblePS=True - ) - - measured_pt_reweighted_down = asrootpy(response_pt_reweighted_down.ProjectionX('px',1)) - truth_pt_reweighted_down = asrootpy(response_pt_reweighted_down.ProjectionY()) - _, _, response_eta_reweighted_up, _ = get_unfold_histogram_tuple( inputfile=file_for_etaReweight_up, variable=variable, @@ -101,9 +84,9 @@ def main(): hp = Histogram_properties() hp.name = 'Reweighting_check_{channel}_{variable}_at_{com}TeV'.format( - channel=channel, - variable=variable, - com='13', + channel=channel, + variable=variable, + com='13', ) v_latex = latex_labels.variables_latex[variable] @@ -115,28 +98,26 @@ def main(): hp.title = 'Reweighting check for {variable}'.format(variable=v_latex) measured_central.Rebin(2) - measured_pt_reweighted_up.Rebin(2) - measured_pt_reweighted_down.Rebin(2) + measured_pt_reweighted.Rebin(2) measured_eta_reweighted_up.Rebin(2) measured_eta_reweighted_down.Rebin(2) data.Rebin(2) measured_central.Scale( 1 / measured_central.Integral() ) - measured_pt_reweighted_up.Scale( 1 / measured_pt_reweighted_up.Integral() ) - measured_pt_reweighted_down.Scale( 1 / measured_pt_reweighted_down.Integral() ) + measured_pt_reweighted.Scale( 1 / measured_pt_reweighted.Integral() ) measured_eta_reweighted_up.Scale( 1 / measured_eta_reweighted_up.Integral() ) measured_eta_reweighted_down.Scale( 1/ measured_eta_reweighted_down.Integral() ) data.Scale( 1 / data.Integral() ) compare_measurements( - models = {'Central' : measured_central, 'PtReweighted Up' : measured_pt_reweighted_up, 'PtReweighted Down' : measured_pt_reweighted_down, 'EtaReweighted Up' : measured_eta_reweighted_up, 'EtaReweighted Down' : measured_eta_reweighted_down}, - measurements = {'Data' : data}, - show_measurement_errors=True, - histogram_properties=hp, - save_folder='plots/unfolding/reweighting_check', - save_as=['pdf'] - ) + models = {'Central' : measured_central, 'PtReweighted' : measured_pt_reweighted, 'EtaReweighted Up' : measured_eta_reweighted_up, 'EtaReweighted Down' : measured_eta_reweighted_down}, + measurements = {'Data' : data}, + show_measurement_errors=True, + histogram_properties=hp, + save_folder='plots/unfolding/reweighting_check', + save_as=['pdf'] + ) if __name__ == '__main__': diff --git a/dps/analysis/unfolding_tests/closure_test.py b/dps/analysis/unfolding_tests/03_closure_test.py similarity index 100% rename from dps/analysis/unfolding_tests/closure_test.py rename to dps/analysis/unfolding_tests/03_closure_test.py diff --git a/dps/analysis/unfolding_tests/create_toy_mc.py b/dps/analysis/unfolding_tests/04_create_toy_mc.py similarity index 100% rename from dps/analysis/unfolding_tests/create_toy_mc.py rename to dps/analysis/unfolding_tests/04_create_toy_mc.py diff --git a/dps/analysis/unfolding_tests/create_unfolding_pull_data.py b/dps/analysis/unfolding_tests/05_create_unfolding_pull_data.py similarity index 100% rename from dps/analysis/unfolding_tests/create_unfolding_pull_data.py rename to dps/analysis/unfolding_tests/05_create_unfolding_pull_data.py diff --git a/dps/analysis/unfolding_tests/make_unfolding_pull_plots.py b/dps/analysis/unfolding_tests/06_make_unfolding_pull_plots.py similarity index 100% rename from dps/analysis/unfolding_tests/make_unfolding_pull_plots.py rename to dps/analysis/unfolding_tests/06_make_unfolding_pull_plots.py diff --git a/dps/analysis/unfolding_tests/README.md b/dps/analysis/unfolding_tests/README.md index ea65c67e..beb29753 100644 --- a/dps/analysis/unfolding_tests/README.md +++ b/dps/analysis/unfolding_tests/README.md @@ -14,15 +14,15 @@ Summary of what to run: Make the configs. These store where the input unfolding files and input data (ttbar normalisation) are. Check that you pick up the correct files - typically they are in your local dps directory, or on hdfs. ```shell -python src/unfolding_tests/makeConfig.py +python src/unfolding_tests/00_makeConfig.py ``` You can get the best regularisation for one variable/phase space/channel (i.e. one config file), example: ```shell -python src/unfolding_tests/get_best_regularisation_TUnfold.py config/unfolding/VisiblePS/abs_lepton_eta_13TeV_combined_channel.json +python src/unfolding_tests/01_get_best_regularisation_TUnfold.py config/unfolding/VisiblePS/abs_lepton_eta_13TeV_combined_channel.json ``` or run on several using wildcards. To run on all 13TeV variables, combined channel, in the visible phase: ```shell -python src/unfolding_tests/get_best_regularisation_TUnfold.py config/unfolding/VisiblePS/*_13TeV_combined_channel.json +python src/unfolding_tests/01_get_best_regularisation_TUnfold.py config/unfolding/VisiblePS/*_13TeV_combined_channel.json ``` ## Reweighting check @@ -35,7 +35,7 @@ and the underlying true distribution in data, should then be smaller than (or si the bias seen in the unfolded distributions for the reweighted samples. ```shell -python src/unfolding_tests/compare_reweighting.py +python src/unfolding_tests/02_compare_reweighting.py ``` @@ -49,21 +49,21 @@ For the bias plots, the central case is plotted as points, and all other MC samp Currently, the central sample (Powheg Pythia) is plotted, along with two reweighted MC samples. The reweighting is performed on the top pt. ```shell -python src/unfolding_tests/closure_test.py +python src/unfolding_tests/03_closure_test.py ``` ## Creating toy MC First we need to create a set of toy MC. Run ```shell -python src/unfolding_tests/create_toy_mc.py -s powhegPythia +python src/unfolding_tests/04_create_toy_mc.py -s powhegPythia ``` This will create 300 toy mc (300 is the default amount, probably need more for a full study) based on the powheg pythia sample. Other possible options for -s are currently "madgraph" and "amcatnlo" For more information about available parameters, do ```shell -python src/unfolding_tests/create_toy_mc.py -h +python src/unfolding_tests/04_create_toy_mc.py -h ``` This will create a root file in data/toy_mc named toy_mc_powhegPythia_N_300_13TeV.root (generally toy_mc__N__TeV.root). @@ -71,7 +71,7 @@ This file can be used in the next step. ## Creating pull distributions ```shell -python src/unfolding_tests/create_unfolding_pull_data.py -f data/toy_mc/toy_mc_powhegPythia_N_300_13TeV.root -c combined -n 10 -v HT -s powhegPythia --tau 0.001 +python src/unfolding_tests/05_create_unfolding_pull_data.py -f data/toy_mc/toy_mc_powhegPythia_N_300_13TeV.root -c combined -n 10 -v HT -s powhegPythia --tau 0.001 ``` This will consider the toy mc file, for HT in the combined channel. It will take the first 10 toy mc in that file, and unfold with a tau value of 0.001. Output will be placed in: @@ -95,9 +95,9 @@ Passing --scan_tau will tell the script to submit jobs for a range of tau values ## Analysing pull data Making the plots (just pass a file created by the previous step): ```shell -python src/unfolding_tests/make_unfolding_pull_plots.py data/pull_data/13TeV/HT/powhegPythia/Pull_data_TUnfold_combined_0.001905.txt +python src/unfolding_tests/06_make_unfolding_pull_plots.py data/pull_data/13TeV/HT/powhegPythia/Pull_data_TUnfold_combined_0.001905.txt ``` for more information on which plots are going to be produce please consult ```shell -python src/unfolding_tests/make_unfolding_pull_plots.py -h +python src/unfolding_tests/06_make_unfolding_pull_plots.py -h ``` diff --git a/dps/analysis/unfolding_tests/unfolding_sandbox.py b/dps/analysis/unfolding_tests/unfolding_sandbox.py index c695043a..2006dabc 100644 --- a/dps/analysis/unfolding_tests/unfolding_sandbox.py +++ b/dps/analysis/unfolding_tests/unfolding_sandbox.py @@ -20,9 +20,10 @@ def main(): for channel in ['combined', 'muon', 'electron']: + # for variable in config.variables: for variable in config.variables: # for variable in ['MET']: - + print variable # tau_value = get_tau_value(config, channel, variable) @@ -33,7 +34,6 @@ def main(): inputfile=file_for_unfolding, variable=variable, channel=channel, - met_type=config.met_type, centre_of_mass=config.centre_of_mass_energy, ttbar_xsection=config.ttbar_xsection, luminosity=config.luminosity, @@ -63,12 +63,12 @@ def main(): measured.SetBinContent(0,0) truth = asrootpy(h_response_ph.ProjectionY()) # print 'Truth from response :',list(truth.y()) - # print 'Truth underflow : ',truth.GetBinContent(0),truth.GetBinContent(truth.GetNbinsX()+1) + # print 'Truth underflow :',truth.GetBinContent(0),truth.GetBinContent(truth.GetNbinsX()+1) # Unfold unfolding = Unfolding( measured, truth, measured, h_response, None, - method=method, tau=tau_value) + method=method, k_value=-1, tau=tau_value) # unfolded_data = unfolding.closureTest() @@ -80,8 +80,10 @@ def main(): # print binx, biny,h_response.GetBinContent(binx,biny) # print bin,h_truth.GetBinContent(bin) # print 'Tau :',tau_value - unfolded_results = unfolding.unfold() + # print 'Unfolded :',list( unfolded_results.y() ) + # print unfolding.unfoldObject.GetTau() + # print 'Unfolded :',list( unfolded_results.y() ) refolded_results = unfolding.refold() refolded_results.rebin(2) @@ -96,7 +98,6 @@ def main(): print pValue,1-pValue # print unfolding.unfoldObject.GetTau() - def get_tau_value(config, channel, variable): if channel == 'electron': return config.tau_values_electron[variable] diff --git a/dps/analysis/xsection/00_pick_bins.py b/dps/analysis/xsection/00_pick_bins.py index d24549ef..5d36f236 100644 --- a/dps/analysis/xsection/00_pick_bins.py +++ b/dps/analysis/xsection/00_pick_bins.py @@ -45,7 +45,7 @@ from dps.utils.Calculation import calculate_purities, calculate_stabilities from dps.utils.hist_utilities import rebin_2d from dps.config.xsection import XSectionConfig -from optparse import OptionParser +from argparse import ArgumentParser from dps.config.variable_binning import bin_edges_full, minimum_bin_width from dps.utils.file_utilities import write_data_to_JSON from ROOT import TH1, TCanvas, TLine, gDirectory, TObjArray, TColor, TLegend @@ -59,28 +59,39 @@ def main(): Step 3: Check if it is true for all other histograms. If not back to step 2 Step 4: Repeat step 2 & 3 until no mo bins can be created ''' - - parser = OptionParser() - parser.add_option( '-v', dest = "visiblePhaseSpace", action = "store_true", - help = "Consider visible phase space or not" ) - parser.add_option( '-c', dest = "combined", action = "store_true", - help = "Combine channels" ) - parser.add_option( '-r', dest = "redo_resolution", action = "store_true", - help = "Recalculate the resolution plots" ) - ( options, _ ) = parser.parse_args() + parser = ArgumentParser() + parser.add_argument( '-v', + dest = "visiblePhaseSpace", + action = "store_true", + help = "Consider visible phase space or not" + ) + parser.add_argument( '-c', + dest = "combined", + action = "store_true", + help = "Combine channels" + ) + parser.add_argument( '-r', + dest = "redo_resolution", + action = "store_true", + help = "Recalculate the resolution plots" + ) + args = parser.parse_args() measurement_config = XSectionConfig(13) - p_min = 0.6 # 0.5 for MET + # Initialise binning parameters + bin_choices = {} + + # Min Purity and Stability + p_min = 0.6 s_min = 0.6 - # we also want the statistical error to be larger than 5% - # this translates (error -= 1/sqrt(N)) to (1/0.05)^2 = 400 + # 0.5 for MET + + # Min events in bin for appropriate stat unc + # error = 1/sqrt(N) [ unc=5% : (1/0.05)^2 = 400] n_min = 500 n_min_lepton = 500 -# n_min = 200 # N = 200 -> 7.1 % stat error - bin_choices = {} - # variables = bin_edges_full.keys() variables = measurement_config.variables for variable in variables: global var @@ -89,11 +100,13 @@ def main(): variableToUse = variable if 'Rap' in variable: variableToUse = 'abs_%s' % variable - histogram_information = get_histograms( variableToUse, options ) + histogram_information = get_histograms( measurement_config, variableToUse, args ) - if options.redo_resolution: + # Remake the resolution plots from the fine binned unfolding matrix + if args.redo_resolution: rs.generate_resolution_plots(histogram_information, variable) + # Claculate the best binning if variable == 'HT': best_binning, histogram_information = get_best_binning( histogram_information , p_min, s_min, n_min, minimum_bin_width[variable], x_min=100. ) elif variable == 'ST': @@ -106,9 +119,12 @@ def main(): best_binning, histogram_information = get_best_binning( histogram_information , p_min, s_min, n_min_lepton, minimum_bin_width[variable], x_min=23. ) elif variable == 'abs_lepton_eta': best_binning, histogram_information = get_best_binning( histogram_information , p_min, s_min, n_min_lepton, minimum_bin_width[variable] ) + elif variable == 'NJets': + best_binning, histogram_information = get_best_binning( histogram_information , p_min, s_min, n_min, minimum_bin_width[variable], is_NJet=True) else: best_binning, histogram_information = get_best_binning( histogram_information , p_min, s_min, n_min, minimum_bin_width[variable] ) + # Symmetric binning for lepton_eta if 'Rap' in variable: for b in list(best_binning): if b != 0.0: @@ -124,6 +140,7 @@ def main(): newLastBinWidth = penultimateBinWidth * 5 best_binning[-1] = best_binning[-2] + newLastBinWidth + # Smooth bin edges if variable == 'abs_lepton_eta': best_binning = [ round(i,2) for i in best_binning ] elif variable != 'NJets' : @@ -131,76 +148,84 @@ def main(): bin_choices[variable] = best_binning + # Print the best binning to screen and JSON print('The best binning for', variable, 'is:') print('bin edges =', best_binning) print('N_bins =', len( best_binning ) - 1) print('The corresponding purities and stabilities are:') for info in histogram_information: - # print_latex_table(info, variable, best_binning) outputInfo = {} outputInfo['p_i'] = info['p_i'] outputInfo['s_i'] = info['s_i'] - outputInfo['N'] = info['N'] + outputInfo['N'] = info['N'] outputInfo['res'] = info['res'] outputJsonFile = 'unfolding/13TeV/binningInfo_%s_%s_FullPS.txt' % ( variable, info['channel'] ) - if options.visiblePhaseSpace: + if args.visiblePhaseSpace: outputJsonFile = 'unfolding/13TeV/binningInfo_%s_%s_VisiblePS.txt' % ( variable, info['channel'] ) write_data_to_JSON( outputInfo, outputJsonFile ) + print_latex_table(info, variable, best_binning) for key in outputInfo: print (key,outputInfo[key]) print('-' * 120) + + # Final print of all binnings to screen print('=' * 120) print('For config/variable_binning.py') print('=' * 120) for variable in bin_choices: print('\''+variable+'\' : '+str(bin_choices[variable])+',') -def get_histograms( variable, options ): - config = XSectionConfig( 13 ) - - path_electron = '' - path_muon = '' - path_combined = '' - histogram_name = '' - if options.visiblePhaseSpace: +def get_histograms( config, variable, args ): + ''' + Return a dictionary of the unfolding histogram informations (inc. hist) + ''' + path_electron = '' + path_muon = '' + path_combined = '' + histogram_name = 'response_without_fakes' + if args.visiblePhaseSpace: histogram_name = 'responseVis_without_fakes' - else : - histogram_name = 'response_without_fakes' path_electron = '%s_electron/%s' % ( variable, histogram_name ) - path_muon = '%s_muon/%s' % ( variable, histogram_name ) + path_muon = '%s_muon/%s' % ( variable, histogram_name ) path_combined = '%s_combined/%s' % ( variable, histogram_name ) histogram_information = [ - {'file': config.unfolding_central_raw, - 'CoM': 13, - 'path':path_electron, - 'channel':'electron'}, - {'file':config.unfolding_central_raw, - 'CoM': 13, - 'path':path_muon, - 'channel':'muon'}, - ] + { + 'file' : config.unfolding_central_raw, + 'CoM' : 13, + 'path' : path_electron, + 'channel' :'electron' + }, + { + 'file' : config.unfolding_central_raw, + 'CoM' : 13, + 'path' : path_muon, + 'channel' :'muon' + }, + ] - if options.combined: + if args.combined: histogram_information = [ - {'file': config.unfolding_central_raw, - 'CoM': 13, - 'path': path_combined, - 'channel':'combined'}, - ] + { + 'file' : config.unfolding_central_raw, + 'CoM' : 13, + 'path' : path_combined, + 'channel' : 'combined' + }, + ] for histogram in histogram_information: - f = File( histogram['file'] ) - # scale to lumi - # nEvents = f.EventFilter.EventCounter.GetBinContent( 1 ) # number of processed events - # config = XSectionConfig( histogram['CoM'] ) - # lumiweight = config.ttbar_xsection * config.new_luminosity / nEvents - lumiweight = 1 - + f = File( histogram['file'] ) histogram['hist'] = f.Get( histogram['path'] ).Clone() + + # scale to current lumi + lumiweight = config.luminosity_scale + if round(lumiweight, 1) != 1.0: + print( "Scaling to {}".format(lumiweight) ) histogram['hist'].Scale( lumiweight ) + # change scope from file to memory histogram['hist'].SetDirectory( 0 ) f.close() @@ -208,77 +233,90 @@ def get_histograms( variable, options ): return histogram_information - -def get_best_binning( histogram_information, p_min, s_min, n_min, min_width, x_min = None ): +def get_best_binning( histogram_information, p_min, s_min, n_min, min_width, x_min = None, is_NJet=False ): ''' Step 1: Change the size of the first bin until it fulfils the minimal criteria - Step 3: Check if it is true for all other histograms. If not back to step 2 + Step 3: Check if it is true for other channel histograms. If not back to step 2 Step 4: Repeat step 2 & 3 until no more bins can be created ''' - histograms = [info['hist'] for info in histogram_information] - bin_edges = [] + histograms = [info['hist'] for info in histogram_information] + bin_edges = [] resolutions = [] - purities = {} + purities = {} stabilities = {} current_bin_start = 0 current_bin_end = 0 first_hist = histograms[0] - n_bins = first_hist.GetNbinsX() + n_bins = first_hist.GetNbinsX() + + # Start at minimum x instead of 0 if x_min: current_bin_start = first_hist.ProjectionX().FindBin(x_min) - 1 current_bin_end = current_bin_start + # Calculate the bin edges until no more bins can be iterated over while current_bin_end < n_bins: - # bin_End, p, s, N_reco - current_bin_end, _, _, _, r = get_next_end( histograms, current_bin_start, current_bin_end, p_min, s_min, n_min, min_width ) + # Return the next bin end + (p, s, N_reco, res) + current_bin_end, _, _, _, r = get_next_end( histograms, current_bin_start, current_bin_end, p_min, s_min, n_min, min_width, is_NJet=is_NJet ) resolutions.append(r) + + # Attach first bin low edge if not bin_edges: - # if empty bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_start + 1 ) ) + # Attachs the current bin end edge bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_end ) + first_hist.GetXaxis().GetBinWidth( current_bin_end ) ) current_bin_start = current_bin_end + # add the purity and stability values for the final binning - for info in histogram_information: - new_hist = rebin_2d( info['hist'], bin_edges, bin_edges ).Clone( info['channel'] + '_' + str( info['CoM'] ) ) - get_bin_content = new_hist.ProjectionX().GetBinContent - purities = calculate_purities( new_hist.Clone() ) - stabilities = calculate_stabilities( new_hist.Clone() ) - n_events = [int( get_bin_content( i ) ) for i in range( 1, len( bin_edges ) )] + for hist_info in histogram_information: + new_hist = rebin_2d( hist_info['hist'], bin_edges, bin_edges ).Clone( hist_info['channel'] + '_' + str( hist_info['CoM'] ) ) + get_bin_content = new_hist.ProjectionX().GetBinContent + purities = calculate_purities( new_hist.Clone() ) + stabilities = calculate_stabilities( new_hist.Clone() ) + n_events = [int( get_bin_content( i ) ) for i in range( 1, len( bin_edges ) )] + # Now check if the last bin also fulfils the requirements if ( purities[-1] < p_min or stabilities[-1] < s_min or n_events[-1] < n_min ) and len(purities) > 3: - # if not, merge last two bins - bin_edges[-2] = bin_edges[-1] - bin_edges = bin_edges[:-1] - new_hist = rebin_2d( info['hist'], bin_edges, bin_edges ).Clone() - get_bin_content = new_hist.ProjectionX().GetBinContent - purities = calculate_purities( new_hist.Clone() ) - stabilities = calculate_stabilities( new_hist.Clone() ) - n_events = [int( get_bin_content( i ) ) for i in range( 1, len( bin_edges ) )] - - info['p_i'] = purities - info['s_i'] = stabilities - info['N'] = n_events - info['res'] = resolutions + # Merge last two bins + bin_edges[-2] = bin_edges[-1] + bin_edges = bin_edges[:-1] + # Merge the resolutions in the last bins + resolutions[-2] = (resolutions[-2]+resolutions[-1]) / 2 + resolutions = resolutions[:-1] + # Recalculate purities and stabilites + new_hist = rebin_2d( hist_info['hist'], bin_edges, bin_edges ).Clone() + purities = calculate_purities( new_hist.Clone() ) + stabilities = calculate_stabilities( new_hist.Clone() ) + n_events = [int( get_bin_content( i ) ) for i in range( 1, len( bin_edges ) )] + + # Add purites, stabilities, n_events and resolutions to the hstogram information + hist_info['p_i'] = purities + hist_info['s_i'] = stabilities + hist_info['N'] = n_events + hist_info['res'] = resolutions return bin_edges, histogram_information -def get_next_end( histograms, bin_start, bin_end, p_min, s_min, n_min, min_width ): +def get_next_end( histograms, bin_start, bin_end, p_min, s_min, n_min, min_width, is_NJet=False ): + ''' + Getting the next bin end + ''' current_bin_start = bin_start current_bin_end = bin_end - p, s = 0, 0 + for gen_vs_reco_histogram in histograms: reco = asrootpy( gen_vs_reco_histogram.ProjectionX() ) - gen = asrootpy( gen_vs_reco_histogram.ProjectionY( 'py', 1 ) ) + gen = asrootpy( gen_vs_reco_histogram.ProjectionY( 'py', 1 ) ) reco_i = list( reco.y() ) - gen_i = list( gen.y() ) + gen_i = list( gen.y() ) # keep the start bin the same but roll the end bin for bin_i in range ( current_bin_end, len( reco_i ) + 1 ): x_high = reco.GetXaxis().GetBinLowEdge(bin_i) - x_mid = reco.GetXaxis().GetBinCenter(int( (current_bin_start+current_bin_end)/2 ) ) - x_low = reco.GetXaxis().GetBinUpEdge(current_bin_start) + x_mid = reco.GetXaxis().GetBinCenter(int( (current_bin_start+current_bin_end)/2 ) ) + x_low = reco.GetXaxis().GetBinUpEdge(current_bin_start) binWidth = x_high - x_low if binWidth < min_width: @@ -286,7 +324,7 @@ def get_next_end( histograms, bin_start, bin_end, p_min, s_min, n_min, min_width continue n_reco = sum( reco_i[current_bin_start:bin_i] ) - n_gen = sum( gen_i[current_bin_start:bin_i] ) + n_gen = sum( gen_i[current_bin_start:bin_i] ) n_gen_and_reco = 0 if bin_i < current_bin_start + 1: @@ -297,24 +335,18 @@ def get_next_end( histograms, bin_start, bin_end, p_min, s_min, n_min, min_width # the histogram and taking the diagonal elements (which is what we want) n_gen_and_reco = gen_vs_reco_histogram.Integral( current_bin_start + 1, bin_i , current_bin_start + 1, bin_i ) - p, s = 0, 0 + p, s, res = 0, 0, 99 if n_reco > 0: p = round( n_gen_and_reco / n_reco, 3 ) if n_gen > 0: s = round( n_gen_and_reco / n_gen, 3 ) - # find the bin range that matches - # print('New bin : ',current_bin_start,current_bin_end,p,s - if p >= p_min and s >= s_min and n_reco >= n_min: # Now that purity and stability are statisfied... What about the resolution? - # Find slices of X and Y between bin edges and fit them with a Gaussian. - # The StdDev of Gaussian = Resolution. - # If Resolution < Bin width then we are all good - # NJets is not great at the moment for fitting guassians - if (var=='NJets'): + # Dont use resolution information on NJets + if is_NJet: current_bin_end = bin_i break @@ -325,9 +357,9 @@ def get_next_end( histograms, bin_start, bin_end, p_min, s_min, n_min, min_width current_bin_end = bin_i break - # if it gets to the end, this is the best we can do current_bin_end = bin_i + # And now for the next channel starting with current_bin_end. return current_bin_end, p, s, n_reco, res def print_console(info, old_purities, old_stabilities, print_old = False): @@ -343,8 +375,8 @@ def print_console(info, old_purities, old_stabilities, print_old = False): def print_latex_table( info, variable, best_binning ): print('CoM =', info['CoM'], 'channel =', info['channel']) - header = """\%s bin (\GeV) & purity & stability & number of events\\\\ - \hline""" % variable.lower() + header = """\{var} bin (\GeV) & purity & stability & resolution & number of events\\\\ + \hline""".format(var=variable) print(header) firstBin = 0 lastBin = len( best_binning ) - 1 @@ -356,8 +388,10 @@ def print_latex_table( info, variable, best_binning ): if i == len( best_binning ) - 2: bin_range = '$\geq %d$' % best_binning[i] else: - bin_range = '%d - %d' % ( best_binning[i], best_binning[i + 1] ) - print('%s & %.3f & %.3f & %d\\\\' % (bin_range, info['p_i'][i], info['s_i'][i], info['N'][i])) + bin_range = '{start} - {end}'.format(start=best_binning[i],end=best_binning[i + 1] ) + if 'abs_lepton_eta' in variable: + bin_range = '{start} - {end}'.format(start=best_binning[i],end=best_binning[i + 1] ) + print('%s & %.3f & %.3f & %.3f & %d\\\\' % (bin_range, info['p_i'][i], info['s_i'][i], info['res'][i], info['N'][i])) print('\hline') if __name__ == '__main__': diff --git a/dps/analysis/xsection/01_get_ttjet_normalisation.py b/dps/analysis/xsection/01_get_ttjet_normalisation.py index 38685280..b58acf92 100644 --- a/dps/analysis/xsection/01_get_ttjet_normalisation.py +++ b/dps/analysis/xsection/01_get_ttjet_normalisation.py @@ -1,334 +1,87 @@ -''' - Takes AnalysisSoftware (https://github.com/BristolTopGroup/AnalysisSoftware) - output files and extracts the TTJet normalisation for each measured variable - by subtracting backgrounds from data. - - Usage: - python src/cross_section_measurement/01_get_ttjet_normalisation.py \ - -c -v -i \ - -p - - Example: - python src/cross_section_measurement/01_get_ttjet_normalisation.py \ - -c 8 -v MET -i config/measurements/background_subtraction/ - - TODO: In the end this and 01_get_fit_results.py should be merged. - All should come down to the function to extract the # events from TTJet -''' from __future__ import division -from optparse import OptionParser +from argparse import ArgumentParser from dps.utils.logger import log from dps.config.xsection import XSectionConfig -from dps.analysis.xsection.lib import closure_tests -from dps.utils.file_utilities import write_data_to_JSON -from dps.utils.hist_utilities import clean_control_region, \ - hist_to_value_error_tuplelist, fix_overflow - -import glob -from copy import deepcopy -from dps.utils.Calculation import combine_complex_results +from dps.utils.file_utilities import get_files_in_path, read_data_from_JSON from dps.utils.measurement import Measurement from dps.utils.ROOT_utils import set_root_defaults # define logger for this module mylog = log["01b_get_ttjet_normalisation"] - -class TTJetNormalisation(object): - +def main(): ''' - Determines the normalisation for top quark pair production based on - different methods. Unless stated otherwise all templates and - (initial) normalisations are taken from simulation, except for QCD - where the template is extracted from data. - - Supported methods: - BACKGROUND_SUBTRACTION: - Subtracts the known backgrounds from data to obtain TTJet template - and normalisation - SIMULTANEOUS_FIT: - Uses Minuit and several fit variables (quotation needed) to perform - a simultaneous fit (does not use statistical errors of templates). - FRACTION_FITTER: - Uses the TFractionFitter class to fit the TTJet normalisation + 1 - Read Config file for normalisation measurement + 2 - Run measurement + 3 - Combine measurement before unfolding ''' - - BACKGROUND_SUBTRACTION = 10 - SIMULTANEOUS_FIT = 20 - FRACTION_FITTER = 30 - - @mylog.trace() - def __init__(self, - config, - measurement, - method=BACKGROUND_SUBTRACTION, - phase_space='FullPS'): - self.config = config - self.variable = measurement.variable - self.category = measurement.name - self.channel = measurement.channel - self.method = method - self.phase_space = phase_space - self.measurement = measurement - self.measurement.read() - - self.met_type = measurement.met_type - self.fit_variables = ['M3'] - - self.normalisation = {} - self.initial_normalisation = {} - self.templates = {} - - self.have_normalisation = False - - for sample, hist in self.measurement.histograms.items(): - h = deepcopy(hist) - h_norm = h.integral() - if h_norm > 0: - h.Scale(1 / h.integral()) - self.templates[sample] = hist_to_value_error_tuplelist(h) - self.auxiliary_info = {} - self.auxiliary_info['norms'] = measurement.aux_info_norms - - @mylog.trace() - def calculate_normalisation(self): - ''' - 1. get file names - 2. get histograms from files - 3. ??? - 4. calculate normalisation based on self.method - ''' - if self.have_normalisation: - return - histograms = self.measurement.histograms - - for sample, hist in histograms.items(): - # TODO: this should be a list of bin-contents - hist = fix_overflow(hist) - histograms[sample] = hist - self.initial_normalisation[ - sample] = hist_to_value_error_tuplelist(hist) - if self.method == self.BACKGROUND_SUBTRACTION and sample != 'TTJet': - self.normalisation[sample] = self.initial_normalisation[sample] - - if self.method == self.BACKGROUND_SUBTRACTION: - self.background_subtraction(histograms) - if self.method == self.SIMULTANEOUS_FIT: - self.simultaneous_fit(histograms) - - # next, let's round all numbers (they are event numbers after all - for sample, values in self.normalisation.items(): - new_values = [(round(v, 1), round(e, 1)) for v, e in values] - self.normalisation[sample] = new_values - - self.have_normalisation = True - - def background_subtraction(self, histograms): - ttjet_hist = clean_control_region(histograms, - subtract=['QCD', 'V+Jets', 'SingleTop']) - self.normalisation[ - 'TTJet'] = hist_to_value_error_tuplelist(ttjet_hist) - - @mylog.trace() - def simultaneous_fit(self, histograms): - from dps.utils.Fitting import FitData, FitDataCollection, Minuit - print('not in production yet') - fitter = None - fit_data_collection = FitDataCollection() - for fit_variable in self.fit_variables: - mc_histograms = { - 'TTJet': histograms['TTJet'], - 'SingleTop': histograms['SingleTop'], - 'V+Jets': histograms['V+Jets'], - 'QCD': histograms['QCD'], - } - h_data = histograms['data'] - fit_data = FitData(h_data, mc_histograms, - fit_boundaries=self.config.fit_boundaries[fit_variable]) - fit_data_collection.add(fit_data, name=fit_variable) - fitter = Minuit(fit_data_collection) - fitter.fit() - fit_results = fitter.readResults() - - normalisation = fit_data_collection.mc_normalisation( - self.fit_variables[0]) - normalisation_errors = fit_data_collection.mc_normalisation_errors( - self.fit_variables[0]) - print normalisation, normalisation_errors - - @mylog.trace() - def save(self, output_path): - if not self.have_normalisation: - self.calculate_normalisation() - - folder_template = '{path}/normalisation/{method}/{CoM}TeV/{variable}/' - folder_template += '{phase_space}/{category}/' - inputs = { - 'path': output_path, - 'CoM': self.config.centre_of_mass_energy, - 'variable': self.variable, - 'category': self.category, - 'method': self.method_string(), - 'phase_space': self.phase_space, - } - output_folder = folder_template.format(**inputs) - - file_template = '{type}_{channel}_{met_type}.txt' - inputs = { - 'channel': self.channel, - 'met_type': self.met_type, - } - write_data_to_JSON(self.normalisation, - output_folder + file_template.format(type='normalisation', **inputs)) - write_data_to_JSON(self.initial_normalisation, - output_folder + file_template.format(type='initial_normalisation', **inputs)) - write_data_to_JSON(self.templates, - output_folder + file_template.format(type='templates', **inputs)) - write_data_to_JSON(self.auxiliary_info, - output_folder + file_template.format(type='auxiliary_info', **inputs)) - - return output_folder - - @mylog.trace() - def method_string(self): - if self.method == self.BACKGROUND_SUBTRACTION: - return 'background_subtraction' - if self.method == self.SIMULTANEOUS_FIT: - return 'simultaneous_fit_' + '_'.join(self.fit_variables) - if self.method == self.FRACTION_FITTER: - return 'fraction_fitter' - - return 'unknown_method' - - @mylog.trace() - def combine(self, other): - if not self.have_normalisation or not other.have_normalisation: - mylog.warn( - 'One of the TTJetNormalisations does not have a normalisation, aborting.') - return - - self.normalisation = combine_complex_results( - self.normalisation, other.normalisation) - self.initial_normalisation = combine_complex_results( - self.initial_normalisation, other.initial_normalisation) - self.templates = combine_complex_results( - self.templates, other.templates) - self.channel = 'combined' - - -def parse_options(): - parser = OptionParser(__doc__) - parser.add_option("-p", "--path", dest="path", default='data', - help="set output path for JSON files. Default is 'data'.") - parser.add_option("-i", "--input", dest="input", - default='config/measurements/background_subtraction/', - help="set output path for JSON files") - parser.add_option("-v", "--variable", dest="variable", default='MET', - help="set the variable to analyse (MET, HT, ST, MT, WPT). Default is MET.") - parser.add_option("-c", "--centre-of-mass-energy", dest="CoM", default=13, type=int, - help="set the centre of mass energy for analysis. Default = 13 [TeV]") - parser.add_option('-d', '--debug', dest="debug", action="store_true", - help="Print the debug information") - parser.add_option('--closure_test', dest="closure_test", action="store_true", - help="Perform fit on data == sum(MC) * scale factor (MC process)") - parser.add_option('--closure_test_type', dest="closure_test_type", default='simple', - help="Type of closure test (relative normalisation):" + '|'.join(closure_tests.keys())) - parser.add_option('--test', dest="test", action="store_true", - help="Just run the central measurement") - parser.add_option('--visiblePS', dest="visiblePS", action="store_true", - help="Unfold to visible phase space") - - (options, args) = parser.parse_args() - # fix some of the inputs - if not options.path.endswith('/'): - options.path = options.path + '/' - if not options.input.endswith('/'): - options.input = options.input + '/' - - return options, args - - -@mylog.trace() -def main(): - # construct categories from files: - input_template = options.input + '{energy}TeV/{channel}/{variable}/{phase_space}/*.json' - categories = ['QCD_shape'] - categories.extend(measurement_config.categories_and_prefixes.keys()) - categories.extend(measurement_config.rate_changing_systematics_names) - categories.extend([measurement_config.vjets_theory_systematic_prefix + - systematic for systematic in measurement_config.generator_systematics]) - - phase_space = 'FullPS' - if options.visiblePS: - phase_space = 'VisiblePS' results = {} - for channel in ['electron', 'muon']: - inputs = { - 'energy': options.CoM, - 'channel': channel, - 'variable': variable, - 'phase_space': phase_space, - } - measurement_files = glob.glob(input_template.format(**inputs)) - for f in sorted(measurement_files): - if options.test and not 'central' in f : continue - print('Processing file ' + f) - measurement = Measurement.fromJSON(f) - # for each measurement - norm = TTJetNormalisation( - config=measurement_config, - measurement=measurement, - method=TTJetNormalisation.BACKGROUND_SUBTRACTION, - phase_space=phase_space, - ) - norm.calculate_normalisation() - mylog.info('Saving results to {0}'.format(output_path)) - norm.save(output_path) - # store results for later combination - r_name = f.replace(channel, '') - if not results.has_key(r_name): - results[r_name] = [norm] - else: - results[r_name].append(norm) - for f, r_list in results.items(): - if not len(r_list) == 2: - msg = 'Only found results ({0}) for one channel, not combining.' - mylog.warn(msg.format(f)) - continue - n1, n2 = r_list - n1.combine(n2) - n1.save(output_path) + # config file template + input_template = 'config/measurements/background_subtraction/{com}TeV/{ch}/{var}/{ps}/' + ps = 'FullPS' + if args.visiblePS: + ps = 'VisiblePS' -def get_category_from_file(json_file): - filename = json_file.split('/')[-1] - # remove type string - category = filename.replace('_shape_systematic', '') - category = category.replace('_rate_systematic', '') - # remove file ending - category = category.replace('.json', '') + for ch in ['electron', 'muon']: + for var in measurement_config.variables: + if args.variable not in var: continue - return category + # Create measurement_filepath + measurement_filepath = input_template.format( + com = args.CoM, + ch = ch, + var = var, + ps = ps, + ) + + # Get all config files in measurement_filepath + measurement_files = get_files_in_path(measurement_filepath, file_ending='.json') + + for f in sorted(measurement_files): + if args.test: + if 'central' not in f: continue + print('Processing file ' + f) + # Read in Measurement JSON + config = read_data_from_JSON(f) + + if 'electron' in ch: + # Create Measurement Class using JSON + electron_measurement = Measurement(config) + electron_measurement.calculate_normalisation() + electron_measurement.save(ps) + elif 'muon' in ch: + # Create Measurement Class using JSON + muon_measurement = Measurement(config) + muon_measurement.calculate_normalisation() + muon_measurement.save(ps) + # break + + # Combining the channels before unfolding + combined_measurement = electron_measurement + combined_measurement.combine(muon_measurement) + combined_measurement.save(ps) + return + +def parse_arguments(): + parser = ArgumentParser(__doc__) + parser.add_argument("-v", "--variable", dest="variable", default='HT', + help="set the variable to analyse (MET, HT, ST, MT, WPT). Default is MET.") + parser.add_argument("-c", "--centre-of-mass-energy", dest="CoM", default=13, type=int, + help="set the centre of mass energy for analysis. Default = 13 [TeV]") + parser.add_argument('--visiblePS', dest="visiblePS", action="store_true", + help="Unfold to visible phase space") + parser.add_argument('--test', dest="test", action="store_true", + help="Unfold to visible phase space") + args = parser.parse_args() + return args if __name__ == '__main__': set_root_defaults() + args = parse_arguments() + measurement_config = XSectionConfig(args.CoM) + main() - options, args = parse_options() - - # set global variables - debug = options.debug - if debug: - log.setLevel(log.DEBUG) - - measurement_config = XSectionConfig(options.CoM) - # caching of variables for shorter access - translate_options = measurement_config.translate_options - variable = options.variable - output_path = options.path - if options.closure_test: - output_path += '/closure_test/' - output_path += options.closure_test_type + '/' - main() diff --git a/dps/analysis/xsection/02_unfold_and_measure.py b/dps/analysis/xsection/02_unfold_and_measure.py index 80dc7430..7cc5df44 100644 --- a/dps/analysis/xsection/02_unfold_and_measure.py +++ b/dps/analysis/xsection/02_unfold_and_measure.py @@ -1,7 +1,6 @@ # general from __future__ import division -from optparse import OptionParser -# from array import array +from argparse import ArgumentParser # rootpy from rootpy.io import File from rootpy.plotting import Hist2D @@ -14,753 +13,758 @@ from dps.utils.hist_utilities import hist_to_value_error_tuplelist, \ value_error_tuplelist_to_hist from dps.utils.Unfolding import Unfolding, get_unfold_histogram_tuple, removeFakes -from dps.utils.file_utilities import read_data_from_JSON, write_data_to_JSON -from copy import deepcopy from dps.utils.ROOT_utils import set_root_defaults -# from ROOT import TGraph, TSpline3, TUnfoldDensity +from dps.utils.pandas_utilities import read_tuple_from_file, write_tuple_to_df, combine_complex_df + +from copy import deepcopy + +def get_unfolding_files(measurement_config): + ''' + Return the set of unfolding files to use + ''' + unfolding_files = {} + + unfolding_files['file_for_unfolding'] = File( measurement_config.unfolding_central, 'read' ) + + unfolding_files['files_for_pdfs'] = { + 'PDFWeights_%d' % (index) : File ( measurement_config.unfolding_pdfweights[index] ) for index in range( 0, 100 ) + } + + unfolding_files['file_for_renormalisationdown'] = File( measurement_config.unfolding_renormalisation_down, 'read' ) + unfolding_files['file_for_renormalisationup'] = File( measurement_config.unfolding_renormalisation_up, 'read' ) + unfolding_files['file_for_factorisationdown'] = File( measurement_config.unfolding_factorisation_down, 'read' ) + unfolding_files['file_for_factorisationup'] = File( measurement_config.unfolding_factorisation_up, 'read' ) + unfolding_files['file_for_combineddown'] = File( measurement_config.unfolding_combined_down, 'read' ) + unfolding_files['file_for_combinedup'] = File( measurement_config.unfolding_combined_up, 'read' ) + unfolding_files['file_for_alphaSdown'] = File( measurement_config.unfolding_alphaS_down, 'read' ) + unfolding_files['file_for_alphaSup'] = File( measurement_config.unfolding_alphaS_up, 'read' ) + + unfolding_files['file_for_matchingdown'] = File( measurement_config.unfolding_matching_down, 'read' ) + unfolding_files['file_for_matchingup'] = File( measurement_config.unfolding_matching_up, 'read' ) + + unfolding_files['file_for_isrdown'] = File( measurement_config.unfolding_isr_down, 'read' ) + unfolding_files['file_for_isrup'] = File( measurement_config.unfolding_isr_up, 'read' ) + unfolding_files['file_for_fsrdown'] = File( measurement_config.unfolding_fsr_down, 'read' ) + unfolding_files['file_for_fsrup'] = File( measurement_config.unfolding_fsr_up, 'read' ) + unfolding_files['file_for_uedown'] = File( measurement_config.unfolding_ue_down, 'read' ) + unfolding_files['file_for_ueup'] = File( measurement_config.unfolding_ue_up, 'read' ) + + unfolding_files['file_for_topPtSystematic'] = File( measurement_config.unfolding_topPtSystematic, 'read' ) + + unfolding_files['file_for_massdown'] = File( measurement_config.unfolding_mass_down, 'read' ) + unfolding_files['file_for_massup'] = File( measurement_config.unfolding_mass_up, 'read' ) + + unfolding_files['file_for_jesdown'] = File( measurement_config.unfolding_jes_down, 'read' ) + unfolding_files['file_for_jesup'] = File( measurement_config.unfolding_jes_up, 'read' ) + unfolding_files['file_for_jerdown'] = File( measurement_config.unfolding_jer_down, 'read' ) + unfolding_files['file_for_jerup'] = File( measurement_config.unfolding_jer_up, 'read' ) + + unfolding_files['file_for_bjetdown'] = File( measurement_config.unfolding_bjet_down, 'read' ) + unfolding_files['file_for_bjetup'] = File( measurement_config.unfolding_bjet_up, 'read' ) + unfolding_files['file_for_lightjetdown'] = File( measurement_config.unfolding_lightjet_down, 'read' ) + unfolding_files['file_for_lightjetup'] = File( measurement_config.unfolding_lightjet_up, 'read' ) + + unfolding_files['file_for_LeptonDown'] = File( measurement_config.unfolding_Lepton_down, 'read' ) + unfolding_files['file_for_LeptonUp'] = File( measurement_config.unfolding_Lepton_up, 'read' ) + + unfolding_files['file_for_ElectronEnDown'] = File( measurement_config.unfolding_ElectronEn_down, 'read' ) + unfolding_files['file_for_ElectronEnUp'] = File( measurement_config.unfolding_ElectronEn_up, 'read' ) + unfolding_files['file_for_MuonEnDown'] = File( measurement_config.unfolding_MuonEn_down, 'read' ) + unfolding_files['file_for_MuonEnUp'] = File( measurement_config.unfolding_MuonEn_up, 'read' ) + unfolding_files['file_for_TauEnDown'] = File( measurement_config.unfolding_TauEn_down, 'read' ) + unfolding_files['file_for_TauEnUp'] = File( measurement_config.unfolding_TauEn_up, 'read' ) + unfolding_files['file_for_UnclusteredEnDown'] = File( measurement_config.unfolding_UnclusteredEn_down, 'read' ) + unfolding_files['file_for_UnclusteredEnUp'] = File( measurement_config.unfolding_UnclusteredEn_up, 'read' ) + + unfolding_files['file_for_PUUp'] = File( measurement_config.unfolding_PUSystematic_up, 'read') + unfolding_files['file_for_PUDown'] = File( measurement_config.unfolding_PUSystematic_down, 'read') + + unfolding_files['file_for_ptreweight'] = File( measurement_config.unfolding_ptreweight, 'read' ) + + unfolding_files['file_for_powhegPythia8'] = File( measurement_config.unfolding_powheg_pythia8, 'read') + # unfolding_files['file_for_amcatnlo'] = File( measurement_config.unfolding_amcatnlo, 'read') + # unfolding_files['file_for_amcatnlo_herwig'] = File( measurement_config.unfolding_amcatnlo_herwig, 'read') + # unfolding_files['file_for_madgraphMLM'] = File( measurement_config.unfolding_madgraphMLM, 'read') + unfolding_files['file_for_powheg_herwig'] = File( measurement_config.unfolding_powheg_herwig, 'read' ) + return unfolding_files + def unfold_results( results, category, channel, tau_value, h_truth, h_measured, h_response, h_fakes, method, visiblePS ): - global variable, path_to_JSON, options + global variable, path_to_DF, args + edges = reco_bin_edges_full[variable] if visiblePS: edges = reco_bin_edges_vis[variable] + h_data = value_error_tuplelist_to_hist( results, edges ) + # Rebin original TTJet_Measured in terms of final binning (h_data is later replaced with h_data_no_fakes) + h_data_rebinned = h_data.rebinned(2) + # Remove fakes before unfolding - h_data = removeFakes( h_measured, h_fakes, h_data ) + h_data_no_fakes = removeFakes( h_measured, h_fakes, h_data ) - unfolding = Unfolding( h_data, h_truth, h_measured, h_response, h_fakes, method = method, tau = tau_value ) + # unfold + unfolding = Unfolding( h_data_no_fakes, h_truth, h_measured, h_response, h_fakes, method = method, tau = tau_value ) # turning off the unfolding errors for systematic samples if not category == 'central': unfoldCfg.error_treatment = 0 else: - unfoldCfg.error_treatment = options.error_treatment + unfoldCfg.error_treatment = args.error_treatment h_unfolded_data = unfolding.unfold() + # print "h_response bin edges : ", h_response + # print "h_unfolded_data bin edges : ", h_unfolded_data + h_data_no_fakes = h_data_no_fakes.rebinned(2) + del unfolding - return hist_to_value_error_tuplelist( h_unfolded_data ), hist_to_value_error_tuplelist( h_data ) - -def data_covariance_matrix( data ): - values = list( data ) - get_bin_error = data.GetBinError - cov_matrix = Hist2D( len( values ), -10, 10, len( values ), -10, 10, type = 'D' ) - for bin_i in range( len( values ) ): - error = get_bin_error( bin_i + 1 ) - cov_matrix.SetBinContent( bin_i + 1, bin_i + 1, error * error ) - return cov_matrix - -def get_unfolded_normalisation( TTJet_fit_results, category, channel, tau_value, visiblePS ): - global centre_of_mass, luminosity, ttbar_xsection, method - global variable, met_type, path_to_JSON, file_for_unfolding, file_for_powheg_pythia, file_for_powheg_herwig, file_for_ptreweight, files_for_pdfs - global file_for_powhegPythia8, file_for_madgraphMLM, file_for_amcatnlo, file_for_amcatnlo_herwig - # global file_for_matchingdown, file_for_matchingup - global file_for_fsrdown, file_for_fsrup - global file_for_isrdown, file_for_isrup - global file_for_uedown, file_for_ueup - global file_for_massdown, file_for_massup - global ttbar_generator_systematics, ttbar_theory_systematics, pdf_uncertainties - global use_ptreweight + return hist_to_value_error_tuplelist( h_data_rebinned ), hist_to_value_error_tuplelist( h_unfolded_data ), hist_to_value_error_tuplelist( h_data_no_fakes ) + + +def get_unfolded_normalisation( TTJet_normalisation_results, category, channel, tau_value, visiblePS ): + global com, luminosity, ttbar_xsection, method, variable, path_to_DF + global unfolding_files files_for_systematics = { - ttbar_theory_systematic_prefix + 'fsrdown' : file_for_fsrdown, - ttbar_theory_systematic_prefix + 'fsrup' : file_for_fsrup, - ttbar_theory_systematic_prefix + 'isrdown' : file_for_isrdown, - ttbar_theory_systematic_prefix + 'isrup' : file_for_isrup, - ttbar_theory_systematic_prefix + 'uedown' : file_for_uedown, - ttbar_theory_systematic_prefix + 'ueup' : file_for_ueup, - ttbar_theory_systematic_prefix + 'massdown' : file_for_massdown, - ttbar_theory_systematic_prefix + 'massup' : file_for_massup, - - ttbar_theory_systematic_prefix + 'factorisationdown' : file_for_factorisationdown, - ttbar_theory_systematic_prefix + 'factorisationup' : file_for_factorisationup, - ttbar_theory_systematic_prefix + 'renormalisationdown' : file_for_renormalisationdown, - ttbar_theory_systematic_prefix + 'renormalisationup' : file_for_renormalisationup, - ttbar_theory_systematic_prefix + 'combineddown' : file_for_combineddown, - ttbar_theory_systematic_prefix + 'combinedup' : file_for_combinedup, - # ttbar_theory_systematic_prefix + 'alphaSdown' : file_for_alphaSdown, - # ttbar_theory_systematic_prefix + 'alphaSup' : file_for_alphaSup, - - 'JES_down' : file_for_jesdown, - 'JES_up' : file_for_jesup, - - 'JER_down' : file_for_jerdown, - 'JER_up' : file_for_jerup, - - 'BJet_up' : file_for_bjetup, - 'BJet_down' : file_for_bjetdown, - - 'LightJet_up' : file_for_lightjetup, - 'LightJet_down' : file_for_lightjetdown, - - ttbar_theory_systematic_prefix + 'hadronisation' : file_for_powheg_herwig, - # ttbar_theory_systematic_prefix + 'NLOgenerator' : file_for_amcatnlo, - - 'ElectronEnUp' : file_for_ElectronEnUp, - 'ElectronEnDown' : file_for_ElectronEnDown, - 'MuonEnUp' : file_for_MuonEnUp, - 'MuonEnDown' : file_for_MuonEnDown, - 'TauEnUp' : file_for_TauEnUp, - 'TauEnDown' : file_for_TauEnDown, - 'UnclusteredEnUp' : file_for_UnclusteredEnUp, - 'UnclusteredEnDown' : file_for_UnclusteredEnDown, - - 'Muon_up' : file_for_LeptonUp, - 'Muon_down' : file_for_LeptonDown, - 'Electron_up' : file_for_LeptonUp, - 'Electron_down' : file_for_LeptonDown, - - 'PileUp_up' : file_for_PUUp, - 'PileUp_down' : file_for_PUDown, - } + + 'TTJets_massdown' : unfolding_files['file_for_massdown'], + 'TTJets_massup' : unfolding_files['file_for_massup'], + + 'TTJets_factorisationdown' : unfolding_files['file_for_factorisationdown'], + 'TTJets_factorisationup' : unfolding_files['file_for_factorisationup'], + 'TTJets_renormalisationdown' : unfolding_files['file_for_renormalisationdown'], + 'TTJets_renormalisationup' : unfolding_files['file_for_renormalisationup'], + 'TTJets_combineddown' : unfolding_files['file_for_combineddown'], + 'TTJets_combinedup' : unfolding_files['file_for_combinedup'], + 'TTJets_alphaSdown' : unfolding_files['file_for_alphaSdown'], + 'TTJets_alphaSup' : unfolding_files['file_for_alphaSup'], + + 'TTJets_matchingdown' : unfolding_files['file_for_matchingdown'], + 'TTJets_matchingup' : unfolding_files['file_for_matchingup'], + + 'TTJets_isrdown' : unfolding_files['file_for_isrdown'], + 'TTJets_isrup' : unfolding_files['file_for_isrup'], + # 'TTJets_fsrdown' : unfolding_files['file_for_fsrdown'], + 'TTJets_fsrup' : unfolding_files['file_for_fsrup'], + 'TTJets_uedown' : unfolding_files['file_for_uedown'], + 'TTJets_ueup' : unfolding_files['file_for_ueup'], + + 'TTJets_topPt' : unfolding_files['file_for_topPtSystematic'], + + 'JES_down' : unfolding_files['file_for_jesdown'], + 'JES_up' : unfolding_files['file_for_jesup'], + + 'JER_down' : unfolding_files['file_for_jerdown'], + 'JER_up' : unfolding_files['file_for_jerup'], + + 'BJet_up' : unfolding_files['file_for_bjetup'], + 'BJet_down' : unfolding_files['file_for_bjetdown'], + + 'LightJet_up' : unfolding_files['file_for_lightjetup'], + 'LightJet_down' : unfolding_files['file_for_lightjetdown'], + + 'TTJets_hadronisation' : unfolding_files['file_for_powheg_herwig'], + + 'ElectronEnUp' : unfolding_files['file_for_ElectronEnUp'], + 'ElectronEnDown' : unfolding_files['file_for_ElectronEnDown'], + 'MuonEnUp' : unfolding_files['file_for_MuonEnUp'], + 'MuonEnDown' : unfolding_files['file_for_MuonEnDown'], + 'TauEnUp' : unfolding_files['file_for_TauEnUp'], + 'TauEnDown' : unfolding_files['file_for_TauEnDown'], + 'UnclusteredEnUp' : unfolding_files['file_for_UnclusteredEnUp'], + 'UnclusteredEnDown' : unfolding_files['file_for_UnclusteredEnDown'], + + 'Muon_up' : unfolding_files['file_for_LeptonUp'], + 'Muon_down' : unfolding_files['file_for_LeptonDown'], + 'Electron_up' : unfolding_files['file_for_LeptonUp'], + 'Electron_down' : unfolding_files['file_for_LeptonDown'], + + 'PileUp_up' : unfolding_files['file_for_PUUp'], + 'PileUp_down' : unfolding_files['file_for_PUDown'], + + 'Top_Pt_reweight' : unfolding_files['file_for_ptreweight'], + + } h_truth, h_measured, h_response, h_fakes = None, None, None, None - # Systematics where you change the response matrix + + # Uncertainties by changing the response matrix if category in files_for_systematics : print 'Doing category',category,'by changing response matrix' - h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple( inputfile = files_for_systematics[category], - variable = variable, - channel = channel, - met_type = met_type, - centre_of_mass = centre_of_mass, - ttbar_xsection = ttbar_xsection, - luminosity = luminosity, - load_fakes = True, - visiblePS = visiblePS, - ) + h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple( + inputfile = files_for_systematics[category], + variable = variable, + channel = channel, + centre_of_mass = com, + ttbar_xsection = ttbar_xsection, + luminosity = luminosity, + load_fakes = True, + visiblePS = visiblePS, + ) + # PDF Uncertainties elif category in pdf_uncertainties: - h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple( inputfile = files_for_pdfs[category], - variable = variable, - channel = channel, - met_type = met_type, - centre_of_mass = centre_of_mass, - ttbar_xsection = ttbar_xsection, - luminosity = luminosity, - load_fakes = True, - visiblePS = visiblePS, - ) + print 'Doing category',category,'by changing response matrix' + h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple( + inputfile = unfolding_files['files_for_pdfs'][category], + variable = variable, + channel = channel, + centre_of_mass = com, + ttbar_xsection = ttbar_xsection, + luminosity = luminosity, + load_fakes = True, + visiblePS = visiblePS, + ) # Central and systematics where you just change input MC else: - h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple( inputfile = file_for_unfolding, - variable = variable, - channel = channel, - met_type = met_type, - centre_of_mass = centre_of_mass, - ttbar_xsection = ttbar_xsection, - luminosity = luminosity, - load_fakes = True, - visiblePS = visiblePS, - ) - -# central_results = hist_to_value_error_tuplelist( h_truth ) - TTJet_fit_results_unfolded, TTJet_fit_results_withoutFakes = unfold_results( TTJet_fit_results, - category, - channel, - tau_value, - h_truth, - h_measured, - h_response, - h_fakes, - method, - visiblePS, - ) - normalisation_unfolded = { - 'TTJet_measured' : TTJet_fit_results, - 'TTJet_measured_withoutFakes' : TTJet_fit_results_withoutFakes, - 'TTJet_unfolded' : TTJet_fit_results_unfolded - } + h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple( + inputfile = unfolding_files['file_for_unfolding'], + variable = variable, + channel = channel, + centre_of_mass = com, + ttbar_xsection = ttbar_xsection, + luminosity = luminosity, + load_fakes = True, + visiblePS = visiblePS, + ) - # - # THESE ARE FOR GETTING THE HISTOGRAMS FOR COMPARING WITH UNFOLDED DATA - # + # Unfold current normalisation measurements + TTJet_normalisation_results, TTJet_normalisation_results_unfolded, TTJet_normalisation_results_withoutFakes = unfold_results( + TTJet_normalisation_results, + category, + channel, + tau_value, + h_truth, + h_measured, + h_response, + h_fakes, + method, + visiblePS, + ) + + # Store TTJet yields after background subtraction, after background subtraction without fakes and after Unfolding + normalisation_unfolded = { + 'TTJet_measured' : TTJet_normalisation_results, + 'TTJet_measured_withoutFakes' : TTJet_normalisation_results_withoutFakes, + 'TTJet_unfolded' : TTJet_normalisation_results_unfolded, + } + # Return truth of different generators for comparison to data in 04 if category == 'central': - h_truth_fsrdown, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_fsrdown, - variable = variable, - channel = channel, - met_type = met_type, - centre_of_mass = centre_of_mass, - ttbar_xsection = ttbar_xsection, - luminosity = luminosity, - load_fakes = True, - visiblePS = visiblePS, - ) - h_truth_fsrup, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_fsrup, - variable = variable, - channel = channel, - met_type = met_type, - centre_of_mass = centre_of_mass, - ttbar_xsection = ttbar_xsection, - luminosity = luminosity, - load_fakes = True, - visiblePS = visiblePS, - ) - - h_truth_isrdown, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_isrdown, - variable = variable, - channel = channel, - met_type = met_type, - centre_of_mass = centre_of_mass, - ttbar_xsection = ttbar_xsection, - luminosity = luminosity, - load_fakes = True, - visiblePS = visiblePS, - ) - h_truth_isrup, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_isrup, - variable = variable, - channel = channel, - met_type = met_type, - centre_of_mass = centre_of_mass, - ttbar_xsection = ttbar_xsection, - luminosity = luminosity, - load_fakes = True, - visiblePS = visiblePS, - ) - - h_truth_uedown, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_uedown, - variable = variable, - channel = channel, - met_type = met_type, - centre_of_mass = centre_of_mass, - ttbar_xsection = ttbar_xsection, - luminosity = luminosity, - load_fakes = True, - visiblePS = visiblePS, - ) - h_truth_ueup, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_ueup, - variable = variable, - channel = channel, - met_type = met_type, - centre_of_mass = centre_of_mass, - ttbar_xsection = ttbar_xsection, - luminosity = luminosity, - load_fakes = True, - visiblePS = visiblePS, - ) - - h_truth_massdown, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_massdown, - variable = variable, - channel = channel, - met_type = met_type, - centre_of_mass = centre_of_mass, - ttbar_xsection = ttbar_xsection, - luminosity = luminosity, - load_fakes = True, - visiblePS = visiblePS, - ) - h_truth_massup, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_massup, - variable = variable, - channel = channel, - met_type = met_type, - centre_of_mass = centre_of_mass, - ttbar_xsection = ttbar_xsection, - luminosity = luminosity, - load_fakes = True, - visiblePS = visiblePS, - ) - - h_truth_powhegPythia8, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_powhegPythia8, - variable = variable, - channel = channel, - met_type = met_type, - centre_of_mass = centre_of_mass, - ttbar_xsection = ttbar_xsection, - luminosity = luminosity, - load_fakes = True, - visiblePS = visiblePS, - ) - - # h_truth_amcatnlo, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_amcatnlo, - # variable = variable, - # channel = channel, - # met_type = met_type, - # centre_of_mass = centre_of_mass, - # ttbar_xsection = ttbar_xsection, - # luminosity = luminosity, - # load_fakes = True, - # visiblePS = visiblePS, - # ) - - # h_truth_madgraphMLM, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_madgraphMLM, - # variable = variable, - # channel = channel, - # met_type = met_type, - # centre_of_mass = centre_of_mass, - # ttbar_xsection = ttbar_xsection, - # luminosity = luminosity, - # load_fakes = True, - # visiblePS = visiblePS, - # ) - - h_truth_powheg_herwig, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_powheg_herwig, - variable = variable, - channel = channel, - met_type = met_type, - centre_of_mass = centre_of_mass, - ttbar_xsection = ttbar_xsection, - luminosity = luminosity, - load_fakes = True, - visiblePS = visiblePS, - ) - - # h_truth_amcatnlo_herwig, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_amcatnlo_herwig, - # variable = variable, - # channel = channel, - # met_type = met_type, - # centre_of_mass = centre_of_mass, - # ttbar_xsection = ttbar_xsection, - # luminosity = luminosity, - # load_fakes = True, - # visiblePS = visiblePS, - # ) - - # MADGRAPH_ptreweight_results = hist_to_value_error_tuplelist( h_truth_ptreweight ) - # POWHEG_PYTHIA_results = hist_to_value_error_tuplelist( h_truth_POWHEG_PYTHIA ) - # MCATNLO_results = None - powhegPythia8_results = hist_to_value_error_tuplelist( h_truth_powhegPythia8 ) - # madgraphMLM_results = hist_to_value_error_tuplelist( h_truth_madgraphMLM ) - # amcatnloPythia8_results = hist_to_value_error_tuplelist( h_truth_amcatnlo ) - powheg_herwig_results = hist_to_value_error_tuplelist( h_truth_powheg_herwig ) - # amcatnlo_herwig_results = hist_to_value_error_tuplelist( h_truth_amcatnlo_herwig ) - - fsrdown_results = hist_to_value_error_tuplelist( h_truth_fsrdown ) - fsrup_results = hist_to_value_error_tuplelist( h_truth_fsrup ) - isrdown_results = hist_to_value_error_tuplelist( h_truth_isrdown ) - isrup_results = hist_to_value_error_tuplelist( h_truth_isrup ) - uedown_results = hist_to_value_error_tuplelist( h_truth_uedown ) - ueup_results = hist_to_value_error_tuplelist( h_truth_ueup ) - - massdown_results = hist_to_value_error_tuplelist( h_truth_massdown ) - massup_results = hist_to_value_error_tuplelist( h_truth_massup ) - - normalisation_unfolded['powhegPythia8'] = powhegPythia8_results - # normalisation_unfolded['amcatnlo'] = amcatnloPythia8_results - # normalisation_unfolded['madgraphMLM'] = madgraphMLM_results - normalisation_unfolded['powhegHerwig'] = powheg_herwig_results - # normalisation_unfolded['amcatnloHerwig'] = amcatnlo_herwig_results - - normalisation_unfolded['fsrdown'] = fsrdown_results - normalisation_unfolded['fsrup'] = fsrup_results - normalisation_unfolded['isrdown'] = isrdown_results - normalisation_unfolded['isrup'] = isrup_results - normalisation_unfolded['uedown'] = uedown_results - normalisation_unfolded['ueup'] = ueup_results - normalisation_unfolded['massdown'] = massdown_results - normalisation_unfolded['massup'] = massup_results + h_truth_massdown, _, _, _ = get_unfold_histogram_tuple( + inputfile = unfolding_files['file_for_massdown'], + variable = variable, + channel = channel, + centre_of_mass = com, + ttbar_xsection = ttbar_xsection, + luminosity = luminosity, + load_fakes = True, + visiblePS = visiblePS, + ) + h_truth_massup, _, _, _ = get_unfold_histogram_tuple( + inputfile = unfolding_files['file_for_massup'], + variable = variable, + channel = channel, + centre_of_mass = com, + ttbar_xsection = ttbar_xsection, + luminosity = luminosity, + load_fakes = True, + visiblePS = visiblePS, + ) + # h_truth_fsrdown, _, _, _ = get_unfold_histogram_tuple( + # inputfile = unfolding_files['file_for_fsrdown'], + # variable = variable, + # channel = channel, + # centre_of_mass = com, + # ttbar_xsection = ttbar_xsection, + # luminosity = luminosity, + # load_fakes = True, + # visiblePS = visiblePS, + # ) + h_truth_fsrup, _, _, _ = get_unfold_histogram_tuple( + inputfile = unfolding_files['file_for_fsrup'], + variable = variable, + channel = channel, + centre_of_mass = com, + ttbar_xsection = ttbar_xsection, + luminosity = luminosity, + load_fakes = True, + visiblePS = visiblePS, + ) + h_truth_isrdown, _, _, _ = get_unfold_histogram_tuple( + inputfile = unfolding_files['file_for_isrdown'], + variable = variable, + channel = channel, + centre_of_mass = com, + ttbar_xsection = ttbar_xsection, + luminosity = luminosity, + load_fakes = True, + visiblePS = visiblePS, + ) + h_truth_isrup, _, _, _ = get_unfold_histogram_tuple( + inputfile = unfolding_files['file_for_isrup'], + variable = variable, + channel = channel, + centre_of_mass = com, + ttbar_xsection = ttbar_xsection, + luminosity = luminosity, + load_fakes = True, + visiblePS = visiblePS, + ) + h_truth_uedown, _, _, _ = get_unfold_histogram_tuple( + inputfile = unfolding_files['file_for_uedown'], + variable = variable, + channel = channel, + centre_of_mass = com, + ttbar_xsection = ttbar_xsection, + luminosity = luminosity, + load_fakes = True, + visiblePS = visiblePS, + ) + h_truth_ueup, _, _, _ = get_unfold_histogram_tuple( + inputfile = unfolding_files['file_for_ueup'], + variable = variable, + channel = channel, + centre_of_mass = com, + ttbar_xsection = ttbar_xsection, + luminosity = luminosity, + load_fakes = True, + visiblePS = visiblePS, + ) + h_truth_powhegPythia8, _, _, _ = get_unfold_histogram_tuple( + inputfile = unfolding_files['file_for_powhegPythia8'], + variable = variable, + channel = channel, + centre_of_mass = com, + ttbar_xsection = ttbar_xsection, + luminosity = luminosity, + load_fakes = True, + visiblePS = visiblePS, + ) + # h_truth_amcatnlo, _, _, _ = get_unfold_histogram_tuple( + # inputfile = unfolding_files['file_for_amcatnlo'], + # variable = variable, + # channel = channel, + # centre_of_mass = com, + # ttbar_xsection = ttbar_xsection, + # luminosity = luminosity, + # load_fakes = True, + # visiblePS = visiblePS, + # ) + # h_truth_madgraphMLM, _, _, _ = get_unfold_histogram_tuple( + # inputfile = unfolding_files['file_for_madgraphMLM'], + # variable = variable, + # channel = channel, + # centre_of_mass = com, + # ttbar_xsection = ttbar_xsection, + # luminosity = luminosity, + # load_fakes = True, + # visiblePS = visiblePS, + # ) + h_truth_powheg_herwig, _, _, _ = get_unfold_histogram_tuple( + inputfile = unfolding_files['file_for_powheg_herwig'], + variable = variable, + channel = channel, + centre_of_mass = com, + ttbar_xsection = ttbar_xsection, + luminosity = luminosity, + load_fakes = True, + visiblePS = visiblePS, + ) + + normalisation_unfolded['powhegPythia8'] = hist_to_value_error_tuplelist( h_truth_powhegPythia8 ) + # normalisation_unfolded['amcatnlo'] = hist_to_value_error_tuplelist( h_truth_madgraphMLM ) + # normalisation_unfolded['madgraphMLM'] = hist_to_value_error_tuplelist( h_truth_amcatnlo ) + normalisation_unfolded['powhegHerwig'] = hist_to_value_error_tuplelist( h_truth_powheg_herwig ) + + normalisation_unfolded['massdown'] = hist_to_value_error_tuplelist( h_truth_massdown ) + normalisation_unfolded['massup'] = hist_to_value_error_tuplelist( h_truth_massup ) + normalisation_unfolded['isrdown'] = hist_to_value_error_tuplelist( h_truth_isrdown ) + normalisation_unfolded['isrup'] = hist_to_value_error_tuplelist( h_truth_isrup ) + # normalisation_unfolded['fsrdown'] = hist_to_value_error_tuplelist( h_truth_fsrdown ) + normalisation_unfolded['fsrup'] = hist_to_value_error_tuplelist( h_truth_fsrup ) + normalisation_unfolded['uedown'] = hist_to_value_error_tuplelist( h_truth_uedown ) + normalisation_unfolded['ueup'] = hist_to_value_error_tuplelist( h_truth_ueup ) + + # Write all normalisations in unfolded binning scheme to dataframes + file_template = '{path_to_DF}/{category}/unfolded_normalisation_{channel}_{method}.txt' + write_02(normalisation_unfolded, file_template, path_to_DF, category, channel, method) return normalisation_unfolded + def calculate_xsections( normalisation, category, channel ): - global variable, met_type, path_to_JSON + ''' + Calculate the xsection + ''' + global variable, path_to_DF # calculate the x-sections branching_ratio = 0.15 - if channel == 'combined': + if 'combined' in channel: branching_ratio = branching_ratio * 2 - TTJet_xsection = calculate_xsection( normalisation['TTJet_measured'], luminosity, branching_ratio ) # L in pb1 - TTJet_withoutFakes_xsection = calculate_xsection( normalisation['TTJet_measured_withoutFakes'], luminosity, branching_ratio ) # L in pb1 - TTJet_xsection_unfolded = calculate_xsection( normalisation['TTJet_unfolded'], luminosity, branching_ratio ) # L in pb1 - xsection_unfolded = {'TTJet_measured' : TTJet_xsection, - 'TTJet_measured_withoutFakes' : TTJet_withoutFakes_xsection, - 'TTJet_unfolded' : TTJet_xsection_unfolded, - } + xsection_unfolded = {} + xsection_unfolded['TTJet_measured'] = calculate_xsection( + normalisation['TTJet_measured'], + luminosity, # L in pb1 + branching_ratio + ) + xsection_unfolded['TTJet_measured_withoutFakes'] = calculate_xsection( + normalisation['TTJet_measured_withoutFakes'], + luminosity, + branching_ratio + ) + xsection_unfolded['TTJet_unfolded'] = calculate_xsection( + normalisation['TTJet_unfolded'], + luminosity, + branching_ratio + ) if category == 'central': - powhegPythia8_xsection = calculate_xsection( normalisation['powhegPythia8'], luminosity, branching_ratio ) # L in pb1 - # amcatnlo_xsection = calculate_xsection( normalisation['amcatnlo'], luminosity, branching_ratio ) # L in pb1 - powhegHerwig_xsection = calculate_xsection( normalisation['powhegHerwig'], luminosity, branching_ratio ) # L in pb1 - # amcatnloHerwig_xsection = calculate_xsection( normalisation['amcatnloHerwig'], luminosity, branching_ratio ) # L in pb1 - # madgraphMLM_xsection = calculate_xsection( normalisation['madgraphMLM'], luminosity, branching_ratio ) - - fsrdown_xsection = calculate_xsection( normalisation['fsrdown'], luminosity, branching_ratio ) # L in pb1 - fsrup_xsection = calculate_xsection( normalisation['fsrup'], luminosity, branching_ratio ) # L in pb1 - isrdown_xsection = calculate_xsection( normalisation['isrdown'], luminosity, branching_ratio ) # L in pb1 - isrup_xsection = calculate_xsection( normalisation['isrup'], luminosity, branching_ratio ) # L in pb1 - uedown_xsection = calculate_xsection( normalisation['uedown'], luminosity, branching_ratio ) # L in pb1 - ueup_xsection = calculate_xsection( normalisation['ueup'], luminosity, branching_ratio ) # L in pb1 - massdown_xsection = calculate_xsection( normalisation['massdown'], luminosity, branching_ratio ) # L in pb1 - massup_xsection = calculate_xsection( normalisation['massup'], luminosity, branching_ratio ) # L in pb1 - - xsection_unfolded['powhegPythia8'] = powhegPythia8_xsection - # xsection_unfolded['amcatnlo'] = amcatnlo_xsection - # xsection_unfolded['madgraphMLM'] = madgraphMLM_xsection - xsection_unfolded['powhegHerwig'] = powhegHerwig_xsection - # xsection_unfolded['amcatnloHerwig'] = amcatnloHerwig_xsection - - xsection_unfolded['fsrdown'] = fsrdown_xsection - xsection_unfolded['fsrup'] = fsrup_xsection - xsection_unfolded['isrdown'] = isrdown_xsection - xsection_unfolded['isrup'] = isrup_xsection - xsection_unfolded['uedown'] = uedown_xsection - xsection_unfolded['ueup'] = ueup_xsection - xsection_unfolded['massdown'] = massdown_xsection - xsection_unfolded['massup'] = massup_xsection - file_template = '{path_to_JSON}/{category}/xsection_{channel}_{method}.txt' - filename = file_template.format( - path_to_JSON = path_to_JSON, - category = category, - channel = channel, - method = method, - ) - - write_data_to_JSON( xsection_unfolded, filename ) + xsection_unfolded['powhegPythia8'] = calculate_xsection( + normalisation['powhegPythia8'], + luminosity, + branching_ratio + ) + # xsection_unfolded['amcatnlo'] = calculate_xsection( + # normalisation['amcatnlo'], + # luminosity, + # branching_ratio + # ) + xsection_unfolded['powhegHerwig'] = calculate_xsection( + normalisation['powhegHerwig'], + luminosity, + branching_ratio + ) + # xsection_unfolded['madgraphMLM'] = calculate_xsection( + # normalisation['madgraphMLM'], + # luminosity, + # branching_ratio + # ) + + xsection_unfolded['massdown'] = calculate_xsection( + normalisation['massdown'], + luminosity, + branching_ratio + ) + xsection_unfolded['massup'] = calculate_xsection( + normalisation['massup'], + luminosity, + branching_ratio + ) + xsection_unfolded['isrdown'] = calculate_xsection( + normalisation['isrdown'], + luminosity, + branching_ratio + ) + xsection_unfolded['isrup'] = calculate_xsection( + normalisation['isrup'], + luminosity, + branching_ratio + ) + # xsection_unfolded['fsrdown'] = calculate_xsection( + # normalisation['fsrdown'], + # luminosity, + # branching_ratio + # ) + xsection_unfolded['fsrup'] = calculate_xsection( + normalisation['fsrup'], + luminosity, + branching_ratio + ) + xsection_unfolded['uedown'] = calculate_xsection( + normalisation['uedown'], + luminosity, + branching_ratio + ) + xsection_unfolded['ueup'] = calculate_xsection( + normalisation['ueup'], + luminosity, + branching_ratio + ) + + + file_template = '{path_to_DF}/{category}/xsection_{channel}_{method}.txt' + write_02(xsection_unfolded, file_template, path_to_DF, category, channel, method) + return def calculate_normalised_xsections( normalisation, category, channel, normalise_to_one = False ): - global variable, met_type, path_to_JSON, phase_space + ''' + Calculate the normalised cross sections + ''' + global variable, path_to_DF, phase_space binWidths = None if phase_space == 'VisiblePS': binWidths = bin_widths_visiblePS elif phase_space == 'FullPS': binWidths = bin_widths - - TTJet_normalised_xsection = calculate_normalised_xsection( normalisation['TTJet_measured'], binWidths[variable], normalise_to_one ) - TTJet_withoutFakes_normalised_xsection = calculate_normalised_xsection( normalisation['TTJet_measured_withoutFakes'], binWidths[variable], normalise_to_one ) - TTJet_normalised_xsection_unfolded = calculate_normalised_xsection( normalisation['TTJet_unfolded'], binWidths[variable], normalise_to_one ) - - normalised_xsection = {'TTJet_measured' : TTJet_normalised_xsection, - 'TTJet_measured_withoutFakes' : TTJet_withoutFakes_normalised_xsection, - 'TTJet_unfolded' : TTJet_normalised_xsection_unfolded - } + + normalised_xsection = {} + normalised_xsection['TTJet_measured'] = calculate_normalised_xsection( + normalisation['TTJet_measured'], + binWidths[variable], + normalise_to_one + ) + normalised_xsection['TTJet_measured_withoutFakes'] = calculate_normalised_xsection( + normalisation['TTJet_measured_withoutFakes'], + binWidths[variable], + normalise_to_one + ) + normalised_xsection['TTJet_unfolded'] = calculate_normalised_xsection( + normalisation['TTJet_unfolded'], + binWidths[variable], + normalise_to_one + ) if category == 'central': - powhegPythia8_normalised_xsection = calculate_normalised_xsection( normalisation['powhegPythia8'], binWidths[variable], normalise_to_one ) - # amcatnlo_normalised_xsection = calculate_normalised_xsection( normalisation['amcatnlo'], binWidths[variable], normalise_to_one ) - powhegHerwig_normalised_xsection = calculate_normalised_xsection( normalisation['powhegHerwig'], binWidths[variable], normalise_to_one ) - # amcatnloHerwig_normalised_xsection = calculate_normalised_xsection( normalisation['amcatnloHerwig'], binWidths[variable], normalise_to_one ) - # madgraphMLM_normalised_xsection = calculate_normalised_xsection( normalisation['madgraphMLM'], binWidths[variable], normalise_to_one ) - - fsrdown_normalised_xsection = calculate_normalised_xsection( normalisation['fsrdown'], binWidths[variable], normalise_to_one ) - fsrup_normalised_xsection = calculate_normalised_xsection( normalisation['fsrup'], binWidths[variable], normalise_to_one ) - isrdown_normalised_xsection = calculate_normalised_xsection( normalisation['isrdown'], binWidths[variable], normalise_to_one ) - isrup_normalised_xsection = calculate_normalised_xsection( normalisation['isrup'], binWidths[variable], normalise_to_one ) - uedown_normalised_xsection = calculate_normalised_xsection( normalisation['uedown'], binWidths[variable], normalise_to_one ) - ueup_normalised_xsection = calculate_normalised_xsection( normalisation['ueup'], binWidths[variable], normalise_to_one ) - - massdown_normalised_xsection = calculate_normalised_xsection( normalisation['massdown'], binWidths[variable], normalise_to_one ) - massup_normalised_xsection = calculate_normalised_xsection( normalisation['massup'], binWidths[variable], normalise_to_one ) - - normalised_xsection['powhegPythia8'] = powhegPythia8_normalised_xsection - # normalised_xsection['amcatnlo'] = amcatnlo_normalised_xsection - # normalised_xsection['madgraphMLM' ] = madgraphMLM_normalised_xsection - normalised_xsection['powhegHerwig'] = powhegHerwig_normalised_xsection - # normalised_xsection['amcatnloHerwig'] = amcatnloHerwig_normalised_xsection - - normalised_xsection['fsrdown'] = fsrdown_normalised_xsection - normalised_xsection['fsrup'] = fsrup_normalised_xsection - normalised_xsection['isrdown'] = isrdown_normalised_xsection - normalised_xsection['isrup'] = isrup_normalised_xsection - normalised_xsection['uedown'] = uedown_normalised_xsection - normalised_xsection['ueup'] = ueup_normalised_xsection - normalised_xsection['massdown'] = massdown_normalised_xsection - normalised_xsection['massup'] = massup_normalised_xsection - - file_template = '{path_to_JSON}/{category}/normalised_xsection_{channel}_{method}.txt' - filename = file_template.format( - path_to_JSON = path_to_JSON, - category = category, - channel = channel, - method = method, - ) + normalised_xsection['powhegPythia8'] = calculate_normalised_xsection( + normalisation['powhegPythia8'], + binWidths[variable], + normalise_to_one, + ) + # normalised_xsection['amcatnlo'] = calculate_normalised_xsection( + # normalisation['amcatnlo'], + # binWidths[variable], + # normalise_to_one, + # ) + normalised_xsection['powhegHerwig'] = calculate_normalised_xsection( + normalisation['powhegHerwig'], + binWidths[variable], + normalise_to_one, + ) + # normalised_xsection['madgraphMLM'] = calculate_normalised_xsection( + # normalisation['madgraphMLM'], + # binWidths[variable], + # normalise_to_one, + # ) + + normalised_xsection['massdown'] = calculate_normalised_xsection( + normalisation['massdown'], + binWidths[variable], + normalise_to_one, + ) + normalised_xsection['massup'] = calculate_normalised_xsection( + normalisation['massup'], + binWidths[variable], + normalise_to_one, + ) + normalised_xsection['isrdown'] = calculate_normalised_xsection( + normalisation['isrdown'], + binWidths[variable], + normalise_to_one, + ) + normalised_xsection['isrup'] = calculate_normalised_xsection( + normalisation['isrup'], + binWidths[variable], + normalise_to_one, + ) + # normalised_xsection['fsrdown'] = calculate_normalised_xsection( + # normalisation['fsrdown'], + # binWidths[variable], + # normalise_to_one, + # ) + normalised_xsection['fsrup'] = calculate_normalised_xsection( + normalisation['fsrup'], + binWidths[variable], + normalise_to_one, + ) + normalised_xsection['uedown'] = calculate_normalised_xsection( + normalisation['uedown'], + binWidths[variable], + normalise_to_one, + ) + normalised_xsection['ueup'] = calculate_normalised_xsection( + normalisation['ueup'], + binWidths[variable], + normalise_to_one, + ) + file_template = '{path_to_DF}/{category}/xsection_normalised_{channel}_{method}.txt' if normalise_to_one: - filename = filename.replace( 'normalised_xsection', 'normalised_to_one_xsection' ) - write_data_to_JSON( normalised_xsection, filename ) - -if __name__ == '__main__': - set_root_defaults( msg_ignore_level = 3001 ) - # setup - parser = OptionParser() - parser.add_option( "-p", "--path", dest = "path", default = 'data/normalisation/', + file_template = file_template.replace( 'xsection_normalised', 'xsection_normalised_to_one' ) + write_02(normalised_xsection, file_template, path_to_DF, category, channel, method) + +def write_02(tuple_out, f_temp, path_to_DF, category, channel, method): + f = f_temp.format( + path_to_DF = path_to_DF, + category = category, + channel = channel, + method = method, + ) + write_tuple_to_df( tuple_out, f ) + return f + +def parse_arguments(): + parser = ArgumentParser(__doc__) + parser.add_argument( "-p", "--path", dest = "path", default = 'data/normalisation/background_subtraction/', help = "set path to JSON files" ) - parser.add_option( "-v", "--variable", dest = "variable", default = 'MET', + parser.add_argument( "-v", "--variable", dest = "variable", default = 'MET', help = "set the variable to analyse (MET, HT, ST, MT)" ) - parser.add_option( "-b", "--bjetbin", dest = "bjetbin", default = '2m', - help = "set b-jet multiplicity for analysis. Options: exclusive: 0-3, inclusive (N or more): 0m, 1m, 2m, 3m, 4m" ) - parser.add_option( "-m", "--metType", dest = "metType", default = 'type1', - help = "set MET type for analysis of MET, ST or MT" ) - parser.add_option( "-u", "--unfolding_method", dest = "unfolding_method", default = 'TUnfold', - help = "Unfolding method: RooUnfoldSvd (default), TSVDUnfold, RooUnfoldTUnfold, RooUnfoldInvert, RooUnfoldBinByBin, RooUnfoldBayes" ) - parser.add_option( "-e", "--error_treatment", type = 'int', + parser.add_argument( "-u", "--unfolding_method", dest = "unfolding_method", default = 'TUnfold', + help = "Unfolding method: TUnfold" ) + parser.add_argument( "-e", "--error_treatment", type = int, dest = "error_treatment", default = unfoldCfg.error_treatment, - help = "parameter for error treatment in RooUnfold" ) - parser.add_option( "-c", "--centre-of-mass-energy", dest = "CoM", default = 13, + help = "parameter for error treatment in RooUnfold") + parser.add_argument( "-c", "--centre-of-mass-energy", dest = "com", default = 13, help = "set the centre of mass energy for analysis. Default = 13 [TeV]", type = int ) - parser.add_option( "-C", "--combine-before-unfolding", dest = "combine_before_unfolding", action = "store_true", + parser.add_argument( "-C", "--combine-before-unfolding", dest = "combine_before_unfolding", action = "store_true", help = "Perform combination of channels before unfolding" ) - parser.add_option( "-w", "--write-unfolding-objects", dest = "write_unfolding_objects", action = "store_true", - help = "Write out the unfolding objects (D, SV)" ) - parser.add_option( '--test', dest = "test", action = "store_true", + parser.add_argument( '--test', dest = "test", action = "store_true", help = "Just run the central measurement" ) - parser.add_option( '--ptreweight', dest = "ptreweight", action = "store_true", + parser.add_argument( '--ptreweight', dest = "ptreweight", action = "store_true", help = "Use pt-reweighted MadGraph for the measurement" ) - parser.add_option( '--visiblePS', dest = "visiblePS", action = "store_true", + parser.add_argument( '--visiblePS', dest = "visiblePS", action = "store_true", help = "Unfold to visible phase space" ) + args = parser.parse_args() + return args + +if __name__ == '__main__': + set_root_defaults( msg_ignore_level = 3001 ) + # setup + args = parse_arguments() + + # Cache arguments + run_just_central = args.test + use_ptreweight = args.ptreweight + variable = args.variable + com = args.com + unfoldCfg.error_treatment = args.error_treatment + method = args.unfolding_method + combine_before_unfolding = args.combine_before_unfolding + visiblePS = args.visiblePS + + # Cache arguments from xsection config + measurement_config = XSectionConfig( com ) + luminosity = measurement_config.luminosity * measurement_config.luminosity_scale + ttbar_xsection = measurement_config.ttbar_xsection + tau_value_electron = measurement_config.tau_values_electron[variable] + tau_value_muon = measurement_config.tau_values_muon[variable] + tau_value_combined = measurement_config.tau_values_combined[variable] - ( options, args ) = parser.parse_args() - measurement_config = XSectionConfig( options.CoM ) - run_just_central = options.test - use_ptreweight = options.ptreweight - # caching of variables for faster access - translate_options = measurement_config.translate_options - ttbar_theory_systematic_prefix = measurement_config.ttbar_theory_systematic_prefix - vjets_theory_systematic_prefix = measurement_config.vjets_theory_systematic_prefix - met_systematics = measurement_config.met_systematics - - centre_of_mass = options.CoM - luminosity = measurement_config.luminosity * measurement_config.luminosity_scale - ttbar_xsection = measurement_config.ttbar_xsection - path_to_files = measurement_config.path_to_files - file_for_unfolding = File( measurement_config.unfolding_central, 'read' ) - - # Not unfolding with other files at the moment - ### - ### # file_for_powheg_pythia = File( measurement_config.unfolding_powheg_pythia, 'read' ) - ### # file_for_mcatnlo = None - ### # if centre_of_mass == 8: - ### # file_for_mcatnlo = File( measurement_config.unfolding_mcatnlo, 'read' ) - ### # file_for_ptreweight = File ( measurement_config.unfolding_ptreweight, 'read' ) - files_for_pdfs = { 'PDFWeights_%d' % (index) : File ( measurement_config.unfolding_pdfweights[index] ) for index in range( 0, 100 ) } - - ### - # file_for_fsrdown = File( measurement_config.unfolding_fsr_down, 'read' ) - print 'WARNING - using fsr up file for non existent fsr down file' - file_for_fsrdown = File( measurement_config.unfolding_fsr_up, 'read' ) - file_for_fsrup = File( measurement_config.unfolding_fsr_up, 'read' ) - file_for_isrdown = File( measurement_config.unfolding_isr_down, 'read' ) - file_for_isrup = File( measurement_config.unfolding_isr_up, 'read' ) - file_for_uedown = File( measurement_config.unfolding_ue_down, 'read' ) - file_for_ueup = File( measurement_config.unfolding_ue_up, 'read' ) - - ### - file_for_renormalisationdown = File( measurement_config.unfolding_renormalisation_down, 'read' ) - file_for_renormalisationup = File( measurement_config.unfolding_renormalisation_up, 'read' ) - file_for_factorisationdown = File( measurement_config.unfolding_factorisation_down, 'read' ) - file_for_factorisationup = File( measurement_config.unfolding_factorisation_up, 'read' ) - file_for_combineddown = File( measurement_config.unfolding_combined_down, 'read' ) - file_for_combinedup = File( measurement_config.unfolding_combined_up, 'read' ) - # file_for_alphaSdown = File( measurement_config.unfolding_alphaS_down, 'read' ) - # file_for_alphaSup = File( measurement_config.unfolding_alphaS_up, 'read' ) - ### - file_for_massdown = File( measurement_config.unfolding_mass_down, 'read' ) - file_for_massup = File( measurement_config.unfolding_mass_up, 'read' ) - file_for_jesdown = File( measurement_config.unfolding_jes_down, 'read' ) - file_for_jesup = File( measurement_config.unfolding_jes_up, 'read' ) - ### - file_for_jerdown = File( measurement_config.unfolding_jer_down, 'read' ) - file_for_jerup = File( measurement_config.unfolding_jer_up, 'read' ) - ### - file_for_bjetdown = File( measurement_config.unfolding_bjet_down, 'read' ) - file_for_bjetup = File( measurement_config.unfolding_bjet_up, 'read' ) - ### - file_for_lightjetdown = File( measurement_config.unfolding_lightjet_down, 'read' ) - file_for_lightjetup = File( measurement_config.unfolding_lightjet_up, 'read' ) - ### - file_for_LeptonDown = File( measurement_config.unfolding_Lepton_down, 'read' ) - file_for_LeptonUp = File( measurement_config.unfolding_Lepton_up, 'read' ) - ### - file_for_ElectronEnDown = File( measurement_config.unfolding_ElectronEn_down, 'read' ) - file_for_ElectronEnUp = File( measurement_config.unfolding_ElectronEn_up, 'read' ) - ### - file_for_MuonEnDown = File( measurement_config.unfolding_MuonEn_down, 'read' ) - file_for_MuonEnUp = File( measurement_config.unfolding_MuonEn_up, 'read' ) - ### - file_for_TauEnDown = File( measurement_config.unfolding_TauEn_down, 'read' ) - file_for_TauEnUp = File( measurement_config.unfolding_TauEn_up, 'read' ) - ### - file_for_UnclusteredEnDown = File( measurement_config.unfolding_UnclusteredEn_down, 'read' ) - file_for_UnclusteredEnUp = File( measurement_config.unfolding_UnclusteredEn_up, 'read' ) - ### - file_for_PUUp = File( measurement_config.unfolding_PUSystematic_up, 'read') - file_for_PUDown = File( measurement_config.unfolding_PUSystematic_down, 'read') - - file_for_powhegPythia8 = File( measurement_config.unfolding_powheg_pythia8, 'read') - # file_for_amcatnlo = File( measurement_config.unfolding_amcatnlo, 'read') - # file_for_amcatnlo_herwig = File( measurement_config.unfolding_amcatnlo_herwig, 'read') - # file_for_madgraphMLM = File( measurement_config.unfolding_madgraphMLM, 'read') - file_for_powheg_herwig = File( measurement_config.unfolding_powheg_herwig, 'read' ) - - variable = options.variable - - tau_value_electron = measurement_config.tau_values_electron[variable] - tau_value_muon = measurement_config.tau_values_muon[variable] - tau_value_combined = measurement_config.tau_values_combined[variable] - - visiblePS = options.visiblePS phase_space = 'FullPS' if visiblePS: phase_space = "VisiblePS" - unfoldCfg.error_treatment = options.error_treatment - method = options.unfolding_method - combine_before_unfolding = options.combine_before_unfolding - met_type = translate_options[options.metType] - b_tag_bin = translate_options[options.bjetbin] - path_to_JSON = '{path}/{com}TeV/{variable}/{phase_space}/'.format( - path = options.path, - com = measurement_config.centre_of_mass_energy, - variable = variable, - phase_space = phase_space, - ) - - categories = deepcopy( measurement_config.categories_and_prefixes.keys() ) - # No generator or theory systematics yet - ttbar_generator_systematics = [ttbar_theory_systematic_prefix + systematic for systematic in measurement_config.generator_systematics] - ### vjets_generator_systematics = [vjets_theory_systematic_prefix + systematic for systematic in measurement_config.generator_systematics] - # categories.extend( ttbar_generator_systematics ) - ### categories.extend( vjets_generator_systematics ) - - # ### ttbar theory systematics, including pt reweightingnsystematic - # ttbar_theory_systematics = [] #[ ttbar_theory_systematic_prefix + 'ptreweight' ] - # categories.extend( ttbar_theory_systematics ) - - pdf_uncertainties = ['PDFWeights_%d' % index for index in range( measurement_config.pdfWeightMin, measurement_config.pdfWeightMax )] - rate_changing_systematics = [systematic for systematic in measurement_config.rate_changing_systematics_names] - # all MET uncertainties except JES as this is already included - met_uncertainties = [suffix for suffix in measurement_config.met_systematics_suffixes if not 'JetEn' in suffix and not 'JetRes' in suffix] - - all_measurements = deepcopy( categories ) - all_measurements.extend( ttbar_generator_systematics ) + unfolding_files = get_unfolding_files(measurement_config) + path_to_DF = '{path}/{com}TeV/{variable}/{phase_space}/'.format( + path = args.path, + com = com, + variable = variable, + phase_space = phase_space, + ) + + # Core Systematics + all_measurements = deepcopy( measurement_config.measurements ) + # Adding PDF Systematics + pdf_uncertainties = ['PDFWeights_%d' % index for index in range(measurement_config.pdfWeightMin, measurement_config.pdfWeightMax )] all_measurements.extend( pdf_uncertainties ) - all_measurements.extend( ['QCD_shape'] ) - all_measurements.extend( rate_changing_systematics ) + # # TTBar Reweighting Systematics + # ttbar_theory_systematics = [ 'TTJets_ptreweight', 'TTJets_etareweight' ] + # all_measurements.extend( ttbar_theory_systematics ) print 'Performing unfolding for variable', variable for category in all_measurements: - if run_just_central and not category == 'central': + if run_just_central and not category == 'central': continue - # Don't need to consider MET uncertainties for HT - if ( variable in measurement_config.variables_no_met ) and (category in measurement_config.met_systematics_suffixes and not category in ['JES_up', 'JES_down', 'JER_up', 'JER_down']): + if ( variable in measurement_config.variables_no_met ) and (category in measurement_config.met_specific_systematics): continue - print 'Doing category ', category - print 'Unfolding category "%s"' % category - # Setting up systematic MET for JES up/down samples - met_type = translate_options[options.metType] - - if category == 'JES_up': - met_type += 'JetEnUp' - elif category == 'JES_down': - met_type += 'JetEnDown' - elif category == 'JER_up': - met_type += 'JetResUp' - elif category == 'JER_down': - met_type += 'JetResDown' - if category in met_uncertainties and not 'JES' in category and not 'JER' in category: - met_type += category - - # read fit results from JSON - electron_file = path_to_JSON + '/' + category + '/normalisation_electron_' + met_type + '.txt' - muon_file = path_to_JSON + '/' + category + '/normalisation_muon_' + met_type + '.txt' - - # don't change fit input for ttbar generator/theory systematics and PDF weights - if category in ttbar_generator_systematics or category in pdf_uncertainties: - # or category in ttbar_mass_systematics - electron_file = path_to_JSON + '/central/normalisation_electron_' + met_type + '.txt' - muon_file = path_to_JSON + '/central/normalisation_muon_' + met_type + '.txt' - # combined_file = path_to_JSON + '/central/normalisation_combined_' + met_type + '.txt' - elif category in rate_changing_systematics or category == 'QCD_shape': - electron_file = path_to_JSON + '/' + category + '/normalisation_electron_' + met_type + '.txt' - muon_file = path_to_JSON + '/' + category + '/normalisation_muon_' + met_type + '.txt' - elif category == 'central_TTJet': - electron_file = path_to_JSON + '/central/initial_normalisation_electron_' + met_type + '.txt' - muon_file = path_to_JSON + '/central/initial_normalisation_muon_' + met_type + '.txt' - # elif category in met_uncertainties and not 'JES' in category and not 'JER' in category: - # electron_file = path_to_JSON + '/'+category+'/initial_normalisation_electron_' + met_type + '.txt' - # muon_file = path_to_JSON + '/'+category+'/initial_normalisation_muon_' + met_type + '.txt' - elif category != 'central': - electron_file = path_to_JSON + '/' + category + '/normalisation_electron_' + met_type + '.txt' - muon_file = path_to_JSON + '/' + category + '/normalisation_muon_' + met_type + '.txt' - - fit_results_electron = None - fit_results_muon = None - + print 'Unfolding category {}'.format(category) + + # read normalisation results from JSON + electron_file = path_to_DF + '/' + category + '/normalisation_electron.txt' + muon_file = path_to_DF + '/' + category + '/normalisation_muon.txt' + combined_file = path_to_DF + '/' + category + '/normalisation_combined.txt' + + # don't change normalisation input for ttbar generator/theory systematics and PDF weights + # For systematics not run in 01 [PDF and TTJet_] then use the central normalisations + if category not in measurement_config.normalisation_systematics: + electron_file = path_to_DF + '/central/normalisation_electron.txt' + muon_file = path_to_DF + '/central/normalisation_muon.txt' + combined_file = path_to_DF + '/central/normalisation_combined.txt' + + # Read the normalisations + normalisation_results_electron = None + normalisation_results_muon = None + normalisation_results_combined = None + + # Read the normalisation files + # For LeptonUp/Down return other lepton type to central normailsation + # THINK HOW TO READ MUON:ELECTRON/UP:DOWN WITH COMBINEDBEFOREUNFOLDING if category == 'Muon_up' or category == 'Muon_down': - # fit_results_electron = read_data_from_JSON( path_to_JSON + '/central/initial_normalisation_electron_' + met_type + '.txt' ) - fit_results_electron = read_data_from_JSON( path_to_JSON + '/central/normalisation_electron_' + met_type + '.txt' ) - fit_results_muon = read_data_from_JSON( muon_file ) + normalisation_results_electron = read_tuple_from_file( path_to_DF + '/central/normalisation_electron.txt' ) + normalisation_results_muon = read_tuple_from_file( muon_file ) elif category == 'Electron_up' or category == 'Electron_down': - fit_results_electron = read_data_from_JSON( electron_file ) - # fit_results_muon = read_data_from_JSON( path_to_JSON + '/central/initial_normalisation_muon_' + met_type + '.txt' ) - fit_results_muon = read_data_from_JSON( path_to_JSON + '/central/normalisation_muon_' + met_type + '.txt' ) + normalisation_results_electron = read_tuple_from_file( electron_file ) + normalisation_results_muon = read_tuple_from_file( path_to_DF + '/central/normalisation_muon.txt' ) else: - fit_results_electron = read_data_from_JSON( electron_file ) - fit_results_muon = read_data_from_JSON( muon_file ) - fit_results_combined = combine_complex_results(fit_results_electron, fit_results_muon) - TTJet_fit_results_electron = fit_results_electron['TTJet'] - TTJet_fit_results_muon = fit_results_muon['TTJet'] - TTJet_fit_results_combined = fit_results_combined['TTJet'] - - # # change back to original MET type for the unfolding - met_type = translate_options[options.metType] - # # ad-hoc switch for PFMET -> patMETsPFlow - # if met_type == 'PFMET': - # met_type = 'patMETsPFlow' - - file_template = '{path_to_JSON}/{category}/unfolded_normalisation_{channel}_{method}.txt' - filename = '' - - # # get unfolded normalisation - unfolded_normalisation_electron = {} - unfolded_normalisation_muon = {} + normalisation_results_electron = read_tuple_from_file( electron_file ) + normalisation_results_muon = read_tuple_from_file( muon_file ) + + # Combine the normalisations (beforeUnfolding) + normalisation_results_combined = combine_complex_df(normalisation_results_electron, normalisation_results_muon) + TTJet_normalisation_results_electron = normalisation_results_electron['TTJet'] + TTJet_normalisation_results_muon = normalisation_results_muon['TTJet'] + TTJet_normalisation_results_combined = normalisation_results_combined['TTJet'] + + # # get unfolded normalisations and xsections + unfolded_normalisation_electron = {} + unfolded_normalisation_muon = {} + unfolded_normalisation_combined = {} + unfolded_normalisation_combinedBeforeUnfolding = {} + # Electron channel - unfolded_normalisation_electron = get_unfolded_normalisation( TTJet_fit_results_electron, category, 'electron', tau_value_electron, visiblePS = visiblePS ) - filename = file_template.format( - path_to_JSON = path_to_JSON, - category = category, - channel = 'electron', - method = method, - ) - write_data_to_JSON( unfolded_normalisation_electron, filename ) + channel = 'electron' + unfolded_normalisation_electron = get_unfolded_normalisation( + TTJet_normalisation_results_electron, + category, + channel, + tau_value_electron, + visiblePS = visiblePS + ) # measure xsection - calculate_xsections( unfolded_normalisation_electron, category, 'electron' ) - calculate_normalised_xsections( unfolded_normalisation_electron, category, 'electron' ) - calculate_normalised_xsections( unfolded_normalisation_electron, category, 'electron' , True ) + calculate_xsections( unfolded_normalisation_electron, category, channel ) + calculate_normalised_xsections( unfolded_normalisation_electron, category, channel ) + calculate_normalised_xsections( unfolded_normalisation_electron, category, channel , True ) # Muon channel - unfolded_normalisation_muon = get_unfolded_normalisation( TTJet_fit_results_muon, category, 'muon', tau_value_muon, visiblePS = visiblePS ) - filename = file_template.format( - path_to_JSON = path_to_JSON, - category = category, - channel = 'muon', - method = method, - ) - write_data_to_JSON( unfolded_normalisation_muon, filename ) + channel = 'muon' + unfolded_normalisation_muon = get_unfolded_normalisation( + TTJet_normalisation_results_muon, + category, + channel, + tau_value_muon, + visiblePS = visiblePS + ) + # measure xsection + calculate_xsections( unfolded_normalisation_muon, category, channel ) + calculate_normalised_xsections( unfolded_normalisation_muon, category, channel ) + calculate_normalised_xsections( unfolded_normalisation_muon, category, channel , True ) + + # Results where the channels are combined before unfolding (the 'combined in the response matrix') + channel = 'combinedBeforeUnfolding' + unfolded_normalisation_combinedBeforeUnfolding = get_unfolded_normalisation( + TTJet_normalisation_results_combined, + category, + 'combined', + tau_value=tau_value_combined, + visiblePS=visiblePS, + ) # measure xsection - calculate_xsections( unfolded_normalisation_muon, category, 'muon' ) - calculate_normalised_xsections( unfolded_normalisation_muon, category, 'muon' ) - calculate_normalised_xsections( unfolded_normalisation_muon, category, 'muon' , True ) + calculate_xsections( unfolded_normalisation_combinedBeforeUnfolding, category, channel ) + calculate_normalised_xsections( unfolded_normalisation_combinedBeforeUnfolding, category, channel ) + calculate_normalised_xsections( unfolded_normalisation_combinedBeforeUnfolding, category, channel , True ) # Results where the channels are combined after unfolding - unfolded_normalisation_combined = combine_complex_results( unfolded_normalisation_electron, unfolded_normalisation_muon ) channel = 'combined' - filename = file_template.format( - path_to_JSON = path_to_JSON, - category = category, - channel = channel, - method = method, - ) - write_data_to_JSON( unfolded_normalisation_combined, filename ) + unfolded_normalisation_combined = combine_complex_results( unfolded_normalisation_electron, unfolded_normalisation_muon ) + # measure xsection calculate_xsections( unfolded_normalisation_combined, category, channel ) calculate_normalised_xsections( unfolded_normalisation_combined, category, channel ) calculate_normalised_xsections( unfolded_normalisation_combined, category, channel , True ) - # Results where the channels are combined before unfolding - unfolded_normalisation_combinedBeforeUnfolding = get_unfolded_normalisation( - TTJet_fit_results_combined, - category,'combined', tau_value=tau_value_combined, - visiblePS=visiblePS, - ) - channel = 'combinedBeforeUnfolding' - filename = file_template.format( - path_to_JSON = path_to_JSON, - category = category, - channel = channel, - method = method, - ) - write_data_to_JSON( unfolded_normalisation_combinedBeforeUnfolding, filename ) - calculate_xsections( unfolded_normalisation_combinedBeforeUnfolding, category, channel ) - calculate_normalised_xsections( unfolded_normalisation_combinedBeforeUnfolding, category, channel ) - calculate_normalised_xsections( unfolded_normalisation_combinedBeforeUnfolding, category, channel , True ) + + diff --git a/dps/analysis/xsection/03_calculate_systematics.py b/dps/analysis/xsection/03_calculate_systematics.py index 075f5697..98dbd30b 100644 --- a/dps/analysis/xsection/03_calculate_systematics.py +++ b/dps/analysis/xsection/03_calculate_systematics.py @@ -15,10 +15,9 @@ 2) can be used to compare systematics (both in tables and plots) 3) + 4) for more fine-grained analysis ''' -from optparse import OptionParser +from argparse import ArgumentParser from dps.config.xsection import XSectionConfig from dps.config.variable_binning import bin_edges_vis -from dps.utils.file_utilities import make_folder_if_not_exists from dps.utils.systematic import append_PDF_uncertainties, print_dictionary,\ get_normalised_cross_sections, get_symmetrised_systematic_uncertainty,\ generate_covariance_matrices,\ @@ -26,6 +25,34 @@ write_normalised_xsection_measurement,\ write_systematic_xsection_measurement +def parse_arguments(): + parser = ArgumentParser() + parser.add_argument( "-p", "--path", + dest = "path", + default = 'data/normalisation/background_subtraction/', + help = "set path to JSON files" ) + parser.add_argument( "-v", "--variable", + dest = "variable", + default = 'MET', + help = "set variable to plot (MET, HT, ST, MT)" ) + parser.add_argument( "-c", "--centre-of-mass-energy", + dest = "CoM", + default = 13, type = int, + help = "set the centre of mass energy for analysis. Default = 13 [TeV]" ) + parser.add_argument( "-s", "--symmetrise_errors", + action = "store_true", + dest = "symmetrise_errors", + help = "Makes the errors symmetric" ) + parser.add_argument( '--visiblePS', + dest = "visiblePS", + action = "store_true", + help = "Unfold to visible phase space" ) + parser.add_argument( "-u", "--unfolding_method", + dest = "unfolding_method", + default = 'TUnfold', + help = "Unfolding method: TUnfold (default)" ) + args = parser.parse_args() + return args if __name__ == '__main__': ''' @@ -33,63 +60,44 @@ 2) calculate the difference to central measurement 3) ''' - parser = OptionParser() - parser.add_option( "-p", "--path", dest = "path", default = 'data/M3_angle_bl/', - help = "set path to JSON files" ) - parser.add_option( "-v", "--variable", dest = "variable", default = 'MET', - help = "set variable to plot (MET, HT, ST, MT)" ) - parser.add_option( "-m", "--metType", dest = "metType", default = 'type1', - help = "set MET type used in the analysis of MET, ST or MT" ) - parser.add_option( "-b", "--bjetbin", dest = "bjetbin", default = '2m', - help = "set b-jet multiplicity for analysis. Options: exclusive: 0-3, inclusive (N or more): 0m, 1m, 2m, 3m, 4m" ) - parser.add_option( "-c", "--centre-of-mass-energy", dest = "CoM", default = 13, type = int, - help = "set the centre of mass energy for analysis. Default = 13 [TeV]" ) - parser.add_option( "-s", "--symmetrise_errors", action = "store_true", dest = "symmetrise_errors", - help = "Makes the errors symmetric" ) - parser.add_option( '--visiblePS', dest = "visiblePS", action = "store_true", - help = "Unfold to visible phase space" ) - parser.add_option( "-u", "--unfolding_method", dest = "unfolding_method", default = 'TUnfold', - help = "Unfolding method: TUnfold (default)" ) - - ( options, args ) = parser.parse_args() - measurement_config = XSectionConfig( options.CoM ) + args = parse_arguments() + measurement_config = XSectionConfig( args.CoM ) # caching of variables for shorter access - translate_options = measurement_config.translate_options - met_specific_systematics = measurement_config.met_specific_systematics - met_type = translate_options[options.metType] - variables_no_met = measurement_config.variables_no_met - method = options.unfolding_method - symmetrise_errors = options.symmetrise_errors - variable = options.variable - topMasses = measurement_config.topMasses - topMassUncertainty = measurement_config.topMassUncertainty - visiblePS = options.visiblePS + method = args.unfolding_method + symmetrise_errors = args.symmetrise_errors + variable = args.variable + visiblePS = args.visiblePS + met_specific_systematics = measurement_config.met_specific_systematics + variables_no_met = measurement_config.variables_no_met + topMasses = measurement_config.topMasses + topMassUncertainty = measurement_config.topMassUncertainty + phase_space = 'VisiblePS' if not visiblePS: phase_space = 'FullPS' - path_to_JSON = '{path}/{com}TeV/{variable}/{phase_space}' - path_to_JSON = path_to_JSON.format( - path = options.path, - com = options.CoM, + path_to_DF = '{path}/{com}TeV/{variable}/{phase_space}' + path_to_DF = path_to_DF.format( + path = args.path, + com = args.CoM, variable = variable, phase_space = phase_space, - ) + ) number_of_bins=len(bin_edges_vis[variable])-1 - # List of options to pass to systematic functions - opts={ - 'met_specific_systematics' : met_specific_systematics, - 'met_type' : met_type, - 'variables_no_met' : variables_no_met, - 'symmetrise_errors' : symmetrise_errors, - 'path_to_JSON' : path_to_JSON, - 'method' : method, - 'variable' : variable, - 'number_of_bins' : number_of_bins, - 'topMasses' : topMasses, - 'topMassUncertainty' : topMassUncertainty + # List of args to pass to systematic functions + args={ + 'met_specific_systematics' : met_specific_systematics, + 'variables_no_met' : variables_no_met, + 'symmetrise_errors' : symmetrise_errors, + 'path_to_DF' : path_to_DF, + 'method' : method, + 'variable' : variable, + 'number_of_bins' : number_of_bins, + 'topMasses' : topMasses, + 'topMassUncertainty' : topMassUncertainty, + 'phase_space' : phase_space } # Get list of all systematics @@ -100,47 +108,73 @@ list_of_systematics = all_systematics # If you want different lists of systematics can just do some manipulation here - for channel in ['electron', 'muon', 'combined', 'combinedBeforeUnfolding']: - # for channel in ['muon']: - print("Channel in use is {0} : ".format(channel)) - - # Output folder of covariance matrices - covariance_matrix_output_path = 'plots/covariance_matrices/{phase_space}/{channel}/{variable}/' - covariance_matrix_output_path = covariance_matrix_output_path.format( - variable = variable, - channel = channel, - phase_space = phase_space, - ) - make_folder_if_not_exists(covariance_matrix_output_path) + channel = [ + 'electron', + 'muon', + 'combined', + # 'combinedBeforeUnfolding', + ] + for ch in channel: + print("Calculating {0} channel systematic uncertainties : ".format(ch)) - # Add channel specific options to list of options - opts['channel'] = channel - opts['covariance_matrix_output_path'] = covariance_matrix_output_path + # Add channel specific args to list of args + args['channel'] = ch # Retreive the normalised cross sections, for all groups in list_of_systematics. - systematic_normalised_uncertainty, unfolded_systematic_normalised_uncertainty = get_normalised_cross_sections(opts, list_of_systematics) + systematic_normalised_uncertainty, unfolded_systematic_normalised_uncertainty = get_normalised_cross_sections( + args, + list_of_systematics + ) # print_dictionary("Normalised cross sections of the systematics in use", systematic_normalised_uncertainty) # print_dictionary("Unfolded normalised cross sections of the systematics in use", unfolded_systematic_normalised_uncertainty) # Get and symmetrise the uncertainties - x_sec_with_symmetrised_systematics = get_symmetrised_systematic_uncertainty(systematic_normalised_uncertainty, opts) - unfolded_x_sec_with_symmetrised_systematics = get_symmetrised_systematic_uncertainty(unfolded_systematic_normalised_uncertainty, opts) + x_sec_with_symmetrised_systematics = get_symmetrised_systematic_uncertainty( + args, + systematic_normalised_uncertainty, + ) + unfolded_x_sec_with_symmetrised_systematics = get_symmetrised_systematic_uncertainty( + args, + unfolded_systematic_normalised_uncertainty + ) # print_dictionary("Normalised cross sections of the systematics with symmetrised uncertainties", x_sec_with_symmetrised_systematics) # print_dictionary("Unfolded normalised cross sections of the systematics with symmetrised uncertainties", unfolded_x_sec_with_symmetrised_systematics) # Create covariance matrices - generate_covariance_matrices(opts, x_sec_with_symmetrised_systematics) - generate_covariance_matrices(opts, unfolded_x_sec_with_symmetrised_systematics) + generate_covariance_matrices( + args, + x_sec_with_symmetrised_systematics + ) + generate_covariance_matrices( + args, + unfolded_x_sec_with_symmetrised_systematics + ) # Combine all systematic uncertainties for each of the groups of systematics # Currently returns (Value, SysUp, SysDown) - Need to include stat? - full_measurement = get_measurement_with_total_systematic_uncertainty(opts, x_sec_with_symmetrised_systematics) - full_unfolded_measurement = get_measurement_with_total_systematic_uncertainty(opts, unfolded_x_sec_with_symmetrised_systematics) + full_measurement = get_measurement_with_total_systematic_uncertainty( + args, + x_sec_with_symmetrised_systematics + ) + full_unfolded_measurement = get_measurement_with_total_systematic_uncertainty( + args, + unfolded_x_sec_with_symmetrised_systematics + ) # print_dictionary("Measurement with total systematic error for each systematic group", full_measurement) # print_dictionary("Unfolded measurement with total systematic error for each systematic group", full_unfolded_measurement) # Write central +- error to JSON. Group of systematics in question is included in outputfile name. # Summary if you want to specify specific list. e.g. GeneratorOnly etc - write_normalised_xsection_measurement(opts, full_measurement, full_unfolded_measurement, summary = '' ) - write_systematic_xsection_measurement(opts, unfolded_x_sec_with_symmetrised_systematics, full_unfolded_measurement, summary = '' ) + write_normalised_xsection_measurement( + args, + full_measurement, + full_unfolded_measurement, + summary = '' + ) + write_systematic_xsection_measurement( + args, + unfolded_x_sec_with_symmetrised_systematics, + full_unfolded_measurement, + summary = '' + ) diff --git a/dps/analysis/xsection/04_make_plots_matplotlib.py b/dps/analysis/xsection/04_make_plots_matplotlib.py index fe59cbd1..30efc8d4 100644 --- a/dps/analysis/xsection/04_make_plots_matplotlib.py +++ b/dps/analysis/xsection/04_make_plots_matplotlib.py @@ -1,22 +1,24 @@ # the result of the division will be always a float from __future__ import division, print_function -from optparse import OptionParser +from argparse import ArgumentParser import os, gc +import sys from copy import deepcopy -from dps.config.latex_labels import variables_latex, measurements_latex, fit_variables_latex -from dps.config.variable_binning import bin_edges_full, variable_bins_ROOT, variable_bins_visiblePS_ROOT, fit_variable_bin_edges,\ - bin_edges_vis +from dps.config.latex_labels import variables_latex, measurements_latex +from dps.config.variable_binning import bin_edges_full, bin_edges_vis from dps.config.xsection import XSectionConfig -from dps.utils.file_utilities import read_data_from_JSON, make_folder_if_not_exists +from dps.utils.file_utilities import make_folder_if_not_exists +from dps.utils.pandas_utilities import read_tuple_from_file, file_to_df, tupleise_cols from dps.utils.hist_utilities import value_error_tuplelist_to_hist, \ -value_tuplelist_to_hist, value_errors_tuplelist_to_graph, graph_to_value_errors_tuplelist -from math import sqrt +value_errors_tuplelist_to_graph, graph_to_value_errors_tuplelist + # rootpy & matplotlib -from ROOT import kRed, kGreen, kMagenta, kBlue, kBlack +from ROOT import kBlue from dps.utils.ROOT_utils import set_root_defaults import matplotlib as mpl -from dps.utils.plotting import get_best_max_y +from matplotlib import rc + mpl.use( 'agg' ) import rootpy.plotting.root2matplotlib as rplt import matplotlib.pyplot as plt @@ -34,298 +36,100 @@ @xsec_04_log.trace() def read_xsection_measurement_results( category, channel ): - global path_to_JSON, variable, met_type, phase_space, method + ''' + Reading the unfolded xsection results from DFs into graphs + ''' + global path_to_DF, variable, phase_space, method file_template = '{path}/{category}/{name}_{channel}_{method}{suffix}.txt' filename = file_template.format( - path = path_to_JSON, - category = category, - name = 'normalised_xsection', - channel = channel, - method = method, - suffix = '', - ) + path = path_to_DF, + category = category, + name = 'xsection_normalised', + channel = channel, + method = method, + suffix = '', + ) xsec_04_log.debug('Reading file {0}'.format(filename)) - normalised_xsection_unfolded = read_data_from_JSON( filename ) + edges = bin_edges_full[variable] if phase_space == 'VisiblePS': edges = bin_edges_vis[variable] - h_normalised_xsection = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJet_measured'], edges ) - h_normalised_xsection_unfolded = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJet_unfolded'], edges ) - - histograms_normalised_xsection_different_generators = {'measured':h_normalised_xsection, - 'unfolded':h_normalised_xsection_unfolded} + # Collect the cross section measured/unfolded results from dataframes + normalised_xsection_unfolded = read_tuple_from_file( filename ) + h_normalised_xsection = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJet_measured'], edges ) + h_normalised_xsection_unfolded = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJet_unfolded'], edges ) - histograms_normalised_xsection_systematics_shifts = {'measured':h_normalised_xsection, - 'unfolded':h_normalised_xsection_unfolded} + histograms_normalised_xsection_different_generators = { + 'measured':h_normalised_xsection, + 'unfolded':h_normalised_xsection_unfolded, + } + histograms_normalised_xsection_systematics_shifts = deepcopy( histograms_normalised_xsection_different_generators ) if category == 'central': - # true distributions - h_normalised_xsection_powhegPythia8 = value_error_tuplelist_to_hist( normalised_xsection_unfolded['powhegPythia8'], edges ) - # h_normalised_xsection_amcatnlo = value_error_tuplelist_to_hist( normalised_xsection_unfolded['amcatnlo'], edges ) - # h_normalised_xsection_madgraphMLM = value_error_tuplelist_to_hist( normalised_xsection_unfolded['madgraphMLM'], edges ) - h_normalised_xsection_powhegHerwigpp = value_error_tuplelist_to_hist( normalised_xsection_unfolded['powhegHerwig'], edges ) - # h_normalised_xsection_amcatnloHerwigpp = value_error_tuplelist_to_hist( normalised_xsection_unfolded['amcatnloHerwig'], edges ) - - # h_normalised_xsection_scaleup = value_error_tuplelist_to_hist( normalised_xsection_unfolded['scaleup'], edges ) - # h_normalised_xsection_scaledown = value_error_tuplelist_to_hist( normalised_xsection_unfolded['scaledown'], edges ) - h_normalised_xsection_massup = value_error_tuplelist_to_hist( normalised_xsection_unfolded['massup'], edges ) - h_normalised_xsection_massdown = value_error_tuplelist_to_hist( normalised_xsection_unfolded['massdown'], edges ) - - histograms_normalised_xsection_different_generators.update( { - 'powhegPythia8':h_normalised_xsection_powhegPythia8, - # 'amcatnloPythia8':h_normalised_xsection_amcatnlo, - # 'madgraphMLM':h_normalised_xsection_madgraphMLM, - 'powhegHerwig':h_normalised_xsection_powhegHerwigpp, - # 'amcatnloHerwig':h_normalised_xsection_amcatnloHerwigpp, - }) - - histograms_normalised_xsection_systematics_shifts.update( {'powhegPythia8':h_normalised_xsection_powhegPythia8, - # 'scaledown': h_normalised_xsection_scaledown, - # 'scaleup': h_normalised_xsection_scaleup, - 'massdown': h_normalised_xsection_massdown, - 'massup': h_normalised_xsection_massup - }) + + # Add in distributions for the different MC to be shown + h_normalised_xsection_powhegPythia8 = value_error_tuplelist_to_hist( normalised_xsection_unfolded['powhegPythia8'], edges ) + # h_normalised_xsection_amcatnlo = value_error_tuplelist_to_hist( normalised_xsection_unfolded['amcatnlo'], edges ) + # h_normalised_xsection_madgraphMLM = value_error_tuplelist_to_hist( normalised_xsection_unfolded['madgraphMLM'], edges ) + h_normalised_xsection_powhegHerwigpp = value_error_tuplelist_to_hist( normalised_xsection_unfolded['powhegHerwig'], edges ) + + h_normalised_xsection_massup = value_error_tuplelist_to_hist( normalised_xsection_unfolded['massup'], edges ) + h_normalised_xsection_massdown = value_error_tuplelist_to_hist( normalised_xsection_unfolded['massdown'], edges ) + + # And update + histograms_normalised_xsection_different_generators.update( + { + 'powhegPythia8' : h_normalised_xsection_powhegPythia8, + # 'amcatnloPythia8' : h_normalised_xsection_amcatnlo, + # 'madgraphMLM' : h_normalised_xsection_madgraphMLM, + 'powhegHerwig' : h_normalised_xsection_powhegHerwigpp, + } + ) + histograms_normalised_xsection_systematics_shifts.update( + { + 'powhegPythia8' : h_normalised_xsection_powhegPythia8, + 'massdown' : h_normalised_xsection_massdown, + 'massup' : h_normalised_xsection_massup + } + ) filename = file_template.format( - path = path_to_JSON, - category = category, - name = 'normalised_xsection', - channel = channel, - method = method, - suffix = '_with_errors', - ) - - normalised_xsection_unfolded_with_errors = read_data_from_JSON( filename ) + path = path_to_DF, + category = category, + name = 'xsection_normalised', + channel = channel, + method = method, + suffix = '_summary_absolute', + ) + + # Now for the systematic uncertainties + normalised_xsection_unfolded_with_errors = file_to_df( filename ) + normalised_xsection_unfolded_with_errors['TTJet_unfolded'] = tupleise_cols( + normalised_xsection_unfolded_with_errors['central'], + normalised_xsection_unfolded_with_errors['systematic'], + ) + + xsec_04_log.debug('Reading file {0}'.format(filename)) -# filename = file_template.format( -# path = path_to_JSON, -# category = category, -# name = 'normalised_xsection', -# channel = channel, -# method = method, -# suffix = '_with_systematics_but_without_generator_errors', -# ) - ### normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory = read_data_from_JSON( file_template + '_with_systematics_but_without_ttbar_theory_errors.txt' ) -# normalised_xsection_unfolded_with_errors_with_systematics_but_without_generator = normalised_xsection_unfolded_with_errors - - # a rootpy.Graph with asymmetric errors! - ### h_normalised_xsection_with_systematics_but_without_ttbar_theory = value_errors_tuplelist_to_graph( - ### normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory['TTJet_measured'], - ### edges ) - ### h_normalised_xsection_with_systematics_but_without_ttbar_theory_unfolded = value_errors_tuplelist_to_graph( - ### normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory['TTJet_unfolded'], - ### edges ) - - h_normalised_xsection_unfolded_with_errors = value_errors_tuplelist_to_graph( - normalised_xsection_unfolded_with_errors['TTJet_measured'], - edges ) - h_normalised_xsection_unfolded_with_errors_unfolded = value_errors_tuplelist_to_graph( - normalised_xsection_unfolded_with_errors['TTJet_unfolded'], - edges ) + # Transform unfolded data into graph form + h_normalised_xsection_unfolded_with_errors_unfolded = value_errors_tuplelist_to_graph( + normalised_xsection_unfolded_with_errors['TTJet_unfolded'], + edges, + is_symmetric_errors=True + ) - # histograms_normalised_xsection_different_generators['measured_with_systematics'] = h_normalised_xsection_with_systematics_but_without_ttbar_theory - # histograms_normalised_xsection_different_generators['unfolded_with_systematics'] = h_normalised_xsection_with_systematics_but_without_ttbar_theory_unfolded - histograms_normalised_xsection_different_generators['measured_with_systematics'] = h_normalised_xsection_unfolded_with_errors + # Add to list of histograms histograms_normalised_xsection_different_generators['unfolded_with_systematics'] = h_normalised_xsection_unfolded_with_errors_unfolded - - histograms_normalised_xsection_systematics_shifts['measured_with_systematics'] = h_normalised_xsection_unfolded_with_errors histograms_normalised_xsection_systematics_shifts['unfolded_with_systematics'] = h_normalised_xsection_unfolded_with_errors_unfolded return histograms_normalised_xsection_different_generators, histograms_normalised_xsection_systematics_shifts -@xsec_04_log.trace() -def read_fit_templates_and_results_as_histograms( category, channel ): - global path_to_JSON, variable, met_type, phase_space - templates = read_data_from_JSON( path_to_JSON + '/fit_results/' + category + '/templates_' + channel + '_' + met_type + '.txt' ) - - data_values = read_data_from_JSON( path_to_JSON + '/fit_results/' + category + '/initial_values_' + channel + '_' + met_type + '.txt' )['data'] - fit_results = read_data_from_JSON( path_to_JSON + '/fit_results/' + category + '/fit_results_' + channel + '_' + met_type + '.txt' ) - fit_variables = templates.keys() - template_histograms = {fit_variable: {} for fit_variable in fit_variables} - fit_results_histograms = {fit_variable: {} for fit_variable in fit_variables} - - variableBins = None - if phase_space == 'VisiblePS': - variableBins = variable_bins_visiblePS_ROOT - elif phase_space == 'FullPS': - variableBins = variable_bins_ROOT - - for bin_i, variable_bin in enumerate( variableBins[variable] ): - for fit_variable in fit_variables: - h_template_data = value_tuplelist_to_hist( templates[fit_variable]['data'][bin_i], fit_variable_bin_edges[fit_variable] ) - h_template_ttjet = value_tuplelist_to_hist( templates[fit_variable]['TTJet'][bin_i], fit_variable_bin_edges[fit_variable] ) - h_template_singletop = value_tuplelist_to_hist( templates[fit_variable]['SingleTop'][bin_i], fit_variable_bin_edges[fit_variable] ) - h_template_VJets = value_tuplelist_to_hist( templates[fit_variable]['V+Jets'][bin_i], fit_variable_bin_edges[fit_variable] ) - h_template_QCD = value_tuplelist_to_hist( templates[fit_variable]['QCD'][bin_i], fit_variable_bin_edges[fit_variable] ) - template_histograms[fit_variable][variable_bin] = { - 'TTJet' : h_template_ttjet, - 'SingleTop' : h_template_singletop, - 'V+Jets':h_template_VJets, - 'QCD':h_template_QCD - } - h_data = h_template_data.Clone() - h_ttjet = h_template_ttjet.Clone() - h_singletop = h_template_singletop.Clone() - h_VJets = h_template_VJets.Clone() - h_QCD = h_template_QCD.Clone() - - data_normalisation = data_values[bin_i][0] - n_ttjet = fit_results['TTJet'][bin_i][0] - n_singletop = fit_results['SingleTop'][bin_i][0] - VJets_normalisation = fit_results['V+Jets'][bin_i][0] - QCD_normalisation = fit_results['QCD'][bin_i][0] - - h_data.Scale( data_normalisation ) - h_ttjet.Scale( n_ttjet ) - h_singletop.Scale( n_singletop ) - h_VJets.Scale( VJets_normalisation ) - h_QCD.Scale( QCD_normalisation ) - h_background = h_VJets + h_QCD + h_singletop - - for bin_i_data in range( len( h_data ) ): - h_data.SetBinError( bin_i_data + 1, sqrt( h_data.GetBinContent( bin_i_data + 1 ) ) ) - - fit_results_histograms[fit_variable][variable_bin] = { - 'data' : h_data, - 'signal' : h_ttjet, - 'background' : h_background - } - - return template_histograms, fit_results_histograms - -@xsec_04_log.trace() -def make_template_plots( histograms, category, channel ): - global variable, output_folder, phase_space - fit_variables = histograms.keys() - - variableBins = None - if phase_space == 'VisiblePS': - variableBins = variable_bins_visiblePS_ROOT - elif phase_space == 'FullPS': - variableBins = variable_bins_ROOT - - for variable_bin in variableBins[variable]: - path = output_folder + str( measurement_config.centre_of_mass_energy ) + 'TeV/' + variable + '/' + category + '/fit_templates/' - make_folder_if_not_exists( path ) - for fit_variable in fit_variables: - plotname = path + channel + '_' + fit_variable + '_template_bin_' + variable_bin - - # check if template plots exist already - for output_format in output_formats: - if os.path.isfile( plotname + '.' + output_format ): - continue - - # plot with matplotlib - h_ttjet = histograms[fit_variable][variable_bin]['TTJet'] - h_single_top = histograms[fit_variable][variable_bin]['SingleTop'] - h_VJets = histograms[fit_variable][variable_bin]['V+Jets'] - h_QCD = histograms[fit_variable][variable_bin]['QCD'] - - h_ttjet.linecolor = 'red' - h_single_top.linecolor = 'magenta' - h_VJets.linecolor = 'green' - h_QCD.linecolor = 'gray' - h_VJets.linestyle = 'dashed' - h_QCD.linestyle = 'dotted' # currently not working - # bug report: http://trac.sagemath.org/sage_trac/ticket/13834 - - h_ttjet.linewidth = 5 - h_single_top.linewidth = 5 - h_VJets.linewidth = 5 - h_QCD.linewidth = 5 - - plt.figure( figsize = ( 16, 16 ), dpi = 200, facecolor = 'white' ) - axes = plt.axes() - if not variable in ['NJets']: - axes.minorticks_on() - - plt.xlabel( fit_variables_latex[fit_variable], CMS.x_axis_title ) - plt.ylabel( 'normalised to unit area/(%s)' % get_unit_string(fit_variable), CMS.y_axis_title ) - plt.tick_params( **CMS.axis_label_major ) - if not variable in ['NJets']: - plt.tick_params( **CMS.axis_label_minor ) - - rplt.hist( h_ttjet, axes = axes, label = 'signal' ) - rplt.hist( h_single_top, axes = axes, label = 'Single Top' ) - - if ( h_VJets.Integral() != 0 ): - rplt.hist( h_VJets, axes = axes, label = 'V+Jets' ) - else: - print("WARNING: in %s bin %s, %s category, %s channel, V+Jets template is empty: not plotting." % ( variable, variable_bin, category, channel )) - if ( h_QCD.Integral() != 0 ): - rplt.hist( h_QCD, axes = axes, label = 'QCD' ) - else: - print("WARNING: in %s bin %s, %s category, %s channel, QCD template is empty: not plotting." % ( variable, variable_bin, category, channel )) - y_max = get_best_max_y([h_ttjet, h_single_top, h_VJets, h_QCD]) - axes.set_ylim( [0, y_max * 1.1] ) - axes.set_xlim( measurement_config.fit_boundaries[fit_variable] ) - - plt.legend( numpoints = 1, loc = 'upper right', prop = CMS.legend_properties ) - label, channel_label = get_cms_labels( channel ) - plt.title( label, CMS.title ) - # CMS text - # note: fontweight/weight does not change anything as we use Latex text!!! - plt.text(0.95, 0.95, r"\textbf{CMS}", transform=axes.transAxes, fontsize=42, - verticalalignment='top',horizontalalignment='right') - # channel text - axes.text(0.95, 0.95, r"\emph{%s}" %channel_label, transform=axes.transAxes, fontsize=40, - verticalalignment='top',horizontalalignment='right') - - plt.tight_layout() - - for output_format in output_formats: - plt.savefig( plotname + '.' + output_format ) - - plt.close() - gc.collect() - -@xsec_04_log.trace() -def plot_fit_results( histograms, category, channel ): - global variable, b_tag_bin, output_folder, phase_space - from dps.utils.plotting import Histogram_properties, make_data_mc_comparison_plot - fit_variables = histograms.keys() - - variableBins = None - if phase_space == 'VisiblePS': - variableBins = variable_bins_visiblePS_ROOT - elif phase_space == 'FullPS': - variableBins = variable_bins_ROOT - - for variable_bin in variableBins[variable]: - path = output_folder + str( measurement_config.centre_of_mass_energy ) + 'TeV/' + variable + '/' + category + '/fit_results/' - make_folder_if_not_exists( path ) - for fit_variable in fit_variables: - plotname = channel + '_' + fit_variable + '_bin_' + variable_bin - # check if template plots exist already - for output_format in output_formats: - if os.path.isfile( plotname + '.' + output_format ): - continue - - # plot with matplotlib - h_data = histograms[fit_variable][variable_bin]['data'] - h_signal = histograms[fit_variable][variable_bin]['signal'] - h_background = histograms[fit_variable][variable_bin]['background'] - - histogram_properties = Histogram_properties() - histogram_properties.name = plotname - histogram_properties.x_axis_title = fit_variables_latex[fit_variable] - histogram_properties.y_axis_title = 'Events/(%s)' % get_unit_string(fit_variable) - label, _ = get_cms_labels( channel ) - histogram_properties.title = label - histogram_properties.x_limits = measurement_config.fit_boundaries[fit_variable] - - make_data_mc_comparison_plot( [h_data, h_background, h_signal], - ['data', 'background', 'signal'], - ['black', 'green', 'red'], histogram_properties, - save_folder = path, save_as = output_formats ) - @xsec_04_log.trace() def get_cms_labels( channel ): - global b_tag_bin lepton = 'e' if channel == 'electron': lepton = 'e + jets' @@ -333,17 +137,20 @@ def get_cms_labels( channel ): lepton = '$\mu$ + jets' else: lepton = 'e, $\mu$ + jets combined' -# channel_label = '%s, $\geq$ 4 jets, %s' % ( lepton, b_tag_bins_latex[b_tag_bin] ) channel_label = lepton template = '%.1f fb$^{-1}$ (%d TeV)' label = template % ( measurement_config.new_luminosity/1000, measurement_config.centre_of_mass_energy) return label, channel_label @xsec_04_log.trace() -def make_plots( histograms, category, output_folder, histname, show_ratio = True, show_generator_ratio = False, show_before_unfolding = False ): +def make_plots( histograms, category, output_folder, histname, show_ratio = False, show_generator_ratio = False, show_before_unfolding = False ): global variable, phase_space - channel = 'electron' + if show_generator_ratio and not show_ratio: + print("Cannot be done, Use both show_ratio and show_generator_ratio") + sys.exit() + + channel = '' if 'electron' in histname: channel = 'electron' elif 'muon' in histname: @@ -351,106 +158,126 @@ def make_plots( histograms, category, output_folder, histname, show_ratio = True else: channel = 'combined' - # plot with matplotlib + # Initailise data histograms hist_data = histograms['unfolded'] - if category == 'central': - hist_data_with_systematics = histograms['unfolded_with_systematics'] - hist_measured = histograms['measured'] - hist_data.markersize = 2 hist_data.marker = 'o' - if category == 'central': + hist_data_with_systematics = histograms['unfolded_with_systematics'] hist_data_with_systematics.markersize = 2 hist_data_with_systematics.marker = 'o' - hist_measured.markersize = 2 - hist_measured.marker = 'o' - hist_measured.color = 'red' - + # Create base figure to be plotted plt.figure( figsize = CMS.figsize, dpi = CMS.dpi, facecolor = CMS.facecolor ) + # Split into 3 for MC/Data ratio and generator ratio and plot if show_ratio and show_generator_ratio: gs = gridspec.GridSpec( 3, 1, height_ratios = [5, 1, 1] ) axes = plt.subplot( gs[0] ) + # Split into 2 for MC/Data ratio or generator Ratio and plot elif show_ratio or show_generator_ratio: gs = gridspec.GridSpec( 2, 1, height_ratios = [5, 1] ) axes = plt.subplot( gs[0] ) + # Just 1 for plot and setup x axis labels else: axes = plt.axes() - if variable in ['NJets', 'abs_lepton_eta', 'lepton_eta']: - plt.xlabel( '$%s$' % variables_latex[variable], CMS.x_axis_title ) - else: - plt.xlabel( '$%s$ [GeV]' % variables_latex[variable], CMS.x_axis_title ) - - if not variable in ['NJets']: - axes.minorticks_on() - if variable in ['NJets', 'abs_lepton_eta', 'lepton_eta']: - plt.ylabel( r'$\frac{1}{\sigma} \frac{d\sigma}{d' + variables_latex[variable] + '}$', CMS.y_axis_title ) - else: - plt.ylabel( r'$\frac{1}{\sigma} \frac{d\sigma}{d' + variables_latex[variable] + '} \left[\mathrm{GeV}^{-1}\\right]$', CMS.y_axis_title ) + x_label = '${}$'.format(variables_latex[variable]) + if variable in ['HT', 'ST', 'MET', 'WPT']: + x_label += ' [GeV]' + plt.xlabel( x_label, CMS.x_axis_title ) + + # set y axis x-section labels + y_label = r'$\frac{1}{\sigma} \frac{d\sigma}{d' + variables_latex[variable] + '}$' + if variable in ['HT', 'ST', 'MET', 'WPT']: + y_label.replace('}$', ' \left[\mathrm{GeV}^{-1}\\right]$') + plt.ylabel( y_label, CMS.y_axis_title ) + + # Set up ticks on axis. Minor ticks on axis for non NJet variables plt.tick_params( **CMS.axis_label_major ) if not variable in ['NJets']: + axes.minorticks_on() plt.tick_params( **CMS.axis_label_minor ) + # Set raw unfolded data with stat+unfolding uncertianty to be visible hist_data.visible = True + # Set raw unfolded data with systematic uncertianty to be visible + # label = 'do_not_show' = do not show in legend if category == 'central': hist_data_with_systematics.visible = True - rplt.errorbar( hist_data_with_systematics, axes = axes, label = 'do_not_show', xerr = None, capsize = 0, elinewidth = 2, zorder = len( histograms ) + 1 ) - rplt.errorbar( hist_data, axes = axes, label = 'do_not_show', xerr = None, capsize = 15, capthick = 3, elinewidth = 2, zorder = len( histograms ) + 2 ) - rplt.errorbar( hist_data, axes = axes, label = 'data', xerr = None, yerr = False, zorder = len( histograms ) + 3 ) # this makes a nicer legend entry - - if show_before_unfolding: - rplt.errorbar( hist_measured, axes = axes, label = 'data (before unfolding)', xerr = None, zorder = len( histograms ) ) + rplt.errorbar( + hist_data_with_systematics, + axes = axes, + label = 'do_not_show', + xerr = None, + capsize = 0, + elinewidth = 2, + zorder = len( histograms ) + 1 + ) + + # Show stat+unf uncertainty on plot + rplt.errorbar( hist_data, + axes = axes, + label = 'do_not_show', + xerr = None, + capsize = 15, + capthick = 3, + elinewidth = 2, + zorder = len( histograms ) + 2 + ) + # And one for a nice legend entry + rplt.errorbar( hist_data, + axes = axes, + label = 'data', + xerr = None, + yerr = False, + zorder = len( histograms ) + 3 + ) dashes = {} for key, hist in sorted( histograms.items() ): zorder = sorted( histograms, reverse = False ).index( key ) - print (key) + + # Ordering such that systematic uncertainties are plotted first then central powhegPythia then data if key == 'powhegPythia8' and zorder != len(histograms) - 3: zorder = len(histograms) - 3 elif key != 'powhegPythia8' and not 'unfolded' in key: while zorder >= len(histograms) - 3: zorder = zorder - 1 + # Colour and style of MC hists if not 'unfolded' in key and not 'measured' in key: hist.linewidth = 4 - # setting colours linestyle = None - if 'powhegHerwig' in key or 'massdown' in key: - hist.SetLineColor( kBlue ) - dashes[key] = [25,5,5,5,5,5,5,5] - elif 'madgraphMLM' in key or 'scaledown' in key: - hist.SetLineColor( 417 ) - dashes[key] = [5,5] - elif 'MADGRAPH_ptreweight' in key: - hist.SetLineColor( kBlack ) - elif 'powhegPythia8' in key: + + if 'powhegPythia8' in key: linestyle = 'solid' dashes[key] = None hist.SetLineColor( 633 ) + elif 'powhegHerwig' in key or 'massdown' in key: + hist.SetLineColor( kBlue ) + dashes[key] = [25,5,5,5,5,5,5,5] elif 'amcatnloPythia8' in key or 'massup' in key: hist.SetLineColor( 807 ) dashes[key] = [20,5] - # elif 'amcatnloHerwig' in key: - # hist.SetLineColor( 734 ) - # dashes[key] = [15,5] - elif 'MCATNLO' in key or 'scaleup' in key: - hist.SetLineColor( 619 ) - dashes[key] = [5,5,10,5] + elif 'madgraphMLM' in key: + hist.SetLineColor( 417 ) + dashes[key] = [5,5] if linestyle != None: hist.linestyle = linestyle + # Add hist to plot line, h = rplt.hist( hist, axes = axes, label = measurements_latex[key], zorder = zorder ) + # Set the dashes and lines if dashes[key] != None: line.set_dashes(dashes[key]) h.set_dashes(dashes[key]) handles, labels = axes.get_legend_handles_labels() - # making data first in the list + + # Making data first in the legend data_label_index = labels.index( 'data' ) data_handle = handles[data_label_index] labels.remove( 'data' ) @@ -458,13 +285,13 @@ def make_plots( histograms, category, output_folder, histname, show_ratio = True labels.insert( 0, 'data' ) handles.insert( 0, data_handle ) + # Order the rest of the labels in the legend new_handles, new_labels = [], [] zipped = dict( zip( labels, handles ) ) labelOrder = ['data', measurements_latex['powhegPythia8'], measurements_latex['amcatnloPythia8'], measurements_latex['powhegHerwig'], - measurements_latex['amcatnloHerwig'], measurements_latex['madgraphMLM'], measurements_latex['scaleup'], measurements_latex['scaledown'], @@ -476,6 +303,7 @@ def make_plots( histograms, category, output_folder, histname, show_ratio = True new_handles.append(zipped[label]) new_labels.append(label) + # Location of the legend legend_location = (0.97, 0.82) if variable == 'MT': legend_location = (0.05, 0.82) @@ -485,13 +313,21 @@ def make_plots( histograms, category, output_folder, histname, show_ratio = True legend_location = (1.0, 0.84) elif variable == 'abs_lepton_eta': legend_location = (1.0, 0.94) - plt.legend( new_handles, new_labels, numpoints = 1, prop = CMS.legend_properties, frameon = False, bbox_to_anchor=legend_location, - bbox_transform=plt.gcf().transFigure ) + + # Add legend to plot + plt.legend( new_handles, new_labels, + numpoints = 1, + prop = CMS.legend_properties, + frameon = False, + bbox_to_anchor=legend_location, + bbox_transform=plt.gcf().transFigure + ) + + # Title and CMS labels + # note: fontweight/weight does not change anything as we use Latex text!!! label, channel_label = get_cms_labels( channel ) - # title plt.title( label,loc='right', **CMS.title ) - # CMS text - # note: fontweight/weight does not change anything as we use Latex text!!! + # Locations of labels logo_location = (0.05, 0.98) prelim_location = (0.05, 0.92) channel_location = ( 0.05, 0.86) @@ -503,71 +339,93 @@ def make_plots( histograms, category, output_folder, histname, show_ratio = True logo_location = (0.03, 0.98) prelim_location = (0.03, 0.92) channel_location = (0.03, 0.86) - plt.text(logo_location[0], logo_location[1], r"\textbf{CMS}", transform=axes.transAxes, fontsize=42, - verticalalignment='top',horizontalalignment='left') + + # Add labels to plot + plt.text(logo_location[0], logo_location[1], + r"\textbf{CMS}", + transform=axes.transAxes, + fontsize=42, + verticalalignment='top', + horizontalalignment='left' + ) # preliminary - plt.text(prelim_location[0], prelim_location[1], r"\emph{Preliminary}", - transform=axes.transAxes, fontsize=42, - verticalalignment='top',horizontalalignment='left') + plt.text(prelim_location[0], prelim_location[1], + r"\emph{Preliminary}", + transform=axes.transAxes, + fontsize=42, + verticalalignment='top', + horizontalalignment='left' + ) # channel text - plt.text(channel_location[0], channel_location[1], r"\emph{%s}" %channel_label, transform=axes.transAxes, fontsize=40, - verticalalignment='top',horizontalalignment='left') + plt.text(channel_location[0], channel_location[1], + r"\emph{%s}"%channel_label, + transform=axes.transAxes, + fontsize=40, + verticalalignment='top', + horizontalalignment='left' + ) + + # Set y limits on plot ylim = axes.get_ylim() if ylim[0] < 0: axes.set_ylim( ymin = 0.) - if variable == 'WPT': - axes.set_ylim(ymax = ylim[1]*1.3) - elif variable == 'abs_lepton_eta': - axes.set_ylim(ymax = ylim[1]*1.3) - else : - axes.set_ylim(ymax = ylim[1]*1.2) - + axes.set_ylim(ymax = ylim[1]*1.3) - if show_ratio or show_generator_ratio: + # Now to show either of the ratio plots + if show_ratio: + # Set previous x axis ticks and labels to invisible plt.setp( axes.get_xticklabels(), visible = False ) + # Go to ratio subplot ax1 = plt.subplot( gs[1] ) - if not variable in ['NJets']: - ax1.minorticks_on() - #ax1.grid( True, 'major', linewidth = 1 ) + # setting the x_limits identical to the main plot x_limits = axes.get_xlim() ax1.set_xlim(x_limits) + + # Setting tick marks ax1.yaxis.set_major_locator( MultipleLocator( 0.5 ) ) + plt.tick_params( **CMS.axis_label_major ) if not variable in ['NJets']: + ax1.minorticks_on() ax1.yaxis.set_minor_locator( MultipleLocator( 0.1 ) ) + plt.tick_params( **CMS.axis_label_minor ) - if not show_ratio or not show_generator_ratio: - if variable in ['NJets', 'abs_lepton_eta', 'lepton_eta']: - plt.xlabel('$%s$' % variables_latex[variable], CMS.x_axis_title ) - else: - plt.xlabel( '$%s$ [GeV]' % variables_latex[variable], CMS.x_axis_title ) + # x axis labels as before + x_label = '${}$'.format(variables_latex[variable]) + if variable in ['HT', 'ST', 'MET', 'WPT']: + x_label += ' [GeV]' - plt.tick_params( **CMS.axis_label_major ) - if not variable in ['NJets']: - plt.tick_params( **CMS.axis_label_minor ) - plt.ylabel( '$\\frac{\\textrm{pred.}}{\\textrm{data}}$', CMS.y_axis_title ) + if not show_generator_ratio: + plt.xlabel( x_label, CMS.x_axis_title ) + + y_label = '$\\frac{\\textrm{pred.}}{\\textrm{data}}$' + plt.ylabel( y_label, CMS.y_axis_title ) ax1.yaxis.set_label_coords(-0.115, 0.8) - #draw a horizontal line at y=1 for data + + # Draw a horizontal line at y=1 for data plt.axhline(y = 1, color = 'black', linewidth = 2) + # Create ratios and plot to subplot for key, hist in sorted( histograms.iteritems() ): if not 'unfolded' in key and not 'measured' in key: ratio = hist.Clone() - ratio.Divide( hist_data ) #divide by data + ratio.Divide( hist_data ) line, h = rplt.hist( ratio, axes = ax1, label = 'do_not_show' ) if dashes[key] != None: line.set_dashes(dashes[key]) h.set_dashes(dashes[key]) + # Now for the error bands stat_lower = hist_data.Clone() stat_upper = hist_data.Clone() syst_lower = hist_data.Clone() syst_upper = hist_data.Clone() - # plot error bands on data in the ratio plot + # Plot relative error bands on data in the ratio plot stat_errors = graph_to_value_errors_tuplelist(hist_data) if category == 'central': syst_errors = graph_to_value_errors_tuplelist(hist_data_with_systematics) + for bin_i in range( 1, hist_data.GetNbinsX() + 1 ): stat_value, stat_error, _ = stat_errors[bin_i-1] stat_rel_error = stat_error/stat_value @@ -579,30 +437,41 @@ def make_plots( histograms, category, output_folder, histname, show_ratio = True syst_rel_error_up = syst_error_up/syst_value syst_lower.SetBinContent( bin_i, 1 - syst_rel_error_down ) syst_upper.SetBinContent( bin_i, 1 + syst_rel_error_up ) + + # Colour if category == 'central': - rplt.fill_between( syst_lower, syst_upper, ax1, - color = 'yellow' ) - - rplt.fill_between( stat_upper, stat_lower, ax1, color = '0.75', - ) - + rplt.fill_between( + syst_lower, + syst_upper, + ax1, + color = 'yellow' + ) + rplt.fill_between( + stat_upper, + stat_lower, + ax1, + color = '0.75', + ) + + # Add legend loc = 'upper left' - # if variable in ['ST']: - # loc = 'upper right' # legend for ratio plot p_stat = mpatches.Patch(facecolor='0.75', label='Stat.', edgecolor='black' ) p_stat_and_syst = mpatches.Patch(facecolor='yellow', label=r'Stat. $\oplus$ Syst.', edgecolor='black' ) - l1 = ax1.legend(handles = [p_stat, p_stat_and_syst], loc = loc, - frameon = False, prop = {'size':26}, ncol = 2) - - # ax1.legend(handles = [p_stat_and_syst], loc = 'lower left', - # frameon = False, prop = {'size':30}) + l1 = ax1.legend( + handles = [p_stat, p_stat_and_syst], + loc = loc, + frameon = False, + prop = {'size':26}, + ncol = 2 + ) ax1.add_artist(l1) + # Setting y limits and tick parameters if variable == 'MET': ax1.set_ylim( ymin = 0.8, ymax = 1.2 ) ax1.yaxis.set_major_locator( MultipleLocator( 0.5 ) ) -# ax1.yaxis.set_minor_locator( MultipleLocator( 0.1 ) ) + ax1.yaxis.set_minor_locator( MultipleLocator( 0.1 ) ) if variable == 'MT': ax1.set_ylim( ymin = 0.8, ymax = 1.2 ) ax1.yaxis.set_major_locator( MultipleLocator( 0.2 ) ) @@ -631,41 +500,44 @@ def make_plots( histograms, category, output_folder, histname, show_ratio = True ax1.yaxis.set_minor_locator( MultipleLocator( 0.1 ) ) - if show_ratio and show_generator_ratio: + if show_generator_ratio: - plt.setp( axes.get_xticklabels(), visible = False ) #Remove DataMC Comparision Axis - plt.setp( ax1.get_xticklabels(), visible = False ) # Remove Ratio Axis + # Remove Data/MC Ratio Axis + plt.setp( ax1.get_xticklabels(), visible = False ) ax2 = plt.subplot( gs[2] ) - if not variable in ['NJets']: - ax2.minorticks_on() - #ax2.grid( True, 'major', linewidth = 1 ) + # setting the x_limits identical to the main plot x_limits = axes.get_xlim() ax2.set_xlim(x_limits) + # Setting ticks ax2.yaxis.set_major_locator( MultipleLocator( 0.5 ) ) + plt.tick_params( **CMS.axis_label_major ) if not variable in ['NJets']: + ax2.minorticks_on() ax2.yaxis.set_minor_locator( MultipleLocator( 0.1 ) ) + plt.tick_params( **CMS.axis_label_minor ) - if variable in ['NJets', 'abs_lepton_eta', 'lepton_eta']: - plt.xlabel('$%s$' % variables_latex[variable], CMS.x_axis_title ) - else: - plt.xlabel( '$%s$ [GeV]' % variables_latex[variable], CMS.x_axis_title ) + # x axis labels as before + x_label = '${}$'.format(variables_latex[variable]) + if variable in ['HT', 'ST', 'MET', 'WPT']: + x_label += ' [GeV]' + plt.xlabel( x_label, CMS.x_axis_title ) + + y_label = '$\\frac{\\textrm{generator}}{\\textrm{central}}$' + plt.ylabel( y_label, CMS.y_axis_title ) - plt.tick_params( **CMS.axis_label_major ) - if not variable in ['NJets']: - plt.tick_params( **CMS.axis_label_minor ) - plt.ylabel( '$\\frac{\\textrm{generator}}{\\textrm{central}}$', CMS.y_axis_title ) ax2.yaxis.set_label_coords(-0.115, 0.8) - #draw a horizontal line at y=1 for data + + #draw a horizontal line at y=1 for central MC plt.axhline(y = 1, color = 'black', linewidth = 2) central_mc = histograms['powhegPythia8'] for key, hist in sorted( histograms.iteritems() ): if not 'unfolded' in key and not 'measured' in key: ratio = hist.Clone() - ratio.Divide( central_mc ) #divide by data + ratio.Divide( central_mc ) #divide by central mc sample line, h = rplt.hist( ratio, axes = ax2, label = 'do_not_show' ) if dashes[key] != None: line.set_dashes(dashes[key]) @@ -674,7 +546,7 @@ def make_plots( histograms, category, output_folder, histname, show_ratio = True if variable == 'MET': ax2.set_ylim( ymin = 0.8, ymax = 1.2 ) ax2.yaxis.set_major_locator( MultipleLocator( 0.5 ) ) -# ax2.yaxis.set_minor_locator( MultipleLocator( 0.1 ) ) + ax2.yaxis.set_minor_locator( MultipleLocator( 0.1 ) ) if variable == 'MT': ax2.set_ylim( ymin = 0.8, ymax = 1.2 ) ax2.yaxis.set_major_locator( MultipleLocator( 0.2 ) ) @@ -703,91 +575,90 @@ def make_plots( histograms, category, output_folder, histname, show_ratio = True ax2.yaxis.set_minor_locator( MultipleLocator( 0.1 ) ) - if CMS.tight_layout: plt.tight_layout() + # Save the plots path = '{output_folder}/{centre_of_mass_energy}TeV/{phaseSpace}/{variable}/' path = path.format( - output_folder = output_folder, - centre_of_mass_energy = measurement_config.centre_of_mass_energy, - phaseSpace = phase_space, - variable = variable - ) + output_folder = output_folder, + centre_of_mass_energy = measurement_config.centre_of_mass_energy, + phaseSpace = phase_space, + variable = variable + ) make_folder_if_not_exists( path ) for output_format in output_formats: filename = path + '/' + histname + '.' + output_format plt.savefig( filename ) - del hist_data, hist_measured - plt.close() - gc.collect() - -@xsec_04_log.trace() -def plot_central_and_systematics( channel, systematics, exclude = [], suffix = 'altogether' ): - global variable, b_tag_bin, met_type - - plt.figure( figsize = ( 16, 16 ), dpi = 200, facecolor = 'white' ) - axes = plt.axes() - if not variable in ['NJets']: - axes.minorticks_on() - - hist_data_central = read_xsection_measurement_results( 'central', channel )[0]['unfolded_with_systematics'] - hist_data_central.markersize = 2 # points. Imagine, tangible units! - hist_data_central.marker = 'o' - - if variable in ['NJets', 'abs_lepton_eta', 'lepton_eta']: - plt.xlabel( '$%s$' % variables_latex[variable], CMS.x_axis_title ) - plt.ylabel( r'$\frac{1}{\sigma} \frac{d\sigma}{d' + variables_latex[variable] + '}$', CMS.y_axis_title ) - else: - plt.xlabel( '$%s$ [GeV]' % variables_latex[variable], CMS.x_axis_title ) - plt.ylabel( r'$\frac{1}{\sigma} \frac{d\sigma}{d' + variables_latex[variable] + '} \left[\mathrm{GeV}^{-1}\\right]$', CMS.y_axis_title ) - plt.tick_params( **CMS.axis_label_major ) - if not variable in ['NJets']: - plt.tick_params( **CMS.axis_label_minor ) - - rplt.errorbar( hist_data_central, axes = axes, label = 'data', xerr = True ) - - for systematic in sorted( systematics ): - if systematic in exclude or systematic == 'central': - continue - - hist_data_systematic = read_xsection_measurement_results( systematic, channel )[0]['unfolded'] - hist_data_systematic.markersize = 2 - hist_data_systematic.marker = 'o' - colour_number = systematics.index( systematic ) + 2 - if colour_number == 10: - colour_number = 42 - hist_data_systematic.SetMarkerColor( colour_number ) - if 'PDF' in systematic: - rplt.errorbar( hist_data_systematic, axes = axes, label = systematic.replace( 'Weights_', ' ' ), xerr = None ) - elif met_type in systematic: - rplt.errorbar( hist_data_systematic, axes = axes, label = measurements_latex[systematic.replace( met_type, '' )], xerr = None ) - else: - rplt.errorbar( hist_data_systematic, axes = axes, label = measurements_latex[systematic], xerr = None ) - - plt.legend( numpoints = 1, loc = 'center right', prop = {'size':25}, ncol = 2 ) - label, channel_label = get_cms_labels( channel ) - plt.title( label, CMS.title ) - # CMS text - # note: fontweight/weight does not change anything as we use Latex text!!! - plt.text(0.95, 0.95, r"\textbf{CMS}", transform=axes.transAxes, fontsize=42, - verticalalignment='top',horizontalalignment='right') - # channel text - axes.text(0.95, 0.90, r"\emph{%s}" %channel_label, transform=axes.transAxes, fontsize=40, - verticalalignment='top',horizontalalignment='right') - plt.tight_layout() - - - path = output_folder + str( measurement_config.centre_of_mass_energy ) + 'TeV/' + variable - make_folder_if_not_exists( path ) - for output_format in output_formats: - filename = path + '/normalised_xsection_' + channel + '_' + suffix + '.' + output_format - - plt.savefig( filename ) - + del hist_data + if 'central' in category: del hist_data_with_systematics plt.close() gc.collect() + return + +# @xsec_04_log.trace() +# def plot_central_and_systematics( channel, systematics, exclude = [], suffix = 'altogether' ): +# global variable + +# plt.figure( figsize = ( 16, 16 ), dpi = 200, facecolor = 'white' ) +# axes = plt.axes() +# if not variable in ['NJets']: +# axes.minorticks_on() + +# hist_data_central = read_xsection_measurement_results( 'central', channel )[0]['unfolded_with_systematics'] +# hist_data_central.markersize = 2 # points. Imagine, tangible units! +# hist_data_central.marker = 'o' + +# if variable in ['NJets', 'abs_lepton_eta', 'lepton_eta']: +# plt.xlabel( '$%s$' % variables_latex[variable], CMS.x_axis_title ) +# plt.ylabel( r'$\frac{1}{\sigma} \frac{d\sigma}{d' + variables_latex[variable] + '}$', CMS.y_axis_title ) +# else: +# plt.xlabel( '$%s$ [GeV]' % variables_latex[variable], CMS.x_axis_title ) +# plt.ylabel( r'$\frac{1}{\sigma} \frac{d\sigma}{d' + variables_latex[variable] + '} \left[\mathrm{GeV}^{-1}\\right]$', CMS.y_axis_title ) +# plt.tick_params( **CMS.axis_label_major ) +# if not variable in ['NJets']: +# plt.tick_params( **CMS.axis_label_minor ) + +# rplt.errorbar( hist_data_central, axes = axes, label = 'data', xerr = True ) + +# for systematic in sorted( systematics ): +# if systematic in exclude or systematic == 'central': +# continue + +# hist_data_systematic = read_xsection_measurement_results( systematic, channel )[0]['unfolded'] +# hist_data_systematic.markersize = 2 +# hist_data_systematic.marker = 'o' +# colour_number = systematics.index( systematic ) + 2 +# if colour_number == 10: +# colour_number = 42 +# hist_data_systematic.SetMarkerColor( colour_number ) +# if 'PDF' in systematic: +# rplt.errorbar( hist_data_systematic, axes = axes, label = systematic.replace( 'Weights_', ' ' ), xerr = None )tranelse: +# rplt.errorbar( hist_data_systematic, axes = axes, label = measurements_latex[systematic], xerr = None ) + +# plt.legend( numpoints = 1, loc = 'center right', prop = {'size':25}, ncol = 2 ) +# label, channel_label = get_cms_labels( channel ) +# plt.title( label, CMS.title ) +# # CMS text +# # note: fontweight/weight does not change anything as we use Latex text!!! +# plt.text(0.95, 0.95, r"\textbf{CMS}", transform=axes.transAxes, fontsize=42, +# verticalalignment='top',horizontalalignment='right') +# # channel text +# axes.text(0.95, 0.90, r"\emph{%s}" %channel_label, transform=axes.transAxes, fontsize=40, +# verticalalignment='top',horizontalalignment='right') +# plt.tight_layout() + + +# path = output_folder + str( measurement_config.centre_of_mass_energy ) + 'TeV/' + variable +# make_folder_if_not_exists( path ) +# for output_format in output_formats: +# filename = path + '/normalised_xsection_' + channel + '_' + suffix + '.' + output_format + +# plt.savefig( filename ) + +# plt.close() +# gc.collect() @xsec_04_log.trace() def get_unit_string(fit_variable): @@ -801,141 +672,144 @@ def get_unit_string(fit_variable): return unit_string + +def parse_arguments(): + parser = ArgumentParser() + parser.add_argument( "-p", "--path", + dest = "path", + default = 'data/normalisation/background_subtraction/', + help = "set path to files containing dataframes" + ) + parser.add_argument( "-o", "--output_folder", + dest = "output_folder", + default = 'plots/', + help = "set path to save plots" + ) + parser.add_argument( "-v", "--variable", + dest = "variable", + default = 'MET', + help = "set variable to plot (MET, HT, ST, WPT, NJets, lepton_pt, abs_lepton_eta )" + ) + parser.add_argument( "-c", "--centre-of-mass-energy", + dest = "CoM", + default = 13, + type = int, + help = "set the centre of mass energy for analysis. Default = 13 [TeV]" + ) + parser.add_argument( "-a", "--additional-plots", + action = "store_true", + dest = "additional_plots", + help = "Draws additional plots like the comparison of different systematics to the central result." + ) + parser.add_argument( "-g", "--show-generator-ratio", + action = "store_true", + dest = "show_generator_ratio", + help = "Show the ratio of generators to central" + ) + parser.add_argument( "-d", "--debug", + action = "store_true", + dest = "debug", + help = "Enables debugging output" + ) + parser.add_argument( '--visiblePS', + dest = "visiblePS", + action = "store_true", + help = "Unfold to visible phase space" + ) + parser.add_argument( "-u", "--unfolding_method", + dest = "unfolding_method", + default = 'TUnfold', + help = "Unfolding method: TUnfold (default)" + ) + + args = parser.parse_args() + return args + if __name__ == '__main__': set_root_defaults() - parser = OptionParser() - parser.add_option( "-p", "--path", dest = "path", default = 'data/M3_angle_bl/', - help = "set path to JSON files" ) - parser.add_option( "-o", "--output_folder", dest = "output_folder", default = 'plots/', - help = "set path to save plots" ) - parser.add_option( "-v", "--variable", dest = "variable", default = 'MET', - help = "set variable to plot (MET, HT, ST, MT)" ) - parser.add_option( "-m", "--metType", dest = "metType", default = 'type1', - help = "set MET type used in the analysis of MET, ST or MT" ) - parser.add_option( "-b", "--bjetbin", dest = "bjetbin", default = '2m', - help = "set b-jet multiplicity for analysis. Options: exclusive: 0-3, inclusive (N or more): 0m, 1m, 2m, 3m, 4m" ) - parser.add_option( "-c", "--centre-of-mass-energy", dest = "CoM", default = 13, type = int, - help = "set the centre of mass energy for analysis. Default = 13 [TeV]" ) - parser.add_option( "-a", "--additional-plots", action = "store_true", dest = "additional_plots", - help = """Draws additional plots like the comparison of different - systematics to the central result.""" ) - parser.add_option( "-g", "--show-generator-ratio", action = "store_true", dest = "show_generator_ratio", - help = "Show the ratio of generators to central" ) - parser.add_option( "-d", "--debug", action = "store_true", dest = "debug", - help = """Enables debugging output""" ) - parser.add_option("--draw-systematics", action = "store_true", dest = "draw_systematics", - help = "creates a set of plots for each systematic (in addition to central result)." ) - parser.add_option( '--visiblePS', dest = "visiblePS", action = "store_true", - help = "Unfold to visible phase space" ) - parser.add_option( "-u", "--unfolding_method", dest = "unfolding_method", default = 'TUnfold', - help = "Unfolding method: TUnfold (default), RooUnfoldSvd, TSVDUnfold, RooUnfoldTUnfold, RooUnfoldInvert, RooUnfoldBinByBin, RooUnfoldBayes" ) - - output_formats = ['pdf'] - ( options, args ) = parser.parse_args() - if options.debug: + args = parse_arguments() + + if args.debug: log.setLevel(log.DEBUG) - measurement_config = XSectionConfig( options.CoM ) + output_formats = ['pdf'] + measurement_config = XSectionConfig( args.CoM ) + # caching of variables for shorter access - translate_options = measurement_config.translate_options - ttbar_theory_systematic_prefix = measurement_config.ttbar_theory_systematic_prefix - vjets_theory_systematic_prefix = measurement_config.vjets_theory_systematic_prefix - met_systematics = measurement_config.met_systematics - method = options.unfolding_method - - variable = options.variable - show_generator_ratio = options.show_generator_ratio - visiblePS = options.visiblePS + method = args.unfolding_method + variable = args.variable + show_generator_ratio = args.show_generator_ratio + visiblePS = args.visiblePS + output_folder = args.output_folder + + if not output_folder.endswith( '/' ): + output_folder += '/' + phase_space = 'FullPS' if visiblePS: phase_space = 'VisiblePS' - output_folder = options.output_folder - if not output_folder.endswith( '/' ): - output_folder += '/' - met_type = translate_options[options.metType] - b_tag_bin = translate_options[options.bjetbin] - path_to_JSON = '{path}/{com}TeV/{variable}/{phase_space}/' - path_to_JSON = path_to_JSON.format(path = options.path, com = options.CoM, - variable = variable, - phase_space = phase_space, - ) -# path_to_JSON = options.path + '/' + str( measurement_config.centre_of_mass_energy ) + 'TeV/' + variable + '/' - - categories = deepcopy( measurement_config.categories_and_prefixes.keys() ) - ttbar_generator_systematics = [ttbar_theory_systematic_prefix + systematic for systematic in measurement_config.generator_systematics] - vjets_generator_systematics = [vjets_theory_systematic_prefix + systematic for systematic in measurement_config.generator_systematics] - categories.extend( ttbar_generator_systematics ) - # categories.extend( vjets_generator_systematics ) + path_to_DF = '{path}/{com}TeV/{variable}/{phase_space}/' + path_to_DF = path_to_DF.format( + path = args.path, + com = args.CoM, + variable = variable, + phase_space = phase_space, + ) + all_measurements = deepcopy( measurement_config.measurements ) pdf_uncertainties = ['PDFWeights_%d' % index for index in range( measurement_config.pdfWeightMin, measurement_config.pdfWeightMax )] - # # all MET uncertainties except JES as this is already included - # met_uncertainties = [met_type + suffix for suffix in met_systematics if not 'JetEn' in suffix and not 'JetRes' in suffix] - # new_uncertainties = ['QCD_shape'] - rate_changing_systematics = [systematic for systematic in measurement_config.rate_changing_systematics.keys()] - - all_measurements = deepcopy( categories ) - # all_measurements.extend( pdf_uncertainties ) - # all_measurements.extend( met_uncertainties ) - # all_measurements.extend( new_uncertainties ) - all_measurements.extend( rate_changing_systematics ) - # for channel in ['electron', 'muon', 'combined', 'combinedBeforeUnfolding']: - for channel in ['combinedBeforeUnfolding', 'combined']: + all_measurements.extend( pdf_uncertainties ) + + channel = [ + # 'electron', + # 'muon', + 'combined', + # 'combinedBeforeUnfolding', + ] + for ch in channel: for category in all_measurements: - if not category == 'central' and not options.additional_plots: continue - - # if variable == 'HT' and category in met_uncertainties: - # continue - # setting up systematic MET for JES up/down samples for reading fit templates - met_type = translate_options[options.metType] - if category == 'JES_up': - met_type += 'JetEnUp' - elif category == 'JES_down': - met_type += 'JetEnDown' - - # if not channel == 'combined': - # #Don't make additional plots for e.g. generator systematics, mass systematics, k value systematics and pdf systematics because they are now done \ - # #in the unfolding process with BLT unfolding files. - # if category in ttbar_generator_systematics or category in ttbar_mass_systematics or category in kValue_systematics or category in pdf_uncertainties: - # continue - # fit_templates, fit_results = read_fit_templates_and_results_as_histograms( category, channel ) - # make_template_plots( fit_templates, category, channel ) - # plot_fit_results( fit_results, category, channel ) - - # change back to original MET type - met_type = translate_options[options.metType] - if met_type == 'PFMET': - met_type = 'patMETsPFlow' - - histograms_normalised_xsection_different_generators, histograms_normalised_xsection_systematics_shifts = read_xsection_measurement_results( category, channel ) - histname = '{variable}_normalised_xsection_{channel}_{phase_space}' - histname = histname.format(variable = variable, channel = channel, - phase_space = phase_space) - if method != 'RooUnfoldSvd': - histname += '_' + method - make_plots( histograms_normalised_xsection_different_generators, category, output_folder, histname + '_different_generators', show_generator_ratio = show_generator_ratio ) - make_plots( histograms_normalised_xsection_systematics_shifts, category, output_folder, histname + '_systematics_shifts' ) + # Show central only. TODO Add in additional systematic comparison plots + if not category == 'central' and not args.additional_plots: continue + if variable in measurement_config.variables_no_met and category in measurement_config.met_specific_systematics: continue + + # Read the xsection results from dataframe + histograms_normalised_xsection_different_generators, histograms_normalised_xsection_systematics_shifts = read_xsection_measurement_results( category, ch ) + + histname = '{variable}_normalised_xsection_{ch}_{phase_space}_{method}' + histname = histname.format( + variable = variable, + ch = ch, + phase_space = phase_space, + method = method + ) + + make_plots( + histograms_normalised_xsection_different_generators, + category, + output_folder, + histname + '_different_generators', + show_ratio = True, + show_generator_ratio = show_generator_ratio + ) + make_plots( + histograms_normalised_xsection_systematics_shifts, + category, + output_folder, + histname + '_systematics_shifts', + show_ratio = True, + ) del histograms_normalised_xsection_different_generators, histograms_normalised_xsection_systematics_shifts - if options.additional_plots: - plot_central_and_systematics( channel, categories, exclude = ttbar_generator_systematics ) - - plot_central_and_systematics( channel, ttbar_generator_systematics, suffix = 'ttbar_generator_only' ) - -# exclude = set( pdf_uncertainties ).difference( set( pdf_uncertainties_1_to_11 ) ) -# plot_central_and_systematics( channel, pdf_uncertainties_1_to_11, exclude = exclude, suffix = 'PDF_1_to_11' ) -# -# exclude = set( pdf_uncertainties ).difference( set( pdf_uncertainties_12_to_22 ) ) -# plot_central_and_systematics( channel, pdf_uncertainties_12_to_22, exclude = exclude, suffix = 'PDF_12_to_22' ) -# -# exclude = set( pdf_uncertainties ).difference( set( pdf_uncertainties_23_to_33 ) ) -# plot_central_and_systematics( channel, pdf_uncertainties_23_to_33, exclude = exclude, suffix = 'PDF_23_to_33' ) -# -# exclude = set( pdf_uncertainties ).difference( set( pdf_uncertainties_34_to_45 ) ) -# plot_central_and_systematics( channel, pdf_uncertainties_34_to_45, exclude = exclude, suffix = 'PDF_34_to_45' ) -# -# plot_central_and_systematics( channel, met_uncertainties, suffix = 'MET_only' ) -# plot_central_and_systematics( channel, new_uncertainties, suffix = 'new_only' ) - plot_central_and_systematics( channel, rate_changing_systematics, suffix = 'rate_changing_only' ) + # if args.additional_plots: + # TODO + # Generator Only + # PDF Only + # MET Only + # Rate Changing Only + # etc... + # plot_central_and_systematics( ch, measurements, exclude = ttbar_generator_systematics ) + # plot_central_and_systematics( ch, ttbar_generator_systematics, suffix = 'ttbar_generator_only' ) + # plot_central_and_systematics( ch, rate_changing_systematics, suffix = 'rate_changing_only' ) diff --git a/dps/analysis/xsection/05_make_systematic_plots.py b/dps/analysis/xsection/05_make_systematic_plots.py index 2bf342c4..665ddb07 100644 --- a/dps/analysis/xsection/05_make_systematic_plots.py +++ b/dps/analysis/xsection/05_make_systematic_plots.py @@ -14,54 +14,90 @@ # dynamic matplotlib settings from matplotlib import rc -rc( 'font', **CMS.font ) -rc( 'text', usetex = False ) +from operator import itemgetter -def plot_systematic_uncertainties(systematic_uncertainties, bin_edges, variable, output_folder): +# rc( 'font', **CMS.font ) +# rc( 'text', usetex = False ) + +def plot_systematic_uncertainties(systematic_uncertainties, bin_edges, variable, output_folder, subcategories = [], subname = '', plot_largest = False): ''' Plot the systematic uncertainties ''' + print subcategories + if not subcategories: subcategories = systematic_uncertainties.keys() + x_limits = [bin_edges[0], bin_edges[-1]] - y_limits = [0,0.6] + # y_limits = [-0.6,0.6] + y_limits = [0,0.4] + fig_syst = plt.figure( figsize = ( 20, 16 ), dpi = 400, facecolor = 'white' ) ax_syst = fig_syst.add_subplot(1, 1, 1) ax_syst.minorticks_on() ax_syst.xaxis.labelpad = 12 ax_syst.yaxis.labelpad = 12 - error_hists = {} + error_hists_up = {} + error_hists_down = {} stat_hist = None for syst, vals in systematic_uncertainties.iteritems(): - if syst == 'statistical': - stat_hist = values_and_errors_to_hist( vals, [], bin_edges ) + if syst == 'central': + n = len(systematic_uncertainties[syst]) + continue + elif syst == 'statistical': + stat_hist_up = values_and_errors_to_hist( vals, [], bin_edges ) + stat_hist_down = values_and_errors_to_hist( -vals, [], bin_edges ) elif syst == 'systematic': - full_syst_hist = values_and_errors_to_hist( vals, [], bin_edges ) - elif syst == 'central': - central_hist = values_and_errors_to_hist( vals, [], bin_edges ) - else: - error_hists[syst] = values_and_errors_to_hist( vals, [], bin_edges ) + syst_hist_up = values_and_errors_to_hist( vals, [], bin_edges ) + syst_hist_down = values_and_errors_to_hist( -vals, [], bin_edges ) + elif syst in subcategories: + error_hists_up[syst] = values_and_errors_to_hist( vals, [], bin_edges ) + error_hists_down[syst] = values_and_errors_to_hist( -vals, [], bin_edges ) + else: continue + + if plot_largest: + largest_syst = [] + for bin_i in range( n ): + high = [] + for syst, vals in systematic_uncertainties.iteritems(): + if syst == 'central': continue + if syst == 'statistical': continue + if syst == 'systematic': continue + high.append([syst,vals[bin_i]]) + high = sorted(high, key = itemgetter(1), reverse=True) + # Retrieve highest systematics + if high[0][0] not in largest_syst: largest_syst.append(high[0][0]) + elif high[1][0] not in largest_syst: largest_syst.append(high[1][0]) + else: continue + + rplt.fill_between( syst_hist_up, syst_hist_down, color = 'yellow', label='Syst.' ) + rplt.fill_between( stat_hist_down, stat_hist_up, color = 'grey', label='Stat.' ) plt.tick_params( **CMS.axis_label_major ) plt.tick_params( **CMS.axis_label_minor ) - colours = ['red', 'blue', 'green', 'chartreuse', 'indigo', 'magenta', 'darkmagenta', 'hotpink', 'cyan', 'darkred', 'darkgoldenrod', 'mediumvioletred', 'mediumspringgreen', 'gold', 'darkgoldenrod', 'slategray', 'dodgerblue', 'cadetblue', 'darkblue', 'seagreen', 'deeppink' ] - for source, colour in zip (error_hists.keys(), colours): - hist = error_hists[source] - hist.linewidth = 4 - hist.color = colour - rplt.hist( hist, stacked=False, axes = ax_syst, label = source ) - - stat_hist.linewidth = 4 - stat_hist.color = 'black' - stat_hist.linestyle = 'dashed' - rplt.hist( stat_hist, stacked=False, axes = ax_syst, label = 'stat.' ) + colours = ['red', 'blue', 'green', 'chartreuse', 'indigo', 'magenta', 'darkmagenta', 'hotpink', 'cyan', 'darkred', 'darkgoldenrod', 'mediumvioletred', 'mediumspringgreen', 'gold', 'darkgoldenrod', 'slategray', 'dodgerblue', 'cadetblue', 'darkblue', 'seagreen', 'deeppink', 'deepskyblue' ] + # if len(colours) < len(error_hists.keys()): + # print '---> Need to add more colours!!!' - full_syst_hist.linewidth = 4 - full_syst_hist.color = 'black' - rplt.hist( full_syst_hist, stacked=False, axes = ax_syst, label = 'tot syst.' ) + for error_hists in [error_hists_up, error_hists_down]: + for i, source, in enumerate(error_hists.keys()): + hist = error_hists[source] + hist.linewidth = 4 + hist.color = colours[i] + if plot_largest: + if source not in largest_syst: + hist.linestyle = 'dashed' + hist.alpha = 0.4 + hist.linewidth = 2 + # Only label systematic once + if error_hists == error_hists_up: + rplt.hist( hist, stacked=False, label = source ) + else: + rplt.hist( hist, stacked=False, label = '' ) - leg = plt.legend(loc=1,prop={'size':30},ncol=2) + leg = plt.legend(loc='upper right',prop={'size':25},ncol=3) + # leg = plt.legend(loc='upper right',prop={'size':20},ncol=4) leg.draw_frame(False) x_title = variables_NonLatex[variable] @@ -72,16 +108,53 @@ def plot_systematic_uncertainties(systematic_uncertainties, bin_edges, variable, ax_syst.set_ylim( y_limits ) plt.xlabel( x_title, CMS.x_axis_title ) plt.ylabel( 'Relative Uncertainty', CMS.y_axis_title) + + template = '%.1f fb$^{-1}$ (%d TeV)' + label = template % ( measurement_config.new_luminosity/1000, measurement_config.centre_of_mass_energy) + plt.title( label,loc='right', **CMS.title ) + + logo_location = (0.05, 0.98) + prelim_location = (0.05, 0.92) + channel_location = ( 0.05, 0.86) + # plt.text(logo_location[0], logo_location[1], + # "CMS", + # transform=ax_syst.transAxes, + # fontsize=42, + # verticalalignment='top', + # horizontalalignment='left' + # ) + # # preliminary + # plt.text(prelim_location[0], prelim_location[1], + # r"\emph{Preliminary}", + # transform=ax_syst.transAxes, + # fontsize=42, + # verticalalignment='top', + # horizontalalignment='left' + # ) + # # channel text + # plt.text(channel_location[0], channel_location[1], + # r"\emph{%s}" % channel, + # transform=ax_syst.transAxes, + # fontsize=40, + # verticalalignment='top', + # horizontalalignment='left' + # ) + plt.tight_layout() - file_template = output_folder + '{var}_systematics_{com}TeV.pdf'.format( + file_template = output_folder + '{var}_systematics_{com}TeV'.format( var = variable, com = measurement_config.centre_of_mass_energy, ) + if subname: file_template = file_template + '_' + subname + file_template += '.pdf' fig_syst.savefig(file_template) print "Written plots to {f}".format(f = file_template) + # plt.show() return + + if __name__ == '__main__': parser = ArgumentParser(__doc__) parser.add_argument( @@ -135,18 +208,22 @@ def plot_systematic_uncertainties(systematic_uncertainties, bin_edges, variable, variable = args.variable output_folder = args.output_folder ps_vis = args.visiblePS - + + phase_space = 'FullPS' bin_edges = bin_edges_full[variable] if ps_vis: phase_space = 'VisiblePS' bin_edges = bin_edges_vis[variable] measurement_config = XSectionConfig(com) + # for keys in measurement_config.rate_changing_systematics_values.keys(): + # print keys + # print measurement_config.rate_changing_systematics_values[keys].scale for channel in ['electron', 'muon', 'combined', 'combinedBeforeUnfolding']: - - input_file = '{basepath}/{com}TeV/{var}/{ps}/central/normalised_xsection_{channel}_{method}_summary_relative.txt'.format( + if channel != 'combined':continue + input_file = '{basepath}/{com}TeV/{var}/{ps}/central/xsection_normalised_{channel}_{method}_summary_relative.txt'.format( basepath = path, com = com, var = variable, @@ -162,5 +239,25 @@ def plot_systematic_uncertainties(systematic_uncertainties, bin_edges, variable, systematic_uncertainties = pu.file_to_df(input_file) - plot_systematic_uncertainties(systematic_uncertainties, bin_edges, variable, output_folder) + # any group of systematics you want to plot + l_xsec = [] + l_mc = [] + l_weight = [] + l_met = [] + l_shape = [] + for k in systematic_uncertainties.keys(): + if 'cross_section' in k: l_xsec.append(k) + elif 'TTJets_' in k: l_mc.append(k) + elif ('Electron' in k or 'Muon' in k or 'PileUp' in k or 'luminosity' in k or 'BJet' in k) and 'En' not in k: l_weight.append(k) + elif 'En' in k: l_met.append(k) + elif 'JES' in k or 'JER' in k or 'QCD_shape' in k or 'PDF' in k: l_shape.append(k) + else : print ' Not including {}'.format(k) + # # Plot them + plot_systematic_uncertainties(systematic_uncertainties, bin_edges, variable, output_folder) + plot_systematic_uncertainties(systematic_uncertainties, bin_edges, variable, output_folder, plot_largest = True, subname = 'largest') + plot_systematic_uncertainties(systematic_uncertainties, bin_edges, variable, output_folder,l_xsec, "xsection") + plot_systematic_uncertainties(systematic_uncertainties, bin_edges, variable, output_folder,l_mc, "mc") + plot_systematic_uncertainties(systematic_uncertainties, bin_edges, variable, output_folder,l_weight, "weight") + plot_systematic_uncertainties(systematic_uncertainties, bin_edges, variable, output_folder,l_met, "met") + plot_systematic_uncertainties(systematic_uncertainties, bin_edges, variable, output_folder,l_shape, "shape") diff --git a/dps/analysis/xsection/create_measurement.py b/dps/analysis/xsection/create_measurement.py index 659e91ea..b9f61236 100644 --- a/dps/analysis/xsection/create_measurement.py +++ b/dps/analysis/xsection/create_measurement.py @@ -9,478 +9,330 @@ Example: python src/cross_section_measurement/create_measurement.py -c ''' -from __future__ import print_function -from optparse import OptionParser +from argparse import ArgumentParser from dps.config.xsection import XSectionConfig from dps.config import variable_binning -from dps.utils.input import Input from dps.utils.logger import log -from copy import deepcopy -from dps.utils.measurement import Measurement, Systematic +from dps.utils.file_utilities import write_data_to_JSON # define logger for this module create_measurement_log = log["01b_get_ttjet_normalisation"] cml = create_measurement_log # alias - @cml.trace() def main(): - parser = OptionParser(__doc__) - parser.add_option("-c", "--centre-of-mass-energy", dest="CoM", default=13, type=int, - help="set the centre of mass energy for analysis. Default = 13 [TeV]") - parser.add_option('-d', '--debug', dest="debug", action="store_true", - help="Print the debug information") - (options, _) = parser.parse_args() - centre_of_mass_energy = options.CoM - # set global variables - debug = options.debug - if debug: - log.setLevel(log.DEBUG) - - measurement_config = XSectionConfig(centre_of_mass_energy) - categories = ['QCD_shape'] - categories.extend(measurement_config.categories_and_prefixes.keys()) - categories.extend(measurement_config.rate_changing_systematics_names) - categories.extend([measurement_config.vjets_theory_systematic_prefix + scale for scale in ['scaleup', 'scaledown']]) - - for variable in measurement_config.variables: - for category in categories: - for channel in ['electron', 'muon']: - if channel == 'electron' and (category == 'Muon_down' or category == 'Muon_up'): - continue - elif channel == 'muon' and (category == 'Electron_down' or category == 'Electron_up'): - continue - # create_measurement( - # centre_of_mass_energy, category, variable, channel, - # phase_space='FullPS', norm_method='background_subtraction') - # and the visible phase space - create_measurement( - centre_of_mass_energy, category, variable, channel, - phase_space='VisiblePS', norm_method='background_subtraction') - - -@cml.trace() -def create_measurement(com, category, variable, channel, phase_space, norm_method): - if com == 13: - # exclude non existing systematics - if 'VJets' in category and 'scale' in category: - print('Excluding {0} for now'.format(category)) - return - config = XSectionConfig(com) - met_type = get_met_type(category, config) - should_not_run_systematic = category in config.met_systematics_suffixes and variable in config.variables_no_met and not 'JES' in category and not 'JER' in category - if should_not_run_systematic: - # no MET uncertainty on HT (but JES and JER of course) - return - - m = None - if category == 'central': - m = Measurement(category) - else: - vjet_systematics = [config.vjets_theory_systematic_prefix + - systematic for systematic in config.generator_systematics] - if category in config.categories_and_prefixes.keys() or \ - category in config.met_systematics_suffixes or \ - category in vjet_systematics: - m = Systematic(category, - stype=Systematic.SHAPE, - affected_samples=config.samples) - elif category in config.rate_changing_systematics_names: - m = config.rate_changing_systematics_values[category] - - elif category == 'QCD_shape': - m = Systematic(category, - stype=Systematic.SHAPE, - affected_samples=['QCD'], - ) - - m.setVariable(variable) - m.setCentreOfMassEnergy(com) - m.setChannel(channel) - m.setMETType(met_type) - - inputs = { - 'channel': config.analysis_types[channel], - 'met_type': met_type, - 'selection': 'Ref selection', - 'btag': config.translate_options['2m'], # 2 or more - 'energy': com, - 'variable': variable, - 'category': category, - 'phase_space': phase_space, - 'norm_method': norm_method, - 'lepton': channel.title(), - } - variable_template = config.variable_path_templates[ - variable].format(**inputs) - - template_category = category - if category == 'QCD_shape' or category in config.rate_changing_systematics_names: - template_category = 'central' - if category in [config.vjets_theory_systematic_prefix + systematic for systematic in config.generator_systematics]: - template_category = 'central' - - m.addSample( - 'TTJet', - False, - input=create_input( - config, 'TTJet', variable, template_category, channel, - variable_template, phase_space=phase_space, measurement=m, - ), - ) - m.addSample( - 'V+Jets', - False, - input=create_input( - config, 'V+Jets', variable, template_category, channel, - variable_template, phase_space=phase_space, measurement=m, - ), - ) - m.addSample( - 'SingleTop', - False, - input=create_input( - config, 'SingleTop', variable, template_category, channel, - variable_template, phase_space=phase_space, measurement=m, - ), - ) - m.addSample( - 'QCD', - False, - input=create_input( - config, 'QCD', variable, template_category, channel, - variable_template, phase_space=phase_space, measurement=m, - ), - ) - variable_template_data = variable_template.replace( - met_type, config.translate_options['type1']) - - m.addSample( - 'data', - False, - input=create_input( - config, 'data', variable, template_category, channel, - variable_template_data, phase_space=phase_space, measurement=m, - ), - ) - - m_qcd = Measurement(category) - m_qcd.setVariable(variable) - m_qcd.setCentreOfMassEnergy(com) - - qcd_template = get_qcd_template(config, variable, category, channel) - - # we want "measurement = m" here since all rate systematics should apply - # to the control regions as well - m_qcd.addSample( - 'TTJet', - False, - input=create_input( - config, 'TTJet', variable, template_category, channel, - qcd_template, phase_space=phase_space, measurement=m, - ), - ) - m_qcd.addSample( - 'V+Jets', - False, - input=create_input( - config, 'V+Jets', variable, template_category, channel, - qcd_template, phase_space=phase_space, measurement=m, - ), - ) - m_qcd.addSample( - 'SingleTop', - False, - input=create_input( - config, 'SingleTop', variable, template_category, channel, - qcd_template, phase_space=phase_space, measurement=m, - ), + parser = ArgumentParser(__doc__) + parser.add_argument( + "-c", + "--centre-of-mass-energy", + dest="CoM", + default=13, + type=int, + help="set the centre of mass energy for analysis. Default = 13 [TeV]" ) - m_qcd.addSample( - 'QCD', - False, - input=create_input( - config, 'QCD', variable, template_category, channel, - qcd_template, phase_space=phase_space, measurement=m, - ), + parser.add_argument( + '-d', + '--debug', + dest="debug", + action="store_true", + help="Print the debug information" ) - m_qcd.addSample( - 'data', - False, - input=create_input( - config, 'data', variable, template_category, channel, - qcd_template, phase_space=phase_space, measurement=m, - ), + parser.add_argument( + '-q', + '--qcd_from_data', + dest="data_driven_qcd", + default=True, + help="Print the debug information" ) - - m.addShapeForSample('QCD', m_qcd, False) - norm_qcd = deepcopy(m_qcd) - # we want QCD shape and normalisation to be separate - if category == 'QCD_shape': - for sample in norm_qcd.samples.keys(): - tree = norm_qcd.samples[sample]['input'].tree_name - if channel == 'electron': - tree = tree.replace(config.electron_control_region_systematic, - config.electron_control_region) - else: - tree = tree.replace(config.muon_control_region_systematic, - config.muon_control_region) - norm_qcd.samples[sample]['input'].tree_name = tree - if 'QCD_cross_section' in category: - for sample in norm_qcd.samples.keys(): - tree = norm_qcd.samples[sample]['input'].tree_name - if channel == 'electron': - tree = tree.replace(config.electron_control_region, - config.electron_control_region_systematic) - else: - tree = tree.replace(config.muon_control_region, - config.muon_control_region_systematic) - norm_qcd.samples[sample]['input'].tree_name = tree - - m.addNormForSample('QCD', norm_qcd, False) - - if category in [config.vjets_theory_systematic_prefix + systematic for systematic in config.generator_systematics]: - v_template_category = category.replace( - config.vjets_theory_systematic_prefix, '') - m_vjets = Measurement(category) - m_vjets.setVariable(variable) - m_vjets.setCentreOfMassEnergy(com) - m_vjets.addSample( - 'V+Jets', - False, - input=create_input( - config, 'V+Jets', variable, v_template_category, - channel, - variable_template, - config.generator_systematic_vjets_templates[ - v_template_category]), - phase_space=phase_space, measurement=m, - ) - m.addShapeForSample('V+Jets', m_vjets, False) - - inputs['channel'] = channel - base_path = 'config/measurements/{norm_method}/{energy}TeV/' - base_path += '{channel}/{variable}/{phase_space}/' - if category == 'central': - path = base_path + '{category}.json' - m.toJSON(path.format(**inputs)) - else: - if m.type == Systematic.SHAPE: - inputs['type'] = 'shape_systematic' - else: - inputs['type'] = 'rate_systematic' - if category in config.met_systematics_suffixes and category not in ['JES_up', 'JES_down', 'JER_up', 'JER_down']: - inputs['category'] = met_type - path = base_path + '{category}_{type}.json' - m.toJSON(path.format(**inputs)) - - -@cml.trace() -def get_met_type(category, config): - met_type = config.translate_options['type1'] - if category == 'JES_up': - met_type += 'JetEnUp' - elif category == 'JES_down': - met_type += 'JetEnDown' - elif category == 'JER_up': - met_type += 'JetResUp' - elif category == 'JER_down': - met_type += 'JetResDown' - - isJetSystematic = 'JetEn' in category or 'JetRes' in category - isJetSystematic = isJetSystematic or 'JES' in category - isJetSystematic = isJetSystematic or 'JER' in category - - if category in config.met_systematics_suffixes: - # already done them - if not isJetSystematic: - met_type = met_type + category - - return met_type + args = parser.parse_args() + + options = {} + options['com'] = args.CoM + options['data_driven_qcd'] = args.data_driven_qcd + if args.debug: log.setLevel(log.DEBUG) + + + xsec_config = XSectionConfig(options['com']) + categories = xsec_config.normalisation_systematics + print categories + + # Create specific configs required + for ps in ['VisiblePS', 'FullPS']: + options['ps']=ps + for channel in ['electron', 'muon']: + options['channel']=channel + for variable in xsec_config.variables: + options['variable']=variable + for category in categories: + if channel == 'electron' and (category == 'Muon_down' or category == 'Muon_up'): + continue + elif channel == 'muon' and (category == 'Electron_down' or category == 'Electron_up'): + continue + elif variable in xsec_config.variables_no_met and category in xsec_config.met_specific_systematics: + continue + options['category']=category + + m = create_measurement( + options, + norm_method='background_subtraction', + ) + + write_measurement( + options, + m, + norm_method='background_subtraction', + ) @cml.trace() -def get_file(config, sample, category, channel): - use_trees = True if config.centre_of_mass_energy == 13 else False - if channel == 'electron': - qcd_template = config.electron_QCD_MC_category_templates[category] - data_template = config.data_file_electron - qcd_template_tree = config.electron_QCD_MC_category_templates_trees[ - category] - data_template_tree = config.data_file_electron_trees - else: - qcd_template = config.muon_QCD_MC_category_templates[category] - data_template = config.data_file_muon - qcd_template_tree = config.muon_QCD_MC_category_templates_trees[ - category] - data_template_tree = config.data_file_muon_trees - - tree_files = { - 'TTJet': config.ttbar_category_templates_trees[category], - 'V+Jets': config.VJets_category_templates_trees[category], - 'SingleTop': config.SingleTop_category_templates_trees[category], - 'QCD': qcd_template_tree, - 'data': data_template_tree - } - files = { - 'TTJet': config.ttbar_category_templates[category], - 'V+Jets': config.VJets_category_templates[category], - 'SingleTop': config.SingleTop_category_templates[category], - 'QCD': qcd_template, - 'data': data_template, - } - - if use_trees: - return tree_files[sample] - else: - return files[sample] +def create_measurement(options, norm_method): + ''' + Create the config file + ''' + # Create dictionary to write to config file + measurement = {} + xsec_config = XSectionConfig(options['com']) + + # Generate basic normalisation config info + measurement["com"] = options['com'] + measurement["channel"] = options['channel'] + measurement["variable"] = options['variable'] + measurement["name"] = options['category'] + measurement["data_driven_qcd"] = options['data_driven_qcd'] + + # Add specific samples to config + measurement["samples"] = get_samples(options, xsec_config) + return measurement @cml.trace() -def get_qcd_template(config, variable, category, channel): - qcd_inputs = { - 'channel': config.analysis_types[channel], - 'met_type': config.translate_options['type1'], # always central MET - 'selection': 'Ref selection', - 'btag': config.translate_options['2m'], # 2 or more - 'energy': config.centre_of_mass_energy, - 'variable': variable, - 'category': 'central', # always central - 'lepton': channel.title(), - } - - qcd_template = config.variable_path_templates[ - variable].format(**qcd_inputs) - if channel == 'electron': - qcd_template = qcd_template.replace( - 'Ref selection', config.electron_control_region) - if category == 'QCD_shape': - qcd_template = qcd_template.replace( - config.electron_control_region, - config.electron_control_region_systematic) - else: - qcd_template = qcd_template.replace( - 'Ref selection', config.muon_control_region) - if category == 'QCD_shape': - qcd_template = qcd_template.replace( - config.muon_control_region, - config.muon_control_region_systematic) +def get_samples(options, xsec_config): + ''' + Return the dictionary of all sample information + ''' + # create samples dictionary + samples = {} + for s in xsec_config.samples: + samples[s] = get_sample_info(options, xsec_config, s) - return qcd_template + return samples @cml.trace() -def create_input(config, sample, variable, category, channel, template, - input_file=None, phase_space=None, **kwargs): - tree, branch, hist = None, None, None - selection = '1' - if not input_file: - input_file = get_file(config, sample, category, channel) - - if config.centre_of_mass_energy == 13: - branch = template.split('/')[-1] - tree = template.replace('/' + branch, '') - - if 'absolute_eta' in branch: - branch = 'abs(lepton_eta)' - - if sample != 'data': - if category in config.met_systematics_suffixes and not variable in config.variables_no_met: - branch = template.split('/')[-1] - branch += '_METUncertainties[%s]' % config.met_systematics[ - category] - - if 'JES_down' in category or 'JES_up' in category or 'JER_down' in category or 'JER_up' in category: - tree += config.categories_and_prefixes[category] - - if not sample == 'data': - if 'JES_down' in category: - input_file = input_file.replace('tree', 'minusJES_tree') - elif 'JES_up' in category: - input_file = input_file.replace('tree', 'plusJES_tree') - elif 'JER_up' in category: - input_file = input_file.replace('tree', 'plusJER_tree') - elif 'JER_down' in category: - input_file = input_file.replace('tree', 'minusJER_tree') - - selection = '{0} >= 0'.format(branch) - if variable == 'abs_lepton_eta': - selection += ' && {0} <= 3'.format(branch) +def get_sample_info(options, xsec_config, sample): + ''' + Generate each measurements information + ''' + # create sample info + sample_info = {} + + # Branch (variable) + sample_info["branch"] = options['variable'] + if 'abs_lepton_eta' in options['variable']: + sample_info["branch"] = 'abs(lepton_eta)' + + # Selections + sample_info["selection"] = get_selection(options['variable']) + + # MET Systematics + # Only Met Variables + if options['variable'] not in xsec_config.variables_no_met: + # Only MET Syst measurement + if options['category'] in xsec_config.met_specific_systematics: + sample_info["branch"] += '_METUncertainties[{index}]'.format(index = str(xsec_config.met_systematics[options['category']])) + + # Bin Edges + if options['ps'] == 'VisiblePS': + sample_info["bin_edges"] = variable_binning.reco_bin_edges_vis[options['variable']] + elif options['ps'] == 'FullPS': + sample_info["bin_edges"] = variable_binning.reco_bin_edges_full[options['variable']] else: - hist = template - - lumi_scale = config.luminosity_scale - scale = 1. + sample_info["bin_edges"] = None + + # Lumi Scale (Rate) + # Normal lumi scale + ls = 1.0 + # If want to rescale MC to new lumi + if 'data' not in sample: + ls = xsec_config.luminosity_scale + sample_info["lumi_scale"]=ls + lumi_scale = xsec_config.rate_changing_systematics['luminosity'] + if options['category'] == 'luminosity+': + sample_info["lumi_scale"]= ls*(1+lumi_scale) + elif options['category'] == 'luminosity-': + sample_info["lumi_scale"]= ls*(1-lumi_scale) + + # Generator Scale (Rate) + sample_info["scale"]=1.0 + + if sample == 'V+Jets': + generator_scale = xsec_config.rate_changing_systematics['V+Jets_cross_section'] + if options['category'] == 'V+Jets_cross_section+': + sample_info["scale"] = 1.0 + 1.0*generator_scale + elif options['category'] == 'V+Jets_cross_section-': + sample_info["scale"] = 1.0 - 1.0*generator_scale + + if sample == 'SingleTop': + generator_scale = xsec_config.rate_changing_systematics['SingleTop_cross_section'] + if options['category'] == 'SingleTop_cross_section+': + sample_info["scale"] = 1.0 + 1.0*generator_scale + elif options['category'] == 'SingleTop_cross_section-': + sample_info["scale"] = 1.0 - 1.0*generator_scale - m = kwargs['measurement'] - if m.type == Systematic.RATE: - if 'luminosity' in m.name: - lumi_scale = lumi_scale * m.scale - else: - if sample in m.affected_samples: - scale = m.scale - if sample == 'data': # data is not scaled in any way - lumi_scale = 1. - scale = 1. + # scaling will always have some non zero value + if sample_info["scale"] <= 0.0001: sample_info["scale"] = 0.0001 - edges = variable_binning.reco_bin_edges_full[variable] - if phase_space == 'VisiblePS': - edges = variable_binning.reco_bin_edges_vis[variable] + # Weight branches (Shape) weight_branches = [] if sample == 'data': weight_branches.append('1') else: weight_branches.append('EventWeight') - if 'PileUp' not in category: - weight_branches.append('PUWeight') - elif category == 'PileUp_up': + # PU Weights + if options['category'] == 'PileUp_up': weight_branches.append('PUWeight_up') - elif category == 'PileUp_down': + elif options['category'] == 'PileUp_down': weight_branches.append('PUWeight_down') else: - weight_branches.append('1') + weight_branches.append('PUWeight') - if category == 'BJet_down': - weight_branches.append('BJetDownWeight') - elif category == 'BJet_up': + # BJet Weights + if options['category'] == 'BJet_up': weight_branches.append('BJetUpWeight') - elif category == 'LightJet_down': - weight_branches.append('LightJetDownWeight') - elif category == 'LightJet_up': + elif options['category'] == 'BJet_down': + weight_branches.append('BJetDownWeight') + elif options['category'] == 'LightJet_up': weight_branches.append('LightJetUpWeight') + elif options['category'] == 'LightJet_down': + weight_branches.append('LightJetDownWeight') else: weight_branches.append('BJetWeight') - # if not 'QCD' in tree: - # if channel == 'muon': - # if category == 'Muon_down': - # weight_branches.append('MuonDown') - # elif category == 'Muon_up': - # weight_branches.append('MuonUp') - # else: - # weight_branches.append('MuonEfficiencyCorrection') - # elif channel == 'electron': - # if category == 'Electron_down': - # weight_branches.append('ElectronDown') - # elif category == 'Electron_up': - # weight_branches.append('ElectronUp') - # else: - # weight_branches.append('ElectronEfficiencyCorrection') - - i = Input( - input_file=input_file, - hist=hist, - tree=tree, - branch=branch, - selection=selection, - bin_edges=edges, - lumi_scale=lumi_scale, - scale=scale, - weight_branches=weight_branches, + # Lepton Weights + # Lepton weights for nonisolated leptons are removed in measurement.py + # The lepton sf are not derived for non isolated leptons + # if options['channel'] == 'muon': + # if options['category'] == 'Muon_down': + # weight_branches.append('MuonDown') + # elif options['category'] == 'Muon_up': + # weight_branches.append('MuonUp') + # else: + # weight_branches.append('MuonEfficiencyCorrection') + # elif options['channel'] == 'electron': + # if options['category'] == 'Electron_down': + # weight_branches.append('ElectronDown') + # elif options['category'] == 'Electron_up': + # weight_branches.append('ElectronUp') + # else: + # weight_branches.append('ElectronEfficiencyCorrection') + sample_info["weight_branches"] = weight_branches + + # Input File and Tree + # QCD Contorol Regions (Shape) JES and JER + sample_info["input_file"] = get_file(xsec_config, sample, options) + sample_info["tree"], sample_info["qcd_control_region"], sample_info["qcd_normalisation_region"] = get_tree(xsec_config, options) + if sample != 'data': + if options['category'] == 'JES_up': + sample_info["input_file"] = sample_info["input_file"].replace('tree', 'plusJES_tree') + sample_info["tree"] = sample_info["tree"].replace('FitVariables', 'FitVariables_JESUp') + sample_info["qcd_control_region"] = sample_info["qcd_control_region"].replace('FitVariables', 'FitVariables_JESUp') + elif options['category'] == 'JES_down': + sample_info["input_file"] = sample_info["input_file"].replace('tree', 'minusJES_tree') + sample_info["tree"] = sample_info["tree"].replace('FitVariables', 'FitVariables_JESDown') + sample_info["qcd_control_region"] = sample_info["qcd_control_region"].replace('FitVariables', 'FitVariables_JESDown') + elif options['category'] == 'JER_up': + sample_info["input_file"] = sample_info["input_file"].replace('tree', 'plusJER_tree') + sample_info["tree"] = sample_info["tree"].replace('FitVariables', 'FitVariables_JERUp') + sample_info["qcd_control_region"] = sample_info["qcd_control_region"].replace('FitVariables', 'FitVariables_JERUp') + elif options['category'] == 'JER_down': + sample_info["input_file"] = sample_info["input_file"].replace('tree', 'minusJER_tree') + sample_info["tree"] = sample_info["tree"].replace('FitVariables', 'FitVariables_JERDown') + sample_info["qcd_control_region"] = sample_info["qcd_control_region"].replace('FitVariables', 'FitVariables_JERDown') + + if sample_info["qcd_normalisation_region"] is None: + sample_info["qcd_normalisation_region"] = sample_info["qcd_control_region"] + return sample_info + +@cml.trace() +def get_selection(var): + ''' + Return a selection for the branch used by ROOT.Tree.Draw() + ''' + sel = str(var)+" >= 0" + if 'abs_lepton_eta' in var: + sel = "abs(lepton_eta) >= 0 && abs(lepton_eta) <= 3" + return sel + + +@cml.trace() +def get_file(config, sample, options): + ''' + Return a specific sample file + ''' + if options['channel'] == 'electron': + qcd = config.electron_QCD_MC_trees[options['category']] + data = config.data_file_electron + else: + qcd = config.muon_QCD_MC_trees[options['category']] + data = config.data_file_muon + + files = { + 'TTBar': config.ttbar_trees[options['category']], + 'V+Jets': config.VJets_trees[options['category']], + 'SingleTop': config.SingleTop_trees[options['category']], + 'QCD': qcd, + 'data': data + } + return files[sample] + + +@cml.trace() +def get_tree(config, options): + ''' + Return a specific sample tree + ''' + tree = config.tree_path[options['channel']] + qcd_tree = None + qcd_normalisation_tree = None + if options["data_driven_qcd"]: + # QCD control region + qcd_tree = tree.replace( + "Ref selection", config.qcd_control_region[options['channel']]) + # QCD shape systematic + if "QCD_shape" in options['category']: + qcd_tree = tree.replace( + "Ref selection", config.qcd_shape_syst_region[options['channel']]) + qcd_normalisation_tree = tree.replace( + "Ref selection", config.qcd_control_region[options['channel']]) + elif "QCD_cross_section" in options['category']: + qcd_tree = tree.replace( + "Ref selection", config.qcd_control_region[options['channel']]) + qcd_normalisation_tree = tree.replace( + "Ref selection", config.qcd_shape_syst_region[options['channel']]) + + return tree, qcd_tree, qcd_normalisation_tree + + +@cml.trace() +def write_measurement(options, measurement, norm_method): + ''' + Write the config + ''' + base_path = 'config/measurements/{norm_method}/{energy}TeV/{channel}/{variable}/{phase_space}/' + path = base_path + '{category}.json' + + path = path.format( + norm_method = norm_method, + energy = options['com'], + channel = options['channel'], + variable = options['variable'], + phase_space = options['ps'], + category = options['category'], ) - return i + write_data_to_JSON(measurement, path, indent = True) + return if __name__ == '__main__': main() diff --git a/dps/analysis/xsection/make_binning_plots.py b/dps/analysis/xsection/make_binning_plots.py index bb310041..7b78f11b 100644 --- a/dps/analysis/xsection/make_binning_plots.py +++ b/dps/analysis/xsection/make_binning_plots.py @@ -32,6 +32,12 @@ def make_scatter_plot( input_file, histogram, bin_edges, channel, variable, title ): global output_folder, output_formats, options scatter_plot = get_histogram_from_file( histogram, input_file ) + + # Finding max value in scatterplot for colourmap normalisation + max_bin = scatter_plot.GetMaximumBin() + max_bin_content = scatter_plot.GetBinContent(max_bin) + norm = mpl.colors.LogNorm(vmin = 1, vmax = int(max_bin_content+1)) + # scatter_plot.Rebin2D( 5, 5 ) x_limits = [bin_edges[variable][0], bin_edges[variable][-1]] @@ -51,7 +57,9 @@ def make_scatter_plot( input_file, histogram, bin_edges, channel, variable, titl plt.tick_params( **CMS.axis_label_minor ) ax0.xaxis.labelpad = 12 ax0.yaxis.labelpad = 12 - im = rplt.imshow( scatter_plot, axes = ax0, cmap = my_cmap, vmin = 0.001 ) + + im = rplt.imshow( scatter_plot, axes = ax0, cmap = my_cmap, norm=norm ) + colorbar = plt.colorbar( im ) colorbar.ax.tick_params( **CMS.axis_label_major ) diff --git a/dps/analysis/xsection/make_control_plots_fromTrees.py b/dps/analysis/xsection/make_control_plots_fromTrees.py index 34a2b642..828126b9 100644 --- a/dps/analysis/xsection/make_control_plots_fromTrees.py +++ b/dps/analysis/xsection/make_control_plots_fromTrees.py @@ -1,4 +1,4 @@ -from optparse import OptionParser +from argparse import ArgumentParser from dps.config.latex_labels import b_tag_bins_latex, samples_latex, channel_latex, \ variables_latex, fit_variables_latex, control_plots_latex from dps.config.variable_binning import fit_variable_bin_edges, control_plots_bins @@ -31,31 +31,33 @@ def getHistograms( histogram_files, x_limits ): global measurement_config - b_Selection = signal_region_tree.split('/')[-2] + # 2b or No_b selection + b_Selection = signal_region_tree.split('/')[-2] + # Names of QCD regions to use - qcd_data_region = '' - qcd_data_region_electron = 'QCDConversions' - qcd_data_region_muon = 'QCD non iso mu+jets 1p5to3' + qcd_data_region = '' + qcd_data_region_electron = 'QCDConversions' + qcd_data_region_muon = 'QCD non iso mu+jets 1p5to3' # Channel specific files and weights if 'electron' in channel: - histogram_files['data'] = measurement_config.data_file_electron_trees - histogram_files['QCD'] = measurement_config.electron_QCD_MC_category_templates_trees[category] - if normalise_to_fit: - normalisation = normalisations_electron[norm_variable] + histogram_files['data'] = measurement_config.data_file_electron + histogram_files['QCD'] = measurement_config.electron_QCD_MC_trees[category] if use_qcd_data_region: - qcd_data_region = qcd_data_region_electron + qcd_data_region = qcd_data_region_electron + # No Lepton Eff in QCD CR and PU distributions # if not 'QCD' in channel and not 'NPU' in branchName: - # weightBranchSignalRegion += ' * ElectronEfficiencyCorrection' + # weightBranchSignalRegion += ' * ElectronEfficiencyCorrection' + if 'muon' in channel: - histogram_files['data'] = measurement_config.data_file_muon_trees - histogram_files['QCD'] = measurement_config.muon_QCD_MC_category_templates_trees[category] - if normalise_to_fit: - normalisation = normalisations_muon[norm_variable] + histogram_files['data'] = measurement_config.data_file_muon + histogram_files['QCD'] = measurement_config.muon_QCD_MC_trees[category] if use_qcd_data_region: - qcd_data_region = qcd_data_region_muon + qcd_data_region = qcd_data_region_muon # if not 'QCD' in channel: - # weightBranchSignalRegion += ' * MuonEfficiencyCorrection' + # weightBranchSignalRegion += ' * MuonEfficiencyCorrection' + + # Print all the weights applied to this plot print weightBranchSignalRegion # Apply selection to avoid non-physical values @@ -66,47 +68,105 @@ def getHistograms( histogram_files, histograms = {} histograms_QCDControlRegion = {} - # Get histograms for combined channel - if channel == 'combined': - histogram_files_electron = dict(histogram_files) - histogram_files_electron['data'] = measurement_config.data_file_electron_trees - histogram_files_electron['QCD'] = measurement_config.electron_QCD_MC_category_templates_trees[category] - - histogram_files_muon = dict(histogram_files) - histogram_files_muon['data'] = measurement_config.data_file_muon_trees - histogram_files_muon['QCD'] = measurement_config.muon_QCD_MC_category_templates_trees[category] - # histograms_electron = get_histograms_from_trees( trees = [signal_region_tree.replace('COMBINED','EPlusJets')], branch = branchName, weightBranch = weightBranchSignalRegion + ' * ElectronEfficiencyCorrection', files = histogram_files_electron, nBins = nBins, xMin = x_limits[0], xMax = x_limits[-1], selection = selection ) - # histograms_muon = get_histograms_from_trees( trees = [signal_region_tree.replace('COMBINED','MuPlusJets')], branch = branchName, weightBranch = weightBranchSignalRegion + ' * MuonEfficiencyCorrection', files = histogram_files_muon, nBins = nBins, xMin = x_limits[0], xMax = x_limits[-1], selection = selection ) - histograms_muon = get_histograms_from_trees( trees = [signal_region_tree.replace('COMBINED','MuPlusJets')], branch = branchName, weightBranch = weightBranchSignalRegion, files = histogram_files_muon, nBins = nBins, xMin = x_limits[0], xMax = x_limits[-1], selection = selection ) - histograms_electron = get_histograms_from_trees( trees = [signal_region_tree.replace('COMBINED','EPlusJets')], branch = branchName, weightBranch = weightBranchSignalRegion, files = histogram_files_electron, nBins = nBins, xMin = x_limits[0], xMax = x_limits[-1], selection = selection ) + # Retreive histograms for the combined channel + if channel == 'combined': + histogram_files_electron = dict(histogram_files) + histogram_files_electron['data'] = measurement_config.data_file_electron + histogram_files_electron['QCD'] = measurement_config.electron_QCD_MC_trees[category] + + histogram_files_muon = dict(histogram_files) + histogram_files_muon['data'] = measurement_config.data_file_muon + histogram_files_muon['QCD'] = measurement_config.muon_QCD_MC_trees[category] + + histograms_electron = get_histograms_from_trees( + trees = [signal_region_tree.replace('COMBINED','EPlusJets')], + branch = branchName, + # weightBranch = weightBranchSignalRegion + ' * ElectronEfficiencyCorrection', + weightBranch = weightBranchSignalRegion, + files = histogram_files_electron, + nBins = nBins, + xMin = x_limits[0], + xMax = x_limits[-1], + selection = selection + ) + histograms_muon = get_histograms_from_trees( + trees = [signal_region_tree.replace('COMBINED','MuPlusJets')], + branch = branchName, + # weightBranch = weightBranchSignalRegion + ' * MuonEfficiencyCorrection', + weightBranch = weightBranchSignalRegion, + files = histogram_files_muon, + nBins = nBins, + xMin = x_limits[0], + xMax = x_limits[-1], + selection = selection + ) if use_qcd_data_region: - qcd_control_region = signal_region_tree.replace(b_Selection ,'QCD_Control') + qcd_control_region = signal_region_tree.replace( b_Selection ,'QCD_Control') qcd_control_region_electron = signal_region_tree.replace( b_Selection , qcd_data_region_electron ).replace('COMBINED','EPlusJets') - histograms_electron_QCDControlRegion = get_histograms_from_trees( trees = [qcd_control_region_electron], branch = branchName, weightBranch = 'EventWeight', files = histogram_files_electron, nBins = nBins, xMin = x_limits[0], xMax = x_limits[-1], selection = selection ) - qcd_control_region_muon = signal_region_tree.replace( b_Selection , qcd_data_region_muon ).replace('COMBINED','MuPlusJets') - histograms_muon_QCDControlRegion = get_histograms_from_trees( trees = [qcd_control_region_muon], branch = branchName, weightBranch = 'EventWeight', files = histogram_files_muon, nBins = nBins, xMin = x_limits[0], xMax = x_limits[-1], selection = selection ) - + qcd_control_region_muon = signal_region_tree.replace( b_Selection , qcd_data_region_muon ).replace('COMBINED','MuPlusJets') + histograms_electron_QCDControlRegion = get_histograms_from_trees( + trees = [qcd_control_region_electron], + branch = branchName, + weightBranch = 'EventWeight', + files = histogram_files_electron, + nBins = nBins, + xMin = x_limits[0], + xMax = x_limits[-1], + selection = selection + ) + histograms_muon_QCDControlRegion = get_histograms_from_trees( + trees = [qcd_control_region_muon], + branch = branchName, + weightBranch = 'EventWeight', + files = histogram_files_muon, + nBins = nBins, + xMin = x_limits[0], + xMax = x_limits[-1], + selection = selection + ) + + # Combine the electron and muon histograms for sample in histograms_electron: h_electron = histograms_electron[sample][signal_region_tree.replace('COMBINED','EPlusJets')] - h_muon = histograms_muon[sample][signal_region_tree.replace('COMBINED','MuPlusJets')] + h_muon = histograms_muon[sample][signal_region_tree.replace('COMBINED','MuPlusJets')] h_combined = h_electron + h_muon histograms[sample] = { signal_region_tree : h_combined} if use_qcd_data_region: h_qcd_electron = histograms_electron_QCDControlRegion[sample][qcd_control_region_electron] - h_qcd_muon = histograms_muon_QCDControlRegion[sample][qcd_control_region_muon] + h_qcd_muon = histograms_muon_QCDControlRegion[sample][qcd_control_region_muon] h_qcd_combined = h_qcd_electron + h_qcd_muon histograms_QCDControlRegion[sample] = { qcd_control_region : h_qcd_combined } - # Get hsitgorams for specific channel + + # Now for histograms for an single channel else : - histograms = get_histograms_from_trees( trees = [signal_region_tree], branch = branchName, weightBranch = weightBranchSignalRegion, files = histogram_files, nBins = nBins, xMin = x_limits[0], xMax = x_limits[-1], selection = selection ) + histograms = get_histograms_from_trees( + trees = [signal_region_tree], + branch = branchName, + weightBranch = weightBranchSignalRegion, + files = histogram_files, + nBins = nBins, + xMin = x_limits[0], + xMax = x_limits[-1], + selection = selection + ) if use_qcd_data_region: qcd_control_region = signal_region_tree.replace( b_Selection , qcd_data_region ) - histograms_QCDControlRegion = get_histograms_from_trees( trees = [qcd_control_region], branch = branchName, weightBranch = 'EventWeight', files = histogram_files, nBins = nBins, xMin = x_limits[0], xMax = x_limits[-1], selection = selection ) + histograms_QCDControlRegion = get_histograms_from_trees( + trees = [qcd_control_region], + branch = branchName, + weightBranch = 'EventWeight', + files = histogram_files, + nBins = nBins, + xMin = x_limits[0], + xMax = x_limits[-1], + selection = selection + ) # Technical step, don't need key for tree + # Book a dictionary full of the histograms to be used signal_region_hists = {} control_region_hists = {} for sample in histograms.keys(): @@ -115,34 +175,37 @@ def getHistograms( histogram_files, if use_qcd_data_region: control_region_hists[sample] = histograms_QCDControlRegion[sample][qcd_control_region] - # Prepare histograms - if normalise_to_fit: - # only scale signal region to fit (results are invalid for control region) - prepare_histograms( signal_region_hists, rebin = rebin, - scale_factor = measurement_config.luminosity_scale, - normalisation = normalisation ) - elif normalise_to_data: + # Prepare histograms - Scale to data or luminosity + if normalise_to_data: totalMC = 0 for sample in signal_region_hists: if sample is 'data' : continue totalMC += signal_region_hists[sample].Integral() newScale = signal_region_hists['data'].Integral() / totalMC - - prepare_histograms( signal_region_hists, rebin = rebin, - scale_factor = newScale, - ) + prepare_histograms( + signal_region_hists, + rebin = rebin, + scale_factor = newScale, + ) else: - prepare_histograms( signal_region_hists, rebin = rebin, - scale_factor = measurement_config.luminosity_scale ) - prepare_histograms( control_region_hists, rebin = rebin, - scale_factor = measurement_config.luminosity_scale ) - - # Use qcd from data control region or not + prepare_histograms( + signal_region_hists, + rebin = rebin, + scale_factor = measurement_config.luminosity_scale + ) + prepare_histograms( + control_region_hists, + rebin = rebin, + scale_factor = measurement_config.luminosity_scale + ) + + # Using QCD from data? qcd_from_data = None if use_qcd_data_region: - qcd_from_data = clean_control_region( control_region_hists, - - subtract = ['TTJet', 'V+Jets', 'SingleTop'] ) + qcd_from_data = clean_control_region( + control_region_hists, + subtract = ['TTJet', 'V+Jets', 'SingleTop'] + ) # Normalise control region correctly nBins = signal_region_hists['QCD'].GetNbinsX() n, error = signal_region_hists['QCD'].integral(0,nBins+1,error=True) @@ -157,8 +220,6 @@ def getHistograms( histogram_files, if not n_qcd_control_region == 0: dataDrivenQCDScale = n_qcd_predicted_mc_signal / n_qcd_predicted_mc_control qcd_from_data.Scale( dataDrivenQCDScale.nominal_value ) - # signalToControlScale = n_qcd_predicted_mc_signal / n_qcd_control_region - # dataToMCscale = n_qcd_control_region / n_qcd_predicted_mc_control else: qcd_from_data = signal_region_hists['QCD'] @@ -179,8 +240,8 @@ def make_plot( channel, x_axis_title, y_axis_title, ratio_y_limits = [0.5, 1.5], normalise = False, ): - global output_folder, measurement_config, category, normalise_to_fit, showErrorBandOnRatio - global preliminary, norm_variable, sum_bins, b_tag_bin, histogram_files + global output_folder, measurement_config, category, showErrorBandOnRatio + global preliminary, norm_variable, b_tag_bin, histogram_files # Lumi title of plots title = title_template % ( measurement_config.new_luminosity/1000, measurement_config.centre_of_mass_energy ) @@ -188,241 +249,260 @@ def make_plot( channel, x_axis_title, y_axis_title, # Define weights weightBranchSignalRegion = 'EventWeight' + # Apply PU Weights if not "_NPUNoWeight" in name_prefix: if '_NPUUp' in name_prefix: weightBranchSignalRegion += ' * PUWeight_up' elif '_NPUDown' in name_prefix: weightBranchSignalRegion += ' * PUWeight_down' else: weightBranchSignalRegion += ' * PUWeight' + # Apply B Jet Weights if not "_NBJetsNoWeight" in name_prefix: if '_NBJetsUp' in name_prefix: weightBranchSignalRegion += ' * BJetUpWeight' elif '_NBJetsDown' in name_prefix: weightBranchSignalRegion += ' * BJetDownWeight' elif '_NBJets_LightUp' in name_prefix: weightBranchSignalRegion += ' * LightJetUpWeight' elif '_NBJets_LightDown' in name_prefix: weightBranchSignalRegion += ' * LightJetDownWeight' else: weightBranchSignalRegion += ' * BJetWeight' + # Get all histograms - signal_region_hists, control_region_hists, qcd_from_data = getHistograms( histogram_files, signal_region_tree, use_qcd_data_region, channel, branchName, weightBranchSignalRegion, nBins, rebin, x_limits ) - + signal_region_hists, control_region_hists, qcd_from_data = getHistograms( + histogram_files, + signal_region_tree, + use_qcd_data_region, + channel, + branchName, + weightBranchSignalRegion, + nBins, + rebin, + x_limits + ) # Which histograms to draw, and properties histograms_to_draw = [] - histogram_lables = [] - histogram_colors = [] - - histograms_to_draw = [signal_region_hists['data'], - qcd_from_data, - signal_region_hists['V+Jets'], - signal_region_hists['SingleTop'], - signal_region_hists['TTJet']] - histogram_lables = ['data', - 'QCD', - 'V+Jets', - 'Single-Top', - samples_latex['TTJet']] - histogram_colors = [colours['data'], - colours['QCD'], - colours['V+Jets'], - colours['Single-Top'], - colours['TTJet'] ] - - - # Printout on normalisation of different samples - print 'Normalisation after selection' - print 'Data :',signal_region_hists['data'].integral(overflow=True) - print 'TTJet :',signal_region_hists['TTJet'].integral(overflow=True) - print 'Single Top :',signal_region_hists['SingleTop'].integral(overflow=True) - print 'V+Jets :',signal_region_hists['V+Jets'].integral(overflow=True) - print 'QCD :',qcd_from_data.integral(overflow=True) - - mcSum = signal_region_hists['TTJet'].integral(overflow=True) + signal_region_hists['SingleTop'].integral(overflow=True) + signal_region_hists['V+Jets'].integral(overflow=True) + qcd_from_data.integral(overflow=True) - print 'Total MC :',mcSum - + histogram_lables = [] + histogram_colors = [] + + histograms_to_draw = [ + signal_region_hists['data'], + qcd_from_data, + signal_region_hists['V+Jets'], + signal_region_hists['SingleTop'], + signal_region_hists['TTJet'], + ] + histogram_lables = [ + 'data', + 'QCD', + 'V+Jets', + 'Single-Top', + samples_latex['TTJet'], + ] + histogram_colors = [ + colours['data'], + colours['QCD'], + colours['V+Jets'], + colours['Single-Top'], + colours['TTJet'], + ] + + # Print sample event yields + print_output(signal_region_hists, qcd_from_data) + + # Find maximum y of samples maxData = max( list(signal_region_hists['data'].y()) ) y_limits = [0, maxData * 1.4] if log_y: y_limits = [0.1, maxData * 100 ] - # for i in range(0,signal_region_hists['data'].GetNbinsX()): - # print signal_region_hists['data'].GetBinContent() - # print i - # for h in signal_region_hists: - # print signal_region_hists[h].GetBinContent(i) - - # More histogram settings + # More histogram settings to look semi decent histogram_properties = Histogram_properties() - histogram_properties.name = name_prefix + b_tag_bin + histogram_properties.name = name_prefix + b_tag_bin if category != 'central': - histogram_properties.name += '_' + category - histogram_properties.title = title - histogram_properties.x_axis_title = x_axis_title - histogram_properties.y_axis_title = y_axis_title - histogram_properties.x_limits = x_limits - histogram_properties.y_limits = y_limits - histogram_properties.y_max_scale = y_max_scale - histogram_properties.xerr = None + histogram_properties.name += '_' + category + if normalise_to_data: + histogram_properties.name += '_normToData' + histogram_properties.title = title + histogram_properties.x_axis_title = x_axis_title + histogram_properties.y_axis_title = y_axis_title + histogram_properties.x_limits = x_limits + histogram_properties.y_limits = y_limits + histogram_properties.y_max_scale = y_max_scale + histogram_properties.xerr = None # workaround for rootpy issue #638 - histogram_properties.emptybins = True + histogram_properties.emptybins = True + histogram_properties.additional_text = channel_latex[channel] if b_tag_bin: - histogram_properties.additional_text = channel_latex[channel] + ', ' + b_tag_bins_latex[b_tag_bin] - else: - histogram_properties.additional_text = channel_latex[channel] - histogram_properties.legend_location = legend_location - histogram_properties.cms_logo_location = cms_logo_location - histogram_properties.preliminary = preliminary - histogram_properties.set_log_y = log_y - histogram_properties.legend_color = legend_color + histogram_properties.additional_text += b_tag_bins_latex[b_tag_bin] + histogram_properties.legend_location = legend_location + histogram_properties.cms_logo_location = cms_logo_location + histogram_properties.preliminary = preliminary + histogram_properties.set_log_y = log_y + histogram_properties.legend_color = legend_color if ratio_y_limits: - histogram_properties.ratio_y_limits = ratio_y_limits - + histogram_properties.ratio_y_limits = ratio_y_limits if branchName in ['NJets', 'NBJets', 'NBJetsNoWeight']: histogram_properties.integerXVariable = True - # if normalise_to_fit: - # histogram_properties.mc_error = get_normalisation_error( normalisation ) - # histogram_properties.mc_errors_label = 'fit uncertainty' - - if normalise_to_data: - histogram_properties.name += '_normToData' output_folder_to_use = output_folder if use_qcd_data_region: output_folder_to_use += 'WithQCDFromControl/' make_folder_if_not_exists(output_folder_to_use) - if branchName == 'NPU': - getPUWeights(histograms_to_draw, histogram_lables) + # Prints the ratio of PU in Data/MC + # if branchName == 'NPU': + # getPUWeights(histograms_to_draw, histogram_lables) - # Actually draw histograms - # make_data_mc_comparison_plot( histograms_to_draw, histogram_lables, histogram_colors, - # histogram_properties, save_folder = output_folder_to_use, - # show_ratio = False, normalise = normalise, - # ) - # Draw same histogram, but with ratio plot + # Draw histogram with ratio plot histogram_properties.name += '_with_ratio' loc = histogram_properties.legend_location # adjust legend location as it is relative to canvas! histogram_properties.legend_location = ( loc[0], loc[1] + 0.05 ) - make_data_mc_comparison_plot( histograms_to_draw, histogram_lables, histogram_colors, - histogram_properties, save_folder = output_folder_to_use, - show_ratio = True, normalise = normalise - ) + make_data_mc_comparison_plot( + histograms_to_draw, + histogram_lables, + histogram_colors, + histogram_properties, + save_folder = output_folder_to_use, + show_ratio = True, + normalise = normalise + ) print ("Plot written to : ", output_folder_to_use) - # make_plot_tmp( qcd_from_data, histogram_properties, save_folder = output_folder_to_use+'test' ) + return + +def print_output(signal_region_hists, qcd_from_data): + '''Printout on normalisation of different samples''' + print 'Normalisation after selection' + print 'Data :', signal_region_hists['data'].integral(overflow=True) + print 'TTJet :', signal_region_hists['TTJet'].integral(overflow=True) + print 'Single Top :', signal_region_hists['SingleTop'].integral(overflow=True) + print 'V+Jets :', signal_region_hists['V+Jets'].integral(overflow=True) + print 'QCD :', qcd_from_data.integral(overflow=True) + print '-'*60 + mcSum = signal_region_hists['TTJet'].integral(overflow=True) + signal_region_hists['SingleTop'].integral(overflow=True) + signal_region_hists['V+Jets'].integral(overflow=True) + qcd_from_data.integral(overflow=True) + print 'Total DATA :', signal_region_hists['data'].integral(overflow=True) + print 'Total MC :', mcSum + print '='*60 + return + +def parse_arguments(): + parser = ArgumentParser(__doc__) + parser.add_argument( "-o", "--output_folder", + dest = "output_folder", + default = 'plots/control_plots/', + help = "set path to save plots" + ) + parser.add_argument( "-c", "--centre-of-mass-energy", + dest = "CoM", + default = 13, + type = int, + help = "set the centre of mass energy for analysis. Default = 13 [TeV]" + ) + parser.add_argument( "--category", + dest = "category", + default = 'central', + help = "set the category to take the fit results from (default: central)" + ) + parser.add_argument( "--generator", + dest = "generator", + default = 'PowhegPythia8', + help = "set the generator (PowhegPythia8, powhegHerwigpp, amc, amcatnloHerwigpp, madgraph)" + ) + parser.add_argument( "-d", "--normalise_to_data", + dest = "normalise_to_data", + action = "store_true", + help = "normalise the MC to data" + ) + parser.add_argument( "-a", "--additional-plots", + action = "store_true", + dest = "additional_QCD_plots", + help = "creates a set of QCD plots for exclusive bins for all variables" + ) + args = parser.parse_args() + return args if __name__ == '__main__': set_root_defaults() - parser = OptionParser() - parser.add_option( "-p", "--path", dest = "path", default = 'data/M3_angle_bl/', - help = "set path to JSON files" ) - parser.add_option( "-o", "--output_folder", dest = "output_folder", default = 'plots/control_plots/', - help = "set path to save plots" ) - parser.add_option( "-m", "--metType", dest = "metType", default = 'type1', - help = "set MET type used in the analysis of MET-dependent variables" ) - parser.add_option( "-c", "--centre-of-mass-energy", dest = "CoM", default = 13, type = int, - help = "set the centre of mass energy for analysis. Default = 13 [TeV]" ) - parser.add_option( "--category", dest = "category", default = 'central', - help = "set the category to take the fit results from (default: central)" ) - parser.add_option( "--generator", dest = "generator", default = 'PowhegPythia8', - help = "set the generator (PowhegPythia8, powhegHerwigpp, amc, amcatnloHerwigpp, madgraph)" ) - parser.add_option( "-n", "--normalise_to_fit", dest = "normalise_to_fit", action = "store_true", - help = "normalise the MC to fit results" ) - parser.add_option( "-d", "--normalise_to_data", dest = "normalise_to_data", action = "store_true", - help = "normalise the MC to data" ) - parser.add_option( "-a", "--additional-plots", action = "store_true", dest = "additional_QCD_plots", - help = "creates a set of QCD plots for exclusive bins for all variables" ) - - ( options, args ) = parser.parse_args() - measurement_config = XSectionConfig( options.CoM ) - # caching of variables for shorter access - translate_options = measurement_config.translate_options + args = parse_arguments() + + measurement_config = XSectionConfig( args.CoM ) - path_to_JSON = '%s/%dTeV/' % ( options.path, measurement_config.centre_of_mass_energy ) - normalise_to_fit = options.normalise_to_fit - normalise_to_data = options.normalise_to_data - if normalise_to_fit: - output_folder = '%s/after_fit/%dTeV/' % ( options.output_folder, measurement_config.centre_of_mass_energy ) - else: - output_folder = '%s' % ( options.output_folder ) - make_folder_if_not_exists( output_folder ) + normalise_to_data = args.normalise_to_data + + output_folder = '{o}/'.format( o = args.output_folder ) output_folder_base = output_folder - category = options.category - generator = options.generator - met_type = translate_options[options.metType] - make_additional_QCD_plots = options.additional_QCD_plots + make_folder_if_not_exists( output_folder_base ) + + category = args.category + generator = args.generator + make_additional_QCD_plots = args.additional_QCD_plots + # Retreive the appropriate sample histograms histogram_files = { - 'TTJet': measurement_config.ttbar_category_templates_trees[category], - 'V+Jets': measurement_config.VJets_category_templates_trees[category], - 'QCD': measurement_config.electron_QCD_MC_category_templates_trees[category], - 'SingleTop': measurement_config.SingleTop_category_templates_trees[category], + 'TTJet' : measurement_config.ttbar_trees[category], + 'V+Jets' : measurement_config.VJets_trees[category], + 'QCD' : measurement_config.electron_QCD_MC_trees[category], + 'SingleTop' : measurement_config.SingleTop_trees[category], } - - if (generator != 'PowhegPythia8'): - histogram_files['TTJet'] = measurement_config.ttbar_generator_category_templates_trees[generator] - - # Leftover from run1, when fit method was used - # Leave implementation for now - normalisations_electron = { - } - normalisations_muon = { - } + if 'PowhegPythia8' not in generator: + histogram_files['TTJet'] = measurement_config.ttbar_trees[category].replace('PowhegPythia8', generator) preliminary = True useQCDControl = True # showErrorBandOnRatio = True - b_tag_bin = '2orMoreBtags' norm_variable = 'MET' + # comment out plots you don't want include_plots = [ - 'HT', - 'MET', - 'ST', - 'WPT', - 'NVertex', - 'NVertexNoWeight', - 'NVertexUp', - 'NVertexDown', - 'LeptonPt', - 'AbsLeptonEta', - 'NJets', - 'NBJets', - 'NBJetsNoWeight', - 'NBJetsUp', - 'NBJetsDown', - 'NBJets_LightUp', - 'NBJets_LightDown', - 'JetPt', - 'RelIso', - # 'sigmaietaieta' - ] + 'HT', + 'MET', + 'ST', + 'WPT', + 'NVertex', + # 'NVertexNoWeight', + # 'NVertexUp', + # 'NVertexDown', + 'LeptonPt', + 'AbsLeptonEta', + 'NJets', + 'NBJets', + + # 'NBJetsNoWeight', + # 'NBJetsUp', + # 'NBJetsDown', + # 'NBJets_LightUp', + # 'NBJets_LightDown', + # 'JetPt', + # 'RelIso', + # 'sigmaietaieta' + ] additional_qcd_plots = [ - 'QCDHT', - 'QCDMET', - 'QCDST', - 'QCDWPT', - 'QCDAbsLeptonEta', - 'QCDLeptonPt', - 'QCDNJets', - - # 'QCDsigmaietaieta', - 'QCDRelIso', - # 'QCDHT_dataControl_mcSignal', - ] + 'QCDHT', + 'QCDMET', + 'QCDST', + 'QCDWPT', + 'QCDAbsLeptonEta', + 'QCDLeptonPt', + 'QCDNJets', + + # 'QCDsigmaietaieta', + 'QCDRelIso', + # 'QCDHT_dataControl_mcSignal', + ] if make_additional_QCD_plots: include_plots.extend( additional_qcd_plots ) selection = 'Ref selection' # also 'Ref selection NoBSelection' for channel, label in { - 'electron' : 'EPlusJets', - 'muon' : 'MuPlusJets', - 'combined' : 'COMBINED' - }.iteritems() : - b_tag_bin = '2orMoreBtags' + 'electron' : 'EPlusJets', + 'muon' : 'MuPlusJets', + 'combined' : 'COMBINED' + }.iteritems() : # Set folder for this batch of plots + b_tag_bin = '2orMoreBtags' output_folder = output_folder_base + "/Variables/" + selection + "/" make_folder_if_not_exists(output_folder) print '--->', channel @@ -610,7 +690,7 @@ def make_plot( channel, x_axis_title, y_axis_title, x_limits = control_plots_bins['NJets'], nBins = len(control_plots_bins['NJets'])-1, rebin = 1, - legend_location = ( 1, 0.78 ), + legend_location = ( 1.0, 0.78 ), cms_logo_location = 'left', use_qcd_data_region = useQCDControl, log_y = True, @@ -946,232 +1026,232 @@ def make_plot( channel, x_axis_title, y_axis_title, use_qcd_data_region = useQCDControl, ) + ################################################### + # QCD Control Region + ################################################### + for channel, label in { + 'electronQCDNonIso' : 'EPlusJets/QCD non iso e+jets', + 'electronQCDConversions' : 'EPlusJets/QCDConversions', + 'muonQCDNonIso' : 'MuPlusJets/QCD non iso mu+jets 3toInf', + 'muonQCDNonIso2' : 'MuPlusJets/QCD non iso mu+jets 1p5to3', + }.iteritems() : + b_tag_bin = '0btag' + + # Set folder for this batch of plots + output_folder = output_folder_base + "QCDControl/Variables/%s/" % channel + # output_folder = output_folder_base + "QCDControl/Variables/%s/TightElectron/" % channel + make_folder_if_not_exists(output_folder) + + print 'Control region :',label + + treeName = 'EPlusJets/QCD non iso e+jets' + signalTreeName = 'EPlusJets/Ref selection' + if channel == 'electronQCDConversions': + treeName = 'EPlusJets/QCDConversions' + elif channel == 'muonQCDNonIso': + treeName = 'MuPlusJets/QCD non iso mu+jets 3toInf' + signalTreeName = 'MuPlusJets/Ref selection' + elif channel == 'muonQCDNonIso2': + treeName = 'MuPlusJets/QCD non iso mu+jets 1p5to3' + signalTreeName = 'MuPlusJets/Ref selection' + + ################################################### + # HT + ################################################### + norm_variable = 'HT' + if 'QCDHT' in include_plots: + print '---> QCD HT' + make_plot( channel, + x_axis_title = '$%s$ [GeV]' % variables_latex['HT'], + y_axis_title = 'Events/(%i GeV)' % binWidth(control_plots_bins['HT']), + signal_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % treeName, + control_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % treeName, + branchName = 'HT', + name_prefix = '%s_HT_' % channel, + x_limits = control_plots_bins['HT'], + nBins = 20, + rebin = 1, + legend_location = ( 0.95, 0.78 ), + cms_logo_location = 'right', + ) + + if 'QCDHT_dataControl_mcSignal' in include_plots: + print '---> QCD HT data to signal QCD' + make_plot( channel, + x_axis_title = '$%s$ [GeV]' % variables_latex['HT'], + y_axis_title = 'Events/(%i GeV)' % binWidth(control_plots_bins['HT']), + signal_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % signalTreeName, + control_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % treeName, + branchName = 'HT', + name_prefix = '%s_HT_' % channel, + x_limits = control_plots_bins['HT'], + nBins = 20, + rebin = 1, + legend_location = ( 0.95, 0.78 ), + cms_logo_location = 'right', + ) + ################################################### - # QCD Control Region + # MET ################################################### - for channel, label in { - 'electronQCDNonIso' : 'EPlusJets/QCD non iso e+jets', - 'electronQCDConversions' : 'EPlusJets/QCDConversions', - # 'muonQCDNonIso' : 'MuPlusJets/QCD non iso mu+jets 3toInf', - # 'muonQCDNonIso2' : 'MuPlusJets/QCD non iso mu+jets 1p5to3', - }.iteritems() : - b_tag_bin = '0btag' - - # Set folder for this batch of plots - output_folder = output_folder_base + "QCDControl/Variables/%s/" % channel - # output_folder = output_folder_base + "QCDControl/Variables/%s/TightElectron/" % channel - make_folder_if_not_exists(output_folder) - - print 'Control region :',label - - treeName = 'EPlusJets/QCD non iso e+jets' - signalTreeName = 'EPlusJets/Ref selection' - if channel == 'electronQCDConversions': - treeName = 'EPlusJets/QCDConversions' - elif channel == 'muonQCDNonIso': - treeName = 'MuPlusJets/QCD non iso mu+jets 3toInf' - signalTreeName = 'MuPlusJets/Ref selection' - elif channel == 'muonQCDNonIso2': - treeName = 'MuPlusJets/QCD non iso mu+jets 1p5to3' - signalTreeName = 'MuPlusJets/Ref selection' - - ################################################### - # HT - ################################################### - norm_variable = 'HT' - if 'QCDHT' in include_plots: - print '---> QCD HT' - make_plot( channel, - x_axis_title = '$%s$ [GeV]' % variables_latex['HT'], - y_axis_title = 'Events/(%i GeV)' % binWidth(control_plots_bins['HT']), - signal_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % treeName, - control_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % treeName, - branchName = 'HT', - name_prefix = '%s_HT_' % channel, - x_limits = control_plots_bins['HT'], - nBins = 20, - rebin = 1, - legend_location = ( 0.95, 0.78 ), - cms_logo_location = 'right', - ) - - if 'QCDHT_dataControl_mcSignal' in include_plots: - print '---> QCD HT data to signal QCD' - make_plot( channel, - x_axis_title = '$%s$ [GeV]' % variables_latex['HT'], - y_axis_title = 'Events/(%i GeV)' % binWidth(control_plots_bins['HT']), - signal_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % signalTreeName, - control_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % treeName, - branchName = 'HT', - name_prefix = '%s_HT_' % channel, - x_limits = control_plots_bins['HT'], - nBins = 20, - rebin = 1, - legend_location = ( 0.95, 0.78 ), - cms_logo_location = 'right', - ) - - ################################################### - # MET - ################################################### - norm_variable = 'MET' - if 'QCDMET' in include_plots: - print '---> QCD MET' - make_plot( channel, - x_axis_title = '$%s$ [GeV]' % variables_latex['MET'], - y_axis_title = 'Events/(%i GeV)' % binWidth(control_plots_bins['MET']), - signal_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % treeName, - control_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % treeName, - branchName = 'MET', - name_prefix = '%s_MET_' % channel, - x_limits = control_plots_bins['MET'], - nBins = 20, - rebin = 1, - legend_location = ( 0.95, 0.78 ), - cms_logo_location = 'right', - ) - - ################################################### - # ST - ################################################### - norm_variable = 'ST' - if 'QCDST' in include_plots: - print '---> QCD ST' - make_plot( channel, - x_axis_title = '$%s$ [GeV]' % variables_latex['ST'], - y_axis_title = 'Events/(%i GeV)' % binWidth(control_plots_bins['ST']), - signal_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % treeName, - control_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % treeName, - branchName = 'ST', - name_prefix = '%s_ST_' % channel, - x_limits = control_plots_bins['ST'], - nBins = 20, - rebin = 1, - legend_location = ( 0.95, 0.78 ), - cms_logo_location = 'right', - ) - - ################################################### - # WPT - ################################################### - norm_variable = 'WPT' - if 'QCDWPT' in include_plots: - print '---> QCD WPT' - make_plot( channel, - x_axis_title = '$%s$ [GeV]' % variables_latex['WPT'], - y_axis_title = 'Events/(%i GeV)' % binWidth(control_plots_bins['WPT']), - signal_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % treeName, - control_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % treeName, - branchName = 'WPT', - name_prefix = '%s_WPT_' % channel, - x_limits = control_plots_bins['WPT'], - nBins = 20, - rebin = 1, - legend_location = ( 0.95, 0.78 ), - cms_logo_location = 'right', - ) - - ################################################### - # Abs Lepton Eta - ################################################### - if 'QCDAbsLeptonEta' in include_plots: - print '---> QCD Abs Lepton Eta' - make_plot( channel, - x_axis_title = '$%s$' % control_plots_latex['eta'], - y_axis_title = 'Events/(%.1f)' % binWidth(control_plots_bins['AbsLeptonEtaQCD']), - signal_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % ( treeName ), - control_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % ( treeName ), - branchName = 'abs(lepton_eta)', - name_prefix = '%s_AbsLeptonEta_' % channel, - x_limits = control_plots_bins['AbsLeptonEtaQCD'], - nBins = len(control_plots_bins['AbsLeptonEtaQCD'])-1, - rebin = 1, - legend_location = ( 0.95, 0.78 ), - cms_logo_location = 'right', - ) - - ################################################### - # Lepton Pt - ################################################### - if 'QCDLeptonPt' in include_plots: - print '---> QCD Lepton Pt' - binsLabel = 'ElectronPt' - if channel == 'muon': - binsLabel = 'MuonPt' - - make_plot( channel, - x_axis_title = '$%s$' % control_plots_latex['pt'], - y_axis_title = 'Events/(%i GeV)' % binWidth(control_plots_bins[binsLabel]), - signal_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % ( treeName ), - control_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % ( treeName ), - branchName = 'lepton_pt', - name_prefix = '%s_LeptonPt_' % channel, - x_limits = control_plots_bins[binsLabel], - nBins = len(control_plots_bins[binsLabel])-1, - rebin = 1, - legend_location = ( 0.95, 0.78 ), - cms_logo_location = 'right', - ) - - ################################################### - # NJets - ################################################### - if 'QCDNJets' in include_plots: - print '---> QCD NJets' - make_plot( channel, - x_axis_title = '$%s$' % control_plots_latex['NJets'], - y_axis_title = 'Events', - signal_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % ( treeName ), - control_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % ( treeName ), - branchName = 'NJets', - name_prefix = '%s_NJets_' % channel, - x_limits = control_plots_bins['NJets'], - nBins = len(control_plots_bins['NJets'])-1, - rebin = 1, - legend_location = ( 0.95, 0.78 ), - cms_logo_location = 'right', - ) - - # # Set folder for this batch of plots - # output_folder = output_folder_base + "QCDControl/Control/%s/" % channel - # # output_folder = output_folder_base + "QCDControl/Control/%s/TightElectron/" % channel - # make_folder_if_not_exists(output_folder) - # ################################################### - # # Rel iso - # ################################################### - if 'QCDRelIso' in include_plots: - print '---> QCD Rel iso' - make_plot( channel, - x_axis_title = '$%s$' % control_plots_latex['relIso'], - y_axis_title = 'Events', - signal_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % label, - control_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % label, - branchName = '%s' % 'lepton_isolation', - name_prefix = '%s_relIso_' % channel, - x_limits = control_plots_bins['relIsoQCD'], - nBins = len(control_plots_bins['relIsoQCD'])-1, - rebin = 1, - legend_location = ( 0.95, 0.78 ), - cms_logo_location = 'right', - ) - # ################################################### - # # Sigma ieta ieta - # ################################################### - - # norm_variable = 'sigmaietaieta' - # if 'QCDsigmaietaieta' in include_plots and not 'MuPlusJets' in treeName: - # print '---> sigmaietaieta' - # make_plot( channel, - # x_axis_title = '$%s$' % variables_latex['sigmaietaieta'], - # y_axis_title = 'Events/(%i GeV)' % binWidth(control_plots_bins['sigmaietaieta']), - # signal_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % ( treeName ), - # control_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % ( treeName ), - # branchName = 'sigmaIetaIeta', - # name_prefix = '%s_sigmaIetaIeta_' % channel, - # x_limits = control_plots_bins['sigmaietaieta'], - # y_max_scale = 1.5, - # nBins = len(control_plots_bins['sigmaietaieta'])-1, - # rebin = 1, - # legend_location = ( 0.95, 0.85 ), - # cms_logo_location = 'left', - # ) + norm_variable = 'MET' + if 'QCDMET' in include_plots: + print '---> QCD MET' + make_plot( channel, + x_axis_title = '$%s$ [GeV]' % variables_latex['MET'], + y_axis_title = 'Events/(%i GeV)' % binWidth(control_plots_bins['MET']), + signal_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % treeName, + control_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % treeName, + branchName = 'MET', + name_prefix = '%s_MET_' % channel, + x_limits = control_plots_bins['MET'], + nBins = 20, + rebin = 1, + legend_location = ( 0.95, 0.78 ), + cms_logo_location = 'right', + ) + + ################################################### + # ST + ################################################### + norm_variable = 'ST' + if 'QCDST' in include_plots: + print '---> QCD ST' + make_plot( channel, + x_axis_title = '$%s$ [GeV]' % variables_latex['ST'], + y_axis_title = 'Events/(%i GeV)' % binWidth(control_plots_bins['ST']), + signal_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % treeName, + control_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % treeName, + branchName = 'ST', + name_prefix = '%s_ST_' % channel, + x_limits = control_plots_bins['ST'], + nBins = 20, + rebin = 1, + legend_location = ( 0.95, 0.78 ), + cms_logo_location = 'right', + ) + + ################################################### + # WPT + ################################################### + norm_variable = 'WPT' + if 'QCDWPT' in include_plots: + print '---> QCD WPT' + make_plot( channel, + x_axis_title = '$%s$ [GeV]' % variables_latex['WPT'], + y_axis_title = 'Events/(%i GeV)' % binWidth(control_plots_bins['WPT']), + signal_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % treeName, + control_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % treeName, + branchName = 'WPT', + name_prefix = '%s_WPT_' % channel, + x_limits = control_plots_bins['WPT'], + nBins = 20, + rebin = 1, + legend_location = ( 0.95, 0.78 ), + cms_logo_location = 'right', + ) + + ################################################### + # Abs Lepton Eta + ################################################### + if 'QCDAbsLeptonEta' in include_plots: + print '---> QCD Abs Lepton Eta' + make_plot( channel, + x_axis_title = '$%s$' % control_plots_latex['eta'], + y_axis_title = 'Events/(%.1f)' % binWidth(control_plots_bins['AbsLeptonEtaQCD']), + signal_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % ( treeName ), + control_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % ( treeName ), + branchName = 'abs(lepton_eta)', + name_prefix = '%s_AbsLeptonEta_' % channel, + x_limits = control_plots_bins['AbsLeptonEtaQCD'], + nBins = len(control_plots_bins['AbsLeptonEtaQCD'])-1, + rebin = 1, + legend_location = ( 0.95, 0.78 ), + cms_logo_location = 'right', + ) + + ################################################### + # Lepton Pt + ################################################### + if 'QCDLeptonPt' in include_plots: + print '---> QCD Lepton Pt' + binsLabel = 'ElectronPt' + if channel == 'muon': + binsLabel = 'MuonPt' + + make_plot( channel, + x_axis_title = '$%s$' % control_plots_latex['pt'], + y_axis_title = 'Events/(%i GeV)' % binWidth(control_plots_bins[binsLabel]), + signal_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % ( treeName ), + control_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % ( treeName ), + branchName = 'lepton_pt', + name_prefix = '%s_LeptonPt_' % channel, + x_limits = control_plots_bins[binsLabel], + nBins = len(control_plots_bins[binsLabel])-1, + rebin = 1, + legend_location = ( 0.95, 0.78 ), + cms_logo_location = 'right', + ) + + ################################################### + # NJets + ################################################### + if 'QCDNJets' in include_plots: + print '---> QCD NJets' + make_plot( channel, + x_axis_title = '$%s$' % control_plots_latex['NJets'], + y_axis_title = 'Events', + signal_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % ( treeName ), + control_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % ( treeName ), + branchName = 'NJets', + name_prefix = '%s_NJets_' % channel, + x_limits = control_plots_bins['NJets'], + nBins = len(control_plots_bins['NJets'])-1, + rebin = 1, + legend_location = ( 0.95, 0.78 ), + cms_logo_location = 'right', + ) + + # # Set folder for this batch of plots + # output_folder = output_folder_base + "QCDControl/Control/%s/" % channel + # # output_folder = output_folder_base + "QCDControl/Control/%s/TightElectron/" % channel + # make_folder_if_not_exists(output_folder) + # ################################################### + # # Rel iso + # ################################################### + if 'QCDRelIso' in include_plots: + print '---> QCD Rel iso' + make_plot( channel, + x_axis_title = '$%s$' % control_plots_latex['relIso'], + y_axis_title = 'Events', + signal_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % label, + control_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % label, + branchName = '%s' % 'lepton_isolation', + name_prefix = '%s_relIso_' % channel, + x_limits = control_plots_bins['relIsoQCD'], + nBins = len(control_plots_bins['relIsoQCD'])-1, + rebin = 1, + legend_location = ( 0.95, 0.78 ), + cms_logo_location = 'right', + ) + # ################################################### + # # Sigma ieta ieta + # ################################################### + + # norm_variable = 'sigmaietaieta' + # if 'QCDsigmaietaieta' in include_plots and not 'MuPlusJets' in treeName: + # print '---> sigmaietaieta' + # make_plot( channel, + # x_axis_title = '$%s$' % variables_latex['sigmaietaieta'], + # y_axis_title = 'Events/(%i GeV)' % binWidth(control_plots_bins['sigmaietaieta']), + # signal_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % ( treeName ), + # control_region_tree = 'TTbar_plus_X_analysis/%s/FitVariables' % ( treeName ), + # branchName = 'sigmaIetaIeta', + # name_prefix = '%s_sigmaIetaIeta_' % channel, + # x_limits = control_plots_bins['sigmaietaieta'], + # y_max_scale = 1.5, + # nBins = len(control_plots_bins['sigmaietaieta'])-1, + # rebin = 1, + # legend_location = ( 0.95, 0.85 ), + # cms_logo_location = 'left', + # ) diff --git a/dps/condor/prepare_dps.sh b/dps/condor/prepare_dps.sh index acb1bf99..5414b097 100755 --- a/dps/condor/prepare_dps.sh +++ b/dps/condor/prepare_dps.sh @@ -8,9 +8,10 @@ if [ -f "dps.tar" ]; then fi echo "... creating tar file (dps.tar)" mkdir -p jobs -tar -zcf dps.tar bin dps config jobs src tools experimental \ +tar -zcf dps.tar dps bin config jobs \ --exclude="*.pyc" --exclude="jobs/*/logs" \ ---exclude "*.tar" --exclude="config/unfolding" --exclude="experimental/topReco" +--exclude="*.tar" --exclude="config/unfolding" \ +--exclude="dps/legacy/*" # hadoop fs -mkdir -p $1 # hadoop fs -copyFromLocal dps.tar $1 diff --git a/dps/config/variable_binning.py b/dps/config/variable_binning.py index 731e4d98..b33da4b3 100644 --- a/dps/config/variable_binning.py +++ b/dps/config/variable_binning.py @@ -64,19 +64,19 @@ def produce_reco_bin_edges( gen_bin_edges ): control_plots_bins = { 'NJets' : [i + 0.5 for i in range ( 3, 16 + 1 )], 'JetPt' : [i * 5 for i in range ( 5, 40 )], - 'MuonPt' : [i * 24 for i in range ( 1, 10 )], - 'ElectronPt' : [i * 10 for i in range ( 1, 20 )], + 'MuonPt' : [i * 10 for i in range ( 1, 40 )], + 'ElectronPt' : [i * 10 for i in range ( 1, 40 )], 'LeptonEta' : [i*0.5 for i in range( -25, 25 )], 'AbsLeptonEta' : [i*0.1 for i in range( 0, 25 )], 'NBJets' : [i - 0.5 for i in range ( 0, 6 + 1 )], 'NVertex' : [i for i in range ( 0,40 + 1 )], 'relIso' : [i*0.01 for i in range(0,20)], - 'relIsoQCD' : [i*0.05 for i in range(0,20)], + 'relIsoQCD' : [i*0.025 for i in range(0,40)], 'AbsLeptonEtaQCD' : [i*0.2 for i in range( 0, 24 )], - 'MET' : [i * 15 for i in range ( 0, 21 )], - 'WPT' : [i * 25 for i in range ( 0, 17 )], - 'HT' : [i * 50 for i in range ( 0, 21 )], - 'ST' : [i * 50 for i in range ( 2, 25 )], + 'MET' : [i * 15 for i in range ( 0, 40 )], + 'WPT' : [i * 25 for i in range ( 0, 35 )], + 'HT' : [i * 50 for i in range ( 0, 40 )], + 'ST' : [i * 50 for i in range ( 2, 40 )], 'sigmaietaieta' : [i * 0.002 for i in range ( 0, 20 )], } diff --git a/dps/config/xsection.py b/dps/config/xsection.py index 232a7ed0..39d69afd 100644 --- a/dps/config/xsection.py +++ b/dps/config/xsection.py @@ -21,7 +21,6 @@ class XSectionConfig(): 'general_category_templates_trees', 'generator_systematic_vjets_templates', 'generator_systematics', - 'generator_mcsamples', 'higgs_category_templates', 'higgs_file', 'include_higgs', 'tau_values_electron', 'tau_values_muon', @@ -49,13 +48,16 @@ class XSectionConfig(): 'vjets_theory_systematic_prefix' ] + # Used in 01 samples = [ - 'TTJet', + 'data', + 'TTBar', 'V+Jets', 'SingleTop', 'QCD' ] + # Used in 01 variables = [ 'HT', 'MET', @@ -66,6 +68,7 @@ class XSectionConfig(): 'abs_lepton_eta' ] + # Used in 01 variables_no_met = [ 'HT', 'NJets', @@ -86,16 +89,16 @@ def __init__( self, centre_of_mass_energy ): def __fill_defaults__( self ): self.met_type = 'patType1CorrectedPFMet' - # self.path_to_files = self.current_analysis_path + str( self.centre_of_mass_energy ) + 'TeV/2016/' self.path_to_files = self.current_analysis_path - self.path_to_unfolding_histograms = '/hdfs/TopQuarkGroup/run2/unfolding/13TeV/Moriond2017/' - path_to_files = self.path_to_files + + self.path_to_unfolding_histograms = '/hdfs/TopQuarkGroup/run2/unfolding/13TeV/Moriond2017/' + # self.path_to_unfolding_histograms = 'unfolding/13TeV/' path_to_unfolding_histograms = self.path_to_unfolding_histograms self.luminosity = self.luminosities[self.centre_of_mass_energy] - # general + # Used in 01 self.met_systematics = { 'JER_up' : 0, 'JER_down' : 1, @@ -109,22 +112,31 @@ def __fill_defaults__( self ): 'TauEnDown' : 9, 'UnclusteredEnUp' : 10, 'UnclusteredEnDown' : 11, - # 'ElectronEn_up' : 6, - # 'ElectronEn_down' : 7, - # 'MuonEn_up' : 4, - # 'MuonEn_down' : 5, - # 'TauEn_up' : 8, - # 'TauEn_down' : 9, - # 'UnclusteredEn_up' : 10, - # 'UnclusteredEn_down' : 11, } + # Remove? + self.met_systematics_suffixes = self.met_systematics.keys() + + # Used in 01 - combine with self.met_systematics? + self.met_specific_systematics = [ + 'ElectronEnUp', + 'ElectronEnDown', + 'MuonEnUp', + 'MuonEnDown', + 'TauEnUp', + 'TauEnDown', + 'UnclusteredEnUp', + 'UnclusteredEnDown', + ] + + self.analysis_types = { - 'electron':'EPlusJets', - 'muon':'MuPlusJets', - 'combined':'combined' + 'electron' : 'EPlusJets', + 'muon' : 'MuPlusJets', + 'combined' : 'combined', } + # Needed? Where? # measurement script options self.translate_options = { 'all':'', @@ -142,66 +154,125 @@ def __fill_defaults__( self ): 'type1':'patType1CorrectedPFMet', } + # Needed? self.ttbar_theory_systematic_prefix = 'TTJets_' self.vjets_theory_systematic_prefix = 'VJets_' # files self.middle = '_' + str( self.luminosity ) + 'pb_PFElectron_PFMuon_PF2PATJets_PFMET' middle = self.middle - self.data_file_muon = path_to_files + 'data_muon_tree.root' - self.data_file_electron = path_to_files + 'data_electron_tree.root' + # self.data_file_muon = path_to_files + 'data_muon_tree.root' + # self.data_file_electron = path_to_files + 'data_electron_tree.root' + self.data_file_muon = '/hdfs/TopQuarkGroup/ec6821/1.0.0/atOutput/combined/data_muon_tree.root' + self.data_file_electron = '/hdfs/TopQuarkGroup/ec6821/1.0.0/atOutput/combined/data_electron_tree.root' + + self.higgs_file = path_to_files + 'central/TTH_Inclusive_M-125' + middle + '.root' + + # self.categories_and_prefixes = { + # 'central':'', + # 'Electron_down':'ElectronDown', + # 'Electron_up':'ElectronUp', + # 'Muon_down':'MuonDown', + # 'Muon_up':'MuonUp', + # 'BJet_down':'BJetDown', + # 'BJet_up':'BJetUp', + # 'LightJet_down':'LightJetDown', + # 'LightJet_up':'LightJetUp', + # 'JES_down':'_JESDown', + # 'JES_up':'_JESUp', + # # 'JES_down_alphaCorr':'_JESDown_alphaCorr', + # # 'JES_up_alphaCorr':'_JESUp_alphaCorr', + # 'JER_down':'_JERDown', + # 'JER_up':'_JERUp', + + # 'PileUp_up' : '', + # 'PileUp_down' : '', + + # # Other MET uncertainties not already included + # 'ElectronEnUp' : '', + # 'ElectronEnDown' : '', + # 'MuonEnUp' : '', + # 'MuonEnDown' : '', + # 'TauEnUp' : '', + # 'TauEnDown' : '', + # 'UnclusteredEnUp' : '', + # 'UnclusteredEnDown' : '', + # } + + # Used in 01 + # Rename to normalisation_measurements? + self.normalisation_systematics = [ + 'central', + + 'JES_up', + 'JES_down', + 'JER_up', + 'JER_down', + + 'BJet_up', + 'BJet_down', + 'LightJet_up', + 'LightJet_down', + + 'PileUp_up', + 'PileUp_down', + + 'Electron_up', + 'Electron_down', + 'Muon_up', + 'Muon_down', - self.data_file_muon_trees = path_to_files + 'data_muon_tree.root' - # self.data_file_muon_trees = '/storage/ec6821/AnalysisTools/CMSSW_8_0_17/src/tree_SingleMuon_15930pb_PFElectron_PFMuon_PF2PATJets_MET_201.root' - self.data_file_electron_trees = path_to_files + 'data_electron_tree.root' + 'ElectronEnUp', + 'ElectronEnDown', + 'MuonEnUp', + 'MuonEnDown', + 'TauEnUp', + 'TauEnDown', + 'UnclusteredEnUp', + 'UnclusteredEnDown', - self.muon_QCD_file = path_to_files + 'QCD_data_mu.root' - self.SingleTop_file = path_to_files + 'SingleTop.root' - self.electron_QCD_MC_file = path_to_files + 'QCD_Electron.root' - self.muon_QCD_MC_file = path_to_files + 'QCD_data_mu.root' + 'luminosity+', + 'luminosity-', - self.SingleTop_tree_file = path_to_files + 'SingleTop_tree.root' - self.muon_QCD_tree_file = path_to_files + 'QCD_Muon_tree.root' - self.electron_QCD_MC_tree_file = path_to_files + 'QCD_Electron_tree.root' - self.muon_QCD_MC_tree_file = path_to_files + 'QCD_Muon_tree.root' + 'V+Jets_cross_section-', + 'V+Jets_cross_section+', + 'SingleTop_cross_section+', + 'SingleTop_cross_section-', - self.higgs_file = path_to_files + 'central/TTH_Inclusive_M-125' + middle + '.root' + 'QCD_cross_section', + 'QCD_shape', + ] - self.categories_and_prefixes = { - 'central':'', - 'Electron_down':'ElectronDown', - 'Electron_up':'ElectronUp', - 'Muon_down':'MuonDown', - 'Muon_up':'MuonUp', - 'BJet_down':'BJetDown', - 'BJet_up':'BJetUp', - 'LightJet_down':'LightJetDown', - 'LightJet_up':'LightJetUp', - 'JES_down':'_JESDown', - 'JES_up':'_JESUp', - # 'JES_down_alphaCorr':'_JESDown_alphaCorr', - # 'JES_up_alphaCorr':'_JESUp_alphaCorr', - 'JER_down':'_JERDown', - 'JER_up':'_JERUp', - - 'PileUp_up' : '', - 'PileUp_down' : '', - - # Other MET uncertainties not already included - 'ElectronEnUp' : '', - 'ElectronEnDown' : '', - 'MuonEnUp' : '', - 'MuonEnDown' : '', - 'TauEnUp' : '', - 'TauEnDown' : '', - 'UnclusteredEnUp' : '', - 'UnclusteredEnDown' : '', - } + # Rename to generator_measurements? + self.generator_systematics = [ + 'TTJets_massup', + 'TTJets_massdown', + 'TTJets_alphaSup', + 'TTJets_alphaSdown', + 'TTJets_hadronisation', + 'TTJets_topPt', + 'TTJets_factorisationup', + 'TTJets_factorisationdown', + 'TTJets_renormalisationup', + 'TTJets_renormalisationdown', + 'TTJets_combinedup', + 'TTJets_combineddown', + 'TTJets_matchingup', + 'TTJets_matchingdown', + 'TTJets_fsrup', + 'TTJets_fsrdown', + 'TTJets_isrup', + 'TTJets_isrdown', + 'TTJets_ueup', + 'TTJets_uedown' + ] + + self.measurements = self.normalisation_systematics + self.generator_systematics self.list_of_systematics = { # Theoretical Uncertainties (Rate Changing) 'V+Jets_cross_section' : ['V+Jets_cross_section+', 'V+Jets_cross_section-'], - 'QCD_cross_section' : ['QCD_cross_section+', 'QCD_cross_section-'], + 'QCD_cross_section' : ['QCD_cross_section', 'QCD_cross_section'], 'SingleTop_cross_section' : ['SingleTop_cross_section+', 'SingleTop_cross_section-'], 'luminosity' : ['luminosity+', 'luminosity-'], # QCD Shape @@ -209,13 +280,16 @@ def __fill_defaults__( self ): # Generator Uncertainties 'TTJets_mass' : ['TTJets_massup', 'TTJets_massdown'], 'TTJets_hadronisation' : ['TTJets_hadronisation', 'TTJets_hadronisation'], - 'TTJets_ue' : ['TTJets_ueup', 'TTJets_uedown'], + 'TTJets_ue' : ['TTJets_ueup', 'TTJets_uedown'], + 'TTJets_topPt' : ['TTJets_topPt', 'TTJets_topPt'], 'TTJets_envelope' : ['TTJets_factorisationup', 'TTJets_factorisationdown', 'TTJets_renormalisationup', 'TTJets_renormalisationdown', 'TTJets_combinedup', 'TTJets_combineddown', 'TTJets_fsrup', 'TTJets_fsrdown', - 'TTJets_isrup', 'TTJets_isrdown', - ], + 'TTJets_isrup', 'TTJets_isrdown'], + + 'TTJets_alphaS' : ['TTJets_alphaSup', 'TTJets_alphaSdown'], + 'TTJets_matching' : ['TTJets_matchingup', 'TTJets_matchingdown'], # Event Reweighting 'PileUp' : ['PileUp_up', 'PileUp_down'], @@ -234,85 +308,15 @@ def __fill_defaults__( self ): 'TauEn' : ['TauEnUp', 'TauEnDown'], 'UnclusteredEn' : ['UnclusteredEnUp', 'UnclusteredEnDown'], # Top Reweighting Uncertainties - # 'Top_pt_reweight' : ['Top_pt_reweight_up', 'Top_pt_reweight_down'], + # 'Top_pt_reweight' : ['Top_pt_reweight', 'Top_pt_reweight'], # 'Top_eta_reweight' : ['Top_eta_reweight_up', 'Top_eta_reweight_down'], } - self.met_specific_systematics = [ - 'ElectronEnUp', - 'ElectronEnDown', - 'MuonEnUp', - 'MuonEnDown', - 'TauEnUp', - 'TauEnDown', - 'UnclusteredEnUp', - 'UnclusteredEnDown', - ] - - - - self.met_systematics_suffixes = self.met_systematics.keys() - # now fill in the centre of mass dependent values self.__fill_defaults_13TeV__() - self.generator_systematics = [ - 'scaleup', 'scaledown', - 'massup', 'massdown', - 'hadronisation', - 'NLOgenerator', - 'factorisationup', 'factorisationdown', - 'renormalisationup', 'renormalisationdown', - 'combinedup', 'combineddown', - 'fsrup', 'fsrdown', - 'isrup', 'isrdown', - 'ueup', 'uedown' - ] - - self.generator_mcsamples = [ - 'PowhegPythia8', - 'powhegHerwigpp', - 'amc', - 'amcatnloHerwigpp', - 'madgraph' - ] - - self.rate_changing_systematics_values = {} - for systematic in self.rate_changing_systematics.keys(): - affected_samples = XSectionConfig.samples # all samples - if 'SingleTop' in systematic: - affected_samples = ['SingleTop'] - if 'TTJet' in systematic: - affected_samples = ['TTJet'] - if 'VJets' in systematic: - affected_samples = ['V+Jets'] - if 'QCD' in systematic: - affected_samples = ['QCD'] - - sp = dps.utils.measurement.Systematic( - systematic + '+', - # systematic + '_up', - stype = dps.utils.measurement.Systematic.RATE, - affected_samples = affected_samples, - scale = 1 + self.rate_changing_systematics[systematic], - ) - scale = 1 - self.rate_changing_systematics[systematic] - if scale <= 0: scale = 10e-5 - - sm = dps.utils.measurement.Systematic( - systematic + '-', - # systematic + '_down', - stype = dps.utils.measurement.Systematic.RATE, - affected_samples = affected_samples, - scale = scale, - ) - self.rate_changing_systematics_values[sp.name] = sp - self.rate_changing_systematics_values[sm.name] = sm - - self.rate_changing_systematics_names = self.rate_changing_systematics_values.keys() - self.topMass_systematics = [ 'TTJets_massup', 'TTJets_massdown'] - # self.topMass_systematics = [ 'TTJets_mass_up', 'TTJets_mass_down'] + self.topMasses = [ 169.5, 172.5, @@ -320,6 +324,7 @@ def __fill_defaults__( self ): ] self.topMassUncertainty = 1.0 # GeV from https://twiki.cern.ch/twiki/bin/view/LHCPhysics/TtbarNNLO + # Needed? self.central_general_template = path_to_files + 'central/%s' + middle + '.root' self.generator_systematic_vjets_templates = {} for systematic in self.generator_systematics: @@ -329,58 +334,60 @@ def __fill_defaults__( self ): tmp = tmp.format(systematic, self.luminosity) self.generator_systematic_vjets_templates[systematic] = tmp - categories_and_prefixes = self.categories_and_prefixes - generator_mcsamples = self.generator_mcsamples - - # File Templates - self.general_category_templates = {category: path_to_files + category + '/%s' + middle + prefix + '.root' for category, prefix in categories_and_prefixes.iteritems()} - self.ttbar_category_templates = {category: path_to_files + 'TTJets_PowhegPythia8.root' for category, prefix in categories_and_prefixes.iteritems()} - self.SingleTop_category_templates = {category: path_to_files + '/SingleTop.root' for ( category, prefix ) in categories_and_prefixes.iteritems()} - self.VJets_category_templates = {category: path_to_files + '/VJets.root' for ( category, prefix ) in categories_and_prefixes.iteritems()} - self.higgs_category_templates = {category: path_to_files + '/TTH_Inclusive_M-125' + middle + prefix + '.root' for ( category, prefix ) in categories_and_prefixes.iteritems()} - self.electron_QCD_MC_category_templates = {category: path_to_files + '/QCD_Electron.root' for ( category, prefix ) in categories_and_prefixes.iteritems()} - self.muon_QCD_MC_category_templates = {category: path_to_files + '/QCD_Muon.root' for ( category, prefix ) in categories_and_prefixes.iteritems()} - - self.general_category_templates_trees = {category: path_to_files + category + '/%s' + middle + prefix + '.root' for category, prefix in categories_and_prefixes.iteritems()} - self.ttbar_category_templates_trees = {category: path_to_files + '/TTJets_PowhegPythia8_tree.root' for category, prefix in categories_and_prefixes.iteritems()} - self.SingleTop_category_templates_trees = {category: path_to_files + '/SingleTop_tree.root' for ( category, prefix ) in categories_and_prefixes.iteritems()} - self.VJets_category_templates_trees = {category: path_to_files + '/VJets_tree.root' for ( category, prefix ) in categories_and_prefixes.iteritems()} - self.electron_QCD_MC_category_templates_trees = {category: path_to_files + '/QCD_Electron_tree.root' for ( category, prefix ) in categories_and_prefixes.iteritems()} - self.muon_QCD_MC_category_templates_trees = {category: path_to_files + '/QCD_Muon_tree.root' for ( category, prefix ) in categories_and_prefixes.iteritems()} - self.ttbar_generator_category_templates_trees = {category: path_to_files + '/TTJets_' + category + '_tree.root' for category in generator_mcsamples} - - self.ttbar_fsrup_category_templates_trees = path_to_files + '/TTJets_PowhegPythia8_fsrup_tree.root' - self.ttbar_fsrdown_category_templates_trees = path_to_files + '/TTJets_PowhegPythia8_fsrdown_tree.root' - self.ttbar_isrup_category_templates_trees = path_to_files + '/TTJets_PowhegPythia8_isrup_tree.root' - self.ttbar_isrdown_category_templates_trees = path_to_files + '/TTJets_PowhegPythia8_isrdown_tree.root' - self.ttbar_ueup_category_templates_trees = path_to_files + '/TTJets_PowhegPythia8_up_tree.root' - self.ttbar_uedown_category_templates_trees = path_to_files + '/TTJets_PowhegPythia8_down_tree.root' - - self.ttbar_amc_category_templates_trees = path_to_files + '/TTJets_amc_tree.root' - self.ttbar_madgraph_category_templates_trees = path_to_files + '/TTJets_madgraph_tree.root' - self.ttbar_powhegpythia8_category_templates_trees = path_to_files + '/TTJets_powhegPythia8_tree.root' - self.ttbar_powhegherwigpp_category_templates_trees = path_to_files + '/TTJets_powhegHerwigpp_tree.root' - self.ttbar_amcatnloherwigpp_category_templates_trees = path_to_files + '/TTJets_amcatnloHerwigpp_tree.root' - self.ttbar_mtop1695_category_templates_trees = path_to_files + '/TTJets_PowhegPythia8_mtop1695_tree.root' - self.ttbar_mtop1755_category_templates_trees = path_to_files + '/TTJets_PowhegPythia8_mtop1755_tree.root' - self.ttbar_jesup_category_templates_trees = path_to_files + '/TTJets_PowhegPythia8_plusJES_tree.root' - self.ttbar_jesdown_category_templates_trees = path_to_files + '/TTJets_PowhegPythia8_minusJES_tree.root' - self.ttbar_jerup_category_templates_trees = path_to_files + '/TTJets_PowhegPythia8_plusJER_tree.root' - self.ttbar_jerdown_category_templates_trees = path_to_files + '/TTJets_PowhegPythia8_minusJER_tree.root' - + # categories_and_prefixes = self.categories_and_prefixes + + # Used in 01 + # self.general_trees = { + # category: path_to_files + category + '/%s' + middle + prefix + '.root' for category, prefix in categories_and_prefixes.iteritems()} + self.ttbar_trees = { + category: path_to_files + 'TTJets_PowhegPythia8_tree.root' for category in self.normalisation_systematics} + self.SingleTop_trees = { + category: path_to_files + 'SingleTop_tree.root' for category in self.normalisation_systematics} + self.VJets_trees = { + category: path_to_files + 'VJets_tree.root' for category in self.normalisation_systematics} + self.electron_QCD_MC_trees = { + category: path_to_files + 'QCD_Electron_tree.root' for category in self.normalisation_systematics} + self.muon_QCD_MC_trees = { + category: path_to_files + 'QCD_Muon_tree.root' for category in self.normalisation_systematics} + + self.ttbar_amc_trees = path_to_files + '/TTJets_amc_tree.root' + self.ttbar_madgraph_trees = path_to_files + '/TTJets_madgraph_tree.root' + self.ttbar_powhegpythia8_trees = path_to_files + '/TTJets_PowhegPythia8_tree.root' + self.ttbar_powhegherwigpp_trees = path_to_files + '/TTJets_powhegHerwigpp_tree.root' + self.ttbar_amcatnloherwigpp_trees = path_to_files + '/TTJets_amcatnloHerwigpp_tree.root' + + self.ttbar_mtop1695_trees = path_to_files + '/TTJets_PowhegPythia8_mtop1695_tree.root' + self.ttbar_mtop1755_trees = path_to_files + '/TTJets_PowhegPythia8_mtop1755_tree.root' + self.ttbar_jesup_trees = path_to_files + '/TTJets_PowhegPythia8_plusJES_tree.root' + self.ttbar_jesdown_trees = path_to_files + '/TTJets_PowhegPythia8_minusJES_tree.root' + self.ttbar_jerup_trees = path_to_files + '/TTJets_PowhegPythia8_plusJER_tree.root' + self.ttbar_jerdown_trees = path_to_files + '/TTJets_PowhegPythia8_minusJER_tree.root' + + # Underlying Event trees + self.ttbar_ueup_trees = path_to_files + '/TTJets_powhegPythia8_up_tree.root' + self.ttbar_uedown_trees = path_to_files + '/TTJets_powhegPythia8_down_tree.root' + # Initial(Final) State Radiation event Trees + self.ttbar_isrup_trees = path_to_files + '/TTJets_powhegPythia8_isrup_tree.root' + self.ttbar_isrdown_trees = path_to_files + '/TTJets_powhegPythia8_isrdown_tree.root' + self.ttbar_fsrup_trees = path_to_files + '/TTJets_powhegPythia8_fsrup_tree.root' + self.ttbar_fsrdown_trees = path_to_files + '/TTJets_powhegPythia8_fsrdown_tree.root' + + + # Needed? self.data_muon_category_templates = { 'central': self.data_file_muon, 'JES_up': self.data_file_muon, 'JES_down': self.data_file_muon } - self.data_muon_category_templates_trees = self.data_file_muon_trees + self.data_muon_category_templates_trees = self.data_file_muon self.data_electron_category_templates = { 'central': self.data_file_electron, 'JES_up': self.data_file_electron, 'JES_down': self.data_file_electron, } - self.data_electron_category_templates_trees = self.data_file_electron_trees + self.data_electron_category_templates_trees = self.data_file_electron + # Unfolding MC Different Generator Samples self.unfolding_powheg_pythia8_raw = path_to_unfolding_histograms + 'unfolding_TTJets_%dTeV.root' % self.centre_of_mass_energy @@ -401,8 +408,7 @@ def __fill_defaults__( self ): self.unfolding_central = self.unfolding_powheg_pythia8 - self.unfolding_ptreweight_up = path_to_unfolding_histograms + 'unfolding_TTJets_%dTeV_asymmetric_withTopPtReweighting_up.root' % self.centre_of_mass_energy - self.unfolding_ptreweight_down = path_to_unfolding_histograms + 'unfolding_TTJets_%dTeV_asymmetric_withTopPtReweighting_down.root' % self.centre_of_mass_energy + self.unfolding_ptreweight = path_to_unfolding_histograms + 'unfolding_TTJets_%dTeV_asymmetric_withTopPtReweighting.root' % self.centre_of_mass_energy self.unfolding_renormalisation_down = path_to_unfolding_histograms + 'unfolding_TTJets_%dTeV_asymmetric_05muR1muF.root' % self.centre_of_mass_energy self.unfolding_renormalisation_up = path_to_unfolding_histograms + 'unfolding_TTJets_%dTeV_asymmetric_2muR1muF.root' % self.centre_of_mass_energy @@ -410,14 +416,18 @@ def __fill_defaults__( self ): self.unfolding_factorisation_up = path_to_unfolding_histograms + 'unfolding_TTJets_%dTeV_asymmetric_1muR2muF.root' % self.centre_of_mass_energy self.unfolding_combined_down = path_to_unfolding_histograms + 'unfolding_TTJets_%dTeV_asymmetric_05muR05muF.root' % self.centre_of_mass_energy self.unfolding_combined_up = path_to_unfolding_histograms + 'unfolding_TTJets_%dTeV_asymmetric_2muR2muF.root' % self.centre_of_mass_energy - - self.unfolding_fsr_down = path_to_unfolding_histograms + 'unfolding_TTJets_%dTeV_fsrup_asymmetric.root' % self.centre_of_mass_energy + self.unfolding_fsr_down = path_to_unfolding_histograms + 'unfolding_TTJets_%dTeV_fsrdown_asymmetric.root' % self.centre_of_mass_energy self.unfolding_fsr_up = path_to_unfolding_histograms + 'unfolding_TTJets_%dTeV_fsrup_asymmetric.root' % self.centre_of_mass_energy self.unfolding_isr_down = path_to_unfolding_histograms + 'unfolding_TTJets_%dTeV_isrdown_asymmetric.root' % self.centre_of_mass_energy self.unfolding_isr_up = path_to_unfolding_histograms + 'unfolding_TTJets_%dTeV_isrup_asymmetric.root' % self.centre_of_mass_energy self.unfolding_ue_down = path_to_unfolding_histograms + 'unfolding_TTJets_%dTeV_uedown_asymmetric.root' % self.centre_of_mass_energy self.unfolding_ue_up = path_to_unfolding_histograms + 'unfolding_TTJets_%dTeV_ueup_asymmetric.root' % self.centre_of_mass_energy - + self.unfolding_topPtSystematic = path_to_unfolding_histograms + 'unfolding_TTJets_%dTeV_topPtSystematic_asymmetric.root' % self.centre_of_mass_energy + self.unfolding_alphaS_down = path_to_unfolding_histograms + 'unfolding_TTJets_%dTeV_asymmetric_alphaS_down.root' % self.centre_of_mass_energy + self.unfolding_alphaS_up = path_to_unfolding_histograms + 'unfolding_TTJets_%dTeV_asymmetric_alphaS_up.root' % self.centre_of_mass_energy + self.unfolding_matching_down = path_to_unfolding_histograms + 'unfolding_TTJets_%dTeV_asymmetric_matching_down.root' % self.centre_of_mass_energy + self.unfolding_matching_up = path_to_unfolding_histograms + 'unfolding_TTJets_%dTeV_asymmetric_matching_up.root' % self.centre_of_mass_energy + self.unfolding_mass_down = path_to_unfolding_histograms + 'unfolding_TTJets_%dTeV_massdown_asymmetric.root' % self.centre_of_mass_energy self.unfolding_mass_up = path_to_unfolding_histograms + 'unfolding_TTJets_%dTeV_massup_asymmetric.root' % self.centre_of_mass_energy self.unfolding_Lepton_down = path_to_unfolding_histograms + 'unfolding_TTJets_%dTeV_leptondown_asymmetric.root' % self.centre_of_mass_energy @@ -443,21 +453,25 @@ def __fill_defaults__( self ): self.unfolding_PUSystematic_up = path_to_unfolding_histograms + 'unfolding_TTJets_%dTeV_pileupUp_asymmetric.root' % self.centre_of_mass_energy self.unfolding_PUSystematic_down = path_to_unfolding_histograms + 'unfolding_TTJets_%dTeV_pileupDown_asymmetric.root' % self.centre_of_mass_energy - self.pdfWeightMin = 0 self.pdfWeightMax = 100 self.unfolding_pdfweights = {index : path_to_unfolding_histograms + 'unfolding_TTJets_%dTeV_asymmetric_pdfWeight_%d.root' % (self.centre_of_mass_energy, index) for index in range( self.pdfWeightMin, self.pdfWeightMax )} - self.tree_path_templates = { + # Used in 01 + self.tree_path = { 'electron' : 'TTbar_plus_X_analysis/EPlusJets/Ref selection/FitVariables', - 'muon' : 'TTbar_plus_X_analysis/MuPlusJets/Ref selection/FitVariables' + 'muon' : 'TTbar_plus_X_analysis/MuPlusJets/Ref selection/FitVariables', } - - self.tree_path_control_templates = { - 'electron' : 'TTbar_plus_X_analysis/EPlusJets/QCD non iso e+jets/FitVariables', - 'muon' : 'TTbar_plus_X_analysis/MuPlusJets/QCD non iso mu+jets 3toInf/FitVariables' + self.qcd_control_region = { + 'electron' : 'QCDConversions', + 'muon' : 'QCD non iso mu+jets 1p5to3', + } + self.qcd_shape_syst_region = { + 'electron' : 'QCD non iso e+jets', + 'muon' : 'QCD non iso mu+jets 3toInf', } + # Needed? self.variable_path_templates = { 'MET' : 'TTbar_plus_X_analysis/{channel}/{selection}/FitVariables/MET', 'HT' : 'TTbar_plus_X_analysis/{channel}/{selection}/FitVariables/HT', @@ -473,9 +487,9 @@ def __fill_defaults__( self ): 'abs_bjets_eta': 'TTbar_plus_X_analysis/{channel}/{selection}/Jets/abs(bjet_eta)', } + # Needed? self.electron_control_region = 'QCDConversions' self.electron_control_region_systematic = 'QCD non iso e+jets' - self.muon_control_region = 'QCD non iso mu+jets 1p5to3' self.muon_control_region_systematic = 'QCD non iso mu+jets 3toInf' @@ -483,6 +497,7 @@ def __fill_defaults__( self ): self.luminosity_scale = self.new_luminosity / self.luminosity + # Needed? # structure # { summary_name : [(Electron_down, Electron_up)), (TTJets_hadronisation, TTJets_hadronisation) self.typical_systematics_summary = { @@ -509,7 +524,7 @@ def __fill_defaults__( self ): ('luminosity-', 'luminosity+'), ], 'QCD Normalisation': [ - ('QCD_cross_section-', 'QCD_cross_section+'), + ('QCD_cross_section', 'QCD_cross_section'), ], 'QCD shape': [ ('QCD_shape', 'QCD_shape') @@ -548,11 +563,9 @@ def __fill_defaults_13TeV__( self ): self.ttbar_xsection = 831.76 # pb self.rate_changing_systematics = {#TODO check where this is used - 'luminosity': 0.027, # Best guess for 13 TeV 4.8->2.7 + 'luminosity': 0.062, 'SingleTop_cross_section': 0.05, # Currently same as 8 TeV - # 'TTJet_cross_section': 0.043, # Currently same as 8 TeV 'V+Jets_cross_section': 0.5, - 'QCD_cross_section' : 1., } self.tau_values_electron = { diff --git a/dps/experimental/DougsBTagEff/makeBTagEfficiencies.py b/dps/experimental/DougsBTagEff/makeBTagEfficiencies.py index 5c8a5d67..9b8c674d 100644 --- a/dps/experimental/DougsBTagEff/makeBTagEfficiencies.py +++ b/dps/experimental/DougsBTagEff/makeBTagEfficiencies.py @@ -6,6 +6,7 @@ import math import os from optparse import OptionParser +from dps.utils.file_utilities import make_folder_if_not_exists ROOT.gROOT.SetBatch(True) if __name__ == '__main__': @@ -13,22 +14,22 @@ gStyle.SetOptStat("") parser = OptionParser() - parser.add_option("-t", "--test", dest="test", default=False, + parser.add_option("-t", "--test", dest="test", action = "store_true", help="Run over a few events only") - parser.add_option("-p", "--plots", dest="make_plots", default=True, + parser.add_option("-p", "--plots", dest="make_plots", action = "store_true", help="Print out files to .png") - parser.add_option("-o", "--only_plots", dest="only_plots", default=False, + parser.add_option("-o", "--only_plots", dest="only_plots", action = "store_true", help="Print out files to .png") (options, args) = parser.parse_args() if options.test : print "RUNNING OVER TEST SAMPLE" - basepath = "/hdfs/TopQuarkGroup/run2/atOutput/13TeV/25ns/20_05_16/" + basepath = "/hdfs/TopQuarkGroup/ec6821/0.0.10/atOutput/combined/" input_files = { 0 : ["TTJets_PowhegPythia8_tree.root" , "PowhegPythia8"], 1 : ["TTJets_powhegHerwigpp_tree.root" , "PowhegHerwigpp"], 2 : ["TTJets_amc_tree.root" , "aMCatNLOPythia8"], - 3 : ["TTJets_amcatnloHerwigpp_tree.root" , "aMCatNLOHerwigpp"], - 4 : ["TTJets_madgraph_tree.root" , "Madgraph"], + 3 : ["TTJets_madgraph_tree.root" , "Madgraph"], + # 4 : ["TTJets_amcatnloHerwigpp_tree.root" , "aMCatNLOHerwigpp"], } partonHists = [ @@ -60,7 +61,6 @@ in_file = input_files[key][0] sample = input_files[key][1] input_file = basepath+in_file - print "Generator : ", sample directory = out_file.mkdir( sample ) @@ -79,7 +79,6 @@ Mu_inputTree = "TTbar_plus_X_analysis/MuPlusJets/Ref selection NoBSelection/BTagEfficiencies/Jets" Mu_Chain = TChain(Mu_inputTree) Mu_Chain.Add(input_file) - Chain = { 0 : E_Chain, 1 : Mu_Chain, @@ -95,6 +94,7 @@ n=n+1 if options.test : if n == 10000 : break + # if n == 10 : break NJets = event.__getattr__("NJets") pt = event.__getattr__("pt") eta = event.__getattr__("eta") @@ -108,14 +108,29 @@ puWeight = event.__getattr__("PUWeight") if key == 0 : leptonWeight = event.__getattr__("ElectronEfficiencyCorrection") else : leptonWeight = event.__getattr__("MuonEfficiencyCorrection") - + + + weight = eventWeight * puWeight * leptonWeight + - if (NJets == 0): continue; + if (NJets <= 0): continue; for JetIndex in range (0,int(NJets)): - - if (pt[JetIndex] < 25): continue; + if (pt[JetIndex] < 30): continue; + if (abs(eta[JetIndex]) > 2.4): continue; + # print "- "*10 + # print 'leptonweight chosen ' , leptonWeight + # print 'electron wieght ' ,event.__getattr__("ElectronEfficiencyCorrection") + # print 'muon weight ' ,event.__getattr__("MuonEfficiencyCorrection") + # print "NJets ", NJets + # print "Weight ", weight + # print "eventWeight ", eventWeight + # print "puWeight ", puWeight + # print "lepWeight ", leptonWeight + # print "HadronFlavour ", hadronFlavour[JetIndex] + # print "is Med BTagged", isMedium[JetIndex] + # print "Pt jet", pt[JetIndex] if (hadronFlavour[JetIndex] == 5): bQuarkJets_Total_Hist[sample].Fill(pt[JetIndex], eta[JetIndex], weight) @@ -168,7 +183,7 @@ if options.make_plots: f = TFile("BTagEfficiency.root", "OPEN") - + make_folder_if_not_exists('plots/') for key in range (0, len(input_files)): generator = input_files[key][1] diff --git a/dps/experimental/condor/01b/run01_forAllOptions.py b/dps/experimental/condor/01b/run01_forAllOptions.py index d34e29a9..b37c9f7c 100644 --- a/dps/experimental/condor/01b/run01_forAllOptions.py +++ b/dps/experimental/condor/01b/run01_forAllOptions.py @@ -27,7 +27,7 @@ # 'ttbarRap', ] -jobOptions = ['-v %s -i config/measurements/background_subtraction' % ( var ) for var in vars ] +jobOptions = ['-v %s' % ( var ) for var in vars ] parser = OptionParser("Merge histogram files on DICE") parser.add_option("-n", dest="jobNumber", default=-1, type='int', diff --git a/dps/experimental/howMuchLumiInNtuples.py b/dps/experimental/howMuchLumiInNtuples.py index d7905f7d..b8d89106 100644 --- a/dps/experimental/howMuchLumiInNtuples.py +++ b/dps/experimental/howMuchLumiInNtuples.py @@ -34,5 +34,6 @@ # print run,lumi # print outputJson with open('json.txt', 'w') as outfile: - print json.dumps(outputJson) - json.dump(outputJson, outfile) \ No newline at end of file + print json.dumps(outputJson) + json.dump(outputJson, outfile) + diff --git a/dps/analysis/BLTUnfold/getOutput.py b/dps/legacy/BLTUnfold/getOutput.py similarity index 100% rename from dps/analysis/BLTUnfold/getOutput.py rename to dps/legacy/BLTUnfold/getOutput.py diff --git a/dps/analysis/BLTUnfold/getScaleFactors.py b/dps/legacy/BLTUnfold/getScaleFactors.py similarity index 100% rename from dps/analysis/BLTUnfold/getScaleFactors.py rename to dps/legacy/BLTUnfold/getScaleFactors.py diff --git a/dps/analysis/BLTUnfold/runJobsInteractive.py b/dps/legacy/BLTUnfold/runJobsInteractive.py similarity index 100% rename from dps/analysis/BLTUnfold/runJobsInteractive.py rename to dps/legacy/BLTUnfold/runJobsInteractive.py diff --git a/dps/analysis/BLTUnfold/scaleFactors.py b/dps/legacy/BLTUnfold/scaleFactors.py similarity index 100% rename from dps/analysis/BLTUnfold/scaleFactors.py rename to dps/legacy/BLTUnfold/scaleFactors.py diff --git a/dps/analysis/HLT_scripts_for_Sergeys_thesis/__init__.py b/dps/legacy/HLT_scripts_for_Sergeys_thesis/__init__.py similarity index 100% rename from dps/analysis/HLT_scripts_for_Sergeys_thesis/__init__.py rename to dps/legacy/HLT_scripts_for_Sergeys_thesis/__init__.py diff --git a/dps/analysis/HLT_scripts_for_Sergeys_thesis/make_HLT_plots_Sergey.py b/dps/legacy/HLT_scripts_for_Sergeys_thesis/make_HLT_plots_Sergey.py similarity index 100% rename from dps/analysis/HLT_scripts_for_Sergeys_thesis/make_HLT_plots_Sergey.py rename to dps/legacy/HLT_scripts_for_Sergeys_thesis/make_HLT_plots_Sergey.py diff --git a/dps/analysis/HLT_scripts_for_Sergeys_thesis/make_jet_response_plot_pt_bins.py b/dps/legacy/HLT_scripts_for_Sergeys_thesis/make_jet_response_plot_pt_bins.py similarity index 100% rename from dps/analysis/HLT_scripts_for_Sergeys_thesis/make_jet_response_plot_pt_bins.py rename to dps/legacy/HLT_scripts_for_Sergeys_thesis/make_jet_response_plot_pt_bins.py diff --git a/dps/analysis/HLT_scripts_for_Sergeys_thesis/make_jet_response_plots_correction_levels.py b/dps/legacy/HLT_scripts_for_Sergeys_thesis/make_jet_response_plots_correction_levels.py similarity index 100% rename from dps/analysis/HLT_scripts_for_Sergeys_thesis/make_jet_response_plots_correction_levels.py rename to dps/legacy/HLT_scripts_for_Sergeys_thesis/make_jet_response_plots_correction_levels.py diff --git a/dps/analysis/check_CRAB_jobs.py b/dps/legacy/check_CRAB_jobs.py similarity index 100% rename from dps/analysis/check_CRAB_jobs.py rename to dps/legacy/check_CRAB_jobs.py diff --git a/dps/analysis/estimate_QCD_rate.py b/dps/legacy/estimate_QCD_rate.py similarity index 100% rename from dps/analysis/estimate_QCD_rate.py rename to dps/legacy/estimate_QCD_rate.py diff --git a/dps/analysis/generate_QCD_template_from_data.py b/dps/legacy/generate_QCD_template_from_data.py similarity index 100% rename from dps/analysis/generate_QCD_template_from_data.py rename to dps/legacy/generate_QCD_template_from_data.py diff --git a/dps/analysis/grid.py b/dps/legacy/grid.py similarity index 100% rename from dps/analysis/grid.py rename to dps/legacy/grid.py diff --git a/dps/analysis/make_CRAB_configuration.py b/dps/legacy/make_CRAB_configuration.py similarity index 100% rename from dps/analysis/make_CRAB_configuration.py rename to dps/legacy/make_CRAB_configuration.py diff --git a/dps/analysis/make_HLT_plots.py b/dps/legacy/make_HLT_plots.py similarity index 100% rename from dps/analysis/make_HLT_plots.py rename to dps/legacy/make_HLT_plots.py diff --git a/dps/analysis/make_ntuples_CRAB_configurations.sh b/dps/legacy/make_ntuples_CRAB_configurations.sh similarity index 100% rename from dps/analysis/make_ntuples_CRAB_configurations.sh rename to dps/legacy/make_ntuples_CRAB_configurations.sh diff --git a/dps/analysis/make_unfolding_CRAB_configurations.sh b/dps/legacy/make_unfolding_CRAB_configurations.sh similarity index 100% rename from dps/analysis/make_unfolding_CRAB_configurations.sh rename to dps/legacy/make_unfolding_CRAB_configurations.sh diff --git a/dps/analysis/read_processed_events.py b/dps/legacy/read_processed_events.py similarity index 100% rename from dps/analysis/read_processed_events.py rename to dps/legacy/read_processed_events.py diff --git a/dps/analysis/read_skim_information.py b/dps/legacy/read_skim_information.py similarity index 100% rename from dps/analysis/read_skim_information.py rename to dps/legacy/read_skim_information.py diff --git a/dps/analysis/search/__init__.py b/dps/legacy/search/__init__.py similarity index 100% rename from dps/analysis/search/__init__.py rename to dps/legacy/search/__init__.py diff --git a/dps/analysis/search/translate_results_to_theta.py b/dps/legacy/search/translate_results_to_theta.py similarity index 100% rename from dps/analysis/search/translate_results_to_theta.py rename to dps/legacy/search/translate_results_to_theta.py diff --git a/dps/analysis/search/validate_systematic_method.py b/dps/legacy/search/validate_systematic_method.py similarity index 100% rename from dps/analysis/search/validate_systematic_method.py rename to dps/legacy/search/validate_systematic_method.py diff --git a/dps/legacy/tools/measurement.py b/dps/legacy/tools/measurement.py new file mode 100644 index 00000000..df463b3d --- /dev/null +++ b/dps/legacy/tools/measurement.py @@ -0,0 +1,316 @@ +''' + Provides the classes Measurement and Systematic +''' +from __future__ import division +from . import log +import copy +from rootpy.io.file import Directory +from dps.utils.ROOT_utils import get_histogram_from_file +from dps.utils.file_utilities import make_folder_if_not_exists,\ + write_data_to_JSON, read_data_from_JSON +from dps.utils.input import Input +from dps.utils.hist_utilities import clean_control_region +# define logger for this module +meas_log = log["dps.utils.measurement"] + + +class Measurement(): + + ''' + The Measurement class combines files and histogram paths into + one container. It also allows to provide separate shapes for the + histograms while using the normalisation from the initial set. + ''' + + @meas_log.trace() + def __init__(self, name): + self.name = name + self.variable = '' + self.centre_of_mass_energy = 0 + self.channel = '' + self.samples = {} + self.shapes = {} + self.norms = {} + self.histograms = {} + self.fit_variables = {} + + self.have_read_samples = False + self.have_read_shapes = False + self.have_read_norms = False + + self.met_type = '' + + self.type = 0 + + self.aux_info_norms = {} + + @meas_log.trace() + def addSample(self, sample, read=True, **kwargs): + self.samples[sample] = kwargs + # TODO: add tree & branch, selection etc + # whatever get_histograms_from_trees needs + if read: + self.read_sample(sample) + + @meas_log.trace() + def addShapeForSample(self, sample, measurement, read=True): + self.shapes[sample] = measurement + if read: + self.read_shape(sample) + + @meas_log.trace() + def addNormForSample(self, sample, measurement, read=True): + self.norms[sample] = measurement + if read: + self.read_norm(sample) + + @meas_log.trace() + def addFitVariable(self, variable, measurement): + self.fit_variables[variable] = measurement + + @meas_log.trace() + def toJSON(self, JSON_file): + output = self.toDict() + filename = JSON_file.split('/')[-1] + directory = JSON_file.replace(filename, '') + make_folder_if_not_exists(directory) + write_data_to_JSON(output, JSON_file) + + @meas_log.trace() + def toDict(self): + output = {} + output['class'] = str(self.__class__) + output['name'] = self.name + output['variable'] = self.variable + output['centre_of_mass_energy'] = self.centre_of_mass_energy + output['samples'] = self.samples + output['shapes'] = {shape: meas.toDict() + for shape, meas in self.shapes.items()} + output['norms'] = {norm: meas.toDict() + for norm, meas in self.norms.items()} + output['channel'] = self.channel + output['met_type'] = self.met_type + for sample in output['samples'].keys(): + if output['samples'][sample].has_key('input'): + output['samples'][sample]['input'] = output[ + 'samples'][sample]['input'].toDict() + + return output + + @staticmethod + def fromJSON(JSON_file): + src = read_data_from_JSON(JSON_file) + m = Measurement.fromDict(src) + + return m + + @staticmethod + def fromDict(d): + m = None + if d['class'] == 'dps.utils.measurement.Measurement': + m = Measurement(d['name']) + if d['class'] == 'dps.utils.measurement.Systematic': + m = Systematic(d['name'], d['type'], + affected_samples=d['affected_samples'], scale=d['scale']) + m.setVariable(d['variable']) + m.setCentreOfMassEnergy(int(d['centre_of_mass_energy'])) + m.setChannel(d['channel']) + m.setMETType(d['met_type']) + for sample, i in d['samples'].items(): + if i.has_key('input'): + inp = Input(**i['input']) + m.addSample(sample, read=True, input=inp) + else: + m.addSample(sample, i['file'], i['hist'], read=True) + for shape, obj in d['shapes'].items(): + m.addShapeForSample(shape, Measurement.fromDict(obj), read=True) + for norm, obj in d['norms'].items(): + m.addNormForSample( + norm, Measurement.fromDict(obj), read=True) + return m + + @meas_log.trace() + def toROOT(self): + ''' + Converts measurement into something that can be stored in a ROOT + file + ''' + return + d = Directory(name=self.name) + # create shape and norm folders if there is anything to be saved + # what needs to be saved + # MET type + return d + + @meas_log.trace() + def setVariable(self, variable): + self.variable = variable + + @meas_log.trace() + def setCentreOfMassEnergy(self, com): + self.centre_of_mass_energy = com + + @meas_log.trace() + def setChannel(self, channel): + self.channel = channel + + @meas_log.trace() + def setMETType(self, met_type): + self.met_type = met_type + + @meas_log.trace() + def getCleanedShape(self, sample): + subtract = copy.copy(self.histograms.keys()) + subtract.remove(sample) + subtract.remove('data') + hist = clean_control_region(self.histograms, + data_label='data', + subtract=subtract, + fix_to_zero=True) + return hist + + @meas_log.trace() + def read(self): + self.read_samples() + self.read_shapes() + self.read_norms() + + @meas_log.trace() + def read_samples(self): + if self.have_read_samples: + return + for sample in self.samples.keys(): + self.read_sample(sample) + self.have_read_samples = True + + @meas_log.trace() + def read_sample(self, sample): + if self.samples[sample].has_key('input'): + i = self.samples[sample]['input'] + if isinstance(i, dict): + i = Input(**self.samples[sample]['input']) + self.histograms[sample] = i.read() + return + input_file = self.samples[sample]['input_file'] + if self.samples[sample].has_key('hist'): + hist = self.samples[sample]['hist'] + self.histograms[sample] = get_histogram_from_file( + hist, input_file) + + @meas_log.trace() + def read_shapes(self): + if self.have_read_shapes: + return + if not self.have_read_samples: + self.read_samples() + for sample in self.shapes.keys(): + self.read_shape(sample) + self.have_read_shapes = True + + @meas_log.trace() + def read_norms(self): + if self.have_read_norms: + return + if not self.have_read_samples: + self.read_samples() + for sample in self.norms.keys(): + self.read_norm(sample) + self.have_read_norms = True + + @meas_log.trace() + def read_shape(self, sample): + ''' + Shape from a Control Region (CR) is currently treated as: + - define process A for which you which to get the shape + - define CR + - subtract other processes from data in the CR + - normalise the result to process A in signal region + - replace process A in signal region with the new histogram + ''' + measurement = self.shapes[sample] + shape = measurement.getCleanedShape(sample) + if sample in self.histograms.keys(): + n_shape = shape.Integral() + mc = self.histograms[sample] + n_mc = mc.Integral() + scale = 1 + if not n_shape == 0: + if not n_mc == 0: + scale = 1 / n_shape * n_mc + else: + scale = 1 / n_shape + shape.Scale(scale) + self.histograms[sample] = shape + else: + meas_log.warning( + 'No MC entry found for sample "{0}", using shape normalisation'.format(sample)) + self.histograms[sample] = shape + + @meas_log.trace() + def read_norm(self, sample): + ''' + Normalisation from a Control Region (CR) is currently treated as: + - define normalisation for process A + - define CR + - subtract other processes from data in the CR + - calculate the ratio between process A and data (both in CR) + - apply ratio to process A in signal region + ''' + measurement = self.norms[sample] + self.aux_info_norms[sample] = {} + # get ratio from control region + norm = measurement.getCleanedShape(sample) + mc_in_control = measurement.histograms[sample] + # scale sample to this ratio + if sample in self.histograms.keys(): + n_data_control = norm.Integral() + n_mc_control = mc_in_control.Integral() + ratio = n_data_control / n_mc_control + meas_log.debug('Ratio from control region {0}'.format(ratio)) + n_mc_signal_region = self.histograms[sample].integral() + self.histograms[sample].Scale(ratio) + self.aux_info_norms[sample]['norm_factor'] = round(ratio, 2) + self.aux_info_norms[sample]['n_mc_control'] = n_mc_control + self.aux_info_norms[sample][ + 'n_mc_signal_region'] = n_mc_signal_region + self.aux_info_norms[sample]['n_data_control'] = n_data_control + else: + meas_log.warning( + 'No MC entry found for sample "{0}", using control region normalisation'.format(sample)) + self.histograms[sample] = norm + + +class Systematic(Measurement): + + ''' + The Systematic class is an extension of the Measurement class. + It allows to implement systematic specific functionality + (e.g. rate systematics). + ''' + + SHAPE = 10 + RATE = 20 + + @meas_log.trace() + def __init__(self, name, + stype=SHAPE, + affected_samples=[], + scale=1.): + ''' + Constructor + ''' + Measurement.__init__(self, name) + self.type = stype + + self.affected_samples = affected_samples + + self.scale = scale + + @meas_log.trace() + def toDict(self): + output = Measurement.toDict(self) + output['type'] = self.type + output['affected_samples'] = self.affected_samples + output['scale'] = self.scale + + return output diff --git a/dps/analysis/trash/README b/dps/legacy/trash/README similarity index 100% rename from dps/analysis/trash/README rename to dps/legacy/trash/README diff --git a/dps/analysis/xsection/01_get_fit_results.py b/dps/legacy/xsection/01_get_fit_results.py similarity index 100% rename from dps/analysis/xsection/01_get_fit_results.py rename to dps/legacy/xsection/01_get_fit_results.py diff --git a/dps/legacy/xsection/01_get_ttjet_normalisation.py b/dps/legacy/xsection/01_get_ttjet_normalisation.py new file mode 100644 index 00000000..c0b6e954 --- /dev/null +++ b/dps/legacy/xsection/01_get_ttjet_normalisation.py @@ -0,0 +1,261 @@ +''' + Takes AnalysisSoftware (https://github.com/BristolTopGroup/AnalysisSoftware) + output files and extracts the TTJet normalisation for each measured variable + by subtracting backgrounds from data. + + Usage: + python dps/analysis/xsection/01_get_ttjet_normalisation.py \ + -c -v -i \ + -p + + Example: + python dps/analysis/xsection/01_get_ttjet_normalisation.py \ + -c 13 -v MET -i config/measurements/background_subtraction/ + + TODO: In the end this and 01_get_fit_results.py should be merged. + All should come down to the function to extract the # events from TTJet +''' +from __future__ import division +from argparse import ArgumentParser +from dps.utils.logger import log +from dps.config.xsection import XSectionConfig +from dps.analysis.xsection.lib import closure_tests +from dps.utils.file_utilities import write_data_to_JSON, get_files_in_path +from dps.utils.hist_utilities import clean_control_region, \ + hist_to_value_error_tuplelist, fix_overflow + +import os +from copy import deepcopy +from dps.utils.Calculation import combine_complex_results +from dps.utils.measurement import Measurement +from dps.utils.ROOT_utils import set_root_defaults + +# define logger for this module +mylog = log["01b_get_ttjet_normalisation"] + + +class TTJetNormalisation(object): + ''' + Determines the normalisation for top quark pair production. + Unless stated otherwise all templates and (initial) normalisations + are taken from simulation, except for QCD where the template is + extracted from data. + + Subtracts the known backgrounds from data to obtain TTJet template + and normalisation + ''' + + @mylog.trace() + def __init__(self, + config, + measurement, + phase_space='FullPS'): + self.config = config + self.variable = measurement.variable + self.category = measurement.name + self.channel = measurement.channel + self.phase_space = phase_space + self.measurement = measurement + self.measurement.read() + + self.normalisation = {} + self.initial_normalisation = {} + # self.unity_normalisation = {} + self.auxiliary_info = {} + + self.have_normalisation = False + + # for sample, hist in self.measurement.histograms.items(): + # h = deepcopy(hist) + # h_norm = h.integral() + # if h_norm > 0: + # h.Scale(1 / h.integral()) + # self.unity_normalisation[sample] = hist_to_value_error_tuplelist(h) + + self.auxiliary_info['norms'] = measurement.aux_info_norms + + @mylog.trace() + def calculate_normalisation(self): + ''' + 1. get file names + 2. get histograms from files + 3. ??? + 4. calculate normalisation + ''' + if self.have_normalisation: + return + histograms = self.measurement.histograms + + for sample, hist in histograms.items(): + # TODO: this should be a list of bin-contents + hist = fix_overflow(hist) + histograms[sample] = hist + self.initial_normalisation[sample] = hist_to_value_error_tuplelist(hist) + self.normalisation[sample] = self.initial_normalisation[sample] + + self.background_subtraction(histograms) + + # next, let's round all numbers (they are event numbers after all + for sample, values in self.normalisation.items(): + new_values = [(round(v, 1), round(e, 1)) for v, e in values] + self.normalisation[sample] = new_values + + self.have_normalisation = True + + @mylog.trace() + def background_subtraction(self, histograms): + ttjet_hist = clean_control_region( + histograms, + subtract=['QCD', 'V+Jets', 'SingleTop'] + ) + self.normalisation['TTJet'] = hist_to_value_error_tuplelist(ttjet_hist) + + @mylog.trace() + def save(self, output_path): + if not self.have_normalisation: + self.calculate_normalisation() + + file_template = '{type}_{channel}.txt' + folder_template = '{path}/normalisation/{method}/{CoM}TeV/{variable}/{phase_space}/{category}/' + output_folder = folder_template.format( + path = output_path, + CoM = self.config.centre_of_mass_energy, + variable = self.variable, + category = self.category, + method = 'background_subtraction', + phase_space = self.phase_space, + ) + + write_data_to_JSON( + self.normalisation, + output_folder + file_template.format(type='normalisation', channel=self.channel) + ) + write_data_to_JSON( + self.initial_normalisation, + output_folder + file_template.format(type='initial_normalisation', channel=self.channel) + ) + # write_data_to_JSON( + # self.unity_normalisation, + # output_folder + file_template.format(type='unity_normalisation', channel=self.channel) + # ) + write_data_to_JSON( + self.auxiliary_info, + output_folder + file_template.format(type='auxiliary_info', channel=self.channel) + ) + return output_folder + + @mylog.trace() + def combine(self, other): + if not self.have_normalisation or not other.have_normalisation: + mylog.warn( + 'One of the TTJetNormalisations does not have a normalisation, aborting.') + return + + self.normalisation = combine_complex_results( + self.normalisation, other.normalisation) + self.initial_normalisation = combine_complex_results( + self.initial_normalisation, other.initial_normalisation) + # self.unity_normalisation = combine_complex_results( + # self.unity_normalisation, other.unity_normalisation) + self.channel = 'combined' + + +def parse_arguments(): + parser = ArgumentParser(__doc__) + parser.add_argument("-p", "--path", dest="path", default='data', + help="set output path for JSON files. Default is 'data'.") + parser.add_argument("-i", "--input", dest="input", + default='config/measurements/background_subtraction/', + help="set output path for JSON files") + parser.add_argument("-v", "--variable", dest="variable", default='MET', + help="set the variable to analyse (MET, HT, ST, MT, WPT). Default is MET.") + parser.add_argument("-c", "--centre-of-mass-energy", dest="CoM", default=13, type=int, + help="set the centre of mass energy for analysis. Default = 13 [TeV]") + parser.add_argument('-d', '--debug', dest="debug", action="store_true", + help="Print the debug information") + parser.add_argument('--closure_test', dest="closure_test", action="store_true", + help="Perform fit on data == sum(MC) * scale factor (MC process)") + parser.add_argument('--closure_test_type', dest="closure_test_type", default='simple', + help="Type of closure test (relative normalisation):" + '|'.join(closure_tests.keys())) + parser.add_argument('--test', dest="test", action="store_true", + help="Just run the central measurement") + parser.add_argument('--visiblePS', dest="visiblePS", action="store_true", + help="Unfold to visible phase space") + + args = parser.parse_args() + # fix some of the inputs + if not args.path.endswith('/'): + args.path = args.path + '/' + if not args.input.endswith('/'): + args.input = args.input + '/' + + return args + +@mylog.trace() +def main(): + # construct categories from files: + input_template = args.input + '{energy}TeV/{channel}/{variable}/{phase_space}/' + + phase_space = 'FullPS' + if args.visiblePS: + phase_space = 'VisiblePS' + results = {} + + for channel in ['electron', 'muon']: + measurement_filepath = input_template.format( + energy = args.CoM, + channel = channel, + variable = variable, + phase_space = phase_space, + ) + measurement_files = get_files_in_path(measurement_filepath, file_ending='.json') + + for f in sorted(measurement_files): + if args.test and 'central' not in f: continue + + print('Processing file ' + f) + measurement = Measurement.fromJSON(f) + # for each measurement + norm = TTJetNormalisation( + config=measurement_config, + measurement=measurement, + phase_space=phase_space, + ) + norm.calculate_normalisation() + mylog.info('Saving results to {0}'.format(output_path)) + norm.save(output_path) + # store results for later combination + r_name = f.replace(channel, '') + if not results.has_key(r_name): + results[r_name] = [norm] + else: + results[r_name].append(norm) + + for f, r_list in results.items(): + if not len(r_list) == 2: + msg = 'Only found results ({0}) for one channel, not combining.' + mylog.warn(msg.format(f)) + continue + n1, n2 = r_list + n1.combine(n2) + n1.save(output_path) + +if __name__ == '__main__': + set_root_defaults() + + args = parse_arguments() + + # set global variables + debug = args.debug + if debug: + log.setLevel(log.DEBUG) + + measurement_config = XSectionConfig(args.CoM) + # caching of variables for shorter access + variable = args.variable + output_path = args.path + if args.closure_test: + output_path += '/closure_test/' + output_path += args.closure_test_type + '/' + + main() diff --git a/dps/analysis/xsection/06_compare_energies.py b/dps/legacy/xsection/06_compare_energies.py similarity index 100% rename from dps/analysis/xsection/06_compare_energies.py rename to dps/legacy/xsection/06_compare_energies.py diff --git a/dps/analysis/xsection/98_fit_cross_checks.py b/dps/legacy/xsection/98_fit_cross_checks.py similarity index 100% rename from dps/analysis/xsection/98_fit_cross_checks.py rename to dps/legacy/xsection/98_fit_cross_checks.py diff --git a/dps/analysis/xsection/98b_fit_cross_checks.py b/dps/legacy/xsection/98b_fit_cross_checks.py similarity index 100% rename from dps/analysis/xsection/98b_fit_cross_checks.py rename to dps/legacy/xsection/98b_fit_cross_checks.py diff --git a/dps/analysis/xsection/98c_fit_cross_checks.py b/dps/legacy/xsection/98c_fit_cross_checks.py similarity index 100% rename from dps/analysis/xsection/98c_fit_cross_checks.py rename to dps/legacy/xsection/98c_fit_cross_checks.py diff --git a/dps/legacy/xsection/create_measurement.py b/dps/legacy/xsection/create_measurement.py new file mode 100644 index 00000000..dfcdf93e --- /dev/null +++ b/dps/legacy/xsection/create_measurement.py @@ -0,0 +1,486 @@ +''' + Translates the current config (for a given centre-of-mass energy) + into JSON configs. The configs will be written to + config/measurements/background_subtraction/TeV/ + + Usage: + python src/cross_section_measurement/create_measurement.py -c + + Example: + python src/cross_section_measurement/create_measurement.py -c +''' +from __future__ import print_function +from optparse import OptionParser +from dps.config.xsection import XSectionConfig +from dps.config import variable_binning +from dps.utils.input import Input +from dps.utils.logger import log +from copy import deepcopy +from dps.utils.measurement import Measurement, Systematic + +# define logger for this module +create_measurement_log = log["01b_get_ttjet_normalisation"] +cml = create_measurement_log # alias + + +@cml.trace() +def main(): + parser = OptionParser(__doc__) + parser.add_option("-c", "--centre-of-mass-energy", dest="CoM", default=13, type=int, + help="set the centre of mass energy for analysis. Default = 13 [TeV]") + parser.add_option('-d', '--debug', dest="debug", action="store_true", + help="Print the debug information") + (options, _) = parser.parse_args() + centre_of_mass_energy = options.CoM + # set global variables + debug = options.debug + if debug: + log.setLevel(log.DEBUG) + + measurement_config = XSectionConfig(centre_of_mass_energy) + categories = ['QCD_shape'] + categories.extend(measurement_config.categories_and_prefixes.keys()) + categories.extend(measurement_config.rate_changing_systematics_names) + categories.extend([measurement_config.vjets_theory_systematic_prefix + scale for scale in ['scaleup', 'scaledown']]) + + for variable in measurement_config.variables: + for category in categories: + for channel in ['electron', 'muon']: + if channel == 'electron' and (category == 'Muon_down' or category == 'Muon_up'): + continue + elif channel == 'muon' and (category == 'Electron_down' or category == 'Electron_up'): + continue + # create_measurement( + # centre_of_mass_energy, category, variable, channel, + # phase_space='FullPS', norm_method='background_subtraction') + # and the visible phase space + create_measurement( + centre_of_mass_energy, category, variable, channel, + phase_space='VisiblePS', norm_method='background_subtraction') + + +@cml.trace() +def create_measurement(com, category, variable, channel, phase_space, norm_method): + if com == 13: + # exclude non existing systematics + if 'VJets' in category and 'scale' in category: + print('Excluding {0} for now'.format(category)) + return + config = XSectionConfig(com) + met_type = get_met_type(category, config) + should_not_run_systematic = category in config.met_systematics_suffixes and variable in config.variables_no_met and not 'JES' in category and not 'JER' in category + if should_not_run_systematic: + # no MET uncertainty on HT (but JES and JER of course) + return + + m = None + if category == 'central': + m = Measurement(category) + else: + vjet_systematics = [config.vjets_theory_systematic_prefix + + systematic for systematic in config.generator_systematics] + if category in config.categories_and_prefixes.keys() or \ + category in config.met_systematics_suffixes or \ + category in vjet_systematics: + m = Systematic(category, + stype=Systematic.SHAPE, + affected_samples=config.samples) + elif category in config.rate_changing_systematics_names: + m = config.rate_changing_systematics_values[category] + + elif category == 'QCD_shape': + m = Systematic(category, + stype=Systematic.SHAPE, + affected_samples=['QCD'], + ) + + m.setVariable(variable) + m.setCentreOfMassEnergy(com) + m.setChannel(channel) + m.setMETType(met_type) + + inputs = { + 'channel': config.analysis_types[channel], + 'met_type': met_type, + 'selection': 'Ref selection', + 'btag': config.translate_options['2m'], # 2 or more + 'energy': com, + 'variable': variable, + 'category': category, + 'phase_space': phase_space, + 'norm_method': norm_method, + 'lepton': channel.title(), + } + variable_template = config.variable_path_templates[ + variable].format(**inputs) + + template_category = category + if category == 'QCD_shape' or category in config.rate_changing_systematics_names: + template_category = 'central' + if category in [config.vjets_theory_systematic_prefix + systematic for systematic in config.generator_systematics]: + template_category = 'central' + + m.addSample( + 'TTJet', + False, + input=create_input( + config, 'TTJet', variable, template_category, channel, + variable_template, phase_space=phase_space, measurement=m, + ), + ) + m.addSample( + 'V+Jets', + False, + input=create_input( + config, 'V+Jets', variable, template_category, channel, + variable_template, phase_space=phase_space, measurement=m, + ), + ) + m.addSample( + 'SingleTop', + False, + input=create_input( + config, 'SingleTop', variable, template_category, channel, + variable_template, phase_space=phase_space, measurement=m, + ), + ) + m.addSample( + 'QCD', + False, + input=create_input( + config, 'QCD', variable, template_category, channel, + variable_template, phase_space=phase_space, measurement=m, + ), + ) + variable_template_data = variable_template.replace( + met_type, config.translate_options['type1']) + + m.addSample( + 'data', + False, + input=create_input( + config, 'data', variable, template_category, channel, + variable_template_data, phase_space=phase_space, measurement=m, + ), + ) + + m_qcd = Measurement(category) + m_qcd.setVariable(variable) + m_qcd.setCentreOfMassEnergy(com) + + qcd_template = get_qcd_template(config, variable, category, channel) + + # we want "measurement = m" here since all rate systematics should apply + # to the control regions as well + m_qcd.addSample( + 'TTJet', + False, + input=create_input( + config, 'TTJet', variable, template_category, channel, + qcd_template, phase_space=phase_space, measurement=m, + ), + ) + m_qcd.addSample( + 'V+Jets', + False, + input=create_input( + config, 'V+Jets', variable, template_category, channel, + qcd_template, phase_space=phase_space, measurement=m, + ), + ) + m_qcd.addSample( + 'SingleTop', + False, + input=create_input( + config, 'SingleTop', variable, template_category, channel, + qcd_template, phase_space=phase_space, measurement=m, + ), + ) + m_qcd.addSample( + 'QCD', + False, + input=create_input( + config, 'QCD', variable, template_category, channel, + qcd_template, phase_space=phase_space, measurement=m, + ), + ) + m_qcd.addSample( + 'data', + False, + input=create_input( + config, 'data', variable, template_category, channel, + qcd_template, phase_space=phase_space, measurement=m, + ), + ) + + m.addShapeForSample('QCD', m_qcd, False) + norm_qcd = deepcopy(m_qcd) + # we want QCD shape and normalisation to be separate + if category == 'QCD_shape': + for sample in norm_qcd.samples.keys(): + tree = norm_qcd.samples[sample]['input'].tree_name + if channel == 'electron': + tree = tree.replace(config.electron_control_region_systematic, + config.electron_control_region) + else: + tree = tree.replace(config.muon_control_region_systematic, + config.muon_control_region) + norm_qcd.samples[sample]['input'].tree_name = tree + if 'QCD_cross_section' in category: + for sample in norm_qcd.samples.keys(): + tree = norm_qcd.samples[sample]['input'].tree_name + if channel == 'electron': + tree = tree.replace(config.electron_control_region, + config.electron_control_region_systematic) + else: + tree = tree.replace(config.muon_control_region, + config.muon_control_region_systematic) + norm_qcd.samples[sample]['input'].tree_name = tree + + m.addNormForSample('QCD', norm_qcd, False) + + if category in [config.vjets_theory_systematic_prefix + systematic for systematic in config.generator_systematics]: + v_template_category = category.replace( + config.vjets_theory_systematic_prefix, '') + m_vjets = Measurement(category) + m_vjets.setVariable(variable) + m_vjets.setCentreOfMassEnergy(com) + m_vjets.addSample( + 'V+Jets', + False, + input=create_input( + config, 'V+Jets', variable, v_template_category, + channel, + variable_template, + config.generator_systematic_vjets_templates[ + v_template_category]), + phase_space=phase_space, measurement=m, + ) + m.addShapeForSample('V+Jets', m_vjets, False) + + inputs['channel'] = channel + base_path = 'config/measurements/{norm_method}/{energy}TeV/' + base_path += '{channel}/{variable}/{phase_space}/' + if category == 'central': + path = base_path + '{category}.json' + m.toJSON(path.format(**inputs)) + else: + if m.type == Systematic.SHAPE: + inputs['type'] = 'shape_systematic' + else: + inputs['type'] = 'rate_systematic' + if category in config.met_systematics_suffixes and category not in ['JES_up', 'JES_down', 'JER_up', 'JER_down']: + inputs['category'] = met_type + path = base_path + '{category}_{type}.json' + m.toJSON(path.format(**inputs)) + + +@cml.trace() +def get_met_type(category, config): + met_type = config.translate_options['type1'] + if category == 'JES_up': + met_type += 'JetEnUp' + elif category == 'JES_down': + met_type += 'JetEnDown' + elif category == 'JER_up': + met_type += 'JetResUp' + elif category == 'JER_down': + met_type += 'JetResDown' + + isJetSystematic = 'JetEn' in category or 'JetRes' in category + isJetSystematic = isJetSystematic or 'JES' in category + isJetSystematic = isJetSystematic or 'JER' in category + + if category in config.met_systematics_suffixes: + # already done them + if not isJetSystematic: + met_type = met_type + category + + return met_type + + +@cml.trace() +def get_file(config, sample, category, channel): + use_trees = True if config.centre_of_mass_energy == 13 else False + if channel == 'electron': + qcd_template = config.electron_QCD_MC_category_templates[category] + data_template = config.data_file_electron + qcd_template_tree = config.electron_QCD_MC_category_templates_trees[ + category] + data_template_tree = config.data_file_electron_trees + else: + qcd_template = config.muon_QCD_MC_category_templates[category] + data_template = config.data_file_muon + qcd_template_tree = config.muon_QCD_MC_category_templates_trees[ + category] + data_template_tree = config.data_file_muon_trees + + tree_files = { + 'TTJet': config.ttbar_category_templates_trees[category], + 'V+Jets': config.VJets_category_templates_trees[category], + 'SingleTop': config.SingleTop_category_templates_trees[category], + 'QCD': qcd_template_tree, + 'data': data_template_tree + } + files = { + 'TTJet': config.ttbar_category_templates[category], + 'V+Jets': config.VJets_category_templates[category], + 'SingleTop': config.SingleTop_category_templates[category], + 'QCD': qcd_template, + 'data': data_template, + } + + if use_trees: + return tree_files[sample] + else: + return files[sample] + + +@cml.trace() +def get_qcd_template(config, variable, category, channel): + qcd_inputs = { + 'channel': config.analysis_types[channel], + 'met_type': config.translate_options['type1'], # always central MET + 'selection': 'Ref selection', + 'btag': config.translate_options['2m'], # 2 or more + 'energy': config.centre_of_mass_energy, + 'variable': variable, + 'category': 'central', # always central + 'lepton': channel.title(), + } + + qcd_template = config.variable_path_templates[ + variable].format(**qcd_inputs) + if channel == 'electron': + qcd_template = qcd_template.replace( + 'Ref selection', config.electron_control_region) + if category == 'QCD_shape': + qcd_template = qcd_template.replace( + config.electron_control_region, + config.electron_control_region_systematic) + else: + qcd_template = qcd_template.replace( + 'Ref selection', config.muon_control_region) + if category == 'QCD_shape': + qcd_template = qcd_template.replace( + config.muon_control_region, + config.muon_control_region_systematic) + + return qcd_template + + +@cml.trace() +def create_input(config, sample, variable, category, channel, template, + input_file=None, phase_space=None, **kwargs): + tree, branch, hist = None, None, None + selection = '1' + if not input_file: + input_file = get_file(config, sample, category, channel) + + if config.centre_of_mass_energy == 13: + branch = template.split('/')[-1] + tree = template.replace('/' + branch, '') + + if 'absolute_eta' in branch: + branch = 'abs(lepton_eta)' + + if sample != 'data': + if category in config.met_systematics_suffixes and not variable in config.variables_no_met: + branch = template.split('/')[-1] + branch += '_METUncertainties[%s]' % config.met_systematics[ + category] + + if 'JES_down' in category or 'JES_up' in category or 'JER_down' in category or 'JER_up' in category: + tree += config.categories_and_prefixes[category] + + if not sample == 'data': + if 'JES_down' in category: + input_file = input_file.replace('tree', 'minusJES_tree') + elif 'JES_up' in category: + input_file = input_file.replace('tree', 'plusJES_tree') + elif 'JER_up' in category: + input_file = input_file.replace('tree', 'plusJER_tree') + elif 'JER_down' in category: + input_file = input_file.replace('tree', 'minusJER_tree') + + selection = '{0} >= 0'.format(branch) + if variable == 'abs_lepton_eta': + selection += ' && {0} <= 3'.format(branch) + else: + hist = template + + lumi_scale = config.luminosity_scale + scale = 1. + + m = kwargs['measurement'] + if m.type == Systematic.RATE: + if 'luminosity' in m.name: + lumi_scale = lumi_scale * m.scale + else: + if sample in m.affected_samples: + scale = m.scale + if sample == 'data': # data is not scaled in any way + lumi_scale = 1. + scale = 1. + + edges = variable_binning.reco_bin_edges_full[variable] + if phase_space == 'VisiblePS': + edges = variable_binning.reco_bin_edges_vis[variable] + + weight_branches = [] + if sample == 'data': + weight_branches.append('1') + else: + weight_branches.append('EventWeight') + + if 'PileUp' not in category: + weight_branches.append('PUWeight') + elif category == 'PileUp_up': + weight_branches.append('PUWeight_up') + elif category == 'PileUp_down': + weight_branches.append('PUWeight_down') + else: + weight_branches.append('1') + + if category == 'BJet_down': + weight_branches.append('BJetDownWeight') + elif category == 'BJet_up': + weight_branches.append('BJetUpWeight') + elif category == 'LightJet_down': + weight_branches.append('LightJetDownWeight') + elif category == 'LightJet_up': + weight_branches.append('LightJetUpWeight') + else: + weight_branches.append('BJetWeight') + + if not 'QCD' in tree: + if channel == 'muon': + if category == 'Muon_down': + weight_branches.append('MuonDown') + elif category == 'Muon_up': + weight_branches.append('MuonUp') + else: + weight_branches.append('MuonEfficiencyCorrection') + elif channel == 'electron': + if category == 'Electron_down': + weight_branches.append('ElectronDown') + elif category == 'Electron_up': + weight_branches.append('ElectronUp') + else: + weight_branches.append('ElectronEfficiencyCorrection') + + i = Input( + input_file=input_file, + hist=hist, + tree=tree, + branch=branch, + selection=selection, + bin_edges=edges, + lumi_scale=lumi_scale, + scale=scale, + weight_branches=weight_branches, + ) + return i + +if __name__ == '__main__': + main() diff --git a/dps/analysis/xsection/make_control_plots.py b/dps/legacy/xsection/make_control_plots.py similarity index 100% rename from dps/analysis/xsection/make_control_plots.py rename to dps/legacy/xsection/make_control_plots.py diff --git a/dps/analysis/xsection/make_cutflow_8TeV.py b/dps/legacy/xsection/make_cutflow_8TeV.py similarity index 100% rename from dps/analysis/xsection/make_cutflow_8TeV.py rename to dps/legacy/xsection/make_cutflow_8TeV.py diff --git a/dps/analysis/xsection/make_fit_variable_plots.py b/dps/legacy/xsection/make_fit_variable_plots.py similarity index 100% rename from dps/analysis/xsection/make_fit_variable_plots.py rename to dps/legacy/xsection/make_fit_variable_plots.py diff --git a/dps/analysis/xsection/make_new_physics_plots_8TeV.py b/dps/legacy/xsection/make_new_physics_plots_8TeV.py similarity index 100% rename from dps/analysis/xsection/make_new_physics_plots_8TeV.py rename to dps/legacy/xsection/make_new_physics_plots_8TeV.py diff --git a/dps/analysis/zprime_analysis/__init__.py b/dps/legacy/zprime_analysis/__init__.py similarity index 100% rename from dps/analysis/zprime_analysis/__init__.py rename to dps/legacy/zprime_analysis/__init__.py diff --git a/dps/analysis/zprime_analysis/estimate_QCD_rate.py b/dps/legacy/zprime_analysis/estimate_QCD_rate.py similarity index 100% rename from dps/analysis/zprime_analysis/estimate_QCD_rate.py rename to dps/legacy/zprime_analysis/estimate_QCD_rate.py diff --git a/dps/analysis/zprime_analysis/make_control_plots.py b/dps/legacy/zprime_analysis/make_control_plots.py similarity index 100% rename from dps/analysis/zprime_analysis/make_control_plots.py rename to dps/legacy/zprime_analysis/make_control_plots.py diff --git a/dps/analysis/zprime_analysis/make_control_region_plots.py b/dps/legacy/zprime_analysis/make_control_region_plots.py similarity index 100% rename from dps/analysis/zprime_analysis/make_control_region_plots.py rename to dps/legacy/zprime_analysis/make_control_region_plots.py diff --git a/dps/analysis/zprime_analysis/make_control_region_plots_2.py b/dps/legacy/zprime_analysis/make_control_region_plots_2.py similarity index 100% rename from dps/analysis/zprime_analysis/make_control_region_plots_2.py rename to dps/legacy/zprime_analysis/make_control_region_plots_2.py diff --git a/dps/utils/file_utilities.py b/dps/utils/file_utilities.py index 64ca1af6..c0282e6e 100644 --- a/dps/utils/file_utilities.py +++ b/dps/utils/file_utilities.py @@ -52,9 +52,19 @@ def read_data_from_JSON(JSON_input_file): return data def get_files_in_path(path, file_ending = '.root'): - path += '/*' + file_ending - files = glob.glob(path) - return files + ''' + Return the files for a given path + ''' + input_files=[] + print path + if os.path.exists(path): + for root, dirs, files in os.walk(path): + for name in files: + if file_ending in name: + input_files.append(os.path.join(root, name)) + else: + print "Could not find required folder" + return input_files def check_ROOT_file(filename): passesCheck = can_open_ROOT_file(filename) diff --git a/dps/utils/hist_utilities.py b/dps/utils/hist_utilities.py index 03db4003..d81e398d 100644 --- a/dps/utils/hist_utilities.py +++ b/dps/utils/hist_utilities.py @@ -12,6 +12,7 @@ from rootpy.plotting.hist import Hist2D import random import string +from math import sqrt from copy import deepcopy from .file_utilities import read_data_from_JSON from .logger import log @@ -33,17 +34,27 @@ def values_and_errors_to_hist( values, errors, bins ): value_error_tuplelist = zip( values, errors ) return value_error_tuplelist_to_hist( value_error_tuplelist, bins ) -def value_errors_tuplelist_to_graph( value_errors_tuplelist, bin_edges ): - value_error_tuplelist = [( value, 0 ) for value, lower_error, upper_error in value_errors_tuplelist] +def value_errors_tuplelist_to_graph( value_errors_tuplelist, bin_edges, is_symmetric_errors=False ): + value_error_tuplelist = [] + if is_symmetric_errors: + value_error_tuplelist = [( value, 0 ) for value, error in value_errors_tuplelist] + else: + value_error_tuplelist = [( value, 0 ) for value, lower_error, upper_error in value_errors_tuplelist] + hist = value_error_tuplelist_to_hist( value_error_tuplelist, bin_edges ) rootpy_graph = asrootpy( TGraphAsymmErrors( hist ) ) -# rootpy_graph = Graph(hist = hist) + set_lower_error = rootpy_graph.SetPointEYlow set_upper_error = rootpy_graph.SetPointEYhigh - for point_i, ( value, lower_error, upper_error ) in enumerate( value_errors_tuplelist ): - set_lower_error( point_i, lower_error ) - set_upper_error( point_i, upper_error ) + if is_symmetric_errors: + for point_i, ( value, error ) in enumerate( value_errors_tuplelist ): + set_lower_error( point_i, error ) + set_upper_error( point_i, error ) + else: + for point_i, ( value, lower_error, upper_error ) in enumerate( value_errors_tuplelist ): + set_lower_error( point_i, lower_error ) + set_upper_error( point_i, upper_error ) return rootpy_graph @@ -195,7 +206,7 @@ def fix_overflow( hist ): overflow_error= hist.GetBinError( overflow_bin ) new_last_bin_content = hist.GetBinContent( last_bin ) + overflow - new_last_bin_error = hist.GetBinError( last_bin ) + overflow_error + new_last_bin_error = sqrt(hist.GetBinError( last_bin ) ** 2 + overflow_error ** 2) hist.SetBinContent( last_bin, new_last_bin_content ) hist.SetBinError( last_bin, new_last_bin_error ) @@ -215,7 +226,7 @@ def fix_overflow( hist ): hist.SetBinContent( x, overflow_bin_y, 0. ) hist.SetBinContent( x, last_bin_y, overflow_y + last_bin_content_y ) - hist.SetBinError( x, last_bin_y, overflow_error_y + last_bin_error_y ) + hist.SetBinError( x, last_bin_y, sqrt( overflow_error_y ** 2 + last_bin_error_y ** 2 ) ) # now all x-overflow for y in range( 1, overflow_bin_y +1): overflow_x = hist.GetBinContent( overflow_bin_x, y ) @@ -226,7 +237,7 @@ def fix_overflow( hist ): hist.SetBinContent( overflow_bin_x, y, 0. ) hist.SetBinContent( last_bin_x, y, overflow_x + last_bin_content_x ) - hist.SetBinError( last_bin_x, y, overflow_error_x + last_bin_error_x ) + hist.SetBinError( last_bin_x, y, sqrt( overflow_error_x ** 2 + last_bin_error_x ** 2 ) ) # and now the final bin (both x and y overflow) overflow_x_y = hist.GetBinContent( overflow_bin_x, overflow_bin_y ) last_bin_content_x_y = hist.GetBinContent( last_bin_x, last_bin_y ) diff --git a/dps/utils/measurement.py b/dps/utils/measurement.py index df463b3d..a604b885 100644 --- a/dps/utils/measurement.py +++ b/dps/utils/measurement.py @@ -3,314 +3,240 @@ ''' from __future__ import division from . import log -import copy -from rootpy.io.file import Directory -from dps.utils.ROOT_utils import get_histogram_from_file -from dps.utils.file_utilities import make_folder_if_not_exists,\ - write_data_to_JSON, read_data_from_JSON -from dps.utils.input import Input -from dps.utils.hist_utilities import clean_control_region +from dps.utils.hist_utilities import hist_to_value_error_tuplelist, clean_control_region + # define logger for this module meas_log = log["dps.utils.measurement"] - class Measurement(): - ''' The Measurement class combines files and histogram paths into one container. It also allows to provide separate shapes for the histograms while using the normalisation from the initial set. ''' - - @meas_log.trace() - def __init__(self, name): - self.name = name - self.variable = '' - self.centre_of_mass_energy = 0 - self.channel = '' - self.samples = {} - self.shapes = {} - self.norms = {} - self.histograms = {} - self.fit_variables = {} - - self.have_read_samples = False - self.have_read_shapes = False - self.have_read_norms = False - - self.met_type = '' - - self.type = 0 - - self.aux_info_norms = {} - - @meas_log.trace() - def addSample(self, sample, read=True, **kwargs): - self.samples[sample] = kwargs - # TODO: add tree & branch, selection etc - # whatever get_histograms_from_trees needs - if read: - self.read_sample(sample) - - @meas_log.trace() - def addShapeForSample(self, sample, measurement, read=True): - self.shapes[sample] = measurement - if read: - self.read_shape(sample) - @meas_log.trace() - def addNormForSample(self, sample, measurement, read=True): - self.norms[sample] = measurement - if read: - self.read_norm(sample) - - @meas_log.trace() - def addFitVariable(self, variable, measurement): - self.fit_variables[variable] = measurement + def __init__(self, measurement): + self.measurement = measurement + self.histograms = {} + self.cr_histograms = {} + self.cr_histograms_for_normalisation = {} + self.normalisation = {} + self.variable = None + self.com = None + self.channel = None + self.name = None + self.is_normalised = False + self.central = False + self.samples = {} + self.__setFromConfig() + + def __setFromConfig(self): + self.variable = self.measurement["variable"] + self.com = self.measurement["com"] + self.channel = self.measurement["channel"] + self.samples = self.measurement["samples"] + self.name = self.measurement["name"] + data_driven_qcd = self.measurement["data_driven_qcd"] + + # Is this central or a systematic? + if "central" in self.name: + self.central = True + + # Retrieve histograms from files for SR and CR + for sample, histogram_info in self.samples.iteritems(): + self.histograms[sample] = self.__return_histogram(histogram_info) + if data_driven_qcd: + self.cr_histograms[sample] = self.__return_histogram(histogram_info, useQCDControl=True) + + if histogram_info["qcd_normalisation_region"] != histogram_info["qcd_control_region"]: + self.cr_histograms_for_normalisation[sample] = self.__return_histogram(histogram_info, useQCDControl=True, useQCDSystematicControl=True) + + # print(hist_to_value_error_tuplelist(self.histograms[sample])) + # print(hist_to_value_error_tuplelist(self.cr_histograms[sample])) + + # Replace QCD MC with data-driven MC + if data_driven_qcd: + self.__qcd_from_data() + return - @meas_log.trace() - def toJSON(self, JSON_file): - output = self.toDict() - filename = JSON_file.split('/')[-1] - directory = JSON_file.replace(filename, '') - make_folder_if_not_exists(directory) - write_data_to_JSON(output, JSON_file) + def __qcd_from_data(self): + ''' + Replace Signal region mc qcd with data driven qcd - @meas_log.trace() - def toDict(self): - output = {} - output['class'] = str(self.__class__) - output['name'] = self.name - output['variable'] = self.variable - output['centre_of_mass_energy'] = self.centre_of_mass_energy - output['samples'] = self.samples - output['shapes'] = {shape: meas.toDict() - for shape, meas in self.shapes.items()} - output['norms'] = {norm: meas.toDict() - for norm, meas in self.norms.items()} - output['channel'] = self.channel - output['met_type'] = self.met_type - for sample in output['samples'].keys(): - if output['samples'][sample].has_key('input'): - output['samples'][sample]['input'] = output[ - 'samples'][sample]['input'].toDict() - - return output - - @staticmethod - def fromJSON(JSON_file): - src = read_data_from_JSON(JSON_file) - m = Measurement.fromDict(src) - - return m - - @staticmethod - def fromDict(d): - m = None - if d['class'] == 'dps.utils.measurement.Measurement': - m = Measurement(d['name']) - if d['class'] == 'dps.utils.measurement.Systematic': - m = Systematic(d['name'], d['type'], - affected_samples=d['affected_samples'], scale=d['scale']) - m.setVariable(d['variable']) - m.setCentreOfMassEnergy(int(d['centre_of_mass_energy'])) - m.setChannel(d['channel']) - m.setMETType(d['met_type']) - for sample, i in d['samples'].items(): - if i.has_key('input'): - inp = Input(**i['input']) - m.addSample(sample, read=True, input=inp) - else: - m.addSample(sample, i['file'], i['hist'], read=True) - for shape, obj in d['shapes'].items(): - m.addShapeForSample(shape, Measurement.fromDict(obj), read=True) - for norm, obj in d['norms'].items(): - m.addNormForSample( - norm, Measurement.fromDict(obj), read=True) - return m + N MC QCD in SR + Data in CR * -------------- + N MC QCD in CR - @meas_log.trace() - def toROOT(self): - ''' - Converts measurement into something that can be stored in a ROOT - file + Shape transfer factor + from control to + signal region ''' + # Get the shape of the data driven qcd in the control region + data_driven_qcd = clean_control_region( + self.cr_histograms, + subtract=['TTBar', 'V+Jets', 'SingleTop'] + ) + # print(hist_to_value_error_tuplelist(data_driven_qcd)) + # Calculate transfer factor from signal to control region + n_mc_sr = self.histograms['QCD'].Integral() + n_mc_cr = 1 + transfer_factor = 1 + if self.cr_histograms_for_normalisation == {}: + n_mc_cr = self.cr_histograms['QCD'].Integral() + transfer_factor = n_mc_sr/n_mc_cr + else : + # Treatment for QCD systematic uncertainties + # Use shape from the control region + # and the normalisation derived from a different control region + n_mc_cr = self.cr_histograms['QCD'].Integral() + n_mc_cr_norm = self.cr_histograms_for_normalisation['QCD'].Integral() + data_driven_qcd_normalisation = clean_control_region( + self.cr_histograms_for_normalisation, + subtract=['TTBar', 'V+Jets', 'SingleTop'] + ) + n_data_cr_norm = data_driven_qcd_normalisation.Integral() + transfer_factor = n_mc_sr/ n_mc_cr_norm * n_data_cr_norm / data_driven_qcd.Integral() + + data_driven_qcd.Scale( transfer_factor ) + + # Replace QCD histogram with datadriven one + self.histograms['QCD'] = data_driven_qcd return - d = Directory(name=self.name) - # create shape and norm folders if there is anything to be saved - # what needs to be saved - # MET type - return d - - @meas_log.trace() - def setVariable(self, variable): - self.variable = variable - @meas_log.trace() - def setCentreOfMassEnergy(self, com): - self.centre_of_mass_energy = com + def __return_histogram(self, d_hist_info, ignoreUnderflow=True, useQCDControl=False, useQCDSystematicControl=False): + ''' + Takes basic histogram info and returns histo. + Maybe this can move to ROOT_utilities? + ''' + from rootpy.io.file import File + from rootpy.plotting import Hist + from dps.utils.hist_utilities import fix_overflow + + f = d_hist_info['input_file'] + tree = d_hist_info['tree'] + qcd_tree = d_hist_info["qcd_control_region"] + qcd_tree_for_normalisation = d_hist_info["qcd_normalisation_region"] + var = d_hist_info['branch'] + bins = d_hist_info['bin_edges'] + lumi_scale = d_hist_info['lumi_scale'] + scale = d_hist_info['scale'] + weights = d_hist_info['weight_branches'] + selection = d_hist_info['selection'] + + if useQCDControl: + # replace SR tree with CR tree + if useQCDSystematicControl: + tree = qcd_tree_for_normalisation + else: + tree = qcd_tree + # Remove the Lepton reweighting for the datadriven qcd (SF not derived for unisolated leptons) + for weight in weights: + if 'Electron' in weight: weights.remove(weight) + elif 'Muon' in weight: weights.remove(weight) - @meas_log.trace() - def setChannel(self, channel): - self.channel = channel + weights = "*".join(weights) + # Selection will return a weight 0 or 1 depending on whether event passes selection + weights_and_selection = '( {0} ) * ( {1} )'.format(weights, selection) - @meas_log.trace() - def setMETType(self, met_type): - self.met_type = met_type + scale *= lumi_scale - @meas_log.trace() - def getCleanedShape(self, sample): - subtract = copy.copy(self.histograms.keys()) - subtract.remove(sample) - subtract.remove('data') - hist = clean_control_region(self.histograms, - data_label='data', - subtract=subtract, - fix_to_zero=True) - return hist + root_file = File( f ) + root_tree = root_file.Get( tree ) - @meas_log.trace() - def read(self): - self.read_samples() - self.read_shapes() - self.read_norms() + root_histogram = Hist( bins ) + # Draw histogram of var for selection into root_histogram + root_tree.Draw(var, selection = weights_and_selection, hist = root_histogram) + root_histogram.Scale(scale) - @meas_log.trace() - def read_samples(self): - if self.have_read_samples: - return - for sample in self.samples.keys(): - self.read_sample(sample) - self.have_read_samples = True + # When a tree is filled with a dummy variable, it will end up in the underflow, so ignore it + if ignoreUnderflow: + root_histogram.SetBinContent(0, 0) + root_histogram.SetBinError(0,0) - @meas_log.trace() - def read_sample(self, sample): - if self.samples[sample].has_key('input'): - i = self.samples[sample]['input'] - if isinstance(i, dict): - i = Input(**self.samples[sample]['input']) - self.histograms[sample] = i.read() - return - input_file = self.samples[sample]['input_file'] - if self.samples[sample].has_key('hist'): - hist = self.samples[sample]['hist'] - self.histograms[sample] = get_histogram_from_file( - hist, input_file) + # Fix overflow (Moves entries from overflow bin into last bin i.e. last bin not |..| but |--> ) + root_histogram = fix_overflow(root_histogram) - @meas_log.trace() - def read_shapes(self): - if self.have_read_shapes: - return - if not self.have_read_samples: - self.read_samples() - for sample in self.shapes.keys(): - self.read_shape(sample) - self.have_read_shapes = True + root_file.Close() + return root_histogram - @meas_log.trace() - def read_norms(self): - if self.have_read_norms: - return - if not self.have_read_samples: - self.read_samples() - for sample in self.norms.keys(): - self.read_norm(sample) - self.have_read_norms = True - @meas_log.trace() - def read_shape(self, sample): + def __background_subtraction(self, histograms): ''' - Shape from a Control Region (CR) is currently treated as: - - define process A for which you which to get the shape - - define CR - - subtract other processes from data in the CR - - normalise the result to process A in signal region - - replace process A in signal region with the new histogram + Subtracts the backgrounds from data to give amount of ttbar in data. + Also adds all backgrounds to normalisation output ''' - measurement = self.shapes[sample] - shape = measurement.getCleanedShape(sample) - if sample in self.histograms.keys(): - n_shape = shape.Integral() - mc = self.histograms[sample] - n_mc = mc.Integral() - scale = 1 - if not n_shape == 0: - if not n_mc == 0: - scale = 1 / n_shape * n_mc - else: - scale = 1 / n_shape - shape.Scale(scale) - self.histograms[sample] = shape - else: - meas_log.warning( - 'No MC entry found for sample "{0}", using shape normalisation'.format(sample)) - self.histograms[sample] = shape + ttjet_hist = clean_control_region( + histograms, + subtract=['QCD', 'V+Jets', 'SingleTop'] + ) + self.normalisation['TTJet'] = hist_to_value_error_tuplelist(ttjet_hist) + self.normalisation['data'] = hist_to_value_error_tuplelist(histograms['data']) + # self.normalisation['TTBar'] = hist_to_value_error_tuplelist(histograms['TTBar']) + self.normalisation['SingleTop'] = hist_to_value_error_tuplelist(histograms['SingleTop']) + self.normalisation['V+Jets'] = hist_to_value_error_tuplelist(histograms['V+Jets']) + self.normalisation['QCD'] = hist_to_value_error_tuplelist(histograms['QCD']) + return - @meas_log.trace() - def read_norm(self, sample): + def calculate_normalisation(self): ''' - Normalisation from a Control Region (CR) is currently treated as: - - define normalisation for process A - - define CR - - subtract other processes from data in the CR - - calculate the ratio between process A and data (both in CR) - - apply ratio to process A in signal region + Calls the normalisation of the ttbar samples ''' - measurement = self.norms[sample] - self.aux_info_norms[sample] = {} - # get ratio from control region - norm = measurement.getCleanedShape(sample) - mc_in_control = measurement.histograms[sample] - # scale sample to this ratio - if sample in self.histograms.keys(): - n_data_control = norm.Integral() - n_mc_control = mc_in_control.Integral() - ratio = n_data_control / n_mc_control - meas_log.debug('Ratio from control region {0}'.format(ratio)) - n_mc_signal_region = self.histograms[sample].integral() - self.histograms[sample].Scale(ratio) - self.aux_info_norms[sample]['norm_factor'] = round(ratio, 2) - self.aux_info_norms[sample]['n_mc_control'] = n_mc_control - self.aux_info_norms[sample][ - 'n_mc_signal_region'] = n_mc_signal_region - self.aux_info_norms[sample]['n_data_control'] = n_data_control - else: - meas_log.warning( - 'No MC entry found for sample "{0}", using control region normalisation'.format(sample)) - self.histograms[sample] = norm - - -class Systematic(Measurement): + # normalisation already calculated + if self.is_normalised: return - ''' - The Systematic class is an extension of the Measurement class. - It allows to implement systematic specific functionality - (e.g. rate systematics). - ''' + histograms = self.histograms + self.__background_subtraction(histograms) - SHAPE = 10 - RATE = 20 + # next, let's round all numbers (they are event numbers after all) + for sample, values in self.normalisation.items(): + new_values = [(round(v, 1), round(e, 1)) for v, e in values] + self.normalisation[sample] = new_values + self.is_normalised = True + return - @meas_log.trace() - def __init__(self, name, - stype=SHAPE, - affected_samples=[], - scale=1.): + def save(self, phase_space): ''' - Constructor + Saves the normalisation output into a JSON. + I would like to change this to a pandas Dataframe at somepoint after + a few issues have been worked out ''' - Measurement.__init__(self, name) - self.type = stype - - self.affected_samples = affected_samples - - self.scale = scale - - @meas_log.trace() - def toDict(self): - output = Measurement.toDict(self) - output['type'] = self.type - output['affected_samples'] = self.affected_samples - output['scale'] = self.scale + from dps.utils.pandas_utilities import write_tuple_to_df + from dps.utils.file_utilities import make_folder_if_not_exists + # If normalisation hasnt been calculated - then go calculate it! + if not self.is_normalised: self.calculate_normalisation() + + output_folder = 'data/normalisation/background_subtraction/{com}TeV/{var}/{ps}/{cat}/' + output_folder = output_folder.format( + com = self.com, + var = self.variable, + ps = phase_space, + cat = self.name, + ) + make_folder_if_not_exists(output_folder) + + file_template = '{type}_{channel}.txt' + f = file_template.format( + type='normalisation', + channel=self.channel + ) + + write_tuple_to_df( + self.normalisation, + output_folder + f + ) + return + + def combine(self, other): + ''' + Combines the electron and muon measurements + ''' + from dps.utils.Calculation import combine_complex_results + if not self.is_normalised or not other.is_normalised: + mylog.warn( + 'One of the TTJetNormalisations does not have a normalisation, aborting.') + return - return output + self.normalisation = combine_complex_results( + self.normalisation, other.normalisation) + self.channel = 'combined' + return diff --git a/dps/utils/pandas_utilities.py b/dps/utils/pandas_utilities.py index 291ab140..416d8b65 100644 --- a/dps/utils/pandas_utilities.py +++ b/dps/utils/pandas_utilities.py @@ -5,6 +5,7 @@ pd.set_option('display.max_colwidth', 4096) pd.set_option('display.max_rows', 50) pd.set_option('display.width', 1000) +pd.set_option('precision',12) def dict_to_df(d): ''' @@ -20,13 +21,18 @@ def list_to_series(l): s = pd.Series( l ) return s -def df_to_file(filename, df): +def df_to_file(filename, df, index=True): ''' Save a dataframe to an output text file Nicely human readable ''' + # Make the folder if it doesnt exist + import os + from dps.utils.file_utilities import make_folder_if_not_exists + make_folder_if_not_exists(os.path.dirname(filename)) + with open(filename,'w') as f: - df.to_string(f, index=True) + df.to_string(f, index=index) f.write('\n') print('DataFrame written to {}'.format(f)) f.close() @@ -64,4 +70,102 @@ def divide_by_series(s1, s2): Divide one series by another ''' s = s1.div(s2) - return s \ No newline at end of file + return s + +def tupleise_cols(vals, errs): + ''' + tupleising two cols + ''' + vals_errs = [ (v, e) for v,e in zip(vals, errs)] + return vals_errs + +def write_tuple_to_df( d_norm, filename ): + ''' + Writing tuples to a dataframe + + Takes a pandas dataframe of tuples of the form: + A | B + (v,e) | (v,e) + + Write a pandas output file of the form: + A | A_Unc | B | B_Unc + (v) | (e) | (v) | (e) + + ''' + # First create the dataframe + df = dict_to_df(d_norm) + + # pandas really cant handle reading in tuples. Have to split here + for col in df.columns: + df[[col, col+'_Unc']] = df[col].apply(pd.Series) + # Make columns alphabetical for easy reading + l=df.columns.tolist() + l.sort() + df = df[l] + + # Write dataframe + df_to_file(filename, df, index=False) + return + +def read_tuple_from_file( filename ): + ''' + Reading the output of 01 to a dataframe + + Reads a pandas output file of the form: + A | A_Unc | B | B_Unc + (v) | (e) | (v) | (e) + + Returns a pandas dataframe of the form: + A | B + (v,e) | (v,e) + + ''' + from dps.config.xsection import XSectionConfig + config = XSectionConfig(13) + + # First read the dataframe + df = file_to_df(filename) + l=df.columns.tolist() + + # Now to retupleise the columns + for sample in l: + if '_Unc' in sample: continue + vals = df[sample] + errs = df[sample+'_Unc'] + df[sample] = tupleise_cols(vals, errs) + del df[sample+'_Unc'] + return df + +def combine_complex_df( df1, df2 ): + ''' + Takes a 2 pandii dataframes of the form: + A | B A | B + (v,e) | (v,e) (v,e) | (v,e) + + Returns 1 pandas dataframe of the form + A | B + (v,e) | (v,e) + ''' + from uncertainties import ufloat + l1=df1.columns.tolist() + l2=df2.columns.tolist() + if l1 != l2: + print "Trying to combine two non compatible dataframes" + print l1 + print l2 + return + + combined_result = {} + for sample in l1: + results = [] + for entry1, entry2 in zip(df1[sample], df2[sample]): + v1 = ufloat(entry1[0], entry1[1]) + v2 = ufloat(entry2[0], entry2[1]) + s = v1 + v2 + results.append( ( s.nominal_value, s.std_dev ) ) + combined_result[sample] = results + df = dict_to_df(combined_result) + return df + + + diff --git a/dps/utils/systematic.py b/dps/utils/systematic.py index 89f43dc4..89c1c9b7 100644 --- a/dps/utils/systematic.py +++ b/dps/utils/systematic.py @@ -1,7 +1,7 @@ from __future__ import division, print_function -from dps.utils.file_utilities import read_data_from_JSON, write_data_to_JSON, deprecated +from dps.utils.file_utilities import write_data_to_JSON, deprecated from dps.utils.Calculation import combine_errors_in_quadrature -from dps.utils.pandas_utilities import dict_to_df, list_to_series, df_to_file, divide_by_series +from dps.utils.pandas_utilities import read_tuple_from_file, dict_to_df, list_to_series, df_to_file, divide_by_series from copy import deepcopy from math import sqrt import numpy as np @@ -12,13 +12,13 @@ def write_normalised_xsection_measurement(options, measurement, measurement_unfo [Central Value, Lower Systemtic, Upper Systematic] to a json. Different combinations of systematic uncertainty are stored as different json by appending different 'summary' ''' - path_to_JSON=options['path_to_JSON'] + path_to_DF=options['path_to_DF'] method=options['method'] channel=options['channel'] - output_file = '{path_to_JSON}/central/normalised_xsection_{channel}_{method}_with_errors.txt' + output_file = '{path_to_DF}/central/xsection_normalised_{channel}_{method}_with_errors.txt' output_file = output_file.format( - path_to_JSON = path_to_JSON, + path_to_DF = path_to_DF, channel = channel, method = method, ) @@ -36,13 +36,13 @@ def write_systematic_xsection_measurement(options, systematic, total_syst, summa ''' Write systematics to a df. ''' - path_to_JSON=options['path_to_JSON'] + path_to_DF=options['path_to_DF'] method=options['method'] channel=options['channel'] - output_file = '{path_to_JSON}/central/normalised_xsection_{channel}_{method}_summary_absolute.txt' + output_file = '{path_to_DF}/central/xsection_normalised_{channel}_{method}_summary_absolute.txt' output_file = output_file.format( - path_to_JSON = path_to_JSON, + path_to_DF = path_to_DF, channel = channel, method = method, ) @@ -66,7 +66,7 @@ def write_systematic_xsection_measurement(options, systematic, total_syst, summa d_abs = dict_to_df(all_uncertainties) df_to_file(output_file, d_abs) - # Create Relative Paths + # Create Relative Uncertainties output_file = output_file.replace('absolute', 'relative') for uncertainty, vals in all_uncertainties.iteritems(): if uncertainty == 'central': continue @@ -97,27 +97,27 @@ def read_normalised_xsection_measurement(options, category): variable=options['variable'] variables_no_met=options['variables_no_met'] met_specific_systematics=options['met_specific_systematics'] - path_to_JSON=options['path_to_JSON'] + path_to_DF=options['path_to_DF'] method=options['method'] channel=options['channel'] - filename = '{path}/{category}/normalised_xsection_{channel}_{method}.txt' + filename = '{path}/{category}/xsection_normalised_{channel}_{method}.txt' # Disregarding Met Uncertainties if variable does not use MET if (category in met_specific_systematics) and (variable in variables_no_met): filename = filename.format( - path = path_to_JSON, + path = path_to_DF, channel = channel, category = 'central', method = method, ) else: filename = filename.format( - path = path_to_JSON, + path = path_to_DF, channel = channel, category = category, method = method ) - normalised_xsection = read_data_from_JSON( filename ) - measurement = normalised_xsection['TTJet_measured']#should this be measured without fakes??? + normalised_xsection = read_tuple_from_file( filename ) + measurement = normalised_xsection['TTJet_measured_withoutFakes'] measurement_unfolded = normalised_xsection['TTJet_unfolded'] return measurement, measurement_unfolded @@ -288,7 +288,7 @@ def calculate_total_PDFuncertainty(options, central_measurement, pdf_uncertainty return pdf_sym -def get_symmetrised_systematic_uncertainty(norm_syst_unc_x_secs, options): +def get_symmetrised_systematic_uncertainty(options, norm_syst_unc_x_secs ): ''' Returns the symmetrised uncertainties on the normalised cross sections. @@ -346,15 +346,15 @@ def get_symmetrised_systematic_uncertainty(norm_syst_unc_x_secs, options): normalised_x_sections_with_symmetrised_systematics['BJet'][0] = bJet_tot # Combine PDF with alphaS variations - # alphaS = normalised_x_sections_with_symmetrised_systematics['TTJets_alphaS'][0] - # pdf = normalised_x_sections_with_symmetrised_systematics['PDF'][0] - # pdf_tot = [combine_errors_in_quadrature([e1, e2]) for e1, e2 in zip(alphaS, pdf)] - # normalised_x_sections_with_symmetrised_systematics['PDF'][0] = pdf_tot + alphaS = normalised_x_sections_with_symmetrised_systematics['TTJets_alphaS'][0] + pdf = normalised_x_sections_with_symmetrised_systematics['PDF'][0] + pdf_tot = [combine_errors_in_quadrature([e1, e2]) for e1, e2 in zip(alphaS, pdf)] + normalised_x_sections_with_symmetrised_systematics['PDF'][0] = pdf_tot # TODO combine the signs.... # Now alphaS is combined with pdfs dont need it in dictionary anymore. nor LightJet del normalised_x_sections_with_symmetrised_systematics['LightJet'] - # del normalised_x_sections_with_symmetrised_systematics['TTJets_alphaS'] + del normalised_x_sections_with_symmetrised_systematics['TTJets_alphaS'] return normalised_x_sections_with_symmetrised_systematics @@ -511,6 +511,7 @@ def make_covariance_plot( options, systematic, matrix, label='Covariance' ): Saves to plots/covariance_matrices/{PhaseSpace}/{Channel}/{Variable}/ ''' from dps.config.variable_binning import bin_edges_vis + from dps.utils.file_utilities import make_folder_if_not_exists from ROOT import TH2F, TCanvas, TPad, gROOT, gStyle from array import array gROOT.SetBatch(True) @@ -518,9 +519,18 @@ def make_covariance_plot( options, systematic, matrix, label='Covariance' ): variable = options['variable'] channel = options['channel'] - covariance_matrix_output_path = options['covariance_matrix_output_path'] + phase_space = options['phase_space'] - x_binning = array ( 'f' , bin_edges_vis[variable] ) + # Output folder of covariance matrices + covariance_matrix_output_path = 'plots/covariance_matrices/{phase_space}/{channel}/{variable}/' + covariance_matrix_output_path = covariance_matrix_output_path.format( + variable = variable, + channel = channel, + phase_space = phase_space, + ) + make_folder_if_not_exists(covariance_matrix_output_path) + + x_binning = array ( 'f', bin_edges_vis[variable] ) y_binning = array ( 'f', bin_edges_vis[variable] ) n_xbins = len( x_binning ) - 1 n_ybins = len( y_binning ) - 1