diff --git a/doc/cpptraj.lyx b/doc/cpptraj.lyx index 236aef1bae..b99e66b481 100644 --- a/doc/cpptraj.lyx +++ b/doc/cpptraj.lyx @@ -1230,6 +1230,10 @@ watershell gist \end_layout +\begin_layout LyX-Code +radial +\end_layout + \begin_layout Section General Concepts \end_layout @@ -35041,6 +35045,14 @@ Note that correct normalization of the RDF depends on the number of atoms be off. \end_layout +\begin_layout Standard +The basic (i.e. + no center1/center2/byres1/byres2/bymol1/bymol2) RDF calculations are now + CUDA parallelized. + However, the calculation is done in single-precision on GPUs so the resulting + histograms may differ slightly from the CPU (on the order of 0.0002 - 0.0004). +\end_layout + \begin_layout Subsection randomizeions \end_layout diff --git a/src/Action_Radial.cpp b/src/Action_Radial.cpp index 30cb36cf91..8ce1fcbf61 100644 --- a/src/Action_Radial.cpp +++ b/src/Action_Radial.cpp @@ -7,6 +7,10 @@ #ifdef _OPENMP # include #endif +#ifdef CUDA +# include "Gpu.h" +# include "cuda_kernels/kernel_rdf.cuh" +#endif // CONSTRUCTOR Action_Radial::Action_Radial() : @@ -17,6 +21,7 @@ Action_Radial::Action_Radial() : currentParm_(0), intramol_distances_(0), useVolume_(false), + mask2_is_mask1_(false), volume_(0), maximum2_(0), spacing_(-1), @@ -119,6 +124,10 @@ Action::RetType Action_Radial::Init(ArgList& actionArgs, ActionInit& init, int d Help(); return Action::ERR; } + if (spacing_ > maximum) { + mprinterr("Error: Bin spacing %g is larger than the maximum %g\n", spacing_, maximum); + return Action::ERR; + } // Store max^2, distances^2 greater than max^2 do not need to be // binned and therefore do not need a sqrt calc. maximum2_ = maximum * maximum; @@ -132,12 +141,14 @@ Action::RetType Action_Radial::Init(ArgList& actionArgs, ActionInit& init, int d if (Mask1_.SetMaskString(mask1)) return Action::ERR; // Check for second mask - if none specified use first mask + mask2_is_mask1_ = false; if (needMask2) { std::string mask2 = actionArgs.GetMaskNext(); if (!mask2.empty()) { if (Mask2_.SetMaskString(mask2)) return Action::ERR; } else { if (Mask2_.SetMaskString(mask1)) return Action::ERR; + mask2_is_mask1_ = true; } } // If filename not yet specified check for backwards compat. @@ -257,6 +268,11 @@ Action::RetType Action_Radial::Init(ArgList& actionArgs, ActionInit& init, int d mprintf("\tImaging disabled.\n"); if (numthreads_ > 1) mprintf("\tParallelizing RDF calculation with %i threads.\n",numthreads_); +#ifdef CUDA + if (rmode_ == NORMAL) { + mprintf("\tRDF calculation will be accelerated with CUDA.\n"); + } +#endif return Action::OK; } @@ -439,6 +455,89 @@ Action::RetType Action_Radial::Setup(ActionSetup& setup) { return Action::OK; } +#ifdef CUDA +/** Place coords for selected atoms into arrays. */ +static inline std::vector mask_to_xyz(AtomMask const& Mask, Frame const& frm) +{ + std::vector outerxyz; + outerxyz.reserve( Mask.Nselected()*3 ); + for (AtomMask::const_iterator at = Mask.begin(); at != Mask.end(); ++at) { + const double* xyz = frm.XYZ( *at ); + outerxyz.push_back( (CpptrajGpu::FpType)xyz[0] ); + outerxyz.push_back( (CpptrajGpu::FpType)xyz[1] ); + outerxyz.push_back( (CpptrajGpu::FpType)xyz[2] ); + } + return outerxyz; +} +#endif + +void Action_Radial::calcRDF_singleMask(Frame const& frmIn) { + int outer_max = OuterMask_.Nselected(); + int idx1; + + long unsigned* myRDF = &RDF_[0]; +# ifdef _OPENMP +# pragma omp parallel private(idx1, myRDF) + { + //mprintf("OPENMP: %i threads\n",omp_get_num_threads()); + myRDF = &(rdf_thread_[omp_get_thread_num()][0]); +# pragma omp for schedule(dynamic) +# endif + for (idx1 = 0; idx1 < outer_max; idx1++) { + const double* xyz1 = frmIn.XYZ( OuterMask_[idx1] ); + for (int idx2 = idx1 + 1; idx2 < outer_max; idx2++) { + const double* xyz2 = frmIn.XYZ( OuterMask_[idx2] ); + double D2 = DIST2( imageOpt_.ImagingType(), xyz1, xyz2, frmIn.BoxCrd() ); + if (D2 <= maximum2_) { + // NOTE: Can we modify the histogram to store D^2? + double dist = sqrt(D2); + //mprintf("MASKLOOP: %10i %10i %10.4f\n",atom1,atom2,D); + int idx = (int) (dist * one_over_spacing_); + //if (idx > -1 && idx < numBins_) { + myRDF[idx] += 2; + //} + } + } // END inner loop + } // END outer loop +# ifdef _OPENMP + } // END pragma omp parallel +# endif +} + +void Action_Radial::calcRDF_twoMask(Frame const& frmIn) { + int outer_max = OuterMask_.Nselected(); + int inner_max = InnerMask_.Nselected(); + int idx1; + + long unsigned* myRDF = &RDF_[0]; +# ifdef _OPENMP +# pragma omp parallel private(idx1, myRDF) + { + //mprintf("OPENMP: %i threads\n",omp_get_num_threads()); + myRDF = &(rdf_thread_[omp_get_thread_num()][0]); +# pragma omp for +# endif + for (idx1 = 0; idx1 < outer_max; idx1++) { + const double* xyz1 = frmIn.XYZ( OuterMask_[idx1] ); + for (int idx2 = 0; idx2 < inner_max; idx2++) { + const double* xyz2 = frmIn.XYZ( InnerMask_[idx2] ); + double D2 = DIST2( imageOpt_.ImagingType(), xyz1, xyz2, frmIn.BoxCrd() ); + if (D2 <= maximum2_) { + // NOTE: Can we modify the histogram to store D^2? + double dist = sqrt(D2); + //mprintf("MASKLOOP: %10i %10i %10.4f\n",atom1,atom2,D); + int idx = (int) (dist * one_over_spacing_); + //if (idx > -1 && idx < numBins_) { + myRDF[idx]++; + //} + } + } // END inner loop + } // END outer loop +# ifdef _OPENMP + } // END pragma omp parallel +# endif +} + // Action_Radial::DoAction() /** Calculate distances from atoms in mask1 to atoms in mask 2 and * bin them. @@ -458,41 +557,40 @@ Action::RetType Action_Radial::DoAction(int frameNum, ActionFrame& frm) { if (useVolume_) volume_ += frm.Frm().BoxCrd().CellVolume(); // --------------------------------------------- - if ( rmode_ == NORMAL ) { + if ( rmode_ == NORMAL ) { +#ifdef CUDA + // Copy atoms for GPU + std::vector outerxyz = mask_to_xyz(OuterMask_, frm.Frm()); + const CpptrajGpu::FpType* outerxyzPtr = &outerxyz[0]; + std::vector innerxyz; + const CpptrajGpu::FpType* innerxyzPtr = 0; + if (!mask2_is_mask1_) { + innerxyz = mask_to_xyz(InnerMask_, frm.Frm()); + innerxyzPtr = &innerxyz[0]; + } + CpptrajGpu::FpType gpu_box[3]; + gpu_box[0] = (CpptrajGpu::FpType)frm.Frm().BoxCrd().Param(Box::X); + gpu_box[1] = (CpptrajGpu::FpType)frm.Frm().BoxCrd().Param(Box::Y); + gpu_box[2] = (CpptrajGpu::FpType)frm.Frm().BoxCrd().Param(Box::Z); + CpptrajGpu::FpType gpu_ucell[9], gpu_frac[9]; + for (int ibox = 0; ibox != 9; ibox++) { + gpu_ucell[ibox] = (CpptrajGpu::FpType)frm.Frm().BoxCrd().UnitCell()[ibox]; + gpu_frac[ibox] = (CpptrajGpu::FpType)frm.Frm().BoxCrd().FracCell()[ibox]; + } + Cpptraj_GPU_RDF( &RDF_[0], RDF_.size(), maximum2_, one_over_spacing_, + outerxyzPtr, OuterMask_.Nselected(), + innerxyzPtr, InnerMask_.Nselected(), + imageOpt_.ImagingType(), + gpu_box, + gpu_ucell, + gpu_frac ); +#else /* CUDA */ // Calculation of all atoms in Mask1 to all atoms in Mask2 - int outer_max = OuterMask_.Nselected(); - int inner_max = InnerMask_.Nselected(); -# ifdef _OPENMP -# pragma omp parallel private(nmask1,nmask2,atom1,atom2,D,idx,mythread) - { - //mprintf("OPENMP: %i threads\n",omp_get_num_threads()); - mythread = omp_get_thread_num(); -# pragma omp for -# endif - for (nmask1 = 0; nmask1 < outer_max; nmask1++) { - atom1 = OuterMask_[nmask1]; - for (nmask2 = 0; nmask2 < inner_max; nmask2++) { - atom2 = InnerMask_[nmask2]; - if (atom1 != atom2) { - D = DIST2( imageOpt_.ImagingType(), frm.Frm().XYZ(atom1), frm.Frm().XYZ(atom2), frm.Frm().BoxCrd() ); - if (D <= maximum2_) { - // NOTE: Can we modify the histogram to store D^2? - D = sqrt(D); - //mprintf("MASKLOOP: %10i %10i %10.4f\n",atom1,atom2,D); - idx = (int) (D * one_over_spacing_); - if (idx > -1 && idx < numBins_) -# ifdef _OPENMP - ++rdf_thread_[mythread][idx]; -# else - ++RDF_[idx]; -# endif - } - } - } // END loop over 2nd mask - } // END loop over 1st mask -# ifdef _OPENMP - } // END pragma omp parallel -# endif + if (mask2_is_mask1_) + calcRDF_singleMask( frm.Frm() ); + else + calcRDF_twoMask( frm.Frm() ); +#endif /* CUDA */ // --------------------------------------------- } else if ( rmode_ == NO_INTRAMOL ) { // Calculation of all atoms in Mask1 to all atoms in Mask2, ignoring diff --git a/src/Action_Radial.h b/src/Action_Radial.h index 7982d7c636..dd58b6eaa2 100644 --- a/src/Action_Radial.h +++ b/src/Action_Radial.h @@ -22,6 +22,10 @@ class Action_Radial: public Action { # endif typedef std::vector Iarray; + void calcRDF_singleMask(Frame const&); + + void calcRDF_twoMask(Frame const&); + # ifdef _OPENMP bool threadsCombined_; ///< True if CombineRdfThreads() has been called. # endif @@ -43,6 +47,7 @@ class Action_Radial: public Action { Topology* currentParm_; ///< Current topology, needed for NO_INTERMOL int intramol_distances_; ///< # of intra-molecular distances for NO_INTERMOL. bool useVolume_; ///< If true normalize based on input volume. + bool mask2_is_mask1_; ///< True is mask1 and mask2 are the same. double volume_; ///< Hold sum of volume for averaging. double maximum2_; ///< Largest distance squared that can be binned. double spacing_; ///< Bin spacing. diff --git a/src/Cpptraj.cpp b/src/Cpptraj.cpp index 5d94b8dacb..a740122c9a 100644 --- a/src/Cpptraj.cpp +++ b/src/Cpptraj.cpp @@ -8,6 +8,7 @@ #include "Cpptraj.h" #include "CpptrajStdio.h" #include "Command.h" +#include "Gpu.h" #include "ReadLine.h" #include "Version.h" #include "ParmFile.h" // ProcessMask @@ -122,6 +123,16 @@ void Cpptraj::Intro() const { mprintf("| CUDA device: %s\n", deviceProp.name); mprintf("| Available GPU memory: %s\n", ByteString(deviceProp.totalGlobalMem, BYTE_DECIMAL).c_str()); + mprintf("| Compute capability: %i.%i\n", deviceProp.major, deviceProp.minor); + CpptrajGpu::SetComputeVersion( deviceProp.major ); + if (State_.Debug() > 0) { + mprintf("| Max threads/block: %i\n", deviceProp.maxThreadsPerBlock); + mprintf("| Max threads dim: %i %i %i\n", + deviceProp.maxThreadsDim[0], + deviceProp.maxThreadsDim[1], + deviceProp.maxThreadsDim[2]); + mprintf("| Max threads/multiprocessor: %i\n", deviceProp.maxThreadsPerMultiProcessor); + } } } # endif diff --git a/src/Gpu.cpp b/src/Gpu.cpp new file mode 100644 index 0000000000..f76bce9e10 --- /dev/null +++ b/src/Gpu.cpp @@ -0,0 +1,15 @@ +#include "Gpu.h" + +static unsigned int cpptraj_gpu_major_ = 0; + +void CpptrajGpu::SetComputeVersion(int major) { + if (major > -1) + cpptraj_gpu_major_ = major; +} + +unsigned int CpptrajGpu::MaxBlockDim_2D() { + if (cpptraj_gpu_major_ < 4) + return 16; + else + return 32; +} diff --git a/src/Gpu.h b/src/Gpu.h new file mode 100644 index 0000000000..a637fbca64 --- /dev/null +++ b/src/Gpu.h @@ -0,0 +1,11 @@ +#ifndef INC_CPPTRAJ_GPU_H +#define INC_CPPTRAJ_GPU_H +namespace CpptrajGpu { +/// Set the major CUDA compute version number +void SetComputeVersion(int); +/// \return Max block dimensions if using 2D blocks +unsigned int MaxBlockDim_2D(); +/// Default floating point type +typedef float FpType; +} +#endif diff --git a/src/Makefile b/src/Makefile index b5cd4fb95a..d5384d97ca 100644 --- a/src/Makefile +++ b/src/Makefile @@ -127,7 +127,7 @@ noarpack: @echo "Skipping bundled ARPACK build" @echo "" -cuda_kernels/libcpptraj_cuda.a: +cuda_kernels/libcpptraj_cuda.a:: cd cuda_kernels && $(MAKE) all # Dependency targets diff --git a/src/Version.h b/src/Version.h index 47ba1d64ec..965a5cf0d2 100644 --- a/src/Version.h +++ b/src/Version.h @@ -12,7 +12,7 @@ * Whenever a number that precedes is incremented, all subsequent * numbers should be reset to 0. */ -#define CPPTRAJ_INTERNAL_VERSION "V6.10.0" +#define CPPTRAJ_INTERNAL_VERSION "V6.11.0" /// PYTRAJ relies on this #define CPPTRAJ_VERSION_STRING CPPTRAJ_INTERNAL_VERSION #endif diff --git a/src/cpptrajdepend b/src/cpptrajdepend index 8ad033e732..c68e2f9e36 100644 --- a/src/cpptrajdepend +++ b/src/cpptrajdepend @@ -64,7 +64,7 @@ Action_Principal.o : Action_Principal.cpp Action.h ActionState.h Action_Principa Action_Projection.o : Action_Projection.cpp Action.h ActionFrameCounter.h ActionState.h Action_Projection.h ArgList.h Array1D.h ArrayIterator.h AssociatedData.h Atom.h AtomMask.h AtomType.h BaseIOtype.h Box.h Constants.h CoordinateInfo.h CpptrajFile.h CpptrajStdio.h DataFile.h DataFileList.h DataSet.h DataSetList.h DataSet_1D.h DataSet_2D.h DataSet_Coords.h DataSet_Coords_REF.h DataSet_MatrixDbl.h DataSet_Modes.h Dimension.h DispatchObject.h FileIO.h FileName.h FileTypes.h Frame.h MaskToken.h Matrix.h Matrix_3x3.h MetaData.h Molecule.h NameType.h Parallel.h ParameterHolders.h ParameterSet.h ParameterTypes.h Range.h ReferenceFrame.h ReplicaDimArray.h Residue.h Segment.h StringRoutines.h SymbolExporting.h TextFormat.h Timer.h Topology.h TypeNameHolder.h Unit.h Vec3.h Action_Pucker.o : Action_Pucker.cpp Action.h ActionState.h Action_Pucker.h ArgList.h AssociatedData.h Atom.h AtomMask.h AtomType.h BaseIOtype.h Box.h Constants.h CoordinateInfo.h CpptrajFile.h CpptrajStdio.h DataFile.h DataFileList.h DataSet.h DataSetList.h DataSet_Coords.h DataSet_Coords_REF.h Dimension.h DispatchObject.h FileIO.h FileName.h FileTypes.h Frame.h MaskToken.h Matrix_3x3.h MetaData.h Molecule.h NameType.h Parallel.h ParameterHolders.h ParameterSet.h ParameterTypes.h Range.h ReferenceFrame.h ReplicaDimArray.h Residue.h Segment.h SymbolExporting.h TextFormat.h Timer.h Topology.h TorsionRoutines.h TypeNameHolder.h Unit.h Vec3.h Action_Radgyr.o : Action_Radgyr.cpp Action.h ActionState.h Action_Radgyr.h ArgList.h AssociatedData.h Atom.h AtomMask.h AtomType.h BaseIOtype.h Box.h Constants.h CoordinateInfo.h CpptrajFile.h CpptrajStdio.h DataFile.h DataFileList.h DataSet.h DataSetList.h DataSet_Coords.h DataSet_Coords_REF.h Dimension.h DispatchObject.h FileIO.h FileName.h FileTypes.h Frame.h MaskToken.h Matrix_3x3.h MetaData.h Molecule.h NameType.h Parallel.h ParameterHolders.h ParameterSet.h ParameterTypes.h Range.h ReferenceFrame.h ReplicaDimArray.h Residue.h Segment.h SymbolExporting.h TextFormat.h Timer.h Topology.h TypeNameHolder.h Unit.h Vec3.h -Action_Radial.o : Action_Radial.cpp Action.h ActionState.h Action_Radial.h ArgList.h AssociatedData.h Atom.h AtomMask.h AtomType.h BaseIOtype.h Box.h Constants.h CoordinateInfo.h CpptrajFile.h CpptrajStdio.h DataFile.h DataFileList.h DataSet.h DataSetList.h DataSet_Coords.h DataSet_Coords_REF.h Dimension.h DispatchObject.h DistRoutines.h FileIO.h FileName.h FileTypes.h Frame.h ImageOption.h MaskToken.h Matrix_3x3.h MetaData.h Molecule.h NameType.h Parallel.h ParameterHolders.h ParameterSet.h ParameterTypes.h Range.h ReferenceFrame.h ReplicaDimArray.h Residue.h Segment.h SymbolExporting.h TextFormat.h Timer.h Topology.h TypeNameHolder.h Unit.h Vec3.h +Action_Radial.o : Action_Radial.cpp Action.h ActionState.h Action_Radial.h ArgList.h AssociatedData.h Atom.h AtomMask.h AtomType.h BaseIOtype.h Box.h Constants.h CoordinateInfo.h CpptrajFile.h CpptrajStdio.h DataFile.h DataFileList.h DataSet.h DataSetList.h DataSet_Coords.h DataSet_Coords_REF.h Dimension.h DispatchObject.h DistRoutines.h FileIO.h FileName.h FileTypes.h Frame.h Gpu.h ImageOption.h MaskToken.h Matrix_3x3.h MetaData.h Molecule.h NameType.h Parallel.h ParameterHolders.h ParameterSet.h ParameterTypes.h Range.h ReferenceFrame.h ReplicaDimArray.h Residue.h Segment.h SymbolExporting.h TextFormat.h Timer.h Topology.h TypeNameHolder.h Unit.h Vec3.h cuda_kernels/kernel_rdf.cuh Action_RandomizeIons.o : Action_RandomizeIons.cpp Action.h ActionState.h Action_RandomizeIons.h ArgList.h AssociatedData.h Atom.h AtomMask.h AtomType.h BaseIOtype.h Box.h Constants.h CoordinateInfo.h CpptrajFile.h CpptrajStdio.h DataFile.h DataFileList.h DataSet.h DataSetList.h DataSet_Coords.h DataSet_Coords_REF.h Dimension.h DispatchObject.h DistRoutines.h FileIO.h FileName.h FileTypes.h Frame.h ImageOption.h MaskToken.h Matrix_3x3.h MetaData.h Molecule.h NameType.h Parallel.h ParameterHolders.h ParameterSet.h ParameterTypes.h Random.h Range.h ReferenceFrame.h ReplicaDimArray.h Residue.h Segment.h SymbolExporting.h TextFormat.h Timer.h Topology.h TypeNameHolder.h Unit.h Vec3.h Action_Remap.o : Action_Remap.cpp Action.h ActionState.h ActionTopWriter.h Action_Remap.h ArgList.h AssociatedData.h Atom.h AtomMask.h AtomType.h BaseIOtype.h Box.h Constants.h CoordinateInfo.h CpptrajFile.h CpptrajStdio.h DataFile.h DataFileList.h DataSet.h DataSetList.h DataSet_1D.h DataSet_Coords.h DataSet_Coords_REF.h Dimension.h DispatchObject.h FileIO.h FileName.h FileTypes.h Frame.h MaskToken.h Matrix_3x3.h MetaData.h Molecule.h NameType.h Parallel.h ParameterHolders.h ParameterSet.h ParameterTypes.h Range.h ReferenceFrame.h ReplicaDimArray.h Residue.h Segment.h SymbolExporting.h TextFormat.h Timer.h Topology.h TypeNameHolder.h Unit.h Vec3.h Action_ReplicateCell.o : Action_ReplicateCell.cpp Action.h ActionFrameCounter.h ActionState.h ActionTopWriter.h Action_ReplicateCell.h ArgList.h AssociatedData.h Atom.h AtomMask.h AtomType.h BaseIOtype.h Box.h Constants.h CoordinateInfo.h CpptrajFile.h CpptrajStdio.h DataFile.h DataFileList.h DataSet.h DataSetList.h DataSet_Coords.h DataSet_Coords_REF.h Dimension.h DispatchObject.h FileIO.h FileName.h FileTypes.h Frame.h MaskToken.h Matrix_3x3.h MetaData.h Molecule.h NameType.h OutputTrajCommon.h Parallel.h ParameterHolders.h ParameterSet.h ParameterTypes.h Range.h ReferenceFrame.h ReplicaDimArray.h Residue.h Segment.h SymbolExporting.h TextFormat.h Timer.h Topology.h TrajectoryFile.h Trajout_Single.h TypeNameHolder.h Unit.h Vec3.h @@ -188,7 +188,7 @@ ControlBlock_For.o : ControlBlock_For.cpp Action.h ActionList.h ActionState.h An CoordinateInfo.o : CoordinateInfo.cpp Box.h CoordinateInfo.h CpptrajStdio.h Matrix_3x3.h Parallel.h ReplicaDimArray.h Vec3.h Corr.o : Corr.cpp ArrayIterator.h ComplexArray.h Corr.h PubFFT.h Cph.o : Cph.cpp Cph.h NameType.h -Cpptraj.o : Cpptraj.cpp Action.h ActionList.h ActionState.h Analysis.h AnalysisList.h AnalysisState.h ArgList.h AssociatedData.h Atom.h AtomMask.h AtomType.h BaseIOtype.h Box.h Cmd.h CmdInput.h CmdList.h Command.h Constants.h CoordinateInfo.h Cpptraj.h CpptrajFile.h CpptrajState.h CpptrajStdio.h DataFile.h DataFileList.h DataSet.h DataSetList.h DataSet_Coords.h DataSet_Coords_REF.h Dimension.h DispatchObject.h EnsembleIn.h EnsembleOutList.h FileIO.h FileName.h FileTypes.h Frame.h FramePtrArray.h HipDefinitions.h InputTrajCommon.h MaskToken.h Matrix_3x3.h MetaData.h Molecule.h NameType.h Parallel.h ParameterHolders.h ParameterSet.h ParameterTypes.h ParmFile.h Range.h ReadLine.h ReferenceFrame.h ReplicaDimArray.h ReplicaInfo.h Residue.h Segment.h StringRoutines.h SymbolExporting.h TextFormat.h Timer.h TopInfo.h Topology.h TrajFrameCounter.h TrajectoryFile.h Trajin.h TrajinList.h TrajoutList.h TypeNameHolder.h Unit.h Vec3.h Version.h +Cpptraj.o : Cpptraj.cpp Action.h ActionList.h ActionState.h Analysis.h AnalysisList.h AnalysisState.h ArgList.h AssociatedData.h Atom.h AtomMask.h AtomType.h BaseIOtype.h Box.h Cmd.h CmdInput.h CmdList.h Command.h Constants.h CoordinateInfo.h Cpptraj.h CpptrajFile.h CpptrajState.h CpptrajStdio.h DataFile.h DataFileList.h DataSet.h DataSetList.h DataSet_Coords.h DataSet_Coords_REF.h Dimension.h DispatchObject.h EnsembleIn.h EnsembleOutList.h FileIO.h FileName.h FileTypes.h Frame.h FramePtrArray.h Gpu.h HipDefinitions.h InputTrajCommon.h MaskToken.h Matrix_3x3.h MetaData.h Molecule.h NameType.h Parallel.h ParameterHolders.h ParameterSet.h ParameterTypes.h ParmFile.h Range.h ReadLine.h ReferenceFrame.h ReplicaDimArray.h ReplicaInfo.h Residue.h Segment.h StringRoutines.h SymbolExporting.h TextFormat.h Timer.h TopInfo.h Topology.h TrajFrameCounter.h TrajectoryFile.h Trajin.h TrajinList.h TrajoutList.h TypeNameHolder.h Unit.h Vec3.h Version.h CpptrajFile.o : CpptrajFile.cpp CpptrajFile.h CpptrajStdio.h FileIO.h FileIO_Bzip2.h FileIO_Gzip.h FileIO_Mpi.h FileIO_MpiShared.h FileIO_Std.h FileName.h Parallel.h StringRoutines.h CpptrajState.o : CpptrajState.cpp Action.h ActionList.h ActionState.h Action_CreateCrd.h Analysis.h AnalysisList.h AnalysisState.h ArgList.h AssociatedData.h Atom.h AtomMask.h AtomType.h BaseIOtype.h Box.h CompactFrameArray.h Constants.h CoordinateInfo.h CpptrajFile.h CpptrajState.h CpptrajStdio.h DataFile.h DataFileList.h DataSet.h DataSetList.h DataSet_Coords.h DataSet_Coords_CRD.h DataSet_Coords_REF.h DataSet_Coords_TRJ.h DataSet_Topology.h Dimension.h DispatchObject.h EnsembleIn.h EnsembleNavigator.h EnsembleOutList.h FileIO.h FileName.h FileTypes.h Frame.h FrameArray.h FramePtrArray.h InputTrajCommon.h MaskToken.h Matrix_3x3.h MetaData.h Molecule.h NameType.h Parallel.h ParameterHolders.h ParameterSet.h ParameterTypes.h ProgressBar.h Random.h Range.h ReferenceFrame.h ReplicaDimArray.h ReplicaInfo.h Residue.h Segment.h SymbolExporting.h TextFormat.h Timer.h Topology.h TrajFrameCounter.h TrajFrameIndex.h Trajin.h TrajinList.h TrajoutList.h TypeNameHolder.h Unit.h Vec3.h CpptrajStdio.o : CpptrajStdio.cpp Parallel.h @@ -338,6 +338,7 @@ ForLoop_overSets.o : ForLoop_overSets.cpp Action.h ActionList.h ActionState.h An Frame.o : Frame.cpp Atom.h AtomMask.h Box.h Constants.h CoordinateInfo.h CpptrajStdio.h Frame.h MaskToken.h Matrix_3x3.h Molecule.h NameType.h Parallel.h ReplicaDimArray.h Residue.h Segment.h SymbolExporting.h Unit.h Vec3.h GIST_PME.o : GIST_PME.cpp Atom.h AtomMask.h AtomType.h Box.h Constants.h CoordinateInfo.h CpptrajStdio.h EnergyKernel_Adjust.h EnergyKernel_Nonbond.h Ewald.h Ewald_ParticleMesh.h ExclusionArray.h FileName.h Frame.h GIST_PME.h MaskToken.h Matrix_3x3.h Molecule.h NameType.h PairList.h Parallel.h ParameterHolders.h ParameterSet.h ParameterTypes.h Range.h ReplicaDimArray.h Residue.h Segment.h SplineFxnTable.h SymbolExporting.h Timer.h Topology.h TypeNameHolder.h Unit.h Vec3.h helpme_standalone.h GistEntropyUtils.o : GistEntropyUtils.cpp GistEntropyUtils.h Vec3.h +Gpu.o : Gpu.cpp Gpu.h GridAction.o : GridAction.cpp ArgList.h ArrayIterator.h AssociatedData.h Atom.h AtomMask.h AtomType.h Box.h Constants.h CoordinateInfo.h CpptrajFile.h CpptrajStdio.h DataSet.h DataSetList.h DataSet_3D.h DataSet_Coords.h DataSet_Coords_REF.h DataSet_GridFlt.h Dimension.h FileIO.h FileName.h Frame.h Grid.h GridAction.h GridBin.h MaskToken.h Matrix_3x3.h MetaData.h Molecule.h NameType.h Parallel.h ParameterHolders.h ParameterSet.h ParameterTypes.h Range.h ReferenceFrame.h ReplicaDimArray.h Residue.h Segment.h StringRoutines.h SymbolExporting.h TextFormat.h Timer.h Topology.h TypeNameHolder.h Unit.h Vec3.h GridBin.o : GridBin.cpp Box.h CpptrajStdio.h GridBin.h Matrix_3x3.h Parallel.h Vec3.h HistBin.o : HistBin.cpp Constants.h CpptrajStdio.h Dimension.h HistBin.h diff --git a/src/cpptrajfiles b/src/cpptrajfiles index 20c15abef5..2ce3ad56b7 100644 --- a/src/cpptrajfiles +++ b/src/cpptrajfiles @@ -309,6 +309,7 @@ COMMON_SOURCES= \ Frame.cpp \ GistEntropyUtils.cpp \ GIST_PME.cpp \ + Gpu.cpp \ GridAction.cpp \ GridBin.cpp \ HistBin.cpp \ diff --git a/src/cuda_kernels/CMakeLists.txt b/src/cuda_kernels/CMakeLists.txt index 5f305ce7f1..9d24d42906 100644 --- a/src/cuda_kernels/CMakeLists.txt +++ b/src/cuda_kernels/CMakeLists.txt @@ -1,4 +1,4 @@ -set(CPPTRAJ_CUDA_SOURCES core_kernels.cu kernel_wrappers.cu GistCudaCalc.cu GistCudaSetup.cu) +set(CPPTRAJ_CUDA_SOURCES core_kernels.cu kernel_wrappers.cu GistCudaCalc.cu GistCudaSetup.cu kernel_rdf.cu) cuda_add_library(cpptraj_cuda_routines STATIC ${CPPTRAJ_CUDA_SOURCES}) #make_pic_if_needed does not appear to work for NVCC. Instead, added diff --git a/src/cuda_kernels/Makefile b/src/cuda_kernels/Makefile index 68a05651d8..835f53ae01 100644 --- a/src/cuda_kernels/Makefile +++ b/src/cuda_kernels/Makefile @@ -13,7 +13,7 @@ TARGET = libcpptraj_cuda.a # Source files -CUDA_SOURCES=core_kernels.cu kernel_wrappers.cu GistCudaCalc.cu GistCudaSetup.cu +CUDA_SOURCES=core_kernels.cu kernel_wrappers.cu GistCudaCalc.cu GistCudaSetup.cu kernel_rdf.cu # Objects diff --git a/src/cuda_kernels/NonOrtho_dist2.cuh b/src/cuda_kernels/NonOrtho_dist2.cuh new file mode 100644 index 0000000000..a1826cae5f --- /dev/null +++ b/src/cuda_kernels/NonOrtho_dist2.cuh @@ -0,0 +1,277 @@ +#ifndef INC_NONORTHO_DIST2_CUH +#define INC_NONORTHO_DIST2_CUH +/** \return Shortest imaged distance^2 between given coordinates in fractional space. */ +template +__device__ T NonOrtho_dist2(T a0, T a1, T a2, + T b0, T b1, T b2, + const T *ucell) +{ + //int ixyz[3]; + T minIn = -1.0; + + //double closest2 + // The floor() calls serve to bring each point back in the main unit cell. + T fx = a0 - floor(a0); + T fy = a1 - floor(a1); + T fz = a2 - floor(a2); + T f2x = b0 - floor(b0); + T f2y = b1 - floor(b1); + T f2z = b2 - floor(b2); + // f2 back in Cartesian space + T X_factor = (f2x*ucell[0] + f2y*ucell[3] + f2z*ucell[6]); + T Y_factor = (f2x*ucell[1] + f2y*ucell[4] + f2z*ucell[7]); + T Z_factor = (f2x*ucell[2] + f2y*ucell[5] + f2z*ucell[8]); + // Precompute some factors + T fxm1 = fx - 1.0; + T fxp1 = fx + 1.0; + T fym1 = fy - 1.0; + T fyp1 = fy + 1.0; + T fzm1 = fz - 1.0; + T fzp1 = fz + 1.0; + + T fxm1u0 = fxm1 * ucell[0]; + T fxu0 = fx * ucell[0]; + T fxp1u0 = fxp1 * ucell[0]; + T fxm1u1 = fxm1 * ucell[1]; + T fxu1 = fx * ucell[1]; + T fxp1u1 = fxp1 * ucell[1]; + T fxm1u2 = fxm1 * ucell[2]; + T fxu2 = fx * ucell[2]; + T fxp1u2 = fxp1 * ucell[2]; + + T fym1u3 = fym1 * ucell[3]; + T fyu3 = fy * ucell[3]; + T fyp1u3 = fyp1 * ucell[3]; + T fym1u4 = fym1 * ucell[4]; + T fyu4 = fy * ucell[4]; + T fyp1u4 = fyp1 * ucell[4]; + T fym1u5 = fym1 * ucell[5]; + T fyu5 = fy * ucell[5]; + T fyp1u5 = fyp1 * ucell[5]; + + T fzm1u6 = fzm1 * ucell[6]; + T fzu6 = fz * ucell[6]; + T fzp1u6 = fzp1 * ucell[6]; + T fzm1u7 = fzm1 * ucell[7]; + T fzu7 = fz * ucell[7]; + T fzp1u7 = fzp1 * ucell[7]; + T fzm1u8 = fzm1 * ucell[8]; + T fzu8 = fz * ucell[8]; + T fzp1u8 = fzp1 * ucell[8]; + + // Calc ix iy iz = 0 case + T x = (fxu0 + fyu3 + fzu6) - X_factor; + T y = (fxu1 + fyu4 + fzu7) - Y_factor; + T z = (fxu2 + fyu5 + fzu8) - Z_factor; + // DEBUG + //mprintf("DEBUG: a2: %g %g %g\n",(fxu0 + fyu3 + fzu6), (fxu1 + fyu4 + fzu7), (fxu2 + fyu5 + fzu8)); + //mprintf("DEBUG: a1: %g %g %g\n", X_factor, Y_factor, Z_factor); + T min = (x*x) + (y*y) + (z*z); + + if (minIn > 0.0 && minIn < min) min = minIn; + + //ixyz[0] = 0; + //ixyz[1] = 0; + //ixyz[2] = 0; + + // -1 -1 -1 + x = (fxm1u0 + fym1u3 + fzm1u6) - X_factor; + y = (fxm1u1 + fym1u4 + fzm1u7) - Y_factor; + z = (fxm1u2 + fym1u5 + fzm1u8) - Z_factor; + T D = (x*x) + (y*y) + (z*z); + //if (D < min) { min = D; ixyz[0] = -1; ixyz[1] = -1; ixyz[2] = -1; } + if (D < min) min = D; + // -1 -1 0 + x = (fxm1u0 + fym1u3 + fzu6 ) - X_factor; + y = (fxm1u1 + fym1u4 + fzu7 ) - Y_factor; + z = (fxm1u2 + fym1u5 + fzu8 ) - Z_factor; + D = (x*x) + (y*y) + (z*z); + //if (D < min) { min = D; ixyz[0] = -1; ixyz[1] = -1; ixyz[2] = 0; } + if (D < min) min = D; + // -1 -1 +1 + x = (fxm1u0 + fym1u3 + fzp1u6) - X_factor; + y = (fxm1u1 + fym1u4 + fzp1u7) - Y_factor; + z = (fxm1u2 + fym1u5 + fzp1u8) - Z_factor; + D = (x*x) + (y*y) + (z*z); + //if (D < min) { min = D; ixyz[0] = -1; ixyz[1] = -1; ixyz[2] = 1; } + if (D < min) min = D; + // -1 0 -1 + x = (fxm1u0 + fyu3 + fzm1u6) - X_factor; + y = (fxm1u1 + fyu4 + fzm1u7) - Y_factor; + z = (fxm1u2 + fyu5 + fzm1u8) - Z_factor; + D = (x*x) + (y*y) + (z*z); + //if (D < min) { min = D; ixyz[0] = -1; ixyz[1] = 0; ixyz[2] = -1; } + if (D < min) min = D; + // -1 0 0 + x = (fxm1u0 + fyu3 + fzu6 ) - X_factor; + y = (fxm1u1 + fyu4 + fzu7 ) - Y_factor; + z = (fxm1u2 + fyu5 + fzu8 ) - Z_factor; + D = (x*x) + (y*y) + (z*z); + //if (D < min) { min = D; ixyz[0] = -1; ixyz[1] = 0; ixyz[2] = 0; } + if (D < min) min = D; + // -1 0 +1 + x = (fxm1u0 + fyu3 + fzp1u6) - X_factor; + y = (fxm1u1 + fyu4 + fzp1u7) - Y_factor; + z = (fxm1u2 + fyu5 + fzp1u8) - Z_factor; + D = (x*x) + (y*y) + (z*z); + //if (D < min) { min = D; ixyz[0] = -1; ixyz[1] = 0; ixyz[2] = 1; } + if (D < min) min = D; + // -1 +1 -1 + x = (fxm1u0 + fyp1u3 + fzm1u6) - X_factor; + y = (fxm1u1 + fyp1u4 + fzm1u7) - Y_factor; + z = (fxm1u2 + fyp1u5 + fzm1u8) - Z_factor; + D = (x*x) + (y*y) + (z*z); + //if (D < min) { min = D; ixyz[0] = -1; ixyz[1] = 1; ixyz[2] = -1; } + if (D < min) min = D; + // -1 +1 0 + x = (fxm1u0 + fyp1u3 + fzu6 ) - X_factor; + y = (fxm1u1 + fyp1u4 + fzu7 ) - Y_factor; + z = (fxm1u2 + fyp1u5 + fzu8 ) - Z_factor; + D = (x*x) + (y*y) + (z*z); + //if (D < min) { min = D; ixyz[0] = -1; ixyz[1] = 1; ixyz[2] = 0; } + if (D < min) min = D; + // -1 +1 +1 + x = (fxm1u0 + fyp1u3 + fzp1u6) - X_factor; + y = (fxm1u1 + fyp1u4 + fzp1u7) - Y_factor; + z = (fxm1u2 + fyp1u5 + fzp1u8) - Z_factor; + D = (x*x) + (y*y) + (z*z); + //if (D < min) { min = D; ixyz[0] = -1; ixyz[1] = 1; ixyz[2] = 1; } + if (D < min) min = D; + + // 0 -1 -1 + x = (fxu0 + fym1u3 + fzm1u6) - X_factor; + y = (fxu1 + fym1u4 + fzm1u7) - Y_factor; + z = (fxu2 + fym1u5 + fzm1u8) - Z_factor; + D = (x*x) + (y*y) + (z*z); + //if (D < min) { min = D; ixyz[0] = 0; ixyz[1] = -1; ixyz[2] = -1; } + if (D < min) min = D; + // 0 -1 0 + x = (fxu0 + fym1u3 + fzu6 ) - X_factor; + y = (fxu1 + fym1u4 + fzu7 ) - Y_factor; + z = (fxu2 + fym1u5 + fzu8 ) - Z_factor; + D = (x*x) + (y*y) + (z*z); + //if (D < min) { min = D; ixyz[0] = 0; ixyz[1] = -1; ixyz[2] = 0; } + if (D < min) min = D; + // 0 -1 +1 + x = (fxu0 + fym1u3 + fzp1u6) - X_factor; + y = (fxu1 + fym1u4 + fzp1u7) - Y_factor; + z = (fxu2 + fym1u5 + fzp1u8) - Z_factor; + D = (x*x) + (y*y) + (z*z); + //if (D < min) { min = D; ixyz[0] = 0; ixyz[1] = -1; ixyz[2] = 1; } + if (D < min) min = D; + // 0 0 -1 + x = (fxu0 + fyu3 + fzm1u6) - X_factor; + y = (fxu1 + fyu4 + fzm1u7) - Y_factor; + z = (fxu2 + fyu5 + fzm1u8) - Z_factor; + D = (x*x) + (y*y) + (z*z); + //if (D < min) { min = D; ixyz[0] = 0; ixyz[1] = 0; ixyz[2] = -1; } + if (D < min) min = D; + // 0 0 0 + // 0 0 +1 + x = (fxu0 + fyu3 + fzp1u6) - X_factor; + y = (fxu1 + fyu4 + fzp1u7) - Y_factor; + z = (fxu2 + fyu5 + fzp1u8) - Z_factor; + D = (x*x) + (y*y) + (z*z); + //if (D < min) { min = D; ixyz[0] = 0; ixyz[1] = 0; ixyz[2] = 1; } + if (D < min) min = D; + // 0 +1 -1 + x = (fxu0 + fyp1u3 + fzm1u6) - X_factor; + y = (fxu1 + fyp1u4 + fzm1u7) - Y_factor; + z = (fxu2 + fyp1u5 + fzm1u8) - Z_factor; + D = (x*x) + (y*y) + (z*z); + //if (D < min) { min = D; ixyz[0] = 0; ixyz[1] = 1; ixyz[2] = -1; } + if (D < min) min = D; + // 0 +1 0 + x = (fxu0 + fyp1u3 + fzu6 ) - X_factor; + y = (fxu1 + fyp1u4 + fzu7 ) - Y_factor; + z = (fxu2 + fyp1u5 + fzu8 ) - Z_factor; + D = (x*x) + (y*y) + (z*z); + //if (D < min) { min = D; ixyz[0] = 0; ixyz[1] = 1; ixyz[2] = 0; } + if (D < min) min = D; + // 0 +1 +1 + x = (fxu0 + fyp1u3 + fzp1u6) - X_factor; + y = (fxu1 + fyp1u4 + fzp1u7) - Y_factor; + z = (fxu2 + fyp1u5 + fzp1u8) - Z_factor; + D = (x*x) + (y*y) + (z*z); + //if (D < min) { min = D; ixyz[0] = 0; ixyz[1] = 1; ixyz[2] = 1; } + if (D < min) min = D; + + // +1 -1 -1 + x = (fxp1u0 + fym1u3 + fzm1u6) - X_factor; + y = (fxp1u1 + fym1u4 + fzm1u7) - Y_factor; + z = (fxp1u2 + fym1u5 + fzm1u8) - Z_factor; + D = (x*x) + (y*y) + (z*z); + //if (D < min) { min = D; ixyz[0] = 1; ixyz[1] = -1; ixyz[2] = -1; } + if (D < min) min = D; + // +1 -1 0 + x = (fxp1u0 + fym1u3 + fzu6 ) - X_factor; + y = (fxp1u1 + fym1u4 + fzu7 ) - Y_factor; + z = (fxp1u2 + fym1u5 + fzu8 ) - Z_factor; + D = (x*x) + (y*y) + (z*z); + //if (D < min) { min = D; ixyz[0] = 1; ixyz[1] = -1; ixyz[2] = 0; } + if (D < min) min = D; + // +1 -1 +1 + x = (fxp1u0 + fym1u3 + fzp1u6) - X_factor; + y = (fxp1u1 + fym1u4 + fzp1u7) - Y_factor; + z = (fxp1u2 + fym1u5 + fzp1u8) - Z_factor; + D = (x*x) + (y*y) + (z*z); + //if (D < min) { min = D; ixyz[0] = 1; ixyz[1] = -1; ixyz[2] = 1; } + if (D < min) min = D; + // +1 0 -1 + x = (fxp1u0 + fyu3 + fzm1u6) - X_factor; + y = (fxp1u1 + fyu4 + fzm1u7) - Y_factor; + z = (fxp1u2 + fyu5 + fzm1u8) - Z_factor; + D = (x*x) + (y*y) + (z*z); + //if (D < min) { min = D; ixyz[0] = 1; ixyz[1] = 0; ixyz[2] = -1; } + if (D < min) min = D; + // +1 0 0 + x = (fxp1u0 + fyu3 + fzu6 ) - X_factor; + y = (fxp1u1 + fyu4 + fzu7 ) - Y_factor; + z = (fxp1u2 + fyu5 + fzu8 ) - Z_factor; + D = (x*x) + (y*y) + (z*z); + //if (D < min) { min = D; ixyz[0] = 1; ixyz[1] = 0; ixyz[2] = 0; } + if (D < min) min = D; + // +1 0 +1 + x = (fxp1u0 + fyu3 + fzp1u6) - X_factor; + y = (fxp1u1 + fyu4 + fzp1u7) - Y_factor; + z = (fxp1u2 + fyu5 + fzp1u8) - Z_factor; + D = (x*x) + (y*y) + (z*z); + //if (D < min) { min = D; ixyz[0] = 1; ixyz[1] = 0; ixyz[2] = 1; } + if (D < min) min = D; + // +1 +1 -1 + x = (fxp1u0 + fyp1u3 + fzm1u6) - X_factor; + y = (fxp1u1 + fyp1u4 + fzm1u7) - Y_factor; + z = (fxp1u2 + fyp1u5 + fzm1u8) - Z_factor; + D = (x*x) + (y*y) + (z*z); + //if (D < min) { min = D; ixyz[0] = 1; ixyz[1] = 1; ixyz[2] = -1; } + if (D < min) min = D; + // +1 +1 0 + x = (fxp1u0 + fyp1u3 + fzu6 ) - X_factor; + y = (fxp1u1 + fyp1u4 + fzu7 ) - Y_factor; + z = (fxp1u2 + fyp1u5 + fzu8 ) - Z_factor; + D = (x*x) + (y*y) + (z*z); + //if (D < min) { min = D; ixyz[0] = 1; ixyz[1] = 1; ixyz[2] = 0; } + if (D < min) min = D; + // +1 +1 +1 + x = (fxp1u0 + fyp1u3 + fzp1u6) - X_factor; + y = (fxp1u1 + fyp1u4 + fzp1u7) - Y_factor; + z = (fxp1u2 + fyp1u5 + fzp1u8) - Z_factor; + D = (x*x) + (y*y) + (z*z); + //if (D < min) { min = D; ixyz[0] = 1; ixyz[1] = 1; ixyz[2] = 1; } + if (D < min) min = D; + + //if (closest2 != 0.0 && min < closest2) return (min); +// this->ClosestImage(a1, a2, ixyz); +// fprintf(stdout,"DEBUG: Predict = %2i %2i %2i\n",ixyz[0],ixyz[1],ixyz[2]); + +// ix = ixyz[0]; +// iy = ixyz[1]; +// iz = ixyz[2]; + +//D = sqrt(min); +// fprintf(stdout,"DEBUG: MinDist = %2i %2i %2i = %8.3f\n", ixmin, iymin, izmin, D); +// printf("---------------------------------------------------------------\n"); + return(min); + +} +#endif diff --git a/src/cuda_kernels/core_kernels.cu b/src/cuda_kernels/core_kernels.cu index 9ddb51794d..a412a63f34 100644 --- a/src/cuda_kernels/core_kernels.cu +++ b/src/cuda_kernels/core_kernels.cu @@ -1,284 +1,10 @@ #include "core_kernels.cuh" +#include "NonOrtho_dist2.cuh" +#include "ortho_dist2.cuh" +//#include // DEBUG #define BLOCKDIM 512 #define RSIZE 512 -// ----------------------------------------------------------------------------- -/** \return Shortest imaged distance between given coordinates in fractional space. - * NOTE: This function is complicated hence we will put into a __device__ only function. - */ -__device__ double NonOrtho_dist(double a0, double a1, double a2, - double b0, double b1, double b2, - const double *ucell) -{ - //int ixyz[3]; - double minIn = -1.0; - - //double closest2 - // The floor() calls serve to bring each point back in the main unit cell. - double fx = a0 - floor(a0); - double fy = a1 - floor(a1); - double fz = a2 - floor(a2); - double f2x = b0 - floor(b0); - double f2y = b1 - floor(b1); - double f2z = b2 - floor(b2); - // f2 back in Cartesian space - double X_factor = (f2x*ucell[0] + f2y*ucell[3] + f2z*ucell[6]); - double Y_factor = (f2x*ucell[1] + f2y*ucell[4] + f2z*ucell[7]); - double Z_factor = (f2x*ucell[2] + f2y*ucell[5] + f2z*ucell[8]); - // Precompute some factors - double fxm1 = fx - 1.0; - double fxp1 = fx + 1.0; - double fym1 = fy - 1.0; - double fyp1 = fy + 1.0; - double fzm1 = fz - 1.0; - double fzp1 = fz + 1.0; - - double fxm1u0 = fxm1 * ucell[0]; - double fxu0 = fx * ucell[0]; - double fxp1u0 = fxp1 * ucell[0]; - double fxm1u1 = fxm1 * ucell[1]; - double fxu1 = fx * ucell[1]; - double fxp1u1 = fxp1 * ucell[1]; - double fxm1u2 = fxm1 * ucell[2]; - double fxu2 = fx * ucell[2]; - double fxp1u2 = fxp1 * ucell[2]; - - double fym1u3 = fym1 * ucell[3]; - double fyu3 = fy * ucell[3]; - double fyp1u3 = fyp1 * ucell[3]; - double fym1u4 = fym1 * ucell[4]; - double fyu4 = fy * ucell[4]; - double fyp1u4 = fyp1 * ucell[4]; - double fym1u5 = fym1 * ucell[5]; - double fyu5 = fy * ucell[5]; - double fyp1u5 = fyp1 * ucell[5]; - - double fzm1u6 = fzm1 * ucell[6]; - double fzu6 = fz * ucell[6]; - double fzp1u6 = fzp1 * ucell[6]; - double fzm1u7 = fzm1 * ucell[7]; - double fzu7 = fz * ucell[7]; - double fzp1u7 = fzp1 * ucell[7]; - double fzm1u8 = fzm1 * ucell[8]; - double fzu8 = fz * ucell[8]; - double fzp1u8 = fzp1 * ucell[8]; - - // Calc ix iy iz = 0 case - double x = (fxu0 + fyu3 + fzu6) - X_factor; - double y = (fxu1 + fyu4 + fzu7) - Y_factor; - double z = (fxu2 + fyu5 + fzu8) - Z_factor; - // DEBUG - //mprintf("DEBUG: a2: %g %g %g\n",(fxu0 + fyu3 + fzu6), (fxu1 + fyu4 + fzu7), (fxu2 + fyu5 + fzu8)); - //mprintf("DEBUG: a1: %g %g %g\n", X_factor, Y_factor, Z_factor); - double min = (x*x) + (y*y) + (z*z); - - if (minIn > 0.0 && minIn < min) min = minIn; - - //ixyz[0] = 0; - //ixyz[1] = 0; - //ixyz[2] = 0; - - // -1 -1 -1 - x = (fxm1u0 + fym1u3 + fzm1u6) - X_factor; - y = (fxm1u1 + fym1u4 + fzm1u7) - Y_factor; - z = (fxm1u2 + fym1u5 + fzm1u8) - Z_factor; - double D = (x*x) + (y*y) + (z*z); - //if (D < min) { min = D; ixyz[0] = -1; ixyz[1] = -1; ixyz[2] = -1; } - if (D < min) min = D; - // -1 -1 0 - x = (fxm1u0 + fym1u3 + fzu6 ) - X_factor; - y = (fxm1u1 + fym1u4 + fzu7 ) - Y_factor; - z = (fxm1u2 + fym1u5 + fzu8 ) - Z_factor; - D = (x*x) + (y*y) + (z*z); - //if (D < min) { min = D; ixyz[0] = -1; ixyz[1] = -1; ixyz[2] = 0; } - if (D < min) min = D; - // -1 -1 +1 - x = (fxm1u0 + fym1u3 + fzp1u6) - X_factor; - y = (fxm1u1 + fym1u4 + fzp1u7) - Y_factor; - z = (fxm1u2 + fym1u5 + fzp1u8) - Z_factor; - D = (x*x) + (y*y) + (z*z); - //if (D < min) { min = D; ixyz[0] = -1; ixyz[1] = -1; ixyz[2] = 1; } - if (D < min) min = D; - // -1 0 -1 - x = (fxm1u0 + fyu3 + fzm1u6) - X_factor; - y = (fxm1u1 + fyu4 + fzm1u7) - Y_factor; - z = (fxm1u2 + fyu5 + fzm1u8) - Z_factor; - D = (x*x) + (y*y) + (z*z); - //if (D < min) { min = D; ixyz[0] = -1; ixyz[1] = 0; ixyz[2] = -1; } - if (D < min) min = D; - // -1 0 0 - x = (fxm1u0 + fyu3 + fzu6 ) - X_factor; - y = (fxm1u1 + fyu4 + fzu7 ) - Y_factor; - z = (fxm1u2 + fyu5 + fzu8 ) - Z_factor; - D = (x*x) + (y*y) + (z*z); - //if (D < min) { min = D; ixyz[0] = -1; ixyz[1] = 0; ixyz[2] = 0; } - if (D < min) min = D; - // -1 0 +1 - x = (fxm1u0 + fyu3 + fzp1u6) - X_factor; - y = (fxm1u1 + fyu4 + fzp1u7) - Y_factor; - z = (fxm1u2 + fyu5 + fzp1u8) - Z_factor; - D = (x*x) + (y*y) + (z*z); - //if (D < min) { min = D; ixyz[0] = -1; ixyz[1] = 0; ixyz[2] = 1; } - if (D < min) min = D; - // -1 +1 -1 - x = (fxm1u0 + fyp1u3 + fzm1u6) - X_factor; - y = (fxm1u1 + fyp1u4 + fzm1u7) - Y_factor; - z = (fxm1u2 + fyp1u5 + fzm1u8) - Z_factor; - D = (x*x) + (y*y) + (z*z); - //if (D < min) { min = D; ixyz[0] = -1; ixyz[1] = 1; ixyz[2] = -1; } - if (D < min) min = D; - // -1 +1 0 - x = (fxm1u0 + fyp1u3 + fzu6 ) - X_factor; - y = (fxm1u1 + fyp1u4 + fzu7 ) - Y_factor; - z = (fxm1u2 + fyp1u5 + fzu8 ) - Z_factor; - D = (x*x) + (y*y) + (z*z); - //if (D < min) { min = D; ixyz[0] = -1; ixyz[1] = 1; ixyz[2] = 0; } - if (D < min) min = D; - // -1 +1 +1 - x = (fxm1u0 + fyp1u3 + fzp1u6) - X_factor; - y = (fxm1u1 + fyp1u4 + fzp1u7) - Y_factor; - z = (fxm1u2 + fyp1u5 + fzp1u8) - Z_factor; - D = (x*x) + (y*y) + (z*z); - //if (D < min) { min = D; ixyz[0] = -1; ixyz[1] = 1; ixyz[2] = 1; } - if (D < min) min = D; - - // 0 -1 -1 - x = (fxu0 + fym1u3 + fzm1u6) - X_factor; - y = (fxu1 + fym1u4 + fzm1u7) - Y_factor; - z = (fxu2 + fym1u5 + fzm1u8) - Z_factor; - D = (x*x) + (y*y) + (z*z); - //if (D < min) { min = D; ixyz[0] = 0; ixyz[1] = -1; ixyz[2] = -1; } - if (D < min) min = D; - // 0 -1 0 - x = (fxu0 + fym1u3 + fzu6 ) - X_factor; - y = (fxu1 + fym1u4 + fzu7 ) - Y_factor; - z = (fxu2 + fym1u5 + fzu8 ) - Z_factor; - D = (x*x) + (y*y) + (z*z); - //if (D < min) { min = D; ixyz[0] = 0; ixyz[1] = -1; ixyz[2] = 0; } - if (D < min) min = D; - // 0 -1 +1 - x = (fxu0 + fym1u3 + fzp1u6) - X_factor; - y = (fxu1 + fym1u4 + fzp1u7) - Y_factor; - z = (fxu2 + fym1u5 + fzp1u8) - Z_factor; - D = (x*x) + (y*y) + (z*z); - //if (D < min) { min = D; ixyz[0] = 0; ixyz[1] = -1; ixyz[2] = 1; } - if (D < min) min = D; - // 0 0 -1 - x = (fxu0 + fyu3 + fzm1u6) - X_factor; - y = (fxu1 + fyu4 + fzm1u7) - Y_factor; - z = (fxu2 + fyu5 + fzm1u8) - Z_factor; - D = (x*x) + (y*y) + (z*z); - //if (D < min) { min = D; ixyz[0] = 0; ixyz[1] = 0; ixyz[2] = -1; } - if (D < min) min = D; - // 0 0 0 - // 0 0 +1 - x = (fxu0 + fyu3 + fzp1u6) - X_factor; - y = (fxu1 + fyu4 + fzp1u7) - Y_factor; - z = (fxu2 + fyu5 + fzp1u8) - Z_factor; - D = (x*x) + (y*y) + (z*z); - //if (D < min) { min = D; ixyz[0] = 0; ixyz[1] = 0; ixyz[2] = 1; } - if (D < min) min = D; - // 0 +1 -1 - x = (fxu0 + fyp1u3 + fzm1u6) - X_factor; - y = (fxu1 + fyp1u4 + fzm1u7) - Y_factor; - z = (fxu2 + fyp1u5 + fzm1u8) - Z_factor; - D = (x*x) + (y*y) + (z*z); - //if (D < min) { min = D; ixyz[0] = 0; ixyz[1] = 1; ixyz[2] = -1; } - if (D < min) min = D; - // 0 +1 0 - x = (fxu0 + fyp1u3 + fzu6 ) - X_factor; - y = (fxu1 + fyp1u4 + fzu7 ) - Y_factor; - z = (fxu2 + fyp1u5 + fzu8 ) - Z_factor; - D = (x*x) + (y*y) + (z*z); - //if (D < min) { min = D; ixyz[0] = 0; ixyz[1] = 1; ixyz[2] = 0; } - if (D < min) min = D; - // 0 +1 +1 - x = (fxu0 + fyp1u3 + fzp1u6) - X_factor; - y = (fxu1 + fyp1u4 + fzp1u7) - Y_factor; - z = (fxu2 + fyp1u5 + fzp1u8) - Z_factor; - D = (x*x) + (y*y) + (z*z); - //if (D < min) { min = D; ixyz[0] = 0; ixyz[1] = 1; ixyz[2] = 1; } - if (D < min) min = D; - - // +1 -1 -1 - x = (fxp1u0 + fym1u3 + fzm1u6) - X_factor; - y = (fxp1u1 + fym1u4 + fzm1u7) - Y_factor; - z = (fxp1u2 + fym1u5 + fzm1u8) - Z_factor; - D = (x*x) + (y*y) + (z*z); - //if (D < min) { min = D; ixyz[0] = 1; ixyz[1] = -1; ixyz[2] = -1; } - if (D < min) min = D; - // +1 -1 0 - x = (fxp1u0 + fym1u3 + fzu6 ) - X_factor; - y = (fxp1u1 + fym1u4 + fzu7 ) - Y_factor; - z = (fxp1u2 + fym1u5 + fzu8 ) - Z_factor; - D = (x*x) + (y*y) + (z*z); - //if (D < min) { min = D; ixyz[0] = 1; ixyz[1] = -1; ixyz[2] = 0; } - if (D < min) min = D; - // +1 -1 +1 - x = (fxp1u0 + fym1u3 + fzp1u6) - X_factor; - y = (fxp1u1 + fym1u4 + fzp1u7) - Y_factor; - z = (fxp1u2 + fym1u5 + fzp1u8) - Z_factor; - D = (x*x) + (y*y) + (z*z); - //if (D < min) { min = D; ixyz[0] = 1; ixyz[1] = -1; ixyz[2] = 1; } - if (D < min) min = D; - // +1 0 -1 - x = (fxp1u0 + fyu3 + fzm1u6) - X_factor; - y = (fxp1u1 + fyu4 + fzm1u7) - Y_factor; - z = (fxp1u2 + fyu5 + fzm1u8) - Z_factor; - D = (x*x) + (y*y) + (z*z); - //if (D < min) { min = D; ixyz[0] = 1; ixyz[1] = 0; ixyz[2] = -1; } - if (D < min) min = D; - // +1 0 0 - x = (fxp1u0 + fyu3 + fzu6 ) - X_factor; - y = (fxp1u1 + fyu4 + fzu7 ) - Y_factor; - z = (fxp1u2 + fyu5 + fzu8 ) - Z_factor; - D = (x*x) + (y*y) + (z*z); - //if (D < min) { min = D; ixyz[0] = 1; ixyz[1] = 0; ixyz[2] = 0; } - if (D < min) min = D; - // +1 0 +1 - x = (fxp1u0 + fyu3 + fzp1u6) - X_factor; - y = (fxp1u1 + fyu4 + fzp1u7) - Y_factor; - z = (fxp1u2 + fyu5 + fzp1u8) - Z_factor; - D = (x*x) + (y*y) + (z*z); - //if (D < min) { min = D; ixyz[0] = 1; ixyz[1] = 0; ixyz[2] = 1; } - if (D < min) min = D; - // +1 +1 -1 - x = (fxp1u0 + fyp1u3 + fzm1u6) - X_factor; - y = (fxp1u1 + fyp1u4 + fzm1u7) - Y_factor; - z = (fxp1u2 + fyp1u5 + fzm1u8) - Z_factor; - D = (x*x) + (y*y) + (z*z); - //if (D < min) { min = D; ixyz[0] = 1; ixyz[1] = 1; ixyz[2] = -1; } - if (D < min) min = D; - // +1 +1 0 - x = (fxp1u0 + fyp1u3 + fzu6 ) - X_factor; - y = (fxp1u1 + fyp1u4 + fzu7 ) - Y_factor; - z = (fxp1u2 + fyp1u5 + fzu8 ) - Z_factor; - D = (x*x) + (y*y) + (z*z); - //if (D < min) { min = D; ixyz[0] = 1; ixyz[1] = 1; ixyz[2] = 0; } - if (D < min) min = D; - // +1 +1 +1 - x = (fxp1u0 + fyp1u3 + fzp1u6) - X_factor; - y = (fxp1u1 + fyp1u4 + fzp1u7) - Y_factor; - z = (fxp1u2 + fyp1u5 + fzp1u8) - Z_factor; - D = (x*x) + (y*y) + (z*z); - //if (D < min) { min = D; ixyz[0] = 1; ixyz[1] = 1; ixyz[2] = 1; } - if (D < min) min = D; - - //if (closest2 != 0.0 && min < closest2) return (min); -// this->ClosestImage(a1, a2, ixyz); -// fprintf(stdout,"DEBUG: Predict = %2i %2i %2i\n",ixyz[0],ixyz[1],ixyz[2]); - -// ix = ixyz[0]; -// iy = ixyz[1]; -// iz = ixyz[2]; - -//D = sqrt(min); -// fprintf(stdout,"DEBUG: MinDist = %2i %2i %2i = %8.3f\n", ixmin, iymin, izmin, D); -// printf("---------------------------------------------------------------\n"); - return(min); - -} - // ----------------------------------------------------------------------------- //try thread coarsening /** Calculate the closest distances of atoms of solvent molecules to a point. */ @@ -321,7 +47,6 @@ __global__ void kClosestDistsToPt_NoImage(double* D_, const double* maskCenter, } } - // ----------------------------------------------------------------------------- /** Calculate closest distances of atoms of solvent molecules to solute atoms. */ @@ -456,29 +181,11 @@ __global__ void kClosestDistsToPt_Ortho(double *D_, const double* maskCenter, double dist; for(int offset = 0 ; offset < NAtoms ; offset++ ) { - double x = a0 - SolventMols_[sIndex++]; - double y = a1 - SolventMols_[sIndex++]; - double z = a2 - SolventMols_[sIndex++]; - - // Get rid of sign info - if (x<0) x=-x; - if (y<0) y=-y; - if (z<0) z=-z; - // Get rid of multiples of box lengths - //TODO WIERD that should be a way to simplify it - while (x > box[0]) x = x - box[0]; - while (y > box[1]) y = y - box[1]; - while (z > box[2]) z = z - box[2]; - // Find shortest distance in periodic reference - double D = box[0] - x; - if (D < x) x = D; - D = box[1] - y; - if (D < y) y = D; - D = box[2] - z; - if (D < z) z = D; - - //Dist = x*x + y*y + z*z; - dist = x*x + y*y + z*z; + dist = ortho_dist2( a0, a1, a2, + SolventMols_[sIndex], SolventMols_[sIndex+1], SolventMols_[sIndex+2], + box ); + sIndex += 3; + if (box[0]==0.0 || box[1]==0.0 || box[2]==0.0) dist= -1.0; @@ -551,34 +258,10 @@ __global__ void kClosestDistsToAtoms_Ortho(double* D_, const double* SolventMols for (j = start ; j < end; j+=3 ) { //int offset = start + (j + threadIdx.x)%(end - start); - double x = Solute_atoms[j + 0] - a0; - double y = Solute_atoms[j + 1] - a1; - double z = Solute_atoms[j + 2] - a2; + dist = ortho_dist2( Solute_atoms[j], Solute_atoms[j+1], Solute_atoms[j+2], + a0, a1, a2, + box ); - // Get rid of sign info - if (x<0) x=-x; - if (y<0) y=-y; - if (z<0) z=-z; - // Get rid of multiples of box lengths - //TODO WIERD that should be a way to simplify it - while (x > box[0]) x = x - box[0]; - while (y > box[1]) y = y - box[1]; - while (z > box[2]) z = z - box[2]; - - //below is actually slower! - //x = x - box[0]*((int)x/box[0]); - //y = y - box[0]*((int)y/box[1]); - //z = z - box[0]*((int)z/box[2]); - // Find shortest distance in periodic reference - double D = box[0] - x; - if (D < x) x = D; - D = box[1] - y; - if (D < y) y = D; - D = box[2] - z; - if (D < z) z = D; - - //Dist = x*x + y*y + z*z; - dist = x*x + y*y + z*z; if (box[0]==0.0 || box[1]==0.0 || box[2]==0.0) dist = -1.0; @@ -651,7 +334,7 @@ __global__ void kClosestDistsToPt_Nonortho(double* D_, const double* maskCenter, double x = recip[0]*SolventMols_[sIndex + offset + 0] + recip[1]*SolventMols_[sIndex + offset + 1] + recip[2]*SolventMols_[sIndex + offset + 2]; double y = recip[3]*SolventMols_[sIndex + offset + 0] + recip[4]*SolventMols_[sIndex + offset + 1] + recip[5]*SolventMols_[sIndex + offset + 2]; double z = recip[6]*SolventMols_[sIndex + offset + 0] + recip[7]*SolventMols_[sIndex + offset + 1] + recip[8]*SolventMols_[sIndex + offset + 2]; - double dist = NonOrtho_dist(x,y,z,a0,a1,a2,ucell); + double dist = NonOrtho_dist2(x,y,z,a0,a1,a2,ucell); // if (mol == 0) // printf("dist = %f\n",dist); @@ -733,7 +416,7 @@ __global__ void kClosestDistsToAtoms_Nonortho(double*D_, double y = recip[3]*Solute_atoms[j + 0] + recip[4]*Solute_atoms[j + 1] + recip[5]*Solute_atoms[j + 2] ; double z = recip[6]*Solute_atoms[j + 0] + recip[7]*Solute_atoms[j + 1] + recip[8]*Solute_atoms[j + 2] ; - dist = NonOrtho_dist(x,y,z,a0,a1,a2,ucell); + dist = NonOrtho_dist2(x,y,z,a0,a1,a2,ucell); //if (mol == 11) // printf("min = %f\n",min_val); min_val = min(min_val,dist); @@ -771,4 +454,107 @@ __global__ void kClosestDistsToAtoms_Nonortho(double*D_, } } +// ----------------------------------------------------------------------------- +/** Bin distances from two non-overlapping sets of coords. */ +__global__ void kBinDistances_nonOverlap_NoImage(int* RDF, + const CpptrajGpu::FpType* xyz1, int N1, const CpptrajGpu::FpType* xyz2, int N2, + CpptrajGpu::FpType maximum2, CpptrajGpu::FpType one_over_spacing) +{ + int a1 = blockIdx.x * blockDim.x + threadIdx.x; + int a2 = blockIdx.y * blockDim.y + threadIdx.y; + + if (a1 < N1 && a2 < N2) { + int idx1 = a1 * 3; + CpptrajGpu::FpType a1x = xyz1[idx1 ]; + CpptrajGpu::FpType a1y = xyz1[idx1+1]; + CpptrajGpu::FpType a1z = xyz1[idx1+2]; + + int idx2 = a2 * 3; + CpptrajGpu::FpType x = a1x - xyz2[idx2 ]; + CpptrajGpu::FpType y = a1y - xyz2[idx2+1]; + CpptrajGpu::FpType z = a1z - xyz2[idx2+2]; + + CpptrajGpu::FpType dist2 = (x*x) + (y*y) + (z*z); + if (dist2 > 0 && dist2 <= maximum2) { + CpptrajGpu::FpType dist = sqrt(dist2); + int histIdx = (int) (dist * one_over_spacing); + //printf("DEBUG: a1= %i a2= %i dist= %f bin=%i\n", a1+1, a2+1, dist, histIdx); + //printf("DEBUG: xyz1= %f %f %f\n", a1x, a1y, a1z); + //printf("DEBUG: a1= %i a2= %i dist= %f bin=%i xyz1=%f %f %f xyz2=%f %f %f\n", a1+1, a2+1, dist, histIdx, + // a1x, a1y, a1z, a2x, a2y, a2z); + atomicAdd( RDF + histIdx, 1 ); + } + } +} + +/** Bin distances from two non-overlapping sets of coords. */ +__global__ void kBinDistances_nonOverlap_Ortho(int* RDF, + const CpptrajGpu::FpType* xyz1, int N1, const CpptrajGpu::FpType* xyz2, int N2, + const CpptrajGpu::FpType* box, + CpptrajGpu::FpType maximum2, CpptrajGpu::FpType one_over_spacing) +{ + int a1 = blockIdx.x * blockDim.x + threadIdx.x; + int a2 = blockIdx.y * blockDim.y + threadIdx.y; + + if (a1 < N1 && a2 < N2) { + int idx1 = a1 * 3; + CpptrajGpu::FpType a1x = xyz1[idx1 ]; + CpptrajGpu::FpType a1y = xyz1[idx1+1]; + CpptrajGpu::FpType a1z = xyz1[idx1+2]; + + int idx2 = a2 * 3; + CpptrajGpu::FpType a2x = xyz2[idx2 ]; + CpptrajGpu::FpType a2y = xyz2[idx2+1]; + CpptrajGpu::FpType a2z = xyz2[idx2+2]; + + CpptrajGpu::FpType dist2 = ortho_dist2(a1x, a1y, a1z, a2x, a2y, a2z, box); + if (dist2 > 0 && dist2 <= maximum2) { + CpptrajGpu::FpType dist = sqrt(dist2); + int histIdx = (int) (dist * one_over_spacing); + //printf("DEBUG: a1= %i a2= %i dist= %f bin=%i\n", a1+1, a2+1, dist, histIdx); + //printf("DEBUG: xyz1= %f %f %f\n", a1x, a1y, a1z); + //printf("DEBUG: a1= %i a2= %i dist= %f bin=%i xyz1=%f %f %f xyz2=%f %f %f\n", a1+1, a2+1, dist, histIdx, + // a1x, a1y, a1z, a2x, a2y, a2z); + atomicAdd( RDF + histIdx, 1 ); + } + } +} +/** Bin distances from two non-overlapping sets of coords. */ +__global__ void kBinDistances_nonOverlap_nonOrtho(int* RDF, + const CpptrajGpu::FpType* xyz1, int N1, const CpptrajGpu::FpType* xyz2, int N2, + const CpptrajGpu::FpType* frac, const CpptrajGpu::FpType* ucell, + CpptrajGpu::FpType maximum2, CpptrajGpu::FpType one_over_spacing) +{ + int a1 = blockIdx.x * blockDim.x + threadIdx.x; + int a2 = blockIdx.y * blockDim.y + threadIdx.y; + + if (a1 < N1 && a2 < N2) { + int idx1 = a1 * 3; + CpptrajGpu::FpType a1x = xyz1[idx1 ]; + CpptrajGpu::FpType a1y = xyz1[idx1+1]; + CpptrajGpu::FpType a1z = xyz1[idx1+2]; + CpptrajGpu::FpType f1x = frac[0]*a1x + frac[1]*a1y + frac[2]*a1z; + CpptrajGpu::FpType f1y = frac[3]*a1x + frac[4]*a1y + frac[5]*a1z; + CpptrajGpu::FpType f1z = frac[6]*a1x + frac[7]*a1y + frac[8]*a1z; + + int idx2 = a2 * 3; + CpptrajGpu::FpType a2x = xyz2[idx2 ]; + CpptrajGpu::FpType a2y = xyz2[idx2+1]; + CpptrajGpu::FpType a2z = xyz2[idx2+2]; + CpptrajGpu::FpType f2x = frac[0]*a2x + frac[1]*a2y + frac[2]*a2z; + CpptrajGpu::FpType f2y = frac[3]*a2x + frac[4]*a2y + frac[5]*a2z; + CpptrajGpu::FpType f2z = frac[6]*a2x + frac[7]*a2y + frac[8]*a2z; + + CpptrajGpu::FpType dist2 = NonOrtho_dist2(f2x, f2y, f2z, f1x ,f1y, f1z, ucell); + if (dist2 > 0 && dist2 <= maximum2) { + CpptrajGpu::FpType dist = sqrt(dist2); + int histIdx = (int) (dist * one_over_spacing); + //printf("DEBUG: a1= %i a2= %i dist= %f bin=%i\n", a1+1, a2+1, dist, histIdx); + //printf("DEBUG: xyz1= %f %f %f\n", a1x, a1y, a1z); + //printf("DEBUG: a1= %i a2= %i dist= %f bin=%i xyz1=%f %f %f xyz2=%f %f %f\n", a1+1, a2+1, dist, histIdx, + // a1x, a1y, a1z, a2x, a2y, a2z); + atomicAdd( RDF + histIdx, 1 ); + } + } +} diff --git a/src/cuda_kernels/core_kernels.cuh b/src/cuda_kernels/core_kernels.cuh index 3563c926d3..c49816c045 100644 --- a/src/cuda_kernels/core_kernels.cuh +++ b/src/cuda_kernels/core_kernels.cuh @@ -1,5 +1,6 @@ #ifndef INC_CORE_KERNELS_CUH #define INC_CORE_KERNELS_CUH +#include "../Gpu.h" #if defined(__HIP_PLATFORM_HCC__) #include #endif @@ -13,4 +14,13 @@ __global__ void kClosestDistsToAtoms_Ortho(double*,const double*,const double*,d // Non-orthorhombic imaging __global__ void kClosestDistsToPt_Nonortho(double*,const double*,const double*,double,const double*,const double*,int,int,int); __global__ void kClosestDistsToAtoms_Nonortho(double*,const double*,const double*,double,const double*,const double*,int,int,int,int); +// RDF no imaging +__global__ void kBinDistances_nonOverlap_NoImage(int*, const CpptrajGpu::FpType*, int, const CpptrajGpu::FpType*, int, + CpptrajGpu::FpType, CpptrajGpu::FpType); +// RDF ortho imaging +__global__ void kBinDistances_nonOverlap_Ortho(int*, const CpptrajGpu::FpType*, int, const CpptrajGpu::FpType*, int, + const CpptrajGpu::FpType*, CpptrajGpu::FpType, CpptrajGpu::FpType); +// RDF nonortho imaging +__global__ void kBinDistances_nonOverlap_nonOrtho(int*, const CpptrajGpu::FpType*, int, const CpptrajGpu::FpType*, int, + const CpptrajGpu::FpType*, const CpptrajGpu::FpType*, CpptrajGpu::FpType, CpptrajGpu::FpType); #endif diff --git a/src/cuda_kernels/cudadepend b/src/cuda_kernels/cudadepend index b931699125..d28d3ad970 100644 --- a/src/cuda_kernels/cudadepend +++ b/src/cuda_kernels/cudadepend @@ -1,4 +1,5 @@ GistCudaCalc.o : GistCudaCalc.cu GistCudaCalc.cuh GistCudaSetup.o : GistCudaSetup.cu ../HipDefinitions.h GistCudaCalc.cuh GistCudaSetup.cuh -core_kernels.o : core_kernels.cu core_kernels.cuh -kernel_wrappers.o : kernel_wrappers.cu ../HipDefinitions.h ../ImageOption.h core_kernels.cuh kernel_wrappers.cuh +core_kernels.o : core_kernels.cu ../Gpu.h NonOrtho_dist2.cuh core_kernels.cuh ortho_dist2.cuh +kernel_rdf.o : kernel_rdf.cu ../CpptrajStdio.h ../Gpu.h ../HipDefinitions.h ../ImageOption.h core_kernels.cuh kernel_rdf.cuh +kernel_wrappers.o : kernel_wrappers.cu ../Gpu.h ../HipDefinitions.h ../ImageOption.h core_kernels.cuh kernel_wrappers.cuh diff --git a/src/cuda_kernels/kernel_rdf.cu b/src/cuda_kernels/kernel_rdf.cu new file mode 100644 index 0000000000..c007d14db9 --- /dev/null +++ b/src/cuda_kernels/kernel_rdf.cu @@ -0,0 +1,119 @@ +#include "kernel_rdf.cuh" +#include "core_kernels.cuh" +#include "../CpptrajStdio.h" +#if defined(__HIP_PLATFORM_HCC__) +#include +#include "../HipDefinitions.h" +#endif + +static inline int calc_nblocks(int ntotal, int nthreadsPerBlock) +{ + int nblocks = ntotal / nthreadsPerBlock; + if ( (ntotal % nthreadsPerBlock) != 0 ) + nblocks++; + return nblocks; +} + +/** Report any cuda errors. */ +static inline int Cuda_check(cudaError_t err, const char* desc) { + //cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) { + mprintf("Warning: CUDA Error %s: %s\n", desc, cudaGetErrorString(err)); + mprinterr("Error: CUDA Error %s: %s\n", desc, cudaGetErrorString(err)); + //return 1; + } + return 0; +} + +/** Calculate distances between pairs of atoms and bin them into a 1D histogram. */ +int Cpptraj_GPU_RDF(unsigned long* bins, int nbins, CpptrajGpu::FpType maximum2, CpptrajGpu::FpType one_over_spacing, + const CpptrajGpu::FpType* xyz1, int N1, + const CpptrajGpu::FpType* xyz2, int N2, + ImageOption::Type imageType, + const CpptrajGpu::FpType* box, const CpptrajGpu::FpType* ucell, const CpptrajGpu::FpType* recip) +{ + int* device_rdf; + Cuda_check(cudaMalloc(((void**)(&device_rdf)), nbins * sizeof(int)), "Allocating rdf bins"); + Cuda_check(cudaMemset( device_rdf, 0, nbins*sizeof(int) ), "Setting rdf bins to 0"); + + CpptrajGpu::FpType* device_xyz1; + Cuda_check(cudaMalloc(((void**)(&device_xyz1)), N1 * 3 * sizeof(CpptrajGpu::FpType)), "Allocating xyz1"); + Cuda_check(cudaMemcpy(device_xyz1, xyz1, N1 * 3 * sizeof(CpptrajGpu::FpType), cudaMemcpyHostToDevice), "Copying xyz1"); + + CpptrajGpu::FpType* device_xyz2; + if (xyz2 != 0) { + Cuda_check(cudaMalloc(((void**)(&device_xyz2)), N2 * 3 * sizeof(CpptrajGpu::FpType)), "Allocating xyz2"); + Cuda_check(cudaMemcpy(device_xyz2, xyz2, N2 * 3 * sizeof(CpptrajGpu::FpType), cudaMemcpyHostToDevice), "Copying xyz2"); + } else { + device_xyz2 = device_xyz1; + } + + CpptrajGpu::FpType *boxDev; + CpptrajGpu::FpType *ucellDev, *fracDev; + if (imageType == ImageOption::ORTHO) { + Cuda_check(cudaMalloc(((void**)(&boxDev)), 3 * sizeof(CpptrajGpu::FpType)), "Allocating box"); + Cuda_check(cudaMemcpy(boxDev,box, 3 * sizeof(CpptrajGpu::FpType), cudaMemcpyHostToDevice), "Copying box"); + } else if (imageType == ImageOption::NONORTHO) { + Cuda_check(cudaMalloc(((void**)(&ucellDev)), 9 * sizeof(CpptrajGpu::FpType)), "Allocating ucell"); + Cuda_check(cudaMalloc(((void**)(&fracDev)), 9 * sizeof(CpptrajGpu::FpType)), "Allocating frac"); + Cuda_check(cudaMemcpy(ucellDev,ucell, 9 * sizeof(CpptrajGpu::FpType), cudaMemcpyHostToDevice), "Copying ucell"); + Cuda_check(cudaMemcpy(fracDev,recip, 9 * sizeof(CpptrajGpu::FpType), cudaMemcpyHostToDevice), "Copying frac"); + } + + // Determine number of blocks + unsigned int BLOCKDIM = CpptrajGpu::MaxBlockDim_2D(); + + dim3 threadsPerBlock(BLOCKDIM, BLOCKDIM); + dim3 numBlocks(calc_nblocks(N1, threadsPerBlock.x), calc_nblocks(N2, threadsPerBlock.y)); + //mprintf("#Atoms = %i, %i; Threads per block = %i, %i; #Blocks = %i, %i\n", + // N1, N2, threadsPerBlock.x, threadsPerBlock.y, numBlocks.x, numBlocks.y); + + // Launch kernel + //if (xyz2 != 0) { + // Non-overlapping coords + switch (imageType) { + case ImageOption::NONORTHO: + kBinDistances_nonOverlap_nonOrtho<<>>( + device_rdf, device_xyz1, N1, device_xyz2, N2, fracDev, ucellDev, maximum2, one_over_spacing); + break; + case ImageOption::ORTHO: + kBinDistances_nonOverlap_Ortho<<>>( + device_rdf, device_xyz1, N1, device_xyz2, N2, boxDev, maximum2, one_over_spacing); + break; + case ImageOption::NO_IMAGE: + kBinDistances_nonOverlap_NoImage<<>>( + device_rdf, device_xyz1, N1, device_xyz2, N2, maximum2, one_over_spacing); + break; + //default: + // mprinterr("Internal Error: kernel_rdf: Unhandled image type.\n"); + // return 1; + } + //} else { + // // Overlapping coords +// +// return 1;// FIXME +// } + // Error check + Cuda_check(cudaGetLastError(), "kernel launch"); + + // Copy the result back + int* local_bins = new int[ nbins ]; + cudaMemcpy(local_bins, device_rdf, nbins*sizeof(int), cudaMemcpyDeviceToHost); + for (int ibin = 0; ibin != nbins; ibin++) { + //mprintf("DEBUG:\tBin %i = %i (%i)\n", ibin, local_bins[ibin], device_rdf[ibin]); + //mprintf("DEBUG:\tBin %i = %i\n", ibin, local_bins[ibin]); + bins[ibin] += local_bins[ibin]; + } + delete[] local_bins; + // Free device memory + cudaFree(device_rdf); + cudaFree(device_xyz1); + if (xyz2 != 0) cudaFree(device_xyz2); + if (imageType == ImageOption::ORTHO) + cudaFree(boxDev); + else if (imageType == ImageOption::NONORTHO) { + cudaFree(ucellDev); + cudaFree(fracDev); + } + return 0; +} diff --git a/src/cuda_kernels/kernel_rdf.cuh b/src/cuda_kernels/kernel_rdf.cuh new file mode 100644 index 0000000000..3f6f2a7d80 --- /dev/null +++ b/src/cuda_kernels/kernel_rdf.cuh @@ -0,0 +1,9 @@ +#ifndef INC_KERNEL_RDF_CUH +#define INC_KERNEL_RDF_CUH +#include "../Gpu.h" +#include "../ImageOption.h" +int Cpptraj_GPU_RDF(unsigned long*, int, CpptrajGpu::FpType, CpptrajGpu::FpType, + const CpptrajGpu::FpType*, int, const CpptrajGpu::FpType*, int, + ImageOption::Type, const CpptrajGpu::FpType*, + const CpptrajGpu::FpType*, const CpptrajGpu::FpType*); +#endif diff --git a/src/cuda_kernels/ortho_dist2.cuh b/src/cuda_kernels/ortho_dist2.cuh new file mode 100644 index 0000000000..c7ba2e8210 --- /dev/null +++ b/src/cuda_kernels/ortho_dist2.cuh @@ -0,0 +1,34 @@ +#ifndef INC_ORTHO_DIST2_CUH +#define INC_ORTHO_DIST2_CUH +/** \return Shortest imaged distance^2 between given coordinates in an orthorhombic box. */ +template +__device__ T ortho_dist2(T a1x, T a1y, T a1z, + T a2x, T a2y, T a2z, + const T* box) +{ + T x = a1x - a2x; + T y = a1y - a2y; + T z = a1z - a2z; + // Get rid of sign info + if (x<0) x=-x; + if (y<0) y=-y; + if (z<0) z=-z; + // Get rid of multiples of box lengths + while (x > box[0]) x = x - box[0]; + while (y > box[1]) y = y - box[1]; + while (z > box[2]) z = z - box[2]; + //below is actually slower! + //x = x - box[0]*((int)x/box[0]); + //y = y - box[0]*((int)y/box[1]); + //z = z - box[0]*((int)z/box[2]); + // Find shortest distance in periodic reference + T D2 = box[0] - x; + if (D2 < x) x = D2; + D2 = box[1] - y; + if (D2 < y) y = D2; + D2 = box[2] - z; + if (D2 < z) z = D2; + + return (x*x + y*y + z*z); +} +#endif diff --git a/test/MasterTest.sh b/test/MasterTest.sh index 0073be6f73..581feee1ae 100644 --- a/test/MasterTest.sh +++ b/test/MasterTest.sh @@ -75,6 +75,7 @@ WARNCOUNT=0 # Total number of warnings detected by DoTest this test PROGCOUNT=0 # Total number of times RunCpptraj has been called this test. PROGERROR=0 # Total number of program errors this test CHECKERR=0 # Total errors this test from CheckEnv routine. +CHECKSILENT=0 # If 1 make the TestLibrary routine silent TESTNAME='' # Current test name for Requires routine. UNITNAME='' # Current unit name for CheckFor routine. DESCRIP='' # Current test/unit name for CheckEnv routine. @@ -988,8 +989,10 @@ SkipCheck() { # CheckDefines. TestLibrary() { if [ -z "$2" ] ; then - echo " $DESCRIP requires $1." - echo " Cpptraj was compiled without $1 support." + if [ $CHECKSILENT -eq 0 ] ; then + echo " $DESCRIP requires $1." + echo " Cpptraj was compiled without $1 support." + fi ((CHECKERR++)) fi } @@ -1172,6 +1175,16 @@ CheckFor() { return 0 } +# CheckForSilent() +# \return > 0 if anything in is not present, 0 otherwise. +CheckForSilent() { + CHECKSILENT=1 + # Give generic name + DESCRIP='Test' + CheckEnv $* + return $CHECKERR +} + # ------------------------------------------------------------------------------ # TODO remove all deprecated functions below. # FIXME This is a stub and should be removed diff --git a/test/Test_Radial/RunTest.sh b/test/Test_Radial/RunTest.sh index 9d2b00ef9b..74caaff563 100755 --- a/test/Test_Radial/RunTest.sh +++ b/test/Test_Radial/RunTest.sh @@ -5,11 +5,21 @@ CleanFiles radial.in Radial.agr cRadial.agr WatO-Trp4.agr WatO-Trp4.raw.agr \ WatO-Trp4.byres.agr WatO-Trp.agr WatO-Trp.volume.agr \ WatO-Glu5CD.agr noimage.WatO-Glu5CD.agr point.dat \ - point?.agr wat.origin.agr + point?.agr wat.origin.agr tz2.WatO-Prot.agr tz2.WatO.agr \ + tz2ortho.WatO-Prot.agr tz2ortho.WatO.agr \ + tz2noimage.WatO-Prot.agr tz2noimage.WatO.agr TESTNAME='Radial tests' Requires netcdf maxthreads 10 +# CUDA requires a tolerance since it is done in single precision +TOL_ARG='' +CheckForSilent cuda +if [ $? -eq 0 ] ; then + TOL_ARG='-a 0.0002' + echo "Lowering tolerance for single-precision CUDA: $TOL_ARG" +fitz2noimage.WatO-Prot.agr + INPUT="-i radial.in" UNITNAME='Radial test, non-orthogonal imaging' @@ -45,9 +55,9 @@ RunCpptraj "$UNITNAME" DoTest WatO-Glu5CD.agr.save WatO-Glu5CD.agr DoTest noimage.WatO-Glu5CD.agr.save noimage.WatO-Glu5CD.agr +UNITNAME='Radial test, specified point' CheckFor maxthreads 1 if [ $? -eq 0 ] ; then - UNITNAME='Radial test, specified point' cat > radial.in < radial.in < radial.in < radial.in < radial.in < radial.in < radial.in < ^1" +@target G0.S0 +@type xy + 0.125 0.000000 + 0.375 0.000000 + 0.625 0.000000 + 0.875 0.000000 + 1.125 0.000000 + 1.375 0.000000 + 1.625 0.009807 + 1.875 0.090895 + 2.125 0.066963 + 2.375 0.083493 + 2.625 0.219500 + 2.875 0.313730 + 3.125 0.349662 + 3.375 0.378358 + 3.625 0.455962 + 3.875 0.466993 + 4.125 0.460903 + 4.375 0.482029 + 4.625 0.476009 + 4.875 0.527967 + 5.125 0.514599 + 5.375 0.537444 + 5.625 0.541715 + 5.875 0.582414 + 6.125 0.578839 + 6.375 0.613603 + 6.625 0.619699 + 6.875 0.641508 + 7.125 0.657762 + 7.375 0.685561 + 7.625 0.698543 + 7.875 0.699525 + 8.125 0.734832 + 8.375 0.744579 + 8.625 0.762673 + 8.875 0.786471 + 9.125 0.800059 + 9.375 0.818037 + 9.625 0.820301 + 9.875 0.832422 + 10.125 0.848308 + 10.375 0.859424 + 10.625 0.859642 + 10.875 0.868914 + 11.125 0.881561 + 11.375 0.898519 + 11.625 0.896258 + 11.875 0.907679 + 12.125 0.905259 + 12.375 0.919516 + 12.625 0.926709 + 12.875 0.922429 + 13.125 0.932410 + 13.375 0.935351 + 13.625 0.940967 + 13.875 0.944092 + 14.125 0.950874 + 14.375 0.950799 + 14.625 0.958463 + 14.875 0.962108 diff --git a/test/Test_Radial/tz2.WatO.agr.save b/test/Test_Radial/tz2.WatO.agr.save new file mode 100644 index 0000000000..196b709c66 --- /dev/null +++ b/test/Test_Radial/tz2.WatO.agr.save @@ -0,0 +1,68 @@ +@with g0 +@ xaxis label "Distance (Ang)" +@ yaxis label "" +@ legend 0.2, 0.995 +@ legend char size 0.60 +@ s0 legend ":WAT@O => :WAT@O" +@target G0.S0 +@type xy + 0.125 0.000000 + 0.375 0.000000 + 0.625 0.000000 + 0.875 0.000000 + 1.125 0.000000 + 1.375 0.000000 + 1.625 0.000000 + 1.875 0.000000 + 2.125 0.000000 + 2.375 0.008119 + 2.625 1.302900 + 2.875 2.167052 + 3.125 1.206232 + 3.375 0.972103 + 3.625 0.908561 + 3.875 0.931459 + 4.125 0.954274 + 4.375 0.936945 + 4.625 0.934350 + 4.875 0.980699 + 5.125 0.973067 + 5.375 0.951298 + 5.625 0.926571 + 5.875 0.942356 + 6.125 0.945199 + 6.375 0.980169 + 6.625 0.973968 + 6.875 0.981792 + 7.125 0.970289 + 7.375 0.958719 + 7.625 0.969242 + 7.875 0.949171 + 8.125 0.941500 + 8.375 0.952427 + 8.625 0.952719 + 8.875 0.951468 + 9.125 0.955649 + 9.375 0.958501 + 9.625 0.955208 + 9.875 0.967729 + 10.125 0.956302 + 10.375 0.950518 + 10.625 0.947371 + 10.875 0.941653 + 11.125 0.958531 + 11.375 0.951502 + 11.625 0.948216 + 11.875 0.958844 + 12.125 0.960211 + 12.375 0.948780 + 12.625 0.946982 + 12.875 0.948940 + 13.125 0.948910 + 13.375 0.950781 + 13.625 0.951658 + 13.875 0.946405 + 14.125 0.953586 + 14.375 0.953365 + 14.625 0.956983 + 14.875 0.947460 diff --git a/test/Test_Radial/tz2noimage.WatO-Prot.agr.save b/test/Test_Radial/tz2noimage.WatO-Prot.agr.save new file mode 100644 index 0000000000..56d65609a1 --- /dev/null +++ b/test/Test_Radial/tz2noimage.WatO-Prot.agr.save @@ -0,0 +1,68 @@ +@with g0 +@ xaxis label "Distance (Ang)" +@ yaxis label "" +@ legend 0.2, 0.995 +@ legend char size 0.60 +@ s0 legend ":WAT@O => ^1" +@target G0.S0 +@type xy + 0.125 0.000000 + 0.375 0.000000 + 0.625 0.000000 + 0.875 0.000000 + 1.125 0.000000 + 1.375 0.000000 + 1.625 0.008173 + 1.875 0.108091 + 2.125 0.087052 + 2.375 0.086557 + 2.625 0.233297 + 2.875 0.319481 + 3.125 0.377547 + 3.375 0.443251 + 3.625 0.490833 + 3.875 0.482252 + 4.125 0.484533 + 4.375 0.506425 + 4.625 0.503902 + 4.875 0.552527 + 5.125 0.563820 + 5.375 0.570221 + 5.625 0.601981 + 5.875 0.593438 + 6.125 0.615376 + 6.375 0.635202 + 6.625 0.642753 + 6.875 0.662093 + 7.125 0.683146 + 7.375 0.702575 + 7.625 0.734615 + 7.875 0.730067 + 8.125 0.748326 + 8.375 0.770473 + 8.625 0.788309 + 8.875 0.812769 + 9.125 0.813718 + 9.375 0.828320 + 9.625 0.842101 + 9.875 0.848165 + 10.125 0.862270 + 10.375 0.867660 + 10.625 0.880404 + 10.875 0.885259 + 11.125 0.886767 + 11.375 0.901493 + 11.625 0.909090 + 11.875 0.909520 + 12.125 0.912789 + 12.375 0.909322 + 12.625 0.909426 + 12.875 0.920812 + 13.125 0.913432 + 13.375 0.916059 + 13.625 0.920234 + 13.875 0.916597 + 14.125 0.913657 + 14.375 0.914007 + 14.625 0.910080 + 14.875 0.896066 diff --git a/test/Test_Radial/tz2noimage.WatO.agr.save b/test/Test_Radial/tz2noimage.WatO.agr.save new file mode 100644 index 0000000000..7e90ef826f --- /dev/null +++ b/test/Test_Radial/tz2noimage.WatO.agr.save @@ -0,0 +1,68 @@ +@with g0 +@ xaxis label "Distance (Ang)" +@ yaxis label "" +@ legend 0.2, 0.995 +@ legend char size 0.60 +@ s0 legend ":WAT@O => :WAT@O" +@target G0.S0 +@type xy + 0.125 0.000000 + 0.375 0.000000 + 0.625 0.000000 + 0.875 0.000000 + 1.125 0.000000 + 1.375 0.000000 + 1.625 0.000000 + 1.875 0.000000 + 2.125 0.000000 + 2.375 0.002991 + 2.625 1.183779 + 2.875 1.914740 + 3.125 1.027328 + 3.375 0.808214 + 3.625 0.783276 + 3.875 0.748094 + 4.125 0.800100 + 4.375 0.794806 + 4.625 0.789635 + 4.875 0.781317 + 5.125 0.768040 + 5.375 0.732558 + 5.625 0.721379 + 5.875 0.719195 + 6.125 0.717358 + 6.375 0.708327 + 6.625 0.722703 + 6.875 0.715408 + 7.125 0.717762 + 7.375 0.670551 + 7.625 0.679104 + 7.875 0.649474 + 8.125 0.638264 + 8.375 0.645514 + 8.625 0.635123 + 8.875 0.634940 + 9.125 0.617597 + 9.375 0.606620 + 9.625 0.601648 + 9.875 0.588718 + 10.125 0.584387 + 10.375 0.571313 + 10.625 0.563295 + 10.875 0.559449 + 11.125 0.554193 + 11.375 0.542936 + 11.625 0.533000 + 11.875 0.521494 + 12.125 0.518285 + 12.375 0.511268 + 12.625 0.503336 + 12.875 0.496952 + 13.125 0.489966 + 13.375 0.483528 + 13.625 0.474893 + 13.875 0.467818 + 14.125 0.463736 + 14.375 0.453005 + 14.625 0.445415 + 14.875 0.437057 diff --git a/test/Test_Radial/tz2ortho.WatO-Prot.agr.save b/test/Test_Radial/tz2ortho.WatO-Prot.agr.save new file mode 100644 index 0000000000..bd3e2f5dba --- /dev/null +++ b/test/Test_Radial/tz2ortho.WatO-Prot.agr.save @@ -0,0 +1,68 @@ +@with g0 +@ xaxis label "Distance (Ang)" +@ yaxis label "" +@ legend 0.2, 0.995 +@ legend char size 0.60 +@ s0 legend ":WAT@O => ^1" +@target G0.S0 +@type xy + 0.125 0.000000 + 0.375 0.000000 + 0.625 0.000000 + 0.875 0.000000 + 1.125 0.000000 + 1.375 0.000000 + 1.625 0.008173 + 1.875 0.108091 + 2.125 0.087052 + 2.375 0.086557 + 2.625 0.233297 + 2.875 0.319481 + 3.125 0.377547 + 3.375 0.443251 + 3.625 0.490833 + 3.875 0.482252 + 4.125 0.484533 + 4.375 0.506425 + 4.625 0.503902 + 4.875 0.552527 + 5.125 0.563820 + 5.375 0.570221 + 5.625 0.601981 + 5.875 0.593438 + 6.125 0.615376 + 6.375 0.635202 + 6.625 0.642753 + 6.875 0.662093 + 7.125 0.683146 + 7.375 0.702575 + 7.625 0.734690 + 7.875 0.730136 + 8.125 0.748326 + 8.375 0.770658 + 8.625 0.788658 + 8.875 0.813318 + 9.125 0.814238 + 9.375 0.829501 + 9.625 0.843828 + 9.875 0.849628 + 10.125 0.864379 + 10.375 0.870834 + 10.625 0.883929 + 10.875 0.889172 + 11.125 0.892078 + 11.375 0.907108 + 11.625 0.915906 + 11.875 0.919180 + 12.125 0.922349 + 12.375 0.921380 + 12.625 0.924647 + 12.875 0.938499 + 13.125 0.934368 + 13.375 0.940258 + 13.625 0.948398 + 13.875 0.949439 + 14.125 0.951156 + 14.375 0.959693 + 14.625 0.962870 + 14.875 0.956127 diff --git a/test/Test_Radial/tz2ortho.WatO.agr.save b/test/Test_Radial/tz2ortho.WatO.agr.save new file mode 100644 index 0000000000..0e71d30234 --- /dev/null +++ b/test/Test_Radial/tz2ortho.WatO.agr.save @@ -0,0 +1,68 @@ +@with g0 +@ xaxis label "Distance (Ang)" +@ yaxis label "" +@ legend 0.2, 0.995 +@ legend char size 0.60 +@ s0 legend ":WAT@O => :WAT@O" +@target G0.S0 +@type xy + 0.125 0.000000 + 0.375 0.000000 + 0.625 0.000000 + 0.875 0.000000 + 1.125 0.000000 + 1.375 0.000000 + 1.625 0.000000 + 1.875 0.000000 + 2.125 0.000000 + 2.375 0.002991 + 2.625 1.330731 + 2.875 2.176120 + 3.125 1.187506 + 3.375 0.923321 + 3.625 0.916463 + 3.875 0.899886 + 4.125 0.962833 + 4.375 0.969469 + 4.625 0.979084 + 4.875 0.961074 + 5.125 0.959835 + 5.375 0.937324 + 5.625 0.933079 + 5.875 0.938541 + 6.125 0.950825 + 6.375 0.952239 + 6.625 0.982928 + 6.875 0.983850 + 7.125 0.992426 + 7.375 0.948645 + 7.625 0.957272 + 7.875 0.941759 + 8.125 0.940810 + 8.375 0.950494 + 8.625 0.948031 + 8.875 0.977353 + 9.125 0.960639 + 9.375 0.942116 + 9.625 0.952574 + 9.875 0.941151 + 10.125 0.951702 + 10.375 0.949590 + 10.625 0.953104 + 10.875 0.951344 + 11.125 0.956692 + 11.375 0.947691 + 11.625 0.954567 + 11.875 0.942745 + 12.125 0.943635 + 12.375 0.953684 + 12.625 0.944898 + 12.875 0.947955 + 13.125 0.948854 + 13.375 0.942730 + 13.625 0.953365 + 13.875 0.944907 + 14.125 0.950638 + 14.375 0.949681 + 14.625 0.947970 + 14.875 0.945455