diff --git a/doc/CpptrajManual.pdf b/doc/CpptrajManual.pdf index 77fc309b0d..b33dcdefc6 100644 Binary files a/doc/CpptrajManual.pdf and b/doc/CpptrajManual.pdf differ diff --git a/doc/DocumentChecksums.txt b/doc/DocumentChecksums.txt index 7491bdc6e0..8229700e47 100644 --- a/doc/DocumentChecksums.txt +++ b/doc/DocumentChecksums.txt @@ -1,3 +1,3 @@ f6f8cb1a79951d80a9d2656fd9c30f55 CpptrajDevelopmentGuide.lyx -369e9afee9918ff0797c4a4bf08473fd cpptraj.lyx +180095279f879c5da5a6634cc0ff9799 cpptraj.lyx 5d9b5b5ed47a3ded57b6464df99b3585 CpptrajManual.lyx diff --git a/doc/cpptraj.lyx b/doc/cpptraj.lyx index 9169966825..4eed8ee336 100644 --- a/doc/cpptraj.lyx +++ b/doc/cpptraj.lyx @@ -21779,7 +21779,7 @@ Options for pdb format \end_layout \begin_layout LyX-Code - [pdbv3] [teradvance] [terbyres | pdbter | noter] + [pdbv3] [pdbchain] [teradvance] [terbyres | pdbter | noter] \end_layout \begin_layout LyX-Code @@ -21803,7 +21803,7 @@ Options for pdb format \end_layout \begin_layout LyX-Code - [adpdata ] + [adpdata ] [{hybrid36|wrapnumbers}] \end_layout \begin_deeper @@ -21836,6 +21836,11 @@ pdbv3 Use PDB V3 residue/atom names. Same as specifying 'pdbres' and 'pdbatom'. \end_layout +\begin_layout Description +pdbchain Restrict PDB chain ID to a single character (PDB V3 standard), + otherwise allow chain IDs up to 2 characters. +\end_layout + \begin_layout Description topresnum Use topology residue numbers; otherwise use original residue numbers. @@ -22000,6 +22005,14 @@ nolink "false" ) for anisotropic B-factors. \end_layout +\begin_layout Description +hybrid36 Use hybrid36 formatting for large atom/residue numbers (default). +\end_layout + +\begin_layout Description +wrapnumbers Wrap large atom/residue numbers to zero (previous behavior). +\end_layout + \end_deeper \begin_layout Subsubsection diff --git a/src/Action_AtomicFluct.cpp b/src/Action_AtomicFluct.cpp index 08f6e9b110..96dcfad729 100644 --- a/src/Action_AtomicFluct.cpp +++ b/src/Action_AtomicFluct.cpp @@ -257,7 +257,7 @@ void Action_AtomicFluct::Print() { PDBfile& adpout = static_cast( *adpoutfile_ ); adpout.WriteANISOU( atom+1, (*fluctParm_)[atom].c_str(), fluctParm_->Res(resnum).c_str(), - fluctParm_->Res(resnum).ChainID_1char(), fluctParm_->Res(resnum).OriginalResNum(), + fluctParm_->Res(resnum).ChainID_Nchar(2), fluctParm_->Res(resnum).OriginalResNum(), anisou, (*fluctParm_)[atom].ElementName(), 0 ); } } diff --git a/src/PDBfile.cpp b/src/PDBfile.cpp index 97187b9be7..d6cc421427 100644 --- a/src/PDBfile.cpp +++ b/src/PDBfile.cpp @@ -20,7 +20,7 @@ PDBfile::SSBOND::SSBOND() : PDBfile::SSBOND::SSBOND(int idx1, int idx2, Residue const& r1, Residue const& r2) : idx1_( idx1), idx2_( idx2), rnum1_( r1.OriginalResNum()), rnum2_( r2.OriginalResNum()), - chain1_(r1.ChainID_1char()), chain2_(r2.ChainID_1char()), + chain1_(r1.ChainID_Nchar(1)[0]), chain2_(r2.ChainID_Nchar(1)[0]), // FIXME support >1 char chain ID? icode1_(r1.Icode()), icode2_(r2.Icode()) { std::copy(r1.c_str(), r1.c_str()+3, name1_); @@ -121,7 +121,7 @@ PDBfile::Link::Link() : rnum1_(-1), rnum2_(-1), altloc1_(' '), altloc2_(' '), PDBfile::Link::Link(const char* a1, char alt1, const char* r1, char ch1, int rnum1, char code1, const char* a2, char alt2, const char* r2, char ch2, int rnum2, char code2, SymOp const& S1, SymOp const& S2) : - rnum1_(rnum1), rnum2_(rnum2), altloc1_(alt1), altloc2_(alt2), chain1_(ch1), chain2_(ch2), + rnum1_(rnum1), rnum2_(rnum2), altloc1_(alt1), altloc2_(alt2), chain1_(ch1), chain2_(ch2), // FIXME support > 1 char chain ID? icode1_(code1), icode2_(code2), sym1_(S1), sym2_(S2) { std::copy(a1, a1+4, aname1_); aname1_[4] = '\0'; @@ -186,7 +186,8 @@ PDBfile::PDBfile() : wrapType_(HYBRID36), lineLengthWarning_(false), coordOverflow_(false), - useCol21_(false) + useCol21_(false), + has_large_chainid_(false) {} /** Set atom/residue number wrap type */ @@ -468,14 +469,30 @@ Residue PDBfile::pdb_Residue() { linebuffer_[20] = '\0'; NameType resName(linebuffer_+17); linebuffer_[20] = savechar; - // Chain ID (21) + // Chain ID (21) - extended is 20-21 + std::string chainID; + if (linebuffer_[20] != ' ') { + chainID.reserve(2); + chainID += linebuffer_[20]; + has_large_chainid_ = true; + } else + chainID.reserve(1); + chainID += linebuffer_[21]; // Res num (22-26), insertion code (26) char icode = linebuffer_[26]; linebuffer_[26] = '\0'; //int resnum = atoi( linebuffer_+22 ); int resnum = decodeResNum( linebuffer_+22 ); linebuffer_[26] = icode; - return Residue( resName, resnum, icode, std::string(1, linebuffer_[21]) ); + return Residue( resName, resnum, icode, chainID ); +} + +/** Print a warning if the large chain ID flag is set and reset the flag. */ +void PDBfile::WarnLargeChainID() { + if (has_large_chainid_) { + mprintf("Warning: PDB file has large (>1 char) chain IDs.\n"); + has_large_chainid_ = false; + } } // PDBfile::pdb_XYZ() @@ -686,13 +703,13 @@ PDBfile::Link PDBfile::pdb_Link() { // ----------------------------------------------------------------------------- // PDBfile::WriteRecordHeader() void PDBfile::WriteRecordHeader(PDB_RECTYPE Record, int anum, NameType const& name, - char altLoc, NameType const& resnameIn, char chain, + char altLoc, NameType const& resnameIn, std::string const& chain, int resnum, char icode, const char* Elt) { - char resName[6], atomName[5]; + char resName[7], atomName[5]; char resNum[5], atomNum[6]; - resName[5]='\0'; + resName[6]='\0'; atomName[4]='\0'; // Residue number in PDB format can only be 4 digits wide // Atom number in PDB format can only be 5 digits wide @@ -739,6 +756,21 @@ void PDBfile::WriteRecordHeader(PDB_RECTYPE Record, int anum, NameType const& na rn_idx = 3; for (int i = rn_size - 1; i > -1; i--, rn_idx--) resName[rn_idx] = resnameIn[i]; + // Chain ID + // The chain ID normally is a single character in column 22. + // We can support chain IDs of 2 characters (for e.g. hybrid36 PDBs) + // by making use of column 21 as well, even though this is not + // PDB-format compliant. + resName[5] = ' '; + if (!chain.empty()) { + if (chain.size() == 1) + resName[5] = chain[0]; + else if (!useCol21_) { + resName[4] = chain[0]; + resName[5] = chain[1]; + has_large_chainid_ = true; + } + } // Determine size in characters of element name if given. int eNameChars = 0; if (Elt != 0) eNameChars = strlen( Elt ); @@ -766,14 +798,14 @@ void PDBfile::WriteRecordHeader(PDB_RECTYPE Record, int anum, NameType const& na atomName[i+1] = name[i]; } // TODO if REMARK, which #? - Printf("%-6s%5s %-4s%5s%c%4s%c",PDB_RECNAME_[Record], atomNum, atomName, - resName, chain, resNum, icode); + Printf("%-6s%5s %-4s%6s%4s%c",PDB_RECNAME_[Record], atomNum, atomName, + resName, resNum, icode); if (Record == TER) Printf("\n"); } // PDBfile::WriteHET() void PDBfile::WriteHET(int res, double x, double y, double z) { - WriteCoord(HETATM, anum_++, "XX", ' ', "XXX", ' ', + WriteCoord(HETATM, anum_++, "XX", ' ', "XXX", "", res, ' ', x, y, z, 0.0, 0.0, "", 0, false); } @@ -781,7 +813,7 @@ void PDBfile::WriteHET(int res, double x, double y, double z) { void PDBfile::WriteATOM(int res, double x, double y, double z, const char* resnameIn, double Occ) { - WriteCoord(ATOM, anum_++, "XX", ' ', resnameIn, ' ', + WriteCoord(ATOM, anum_++, "XX", ' ', resnameIn, "", res, ' ', x, y, z, (float)Occ, 0.0, "", 0, false); } @@ -789,13 +821,13 @@ void PDBfile::WriteATOM(int res, double x, double y, double z, void PDBfile::WriteATOM(const char* anameIn, int res, double x, double y, double z, const char* resnameIn, double Occ) { - WriteCoord(ATOM, anum_++, anameIn, ' ', resnameIn, ' ', + WriteCoord(ATOM, anum_++, anameIn, ' ', resnameIn, "", res, ' ', x, y, z, (float)Occ, 0.0, "", 0, false); } // PDBfile::WriteCoord() void PDBfile::WriteCoord(PDB_RECTYPE Record, int anum, NameType const& name, - NameType const& resnameIn, char chain, int resnum, + NameType const& resnameIn, std::string const& chain, int resnum, double X, double Y, double Z) { WriteCoord(Record, anum, name, ' ', resnameIn, chain, @@ -807,7 +839,7 @@ void PDBfile::WriteCoord(PDB_RECTYPE Record, int anum, NameType const& aname, double X, double Y, double Z, float Occ, float Bfac, const char* Elt, int charge) { - WriteCoord(Record, anum, aname, ' ', rname, ' ', resnum, ' ', X, Y, Z, + WriteCoord(Record, anum, aname, ' ', rname, "", resnum, ' ', X, Y, Z, Occ, Bfac, Elt, charge, false); } @@ -846,7 +878,7 @@ static inline void SetChargeString(char* charge_buf, int charge) { // PDBfile::WriteCoord() void PDBfile::WriteCoord(PDB_RECTYPE Record, int anum, NameType const& name, char altLoc, - NameType const& resnameIn, char chain, int resnum, + NameType const& resnameIn, std::string const& chain, int resnum, char icode, double X, double Y, double Z, float Occ, float B, const char* Elt, int charge, bool highPrecision) @@ -868,7 +900,7 @@ void PDBfile::WriteCoord(PDB_RECTYPE Record, int anum, NameType const& name, // PDBfile::WriteANISOU() void PDBfile::WriteANISOU(int anum, NameType const& name, - NameType const& resnameIn, char chain, int resnum, + NameType const& resnameIn, std::string const& chain, int resnum, const double* anisou, const char* Elt, int charge) { // TODO icode, altLoc diff --git a/src/PDBfile.h b/src/PDBfile.h index 7bb382b026..718eecf8cf 100644 --- a/src/PDBfile.h +++ b/src/PDBfile.h @@ -36,6 +36,8 @@ class PDBfile : public CpptrajFile { Atom pdb_Atom() { char al; int n; return pdb_Atom(al,n); } /// \return Residue info with name, number, icode, and chainID for ATOM/HETATM. Residue pdb_Residue(); + /// Warn if large chainIDs detected + void WarnLargeChainID(); /// Set given XYZ array with coords from ATOM/HETATM record. void pdb_XYZ(double*); /// Get occupancy and B-factor from ATOM/HETATM record. @@ -61,7 +63,7 @@ class PDBfile : public CpptrajFile { bool UseCol21() const { return useCol21_; } /// Write PDB record header. void WriteRecordHeader(PDB_RECTYPE, int, NameType const&, char, - NameType const&, char, int, char, const char*); + NameType const&, std::string const&, int, char, const char*); /// Write HETATM record using internal atom numbering void WriteHET(int, double, double, double); /// Write no-name ATOM record using internal atom numbering @@ -69,18 +71,18 @@ class PDBfile : public CpptrajFile { /// Write ATOM record with given name using internal atom numbering void WriteATOM(const char*, int, double, double, double, const char*, double); /// Write PDB ATOM/HETATM record, no B-factor, occ, elt, or charge. - void WriteCoord(PDB_RECTYPE, int, NameType const&, NameType const&, char, int, + void WriteCoord(PDB_RECTYPE, int, NameType const&, NameType const&, std::string const&, int, double, double, double); /// Write PDB ATOM/HETATM record, no alt loc, chain ID, icode. void WriteCoord(PDB_RECTYPE, int, NameType const&, NameType const&, int, double, double, double, float, float, const char*, int); /// Write complete PDB ATOM/HETATM record - void WriteCoord(PDB_RECTYPE, int, NameType const&, char, NameType const&, char, int, + void WriteCoord(PDB_RECTYPE, int, NameType const&, char, NameType const&, std::string const&, int, char, double, double, double, float, float, const char *, int, bool); /// \return True if coordinate write has overflowed; reset overflow status. bool CoordOverflow() { bool stat = coordOverflow_; coordOverflow_ = false; return stat; } /// Write ANISOU record. - void WriteANISOU(int, NameType const&, NameType const&, char, int, + void WriteANISOU(int, NameType const&, NameType const&, std::string const&, int, const double*, const char *, int); /// Write TITLE void WriteTITLE(std::string const&); @@ -120,6 +122,7 @@ class PDBfile : public CpptrajFile { bool lineLengthWarning_; ///< True if any read line is shorter than 80 char bool coordOverflow_; ///< True if coords on write exceed field width bool useCol21_; ///< If true, use column 21 for 4-char res name + bool has_large_chainid_; ///< If true, large (>1 char) chain IDs detected /// Recognized PDB record types; corresponds to PDB_RECTYPE static const char* PDB_RECNAME_[]; /// Correspond to NumWrapType diff --git a/src/Parm_PDB.cpp b/src/Parm_PDB.cpp index 5ec25b6b3f..73cd2077d9 100644 --- a/src/Parm_PDB.cpp +++ b/src/Parm_PDB.cpp @@ -180,6 +180,7 @@ int Parm_PDB::ReadParm(FileName const& fname, Topology &TopIn) { } // END loop over PDB records if (infile.HasHybrid36()) mprintf("\tPDB appears to have hybrid36 encoding in atom/residue number fields.\n"); + infile.WarnLargeChainID(); if (hasMissingResidues) { mprintf("\t%zu missing residues.\n", missingResidues.size()); @@ -238,7 +239,7 @@ int Parm_PDB::ReadParm(FileName const& fname, Topology &TopIn) { for (Topology::res_iterator res = TopIn.ResStart(); res != TopIn.ResEnd(); ++res) { if (r1 == TopIn.ResEnd()) { if (link->Rnum1() == res->OriginalResNum() && - link->Chain1() == res->ChainID_1char() && + link->Chain1() == res->ChainID_Nchar(1)[0] && link->Icode1() == res->Icode()) { r1 = res; @@ -247,7 +248,7 @@ int Parm_PDB::ReadParm(FileName const& fname, Topology &TopIn) { } if (r2 == TopIn.ResEnd()) { if (link->Rnum2() == res->OriginalResNum() && - link->Chain2() == res->ChainID_1char() && + link->Chain2() == res->ChainID_Nchar(1)[0] && link->Icode2() == res->Icode()) { r2 = res; diff --git a/src/Residue.cpp b/src/Residue.cpp index a6280438e3..adf4fa14d6 100644 --- a/src/Residue.cpp +++ b/src/Residue.cpp @@ -4,12 +4,22 @@ const char Residue::DEFAULT_CHAINID_ = 'Z'; -/** \return 1 character chain ID, warn if chain ID is larger than that. */ -char Residue::ChainID_1char() const { - if (chainID_.size() == 1) return chainID_[0]; - if (chainID_.empty()) return ' '; - mprintf("Warning: Chain ID '%s' is larger than 1 character. Truncating.\n", chainID_.c_str()); - return chainID_[0]; +/** \return Max N character chain ID, warn if chain ID is larger than that. */ +std::string Residue::ChainID_Nchar(unsigned int max) const { + if (max == 0 || chainID_.empty()) return std::string(""); + // Max is bigger than or equal to the chain ID size, just return the chain ID + if (max >= chainID_.size()) return chainID_; + // Need to truncate + mprintf("Warning: Chain ID '%s' is larger than %u character. Truncating.\n", chainID(), max, chainID_.c_str()); + std::string trunc; + trunc.reserve(max); + //int startIdx = chainID_.size() - max; + //if (startIdx < 0) startIdx = 0; + //for (unsigned int idx = (unsigned int)startIdx; idx < chainID_.size(); idx++) + // trunc += chainID_[idx]; + for (unsigned int idx = 0; idx < max; idx++) + trunc += chainID_[idx]; + return trunc; } /** Check if a blank string has been set as the chain ID; if so, clear it. */ diff --git a/src/Residue.h b/src/Residue.h index 7c41756b7c..676640207c 100644 --- a/src/Residue.h +++ b/src/Residue.h @@ -84,8 +84,8 @@ class Residue { inline std::string const& ChainID() const { return chainID_; } /// \return const char* for printing chain ID to stdout inline const char* chainID() const { return chainID_.c_str(); } - /// \return 1 character chain ID; warn if chainID is more than 1 character - char ChainID_1char() const; + /// \return Chain ID with max specified length; warn if residue's chainID is more than the given max. + std::string ChainID_Nchar(unsigned int) const; /// \return True if chain ID is not blank. inline bool HasChainID() const { return !chainID_.empty(); } inline const char *c_str() const { return *resname_; } diff --git a/src/Traj_PDBfile.cpp b/src/Traj_PDBfile.cpp index b9b333d66c..9c4cc315b0 100644 --- a/src/Traj_PDBfile.cpp +++ b/src/Traj_PDBfile.cpp @@ -16,6 +16,7 @@ Traj_PDBfile::Traj_PDBfile() : conectMode_(NO_CONECT), pdbWriteMode_(NONE), resNumType_(ORIGINAL), + maxpdbchain_(2), pdbAtom_(0), currentSet_(0), ter_num_(0), @@ -237,6 +238,7 @@ void Traj_PDBfile::WriteHelp() { "\tpdbres : Use PDB V3 residue names.\n" "\tpdbatom : Use PDB V3 atom names.\n" "\tpdbv3 : Use PDB V3 residue/atom names.\n" + "\tpdbchain : Restrict PDB chain ID to a single character (PDB V3 standard).\n" "\ttopresnum : Use topology residue numbers; otherwise use original residue numbers.\n" "\tteradvance : Increment record (atom) # for TER records (default no).\n" "\tterbyres : Print TER cards based on residue sequence instead of molecules.\n" @@ -262,6 +264,8 @@ void Traj_PDBfile::WriteHelp() { "\tbfacmax : Max value for bfacscale.\n" "\toccmax : Max value for occscale.\n" "\tadpdata : Use data in for anisotropic B-factors.\n" + "\thybrid36 : Use hybrid36 formatting for large atom/residue numbers (default).\n" + "\twrapnumbers : Wrap large atom/residue numbers to zero (previous behavior).\n" ); } @@ -319,12 +323,16 @@ int Traj_PDBfile::processWriteArgs(ArgList& argIn, DataSetList const& DSLin) { prependExt_ = argIn.hasKey("keepext"); // Implies MULTI if (prependExt_) pdbWriteMode_ = MULTI; space_group_ = argIn.GetStringKey("sg"); + if (argIn.hasKey("pdbchain")) + maxpdbchain_ = 1; + else + maxpdbchain_ = 2; chainchar_ = argIn.GetStringKey("chainid"); if (!chainchar_.empty()) { - if (chainchar_.size() > 1) { - mprintf("Warning: Specified chain ID %s is too big for PDB, truncating to %c\n", - chainchar_.c_str(), chainchar_[0]); - chainchar_.assign(1, chainchar_[0]); + if (chainchar_.size() > maxpdbchain_) { + chainchar_.resize(maxpdbchain_); + mprintf("Warning: Specified chain ID is too big for PDB, truncating to %s\n", + chainchar_.c_str()); } } if (argIn.hasKey("usecol21")) @@ -485,21 +493,28 @@ int Traj_PDBfile::setupTrajout(FileName const& fname, Topology* trajParm, // TODO: Set different chain ID for solute mols and solvent chainID_.clear(); // Default to a blank chain ID unless user requested PDB v3 compliance - char def_chainid; + std::string def_chainid; + def_chainid.reserve(2); if (pdbres_) - def_chainid = Residue::DefaultChainID(); + def_chainid = std::string(1, Residue::DefaultChainID()); else - def_chainid = ' '; + def_chainid = std::string(" "); // If no chain ID specified, determine chain ID. + unsigned int max_chain_id_size = 0; if (chainchar_.empty()) { chainID_.reserve( trajParm->Nres() ); - for (Topology::res_iterator res = trajParm->ResStart(); res != trajParm->ResEnd(); ++res) + for (Topology::res_iterator res = trajParm->ResStart(); res != trajParm->ResEnd(); ++res) { if (res->HasChainID()) - chainID_.push_back( res->ChainID_1char() ); + chainID_.push_back( res->ChainID_Nchar(maxpdbchain_) ); else chainID_.push_back( def_chainid ); + if (chainID_.back().size() > max_chain_id_size) + max_chain_id_size = chainID_.back().size(); + } } else - chainID_.resize(trajParm->Nres(), chainchar_[0]); + chainID_.resize(trajParm->Nres(), chainchar_); + if (max_chain_id_size > 1) + mprintf("Warning: PDB will have large (2 char) chain IDs.\n"); // Save residue names. If pdbres specified convert to PDBV3 residue names. resNames_.clear(); diff --git a/src/Traj_PDBfile.h b/src/Traj_PDBfile.h index 0f3b0d8811..999f67c3b0 100644 --- a/src/Traj_PDBfile.h +++ b/src/Traj_PDBfile.h @@ -63,6 +63,7 @@ class Traj_PDBfile: public TrajectoryIO { CONECT_Mode conectMode_; ///< CONECT record mode. PDBWRITEMODE pdbWriteMode_; RESNUM_Mode resNumType_; ///< What residue numbers will be used + unsigned int maxpdbchain_; ///< Max length of PDB chain IDs (1 is PDB standard, allow up to 2 by default) int pdbAtom_; int currentSet_; int ter_num_; ///< Amount to increment atom number for TER @@ -87,7 +88,7 @@ class Traj_PDBfile: public TrajectoryIO { Iarray ss_atoms_; Topology *pdbTop_; PDBfile file_; - std::vector chainID_; ///< Hold chainID for each residue. + std::vector chainID_; ///< Hold chainID for each residue. std::vector resNames_; ///< Hold residue names. std::string chainchar_; ///< User-specified chain ID char keepAltLoc_; ///< If not blank, only read atoms with this alt. loc. ID diff --git a/src/Version.h b/src/Version.h index ddbee0927b..ff93a2f878 100644 --- a/src/Version.h +++ b/src/Version.h @@ -12,7 +12,7 @@ * Whenever a number that precedes is incremented, all subsequent * numbers should be reset to 0. */ -#define CPPTRAJ_INTERNAL_VERSION "V7.5.0" +#define CPPTRAJ_INTERNAL_VERSION "V7.6.0" /// PYTRAJ relies on this #define CPPTRAJ_VERSION_STRING CPPTRAJ_INTERNAL_VERSION #endif