From ab6e2b3aa182263b601e0062080cf0b52f87045d Mon Sep 17 00:00:00 2001 From: Oleg Sobolev Date: Fri, 3 Nov 2023 11:45:38 -0700 Subject: [PATCH] Change altloc from str1 to std::string for future flexibility. --- iotbx/pdb/atom_selection.cpp | 24 ++++++++--- iotbx/pdb/hierarchy.cpp | 51 ++++++++++++------------ iotbx/pdb/hierarchy.h | 43 ++++++++++---------- iotbx/pdb/hierarchy_atom_bpl.cpp | 3 +- iotbx/pdb/hierarchy_ext.cpp | 2 +- iotbx/pdb/hierarchy_select.cpp | 2 +- iotbx/pdb/hierarchy_write.cpp | 4 +- iotbx/pdb/overall_counts.cpp | 25 +++++++----- iotbx/pdb/tst_atom_selection.py | 2 +- iotbx/pdb/tst_hierarchy.py | 21 ++++++---- iotbx/regression/tst_hierarchy_id_str.py | 7 +++- mmtbx/reduce/Optimizers.cpp | 2 +- mmtbx/secondary_structure/dssp.hpp | 2 +- 13 files changed, 108 insertions(+), 80 deletions(-) diff --git a/iotbx/pdb/atom_selection.cpp b/iotbx/pdb/atom_selection.cpp index edd2e87f46..1b0fcae234 100644 --- a/iotbx/pdb/atom_selection.cpp +++ b/iotbx/pdb/atom_selection.cpp @@ -28,6 +28,18 @@ namespace { } } + void + s_map_array_transfer( + std::map >& map_s, + std::map >& map) + { + typedef typename std::map >::iterator it; + it i_end = map_s.end(); + for(it i=map_s.begin();i!=i_end;i++) { + map[i->first].swap(i->second); + } + } + } // namespace atom_selection_cache::atom_selection_cache( @@ -35,7 +47,7 @@ namespace { bool altloc_only) { std::map > name_s; - std::map > altloc_s; + std::map > altloc_s; std::map > resseq_s; std::map > icode_s; std::map > resid_s; @@ -106,11 +118,11 @@ namespace { } n_seq = i_seq; if (!altloc_only) { - if (altloc_s.find(str1("")) != altloc_s.end()) { - std::vector const& s0 = altloc_s[str1("")]; - std::vector& s1 = altloc_s[str1(" ")]; + if (altloc_s.find("") != altloc_s.end()) { + std::vector const& s0 = altloc_s[""]; + std::vector& s1 = altloc_s[" "]; s1.insert(s1.end(), s0.begin(), s0.end()); - altloc_s.erase(str1("")); + altloc_s.erase(""); } map_array_transfer(name_s, name); map_array_transfer(resseq_s, resseq); @@ -120,7 +132,7 @@ namespace { map_array_transfer(element_s, element); map_array_transfer(charge_s, charge); } - map_array_transfer(altloc_s, altloc); + s_map_array_transfer(altloc_s, altloc); } af::shared get_resid_sequence ( diff --git a/iotbx/pdb/hierarchy.cpp b/iotbx/pdb/hierarchy.cpp index ca00a387d0..7737e4363a 100644 --- a/iotbx/pdb/hierarchy.cpp +++ b/iotbx/pdb/hierarchy.cpp @@ -617,7 +617,7 @@ namespace { altloc = resname = resseq = icode = chain_id = model_id = 0; } else { - altloc = ag->altloc.elems; + altloc = ag->altloc.c_str(); resname = ag->resname.c_str(); shared_ptr rg_lock = ag->parent.lock(); const residue_group_data* rg = rg_lock.get(); @@ -694,7 +694,7 @@ namespace { atom_with_labels const& self, atom_label_columns_formatter& label_formatter) { - label_formatter.altloc = self.altloc.elems; + label_formatter.altloc = self.altloc.c_str(); label_formatter.resname = self.resname.c_str(); label_formatter.resseq = self.resseq.elems; label_formatter.icode = self.icode.elems; @@ -1111,7 +1111,7 @@ namespace { std::string chain_id; str4 resseq; str1 icode; - str1 altloc; + std::string altloc; std::string resname; boost::optional ag = parent(); if (ag) { @@ -1137,7 +1137,7 @@ namespace { chain_id.c_str(), resseq.elems, icode.elems, - altloc.elems, + altloc.c_str(), resname.c_str(), /* is_first_in_chain */ false, /* is_first_after_break */ false); @@ -1330,7 +1330,7 @@ namespace { atom_group::confid() const { std::string result; - result += (data->altloc.size() == 0) ? " " : data->altloc.elems; + result += (data->altloc.size() == 0) ? " " : data->altloc.c_str(); result += (data->resname.size() == 0) ? " " : boost::algorithm::trim_copy(data->resname); return result; } @@ -1583,8 +1583,8 @@ namespace { typedef std::vector::const_iterator agi_t; agi_t ag_end = data->atom_groups.end(); for(agi_t agi=data->atom_groups.begin();agi!=ag_end;agi++) { - char altloc = agi->data->altloc.elems[0]; - if (altloc != '\0' && altloc != blank_altloc_char) { + std::string altloc = agi->data->altloc; + if (altloc != "" && altloc != blank_altloc_string) { return true; } } @@ -1629,8 +1629,8 @@ namespace { unsigned n_ag = atom_groups_size(); for(unsigned i_ag=0;i_agatom_groups[i_ag]; - char altloc = ag.data->altloc.elems[0]; - if (altloc == '\0' || altloc == blank_altloc_char) { + std::string altloc = ag.data->altloc; + if (altloc == "" || altloc == blank_altloc_string) { if (i_ag != n_blank_altloc_atom_groups) { atom_group ag_by_value = ag; remove_atom_group(i_ag); @@ -1656,7 +1656,7 @@ namespace { unsigned i_ag = 0; for(;i_agatom_groups[i_ag]; - ag.data->altloc.elems[0] = '\0'; + ag.data->altloc = ""; ss4& blank_name_set = blank_name_sets[ag.data->resname]; unsigned n_atoms = ag.atoms_size(); for(unsigned i_atom=0;i_atomresname.c_str()); + atom_group new_ag(blank_altloc_string, ag.data->resname.c_str()); insert_atom_group(i, new_ag); new_atom_group = &data->atom_groups[i]; n_blank_but_alt_atom_groups++; @@ -1755,7 +1755,7 @@ namespace { " (this chain must be the parent)."); } typedef std::map s3ag; - typedef std::map s1s3ag; + typedef std::map s1s3ag; s1s3ag altloc_resname_dict; unsigned n_ag = primary.atom_groups_size(); for(unsigned i=0;i const& i_rgs = mrii->second; unsigned n_i_rgs = static_cast(i_rgs.size()); if (n_i_rgs == 1U) continue; - std::set altlocs; - altlocs.insert('\0'); - altlocs.insert(blank_altloc_char); + std::set altlocs; + altlocs.insert(""); + altlocs.insert(blank_altloc_string); unsigned altlocs_size = 2U; for(unsigned i_i_rgs=0;i_i_rgs const& ags = rg.atom_groups(); bool is_pure_altloc = ( n_ags != 0 - && ags[0].data->altloc.elems[0] != '\0'); + && ags[0].data->altloc != ""); if (common_residue_name_class_only != 0) { skip = 1; for(unsigned i_ag=0;i_ag altlocs; - typedef std::map mcu; + // const char nulc = '\0'; + std::string nulc = ""; + std::vector altlocs; + typedef std::map mcu; mcu altloc_indices; bool have_at_least_one_atom_group = false; for(unsigned i_rg=0;i_rg const& ags = rg.atom_groups(); for(unsigned i_ag=0;i_agaltloc.elems[0]; + std::string altloc = ags[i_ag].data->altloc; if (altloc == nulc) continue; if (altloc_indices.find(altloc) == altloc_indices.end()) { altlocs.push_back(altloc); @@ -2026,10 +2027,10 @@ namespace { af::shared result((af::reserve(n_cf))); for(unsigned i_cf=0;i_cf resnames; // allocate once @@ -2046,7 +2047,7 @@ namespace { std::vector const& ags = rg.atom_groups(); for(unsigned i_ag=0;i_agaltloc.elems[0]; + std::string altloc = ag.data->altloc; if (altloc == nulc) { for(unsigned i_cf=0;i_cfinsert(atoms->end(), ag.atoms().begin(), ag.atoms().end()); - if (ag.data->altloc.elems[0] != nulc) { + if (ag.data->altloc != nulc) { resnames_with_altloc.insert(ag.data->resname); } } @@ -2526,7 +2527,7 @@ namespace { chain_id.c_str(), resseq.elems, icode.elems, - altloc.elems, + altloc.c_str(), resname.c_str(), is_first_in_chain, is_first_after_break); diff --git a/iotbx/pdb/hierarchy.h b/iotbx/pdb/hierarchy.h index 16629a7f1c..d823c06936 100644 --- a/iotbx/pdb/hierarchy.h +++ b/iotbx/pdb/hierarchy.h @@ -72,8 +72,7 @@ A residue object is NOT a parent of the atoms. namespace hierarchy { - static const char blank_altloc_char = ' '; - static const char blank_altloc_cstr[2] = {blank_altloc_char, '\0'}; + static const std::string blank_altloc_string(" "); class root_data; class root; @@ -203,7 +202,7 @@ namespace hierarchy { friend struct atom_label_columns_formatter; weak_ptr parent; public: - str1 altloc; + std::string altloc; std::string resname; protected: std::vector atoms; @@ -211,13 +210,13 @@ namespace hierarchy { inline atom_group_data( weak_ptr const& parent_, - const char* altloc_, - const char* resname_); + std::string altloc_, + std::string resname_); inline atom_group_data( - const char* altloc_, - const char* resname_); + std::string altloc_, + std::string resname_); inline atom_group_data( @@ -863,13 +862,13 @@ namespace hierarchy { explicit atom_group( residue_group const& parent, - const char* altloc="", - const char* resname=""); + std::string altloc="", + std::string resname=""); explicit atom_group( - const char* altloc="", - const char* resname="") + std::string altloc="", + std::string resname="") : data(new atom_group_data(altloc, resname)) {} @@ -878,8 +877,8 @@ namespace hierarchy { atom_group(const atom_group_data* other) : data(new atom_group_data( - other->altloc.elems, - other->resname.c_str())) + other->altloc, + other->resname)) {} atom_group( @@ -1683,7 +1682,7 @@ namespace hierarchy { std::string chain_id; str4 resseq; str1 icode; - str1 altloc; + std::string altloc; std::string resname; bool is_first_in_chain; bool is_first_after_break; @@ -1856,21 +1855,21 @@ namespace hierarchy { inline atom_group_data::atom_group_data( weak_ptr const& parent_, - const char* altloc_, - const char* resname_) + std::string altloc_, + std::string resname_) : parent(parent_), altloc(altloc_), - resname((resname_ == 0) ? "" : resname_) + resname(resname_) {} inline atom_group_data::atom_group_data( - const char* altloc_, - const char* resname_) + std::string altloc_, + std::string resname_) : altloc(altloc_), - resname((resname_ == 0) ? "" : resname_) + resname(resname_) {} inline @@ -1886,8 +1885,8 @@ namespace hierarchy { inline atom_group::atom_group( residue_group const& parent, - const char* altloc, - const char* resname) + std::string altloc, + std::string resname) : data(new atom_group_data(parent.data, altloc, resname)) {} diff --git a/iotbx/pdb/hierarchy_atom_bpl.cpp b/iotbx/pdb/hierarchy_atom_bpl.cpp index 01dff20ea8..886ed43dda 100644 --- a/iotbx/pdb/hierarchy_atom_bpl.cpp +++ b/iotbx/pdb/hierarchy_atom_bpl.cpp @@ -454,7 +454,7 @@ namespace { /* HY36_WIDTH_4_MAX */ 2436111) IOTBX_LOC_GET(resseq) IOTBX_LOC_GET_SET(icode) - IOTBX_LOC_GET_SET(altloc) + // IOTBX_LOC_GET_SET(altloc) // IOTBX_LOC_GET_SET(resname) #undef IOTBX_LOC_GET @@ -484,6 +484,7 @@ namespace { IOTBX_LOC_GET_SET_STD_STRING(model_id) IOTBX_LOC_GET_SET_STD_STRING(chain_id) IOTBX_LOC_GET_SET_STD_STRING(resname) + IOTBX_LOC_GET_SET_STD_STRING(altloc) #undef IOTBX_LOC_GET_STD_STRING #undef IOTBX_LOC_SET_STD_STRING diff --git a/iotbx/pdb/hierarchy_ext.cpp b/iotbx/pdb/hierarchy_ext.cpp index 9ebbe09b53..fb84c9bbba 100644 --- a/iotbx/pdb/hierarchy_ext.cpp +++ b/iotbx/pdb/hierarchy_ext.cpp @@ -62,7 +62,7 @@ namespace { { typedef atom_group w_t; - IOTBX_PDB_HIERARCHY_DATA_WRAPPERS_SMALL_STR_GET_SET(altloc) + IOTBX_PDB_HIERARCHY_DATA_WRAPPERS_STD_STRING_GET_SET(altloc) IOTBX_PDB_HIERARCHY_DATA_WRAPPERS_STD_STRING_GET_SET(resname) static diff --git a/iotbx/pdb/hierarchy_select.cpp b/iotbx/pdb/hierarchy_select.cpp index d963c2cfbc..37156ede21 100644 --- a/iotbx/pdb/hierarchy_select.cpp +++ b/iotbx/pdb/hierarchy_select.cpp @@ -31,7 +31,7 @@ namespace iotbx { namespace pdb { namespace hierarchy { for(unsigned i_ag=0;i_agaltloc.elems, ag.data->resname.c_str()); \ + atom_group r_ag(ag.data->altloc, ag.data->resname); \ std::vector const& atoms = ag.atoms(); \ boost::scoped_array r_atoms(new atom[n_ats]); \ unsigned r_n_ats = 0; diff --git a/iotbx/pdb/hierarchy_write.cpp b/iotbx/pdb/hierarchy_write.cpp index 97c147303f..9c1fa81e59 100644 --- a/iotbx/pdb/hierarchy_write.cpp +++ b/iotbx/pdb/hierarchy_write.cpp @@ -28,7 +28,7 @@ namespace iotbx { namespace pdb { namespace hierarchy { unsigned n_ag = rg.atom_groups_size(); for(unsigned i_ag=0;i_agaltloc.elems; + label_formatter.altloc = ag.data->altloc.c_str(); label_formatter.resname = ag.data->resname.c_str(); typedef std::vector va; va const& atoms = ag.atoms(); @@ -55,7 +55,7 @@ namespace iotbx { namespace pdb { namespace hierarchy { for(unsigned i_at=0;i_at ag_data = atom->parent_ptr(); - label_formatter.altloc = ag_data->altloc.elems; + label_formatter.altloc = ag_data->altloc.c_str(); label_formatter.resname = ag_data->resname.c_str(); IOTBX_LOC } diff --git a/iotbx/pdb/overall_counts.cpp b/iotbx/pdb/overall_counts.cpp index 52b25c3c60..ffbcf80bae 100644 --- a/iotbx/pdb/overall_counts.cpp +++ b/iotbx/pdb/overall_counts.cpp @@ -132,7 +132,7 @@ namespace detail { if (chain.residue_groups_size() == 0) n_empty_chains += 1; model_chain_ids[chain.data->id]++; chain_ids[chain.data->id]++; - std::set chain_altlocs; + std::set chain_altlocs; boost::optional chain_alt_conf_proper; boost::optional chain_alt_conf_improper; bool suppress_chain_break = true; @@ -147,19 +147,19 @@ namespace detail { suppress_chain_break = false; bool have_main_conf = false; bool have_blank_altloc = false; - std::set rg_altlocs; + std::set rg_altlocs; std::set rg_resnames; - std::map > altloc_resnames; + std::map > altloc_resnames; unsigned n_ag = rg.atom_groups_size(); for(unsigned i_ag=0;i_agaltloc.elems[0]; - if (altloc == '\0') { + std::string altloc = ag.data->altloc; + if (altloc == "") { have_main_conf = true; } else { - if (altloc == blank_altloc_char) { + if (altloc == blank_altloc_string) { have_blank_altloc = true; } rg_altlocs.insert(altloc); @@ -183,7 +183,7 @@ namespace detail { } } { - typedef std::map >::const_iterator it; + typedef std::map >::const_iterator it; it i_end = altloc_resnames.end(); for(it i=altloc_resnames.begin();i!=i_end;i++) { if (i->second.size() != 1U) { @@ -191,6 +191,13 @@ namespace detail { .push_back(rg); } } + // This should do the same as above, C++11 way. + // for (auto i: altloc_resnames) { + // if (i->second.size() != 1U) { + // residue_groups_with_multiple_resnames_using_same_altloc + // .push_back(rg); + // } + // } } if (have_blank_altloc) { n_alt_conf_improper++; @@ -226,10 +233,10 @@ namespace detail { prev_rg = boost::optional(rg); } { - typedef std::set::const_iterator it; + typedef std::set::const_iterator it; it i_end = chain_altlocs.end(); for(it i=chain_altlocs.begin();i!=i_end;i++) { - alt_conf_ids[std::string(&*i, 1U)]++; + alt_conf_ids[*i]++; n_alt_conf++; } } diff --git a/iotbx/pdb/tst_atom_selection.py b/iotbx/pdb/tst_atom_selection.py index ac81ac1e10..0db0e02293 100644 --- a/iotbx/pdb/tst_atom_selection.py +++ b/iotbx/pdb/tst_atom_selection.py @@ -127,7 +127,7 @@ def exercise_selection(): for conj in ["and ", ""]: assert list(isel(r"altloc a %sname n" % conj)) == [24] assert list(isel(r"altloc b %sname n" % conj)) == [32] - assert list(isel(r"altloc ' ' %sname n" % conj)) == [0,6,17] + assert list(isel(r"altloc ' ' %sname n" % conj)) == [0,6,17], list(isel(r"altloc ' ' %sname n" % conj)) assert list(isel(r"altid ' ' %sname n" % conj)) == [0,6,17] assert list(isel(r"resname hoh")) == [69] assert list(isel(r"resname SO4")) == [64,65,66,67,68] diff --git a/iotbx/pdb/tst_hierarchy.py b/iotbx/pdb/tst_hierarchy.py index 7737bc3d39..b1df62fd4f 100644 --- a/iotbx/pdb/tst_hierarchy.py +++ b/iotbx/pdb/tst_hierarchy.py @@ -468,9 +468,13 @@ def exercise_atom_group(): ag = pdb.hierarchy.atom_group() assert ag.altloc == "" assert ag.resname == "" - ag = pdb.hierarchy.atom_group(altloc=None, resname=None) - assert ag.altloc == "" - assert ag.resname == "" + ag = pdb.hierarchy.atom_group(altloc="abc", resname="longxyz") + assert ag.altloc == "abc" + assert ag.resname == "longxyz" + # Does not work anymore + # ag = pdb.hierarchy.atom_group(altloc=None, resname=None) + # assert ag.altloc == "" + # assert ag.resname == "" ag = pdb.hierarchy.atom_group(altloc="a", resname="xyz") assert ag.altloc == "a" assert ag.resname == "xyz" @@ -586,11 +590,12 @@ def exercise_atom_group(): pass else: raise Exception_expected # - try: pdb.hierarchy.atom_group(altloc="ab") - except (ValueError, RuntimeError) as e: - assert str(e) == "string is too long for target variable " \ - "(maximum length is 1 character, 2 given)." - else: raise Exception_expected + # Now works + # try: pdb.hierarchy.atom_group(altloc="ab") + # except (ValueError, RuntimeError) as e: + # assert str(e) == "string is too long for target variable " \ + # "(maximum length is 1 character, 2 given)." + # else: raise Exception_expected # ag1 = pdb.hierarchy.atom_group() atom = pdb.hierarchy.atom() diff --git a/iotbx/regression/tst_hierarchy_id_str.py b/iotbx/regression/tst_hierarchy_id_str.py index f6500395e0..387284b623 100644 --- a/iotbx/regression/tst_hierarchy_id_str.py +++ b/iotbx/regression/tst_hierarchy_id_str.py @@ -157,8 +157,11 @@ def exercise_ag_id_str(): # The function defined in python ag = pdb.hierarchy.atom_group() ag.altloc='A' - ag.resname == "ALA" - assert ag.id_str() == 'A ', "'%s'" % ag.id_str() + ag.resname = "ALA" + assert ag.id_str() == 'AALA ', "'%s'" % ag.id_str() + ag.altloc="longAlt" + ag.resname = "ALAnine" + assert ag.id_str() == 'longAltALAnine ', "'%s'" % ag.id_str() h = pdb.input(source_info=None, lines="""\ ATOM 6 CA ASN B 2 -6.522 2.038 2.831 1.00 14.10 C""").construct_hierarchy() diff --git a/mmtbx/reduce/Optimizers.cpp b/mmtbx/reduce/Optimizers.cpp index 072379a692..5540d270db 100644 --- a/mmtbx/reduce/Optimizers.cpp +++ b/mmtbx/reduce/Optimizers.cpp @@ -82,7 +82,7 @@ static std::string resNameAndID(iotbx::pdb::hierarchy::atom const& a) std::string chainID = a.parent().get().parent().get().parent().get().data->id; std::string resName = toUpperCase(stripWhitespace(a.parent().get().data->resname)); std::string resID = stripWhitespace(fromSmall(a.parent().get().parent().get().data->resseq.elems, 4)); - std::string altLoc = stripWhitespace(fromSmall(a.parent().get().data->altloc.elems,1)); + std::string altLoc = stripWhitespace(a.parent().get().data->altloc); // Don't print the code if it is a space (blank). std::string insertionCode = stripWhitespace(fromSmall(a.parent().get().parent().get().data->icode.elems,1)); return "chain " + chainID + " " + altLoc + resName + " " + resID + insertionCode; diff --git a/mmtbx/secondary_structure/dssp.hpp b/mmtbx/secondary_structure/dssp.hpp index 19ab489116..6c9cbd85c5 100644 --- a/mmtbx/secondary_structure/dssp.hpp +++ b/mmtbx/secondary_structure/dssp.hpp @@ -76,7 +76,7 @@ namespace mmtbx { namespace secondary_structure { namespace dssp { unsigned n_ags = prev_rg.atom_groups_size(); std::vector const& ags = prev_rg.atom_groups(); for (unsigned i_ag = 0; i_ag < n_ags; i_ag++) { - if (ags[i_ag].data->altloc.elems[0] == ag_N->data->altloc.elems[0]) { + if (ags[i_ag].data->altloc[0] == ag_N->data->altloc[0]) { n_ats = ags[i_ag].atoms_size(); std::vector prev_atoms = ags[i_ag].atoms(); for (unsigned i_at = 0; i_at < n_ats; i_at++) {