Skip to content

Commit

Permalink
More changes from char* to std::string to accomodate variable size of…
Browse files Browse the repository at this point in the history
… chain id and resnames in hierarchy. Necessary test adjustments. At this point PDB format output will be not valid for longer ids - they won't be trimmed, rather outputted in full.
  • Loading branch information
olegsobolev committed Oct 25, 2023
1 parent 6c0978d commit f42ddfd
Show file tree
Hide file tree
Showing 6 changed files with 80 additions and 48 deletions.
72 changes: 40 additions & 32 deletions iotbx/pdb/hierarchy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -705,21 +705,22 @@ namespace {

} // namespace <anonymous>

void
std::string
atom::format_atom_record_serial_label_columns(
char* result,
atom_label_columns_formatter* label_formatter) const
{
std::string result;
char blank = ' ';
data->serial.copy_right_justified(result+6, 5U, blank);
result[11] = blank;
result += (boost::format("%5s") % data->serial.elems).str();
result += blank;
if (label_formatter == 0) {
atom_label_columns_formatter().format(result+12, *this);
result += atom_label_columns_formatter().format(*this);
}
else {
label_formatter->name = data->name.elems;
label_formatter->format(result+12);
result += label_formatter->format();
}
return result;
}

unsigned
Expand Down Expand Up @@ -838,20 +839,22 @@ namespace {
{
char blank = ' ';
std::memcpy(result, (data->hetero ? "HETATM" : "ATOM "), 6U);
format_atom_record_serial_label_columns(result, label_formatter);
std::string far = format_atom_record_serial_label_columns(label_formatter);
std::memcpy(result+6, far.c_str(), far.size());
unsigned segid_start;
unsigned blanks_start_at;
size_t far_plus = far.size() + 6;
if (replace_floats_with != 0) {
segid_start = 27U;
segid_start = far_plus;
unsigned i=0;
while (replace_floats_with[i] != '\0' && segid_start != 72U) {
result[segid_start++] = replace_floats_with[i++];
}
blanks_start_at = segid_start + 8U;
}
else {
copy_left_justified(result+27, 3U, 0, 0U, blank);
char *r = result + 30;
copy_left_justified(result+far_plus, 3U, 0, 0U, blank);
char *r = result + far_plus + 3;
// the buffer size in std::snprintf is limited to 640 bytes for
// the 80 column PDB format
for(unsigned i=0;i<3;i++) {
Expand All @@ -860,7 +863,7 @@ namespace {
throw std::runtime_error(
std::string("atom ") + "XYZ"[i] + " coordinate value"
" does not fit into F8.3 format:\n"
+ " \"" + std::string(result, 27U) + "\"\n"
+ " \"" + std::string(result, far_plus) + "\"\n"
+ " value: " + (boost::format("%.3f") % data->xyz[i]).str());
}
r += 8;
Expand All @@ -869,22 +872,21 @@ namespace {
if (r[6] != '\0' && r[4] != '.' && r[5] != '.') {
throw std::runtime_error(
std::string("atom occupancy factor does not fit into F6.2 format:\n")
+ " \"" + std::string(result, 27U) + "\"\n"
+ " \"" + std::string(result, far_plus) + "\"\n"
+ " occupancy factor: " + (boost::format("%.2f") % data->occ).str());
}
r += 6;
std::snprintf(r, 640U, "%6.2f", std::min(std::max(-1.e5, data->b), 1.e6));
if (r[6] != '\0' && r[4] != '.' && r[5] != '.') {
throw std::runtime_error(
std::string("atom B-factor does not fit into F6.2 format:\n")
+ " \"" + std::string(result, 27U) + "\"\n"
+ " \"" + std::string(result, far_plus) + "\"\n"
+ " B-factor: " + (boost::format("%.2f") % data->b).str());
}
segid_start = 72U;
blanks_start_at = 66U;
blanks_start_at = 39+far_plus;
segid_start = blanks_start_at+6;
}
return format_atom_record_segid_element_charge_columns(
result, segid_start, blanks_start_at);
return format_atom_record_segid_element_charge_columns(result, segid_start, blanks_start_at);
}

unsigned
Expand All @@ -894,16 +896,18 @@ namespace {
{
char blank = ' ';
std::memcpy(result, "SIGATM", 6U);
format_atom_record_serial_label_columns(result, label_formatter);
copy_left_justified(result+27, 3U, 0, 0U, blank);
char *r = result + 30;
std::string far = format_atom_record_serial_label_columns(label_formatter);
size_t far_plus = far.size() + 6;
std::memcpy(result+6, far.c_str(), far.size());
copy_left_justified(result+far_plus, 3U, 0, 0U, blank);
char *r = result + far_plus + 3;
for(unsigned i=0;i<3;i++) {
std::snprintf(r, 640U, "%8.3f", std::min(std::max(-1.e7, data->sigxyz[i]),1.e8));
if (r[8] != '\0' && r[5] != '.' && r[6] != '.' && r[7] != '.') {
throw std::runtime_error(
std::string("atom sigma ") + "XYZ"[i] + " coordinate value"
" does not fit into F8.3 format:\n"
+ " \"" + std::string(result, 27U) + "\"\n"
+ " \"" + std::string(result, far_plus) + "\"\n"
+ " value: " + (boost::format("%.3f") % data->sigxyz[i]).str());
}
r += 8;
Expand All @@ -912,7 +916,7 @@ namespace {
if (r[6] != '\0' && r[4] != '.' && r[5] != '.') {
throw std::runtime_error(std::string(
"atom sigma occupancy factor does not fit into F6.2 format:\n")
+ " \"" + std::string(result, 27U) + "\"\n"
+ " \"" + std::string(result, far_plus) + "\"\n"
+ " sigma occupancy factor: "
+ (boost::format("%.2f") % data->sigocc).str());
}
Expand All @@ -921,11 +925,11 @@ namespace {
if (r[6] != '\0' && r[4] != '.' && r[5] != '.') {
throw std::runtime_error(std::string(
"atom sigma B-factor does not fit into F6.2 format:\n")
+ " \"" + std::string(result, 27U) + "\"\n"
+ " \"" + std::string(result, far_plus) + "\"\n"
+ " sigma B-factor: "
+ (boost::format("%.2f") % data->sigb).str());
}
return format_atom_record_segid_element_charge_columns(result, 72U, 66U);
return format_atom_record_segid_element_charge_columns(result, 45+far_plus, 39+far_plus);
}

namespace {
Expand Down Expand Up @@ -955,16 +959,18 @@ namespace {
{
char blank = ' ';
std::memcpy(result, "ANISOU", 6U);
format_atom_record_serial_label_columns(result, label_formatter);
result[27] = blank;
char *r = result + 28;
std::string far = format_atom_record_serial_label_columns(label_formatter);
std::memcpy(result+6, far.c_str(), far.size());
size_t far_plus=far.size() + 6;
result[far_plus] = blank;
char *r = result + far_plus + 1;
for(unsigned i=0;i<6;i++) {
double value = data->uij[i]*10000.;
std::snprintf(r, 640U, "%7.0f", std::min(std::max(-1.e7, value), 1.e8));
r += 7;
if (*r != '\0') throw_f70_error(i, value, result, "");
}
return format_atom_record_segid_element_charge_columns(result, 72U, 70U);
return format_atom_record_segid_element_charge_columns(result, 45+far_plus, 43+far_plus);
}

unsigned
Expand All @@ -974,9 +980,11 @@ namespace {
{
char blank = ' ';
std::memcpy(result, "SIGUIJ", 6U);
format_atom_record_serial_label_columns(result, label_formatter);
result[27] = blank;
char *r = result + 28;
std::string far = format_atom_record_serial_label_columns(label_formatter);
std::memcpy(result+6, far.c_str(), far.size());
size_t far_plus=far.size() + 6;
result[far_plus] = blank;
char *r = result + far_plus +1;
for(unsigned i=0;i<6;i++) {
double value =
#ifdef IOTBX_PDB_ENABLE_ATOM_DATA_SIGUIJ
Expand All @@ -989,7 +997,7 @@ namespace {
r += 7;
if (*r != '\0') throw_f70_error(i, value, result, "sigma ");
}
return format_atom_record_segid_element_charge_columns(result, 72U, 70U);
return format_atom_record_segid_element_charge_columns(result, 45+far_plus, 43+far_plus);
}

std::string
Expand Down
7 changes: 1 addition & 6 deletions iotbx/pdb/hierarchy.h
Original file line number Diff line number Diff line change
Expand Up @@ -692,13 +692,8 @@ namespace hierarchy {
}

//! Not available in Python.
/*! result must point to an array of size 27 (or greater).
The first 6 characters are not modified.
On return, result is NOT null-terminated.
*/
void
std::string
format_atom_record_serial_label_columns(
char* result,
atom_label_columns_formatter* label_formatter=0) const;

//! Not available in Python.
Expand Down
5 changes: 3 additions & 2 deletions iotbx/regression/tst_hierarchy_long_chain_ids_1.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,9 +260,10 @@ def test1():
o_pdb_str = h.as_pdb_string()
# Note here incorrect/trimmed chain id
# There's no way to correctly output chain ids longer than 2 char in PDB format
print(o_pdb_str)
assert_lines_in_text(o_pdb_str, """\
ATOM 61 C SERA- 2 72.898 71.361 62.393 1.00 67.20 C
ATOM 62 O SERA- 2 73.055 70.333 61.737 1.00 65.10 O
ATOM 61 C SERA-2 2 72.898 71.361 62.393 1.00 67.20 C
ATOM 62 O SERA-2 2 73.055 70.333 61.737 1.00 65.10 O
""")

o_cif_str = "%s" % h.as_cif_block()
Expand Down
4 changes: 2 additions & 2 deletions iotbx/regression/tst_hierarchy_long_resname_1.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,8 @@ def test1():
ATOM 9 NZ LYS A 279 -3.209 -12.856 44.100 1.00 54.19 N
TER
HETATM 10 CA CA A 301 -17.362 -22.385 28.047 1.00 15.20 CA
HETATM 11 C10 7ZT A 302 -7.646 -6.965 5.796 1.00 22.62 C
HETATM 12 C2 7ZT A 302 -8.462 -5.534 9.265 1.00 16.68 C
HETATM 11 C10 7ZTVU A 302 -7.646 -6.965 5.796 1.00 22.62 C
HETATM 12 C2 7ZTVU A 302 -8.462 -5.534 9.265 1.00 16.68 C
""")

o_cif_str = "%s" % h.as_cif_block()
Expand Down
2 changes: 1 addition & 1 deletion iotbx/regression/tst_hierarchy_long_resname_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def test1():
# print(model_pdb)
for l in [
'LINK NZ LYS A 279 O longHOH A 401 ',
'HETATM 10 O lon A 401 -3.209 -12.856 46.100 1.00 30.11 O']:
'HETATM 10 O longHOH A 401 -3.209 -12.856 46.100 1.00 30.11 O']:
assert_lines_in_text(model_pdb, l)

if (__name__ == "__main__"):
Expand Down
38 changes: 33 additions & 5 deletions iotbx/regression/tst_hierarchy_long_resname_3.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import mmtbx.model
from mmtbx import monomer_library
from libtbx.utils import Sorry
from six.moves import cStringIO as StringIO

# ------------------------------------------------------------------------------

Expand Down Expand Up @@ -178,16 +179,17 @@
def test1():
"""
Creating restraints for long residue name
Not finished yet.
"""
#dumping to disk if needed:
# for name, s in [('model', model_cif), ('restr', ligand_cif)]:
# for name, s in [('model', mm_cif), ('restr', ligand_cif)]:
# with open('%s.cif' % name, 'w') as f:
# f.write(s)
inp = iotbx.pdb.input(lines=mm_cif.split("\n"), source_info=None)
cif_object = iotbx.cif.reader(input_string = ligand_cif).model()
cif_objects = [('bla.cif', cif_object)]
model = mmtbx.model.manager(model_input = inp, restraint_objects = cif_objects)
model = mmtbx.model.manager(
model_input = inp,
restraint_objects = cif_objects)
try:
model.process(make_restraints=True)
geo_str = model.restraints_as_geo()
Expand All @@ -211,12 +213,37 @@ def test1():
]:
assert_lines_in_text(model_cif, l)
model_pdb = model.model_as_pdb()
# print(model_pdb)
print(model_pdb)
for l in [
'HETATM 10 C10 7ZT A 302 -7.646 -6.965 5.796 1.00 22.62 C',
'HETATM 10 C10 7ZTVU A 302 -7.646 -6.965 5.796 1.00 22.62 C',
]:
assert_lines_in_text(model_pdb, l)

def test2():
"""
Try creating restraints for long residue name without cif.
Check error message formatting (atom.quote() function)
"""
#dumping to disk if needed:
# for name, s in [('model', mm_cif), ('restr', ligand_cif)]:
# with open('%s.cif' % name, 'w') as f:
# f.write(s)
inp = iotbx.pdb.input(lines=mm_cif.split("\n"), source_info=None)
cif_object = iotbx.cif.reader(input_string = ligand_cif).model()
cif_objects = [('bla.cif', cif_object)]
mlog = StringIO()
model = mmtbx.model.manager(
model_input = inp,
log = mlog)
try:
model.process(make_restraints=True)
except Sorry as e:
mlog_txt = mlog.getvalue()
# print(str(e))
# print(mlog_txt)
assert_lines_in_text(mlog_txt, """ "HETATM 15 C7 7ZTVU A 302 .*. C " """)


if (__name__ == "__main__"):
t0 = time.time()
mon_lib_srv = None
Expand All @@ -226,4 +253,5 @@ def test1():
print("Can not initialize monomer_library, skipping test.")
if mon_lib_srv is not None:
test1()
test2()
print("OK. Time: %8.3f"%(time.time()-t0))

0 comments on commit f42ddfd

Please sign in to comment.