diff --git a/ensemble_md/replica_exchange_EE.py b/ensemble_md/replica_exchange_EE.py index 5b4ad60..85b6e88 100644 --- a/ensemble_md/replica_exchange_EE.py +++ b/ensemble_md/replica_exchange_EE.py @@ -1565,18 +1565,23 @@ def default_coords_fn(self, molA_file_name, molB_file_name): if len(miss_A) != 0: df_atom_swap = coordinate_swap.get_miss_coord(molA, molB, nameA, nameB, df_atom_swap, 'A2B', swap_map[(swap_map['Swap A'] == nameA) & (swap_map['Swap B'] == nameB)]) # noqa: E501 + # Step 5: Parse Current file to ensure atoms are added in the correct order + atom_order_A = gmx_parser.deter_atom_order(molA_file, nameA) + atom_order_B = gmx_parser.deter_atom_order(molB_file, nameB) + + # Step 6: Write the new file # Reprint preamble text line_start = coordinate_swap.print_preamble(molA_file, molB_new, len(miss_B), len(miss_A)) # Print new coordinates to file for molB - coordinate_swap.write_new_file(df_atom_swap, 'A2B', 'B2A', line_start, molA_file, molB_new, nameA, nameB, copy.deepcopy(molA.xyz[0]), miss_A) # noqa: E501 + coordinate_swap.write_new_file(df_atom_swap, 'A2B', 'B2A', line_start, molA_file, molB_new, nameA, nameB, copy.deepcopy(molA.xyz[0]), miss_A, atom_order_B) # noqa: E501 # Print new coordinates to file # Reprint preamble text line_start = coordinate_swap.print_preamble(molB_file, molA_new, len(miss_A), len(miss_B)) # Print new coordinates for molA - coordinate_swap.write_new_file(df_atom_swap, 'B2A', 'A2B', line_start, molB_file, molA_new, nameB, nameA, copy.deepcopy(molB.xyz[0]), miss_B) # noqa: E501 + coordinate_swap.write_new_file(df_atom_swap, 'B2A', 'A2B', line_start, molB_file, molA_new, nameB, nameA, copy.deepcopy(molB.xyz[0]), miss_B, atom_order_A) # noqa: E501 # Rename temp files os.rename('A_hybrid_swap.gro', molB_dir + '/confout.gro') diff --git a/ensemble_md/tests/data/coord_swap/output_C.gro b/ensemble_md/tests/data/coord_swap/output_C.gro new file mode 100644 index 0000000..631c22e --- /dev/null +++ b/ensemble_md/tests/data/coord_swap/output_C.gro @@ -0,0 +1,23 @@ +Protein in water +20 + 1I2J O 1 5.0178633 5.0861511 2.3039777 0.000 0.000 0.000 + 1I2J C1 2 5.0669813 5.1162391 2.4148340 0.000 0.000 0.000 + 1I2J C2 3 5.1077561 5.2565174 2.4496574 0.000 0.000 0.000 + 1I2J C3 4 5.2587881 5.2859540 2.4412327 0.000 0.000 0.000 + 1I2J C4 5 5.3591075 5.2250023 2.5407770 0.000 0.000 0.000 + 1I2J CV5 6 5.5062575 5.2488542 2.5104387 0.000 0.000 0.000 + 1I2J CV6 7 5.5972819 5.1913514 2.6236000 0.000 0.000 0.000 + 1I2J H1 8 5.0745454 5.0403323 2.4957850 0.000 0.000 0.000 + 1I2J H2 9 5.0574427 5.3244996 2.3799186 0.000 0.000 0.000 + 1I2J H3 10 5.0872545 5.2717891 2.5562549 0.000 0.000 0.000 + 1I2J H4 11 5.2938070 5.2602854 2.3405781 0.000 0.000 0.000 + 1I2J H5 12 5.2689505 5.3949656 2.4466877 0.000 0.000 0.000 + 1I2J H6 13 5.3415952 5.1181459 2.5578511 0.000 0.000 0.000 + 1I2J H7 14 5.3279910 5.2632923 2.6386654 0.000 0.000 0.000 + 1I2J H8 15 5.4598603 5.2642937 2.5228457 0.000 0.000 0.000 + 1I2J HV9 16 5.5342641 5.2074494 2.4128795 0.000 0.000 0.000 + 1I2J HV10 17 5.5161352 5.3577881 2.5176795 0.000 0.000 0.000 + 1I2J HV11 18 5.7024980 5.1972165 2.5934060 0.000 0.000 0.000 + 1I2J HV12 19 5.5721917 5.0852852 2.6353056 0.000 0.000 0.000 + 1I2J HV13 20 5.5869956 5.2427306 2.7198856 0.000 0.000 0.000 + 7.06448 7.06448 4.99534 0.00000 0.00000 0.00000 0.00000 3.53224 3.53224 diff --git a/ensemble_md/tests/data/coord_swap/output_D.gro b/ensemble_md/tests/data/coord_swap/output_D.gro new file mode 100644 index 0000000..6955912 --- /dev/null +++ b/ensemble_md/tests/data/coord_swap/output_D.gro @@ -0,0 +1,29 @@ +Protein in water +26 + 1J2K O 1 5.2959995 4.9607749 2.2057471 0.000 0.000 0.000 + 1J2K C1 2 5.2068386 4.9330115 2.2914972 0.000 0.000 0.000 + 1J2K C2 3 5.2367387 4.9143419 2.4417152 0.000 0.000 0.000 + 1J2K C3 4 5.2398911 5.0504713 2.5114973 0.000 0.000 0.000 + 1J2K C4 5 5.1193190 5.1436095 2.4967213 0.000 0.000 0.000 + 1J2K C5 6 4.9869380 5.0971766 2.5653727 0.000 0.000 0.000 + 1J2K C6 7 4.8737483 5.2045283 2.5777171 0.000 0.000 0.000 + 1J2K CV7 8 4.9173560 5.3224721 2.6708064 0.000 0.000 0.000 + 1J2K CV8 9 4.8892651 5.2975769 2.8232861 0.000 0.000 0.000 + 1J2K H1 10 5.1009479 4.9196019 2.2602017 0.000 0.000 0.000 + 1J2K H2 11 5.1548157 4.8529458 2.4809031 0.000 0.000 0.000 + 1J2K H3 12 5.3377852 4.8729944 2.4515378 0.000 0.000 0.000 + 1J2K H4 13 5.2431130 5.0364089 2.6201639 0.000 0.000 0.000 + 1J2K H5 14 5.3289862 5.1061687 2.4802494 0.000 0.000 0.000 + 1J2K H6 15 5.0852985 5.1476994 2.3925943 0.000 0.000 0.000 + 1J2K H7 16 5.1522965 5.2398167 2.5376258 0.000 0.000 0.000 + 1J2K H9 17 4.9479947 5.0155101 2.5034811 0.000 0.000 0.000 + 1J2K H10 18 4.9996490 5.0553794 2.6659110 0.000 0.000 0.000 + 1J2K H11 19 4.7855296 5.1634459 2.6281769 0.000 0.000 0.000 + 1J2K H12 20 4.8469658 5.2264252 2.4736989 0.000 0.000 0.000 + 1J2K H13 21 4.9016318 5.2935543 2.6352797 0.000 0.000 0.000 + 1J2K HV14 21 5.0237503 5.3428926 2.6540799 0.000 0.000 0.000 + 1J2K HV15 22 4.8513546 5.4049191 2.6414304 0.000 0.000 0.000 + 1J2K HV16 23 4.9280410 5.2066817 2.8707309 0.000 0.000 0.000 + 1J2K HV17 24 4.9172225 5.3824968 2.8867180 0.000 0.000 0.000 + 1J2K HV18 25 4.7816763 5.2949572 2.8441288 0.000 0.000 0.000 + 6.81420 6.81420 4.81837 0.00000 0.00000 0.00000 0.00000 3.40710 3.40710 diff --git a/ensemble_md/tests/data/coord_swap/residue_connect_alt.csv b/ensemble_md/tests/data/coord_swap/residue_connect_alt.csv new file mode 100644 index 0000000..5dd1e2a --- /dev/null +++ b/ensemble_md/tests/data/coord_swap/residue_connect_alt.csv @@ -0,0 +1,45 @@ +,Resname,Connect 1,Connect 2,State 1,State 2 +0,I2J,O,C1,-1,-1 +1,I2J,C1,C2,-1,-1 +2,I2J,C1,H1,-1,-1 +3,I2J,C2,C3,-1,-1 +4,I2J,C2,H2,-1,-1 +5,I2J,C2,H3,-1,-1 +6,I2J,C3,C4,-1,-1 +7,I2J,C3,H4,-1,-1 +8,I2J,C3,H5,-1,-1 +9,I2J,C4,CV5,-1,0 +10,I2J,C4,H6,-1,-1 +11,I2J,C4,H7,-1,-1 +12,I2J,C4,H8,-1,1 +13,I2J,CV5,CV6,0,0 +14,I2J,CV5,HV9,0,0 +15,I2J,CV5,HV10,0,0 +16,I2J,CV6,HV11,0,0 +17,I2J,CV6,HV12,0,0 +18,I2J,CV6,HV13,0,0 +0,J2K,O,C1,-1,-1 +1,J2K,C1,C2,-1,-1 +2,J2K,C1,H1,-1,-1 +3,J2K,C2,C3,-1,-1 +4,J2K,C2,H2,-1,-1 +5,J2K,C2,H3,-1,-1 +6,J2K,C3,C4,-1,-1 +7,J2K,C3,H4,-1,-1 +8,J2K,C3,H5,-1,-1 +9,J2K,C4,C5,-1,-1 +10,J2K,C4,H6,-1,-1 +11,J2K,C4,H7,-1,-1 +12,J2K,C5,C6,-1,-1 +13,J2K,C5,H9,-1,-1 +14,J2K,C5,H10,-1,-1 +15,J2K,C6,CV7,-1,0 +16,J2K,C6,H11,-1,-1 +17,J2K,C6,H12,-1,-1 +18,J2K,C6,H13,-1,1 +19,J2K,CV7,CV8,0,0 +20,J2K,CV7,HV14,0,0 +21,J2K,CV7,HV15,0,0 +22,J2K,CV8,HV16,0,0 +23,J2K,CV8,HV17,0,0 +24,J2K,CV8,HV18,0,0 diff --git a/ensemble_md/tests/data/coord_swap/residue_swap_map_alt.csv b/ensemble_md/tests/data/coord_swap/residue_swap_map_alt.csv new file mode 100644 index 0000000..867b65b --- /dev/null +++ b/ensemble_md/tests/data/coord_swap/residue_swap_map_alt.csv @@ -0,0 +1,3 @@ +,Swap A,Swap B,Anchor Atom Name A,Anchor Atom Name B,Alignment Atom A,Alignment Atom B,Angle Atom A,Angle Atom B,Missing Atom Name +0,I2J,J2K,C4,C4,CV5,C5,C3,C3,H8 +0,J2K,I2J,C6,CV6,H13,HV13,C5,CV5,CV7 CV8 HV14 HV15 HV16 HV17 HV18 diff --git a/ensemble_md/tests/data/coord_swap/sim_A/confout.gro b/ensemble_md/tests/data/coord_swap/sim_A/confout_backup.gro similarity index 100% rename from ensemble_md/tests/data/coord_swap/sim_A/confout.gro rename to ensemble_md/tests/data/coord_swap/sim_A/confout_backup.gro diff --git a/ensemble_md/tests/data/coord_swap/sim_B/confout.gro b/ensemble_md/tests/data/coord_swap/sim_B/confout_backup.gro similarity index 100% rename from ensemble_md/tests/data/coord_swap/sim_B/confout.gro rename to ensemble_md/tests/data/coord_swap/sim_B/confout_backup.gro diff --git a/ensemble_md/tests/data/coord_swap/sim_C/confout_backup.gro b/ensemble_md/tests/data/coord_swap/sim_C/confout_backup.gro new file mode 100644 index 0000000..61a0c7f --- /dev/null +++ b/ensemble_md/tests/data/coord_swap/sim_C/confout_backup.gro @@ -0,0 +1,23 @@ +Protein in water + 20 + 1I2J O 1 5.296 4.961 2.206 -0.3686 0.3444 -0.1300 + 1I2J C1 2 5.207 4.933 2.291 -0.2138 0.6863 0.8740 + 1I2J C2 3 5.237 4.914 2.442 -0.1897 -0.2629 -0.3198 + 1I2J C3 4 5.240 5.050 2.511 0.0930 -0.1426 -0.5485 + 1I2J C4 5 5.119 5.144 2.497 -0.4009 0.0472 0.1513 + 1I2J CV5 6 4.987 5.097 2.565 0.0543 0.1262 0.2260 + 1I2J CV6 7 4.874 5.205 2.578 0.9132 -0.3592 -0.0244 + 1I2J H1 8 5.101 4.920 2.260 -0.0394 2.2014 -0.3651 + 1I2J H2 9 5.155 4.853 2.481 0.6299 -1.3414 -0.2963 + 1I2J H3 10 5.338 4.873 2.452 -0.7490 -1.7256 -0.7240 + 1I2J H4 11 5.243 5.036 2.620 0.3644 -4.1763 -1.0785 + 1I2J H5 12 5.329 5.106 2.480 1.2963 -0.7568 1.7877 + 1I2J H6 13 5.085 5.148 2.393 1.4662 -0.9628 -0.4984 + 1I2J H7 14 5.152 5.240 2.538 1.5196 -0.0617 -1.1409 + 1I2J H8 15 5.039 5.112 2.564 0.5121 0.3042 1.3585 + 1I2J HV9 16 4.948 5.016 2.503 1.3134 0.4465 -0.9890 + 1I2J HV10 17 5.000 5.055 2.666 -1.9137 -1.0343 -0.0077 + 1I2J HV11 18 4.786 5.163 2.628 0.6018 -0.1023 -0.3595 + 1I2J HV12 19 4.847 5.226 2.474 -1.7867 0.1242 0.7725 + 1I2J HV13 20 4.902 5.294 2.635 -0.4338 0.4864 -0.6797 + 6.81420 6.81420 4.81837 0.00000 0.00000 0.00000 0.00000 3.40710 3.40710 diff --git a/ensemble_md/tests/data/coord_swap/sim_C/traj.trr b/ensemble_md/tests/data/coord_swap/sim_C/traj.trr new file mode 100644 index 0000000..d75e562 Binary files /dev/null and b/ensemble_md/tests/data/coord_swap/sim_C/traj.trr differ diff --git a/ensemble_md/tests/data/coord_swap/sim_D/confout_backup.gro b/ensemble_md/tests/data/coord_swap/sim_D/confout_backup.gro new file mode 100644 index 0000000..7a3b594 --- /dev/null +++ b/ensemble_md/tests/data/coord_swap/sim_D/confout_backup.gro @@ -0,0 +1,29 @@ +Protein in water + 26 + 1J2K O 1 5.018 5.086 2.304 0.1745 0.5890 -0.1758 + 1J2K C1 2 5.067 5.116 2.415 -0.2291 -0.2305 0.2929 + 1J2K C2 3 5.108 5.257 2.450 -0.5244 -0.1122 0.0621 + 1J2K C3 4 5.259 5.286 2.441 -0.2349 -0.3584 -0.6547 + 1J2K C4 5 5.359 5.225 2.541 0.2423 -0.3055 0.0402 + 1J2K C5 6 5.506 5.249 2.510 -0.3200 0.3017 0.4568 + 1J2K C6 7 5.597 5.191 2.624 0.0359 0.2685 0.3027 + 1J2K CV7 8 5.569 5.267 2.757 0.3904 -0.1473 0.0832 + 1J2K CV8 9 5.468 5.387 2.743 -0.2666 -0.4050 -0.2619 + 1J2K H1 10 5.075 5.040 2.496 -2.0271 -2.3023 -1.4818 + 1J2K H2 11 5.057 5.324 2.380 -0.3736 2.2840 2.2892 + 1J2K H3 12 5.087 5.272 2.556 -0.2398 -0.7192 0.2037 + 1J2K H4 13 5.294 5.260 2.341 -1.2738 0.9886 -1.3596 + 1J2K H5 14 5.269 5.395 2.447 0.5557 -0.5016 0.7326 + 1J2K H6 15 5.342 5.118 2.558 1.2103 -0.6961 -1.4115 + 1J2K H7 16 5.328 5.263 2.639 3.1608 0.2336 0.7570 + 1J2K H9 17 5.534 5.207 2.413 3.0729 0.7986 1.2199 + 1J2K H10 18 5.516 5.358 2.518 0.7974 0.2313 -0.0088 + 1J2K H11 19 5.702 5.197 2.593 -0.1152 0.8857 -0.1042 + 1J2K H12 20 5.572 5.085 2.635 1.2100 0.1165 1.4424 + 1J2K H13 21 5.587 5.243 2.720 0.8334 -1.0594 1.0965 + 1J2K HV14 22 5.533 5.195 2.832 0.0741 -1.2105 -1.1017 + 1J2K HV15 23 5.665 5.313 2.783 -0.1626 1.8344 -1.3512 + 1J2K HV16 24 5.369 5.369 2.698 0.8667 -0.4618 -2.6960 + 1J2K HV17 25 5.451 5.441 2.837 -0.9029 1.3592 -1.3936 + 1J2K HV18 26 5.510 5.468 2.683 1.4067 0.5534 2.2296 + 7.06448 7.06448 4.99534 0.00000 0.00000 0.00000 0.00000 3.53224 3.53224 diff --git a/ensemble_md/tests/data/coord_swap/sim_D/traj.trr b/ensemble_md/tests/data/coord_swap/sim_D/traj.trr new file mode 100644 index 0000000..639faf9 Binary files /dev/null and b/ensemble_md/tests/data/coord_swap/sim_D/traj.trr differ diff --git a/ensemble_md/tests/test_replica_exchange_EE.py b/ensemble_md/tests/test_replica_exchange_EE.py index 3eb5663..7d5589e 100644 --- a/ensemble_md/tests/test_replica_exchange_EE.py +++ b/ensemble_md/tests/test_replica_exchange_EE.py @@ -893,7 +893,7 @@ def test_default_coords_fn(self, params_dict): REXEE = get_REXEE_instance(params_dict) os.system(f'cp {input_path}/coord_swap/residue_connect.csv .') os.system(f'cp {input_path}/coord_swap/residue_swap_map.csv .') - REXEE.default_coords_fn(f'{input_path}/coord_swap/sim_A/confout.gro', f'{input_path}/coord_swap/sim_B/confout.gro') # noqa: E501 + REXEE.default_coords_fn(f'{input_path}/coord_swap/sim_A/confout_backup.gro', f'{input_path}/coord_swap/sim_B/confout_backup.gro') # noqa: E501 true_output_A = open(f'{input_path}/coord_swap/output_A.gro', 'r').readlines() test_output_A = open(f'{input_path}/coord_swap/sim_B/confout.gro', 'r').readlines() @@ -905,6 +905,20 @@ def test_default_coords_fn(self, params_dict): assert true_output_A == test_output_A assert true_output_B == test_output_B + os.system(f'cp {input_path}/coord_swap/residue_connect_alt.csv residue_connect.csv') + os.system(f'cp {input_path}/coord_swap/residue_swap_map_alt.csv residue_swap_map.csv') + REXEE.default_coords_fn(f'{input_path}/coord_swap/sim_C/confout_backup.gro', f'{input_path}/coord_swap/sim_D/confout_backup.gro') # noqa: E501 + + true_output_C = open(f'{input_path}/coord_swap/output_C.gro', 'r').readlines() + test_output_C = open(f'{input_path}/coord_swap/sim_D/confout.gro', 'r').readlines() + true_output_D = open(f'{input_path}/coord_swap/output_D.gro', 'r').readlines() + test_output_D = open(f'{input_path}/coord_swap/sim_C/confout.gro', 'r').readlines() + + os.remove('residue_connect.csv') + os.remove('residue_swap_map.csv') + assert true_output_C == test_output_C + assert true_output_D == test_output_D + def test_process_top(self, params_dict): import pandas as pd diff --git a/ensemble_md/utils/coordinate_swap.py b/ensemble_md/utils/coordinate_swap.py index c4108e3..18c10a5 100644 --- a/ensemble_md/utils/coordinate_swap.py +++ b/ensemble_md/utils/coordinate_swap.py @@ -679,7 +679,7 @@ def add_atom(mol_new, resnum, resname, df, vel, atom_num): return atom_num -def dummy_real_swap(mol_new, resnum, resname, df, vel, atom_num, orig_coords): +def dummy_real_swap(mol_new, resnum, resname, df, vel, atom_num, orig_coords, name_new): """ Adds an atom to the file which is switching between dummy and real state or vice versa. @@ -699,6 +699,8 @@ def dummy_real_swap(mol_new, resnum, resname, df, vel, atom_num, orig_coords): The atom number to assign to the atom being added. orig_coords : list The XYZ coordinates for the atom being added. + name_new : str + The new name for the atom after the swap Returns ------- @@ -709,22 +711,6 @@ def dummy_real_swap(mol_new, resnum, resname, df, vel, atom_num, orig_coords): # Get the name for the atom we are writting name_orig = df['Name'].to_list()[0] - # Determine the new atom name based on whether the new atom should be real or dummy - final_state = df['Final Type'].to_list()[0] - if final_state == 'dummy': - element = name_orig.strip('0123456789') - atom_name_num = name_orig.strip('DCVH') - # Add D or C - if element == 'C': - name_new = f'D{element}{atom_name_num}' - else: - name_new = f'{element}V{atom_name_num}' - else: - if 'DC' in name_orig: - name_new = name_orig.replace('DC', 'C') - else: - name_new = name_orig.replace('HV', 'H') - # These may be added out of order so lets make sure we have the right coordinates line_num = df['File line'].to_list()[0] c = line_num - 2 @@ -887,7 +873,7 @@ def find_rotation_angle(initial_point, vertex, rotated_point, axis): return angle -def add_or_swap(df_select, file_new, resnum, resname, vel, atom_num, orig_coor, skip_line, R_o_D_num, pick): +def add_or_swap(df_select, file_new, resnum, resname, vel, atom_num, orig_coor, skip_line, name_new): """ Determine if the atom needs added or swapped between real and dummy state and then add the atom to the new file @@ -909,31 +895,26 @@ def add_or_swap(df_select, file_new, resnum, resname, vel, atom_num, orig_coor, The XYZ coordinates for the atom being added. skip_line : list A list of line numbers that should be skipped when we come across them while reading the file. - R_o_D_num : list of str - A list of atoms that need to be added. - pick : int - Index in the list to add now + name_new : str + The new name for the atom after the swap + Returns ------- skip_line : list Updated list of line numbers that should be skipped when we come across them while reading the file. - R_o_D_num_new : list - Updated list of atoms that need added. """ c = df_select.index.values.tolist() if df_select.loc[c[0], 'Direction'] == 'miss': # Add atom if missing add_atom(file_new, resnum, resname, df_select, vel, atom_num) else: # Swap from dummy to real - line = dummy_real_swap(file_new, resnum, resname, df_select, vel, atom_num, orig_coor) # Add the dummy atom from A as a real atom in B and save the line so it can be skipped later # noqa: E501 + line = dummy_real_swap(file_new, resnum, resname, df_select, vel, atom_num, orig_coor, name_new) # Add the dummy atom from A as a real atom in B and save the line so it can be skipped later # noqa: E501 skip_line.append(line) - R_o_D_num_new = np.delete(R_o_D_num, pick) # Atom no longer needs added + return skip_line - return skip_line, R_o_D_num_new - -def write_new_file(df_atom_swap, swap, r_swap, line_start, orig_file, new_file, old_res_name, new_res_name, orig_coords, miss): # noqa: E501 +def write_new_file(df_atom_swap, swap, r_swap, line_start, orig_file, new_file, old_res_name, new_res_name, orig_coords, miss, atom_order): # noqa: E501 """ Writes a new GRO file. @@ -959,21 +940,16 @@ def write_new_file(df_atom_swap, swap, r_swap, line_start, orig_file, new_file, Coordinates for all atoms in the system before the swap. miss : list Residues which are needed after the swap which are not present before the swap. + atom_order : list of str + List of the atom names in the order they appear in the GRO file """ # Add vero velocity to all atoms vel = ['0.000', '0.000', '0.000\n'] atom_num_A, atom_num_B = 0, 0 + res_interest_atom = 0 skip_line = [] df_interest = df_atom_swap[((df_atom_swap['Swap'] == swap) & ((df_atom_swap['Direction'] == 'R2D') | (df_atom_swap['Direction'] == 'D2R'))) | ((df_atom_swap['Swap'] == r_swap) & (df_atom_swap['Direction'] == 'miss'))] # noqa: E501 - R_C_num = df_interest[(df_interest['Element'] == 'C') & ((df_interest['Direction'] == 'D2R') | ((df_interest['Direction'] == 'miss') & (df_interest['Final Type'] == 'real')))]['Atom Name Number'].to_numpy(dtype=int) # noqa: E501 - D_C_num = df_interest[(df_interest['Element'] == 'C') & ((df_interest['Direction'] == 'R2D') | ((df_interest['Direction'] == 'miss') & (df_interest['Final Type'] == 'dummy')))]['Atom Name Number'].to_numpy(dtype=int) # noqa: E501 - R_H_num = df_interest[(df_interest['Element'] == 'H') & ((df_interest['Direction'] == 'D2R') | ((df_interest['Direction'] == 'miss') & (df_interest['Final Type'] == 'real')))]['Atom Name Number'].to_numpy(dtype=int) # noqa: E501 - D_H_num = df_interest[(df_interest['Element'] == 'H') & ((df_interest['Direction'] == 'R2D') | ((df_interest['Direction'] == 'miss') & (df_interest['Final Type'] == 'dummy')))]['Atom Name Number'].to_numpy(dtype=int) # noqa: E501 - R_C_num.sort() - D_C_num.sort() - R_H_num.sort() - D_H_num.sort() for i in range(line_start, len(orig_file)-1): # Some atoms are added out of order from file A and thus must be skipped when they come up if i in skip_line: @@ -1007,116 +983,64 @@ def write_new_file(df_atom_swap, swap, r_swap, line_start, orig_file, new_file, else: current_num = np.NaN current_element = line[1].strip('0123456789') + if 'V' in current_element: + current_element = current_element.strip('V') + if 'D' in current_element: + current_element = current_element.strip('D') if line[1] in miss: # Do not write coordinates if atoms are not present in B atom_num_B -= 1 atom_num_A += 1 continue - elif (current_element == 'C' and current_num in D_C_num) or (current_element == 'H' and current_num in D_H_num): # Skip this line for now as we will add it in later # noqa: E501 - atom_num_B -= 1 - atom_num_A += 1 - continue - elif (current_element == 'C' and (current_num > R_C_num).any()): # We need to add an atom or move an atom to a new line position # noqa: E501 - while (current_num > R_C_num).any(): - # Either add atom or perform swap - pick = np.where(current_num > R_C_num)[0][0] - df_select = df_interest[(df_interest['Atom Name Number'] == str(R_C_num[pick])) & (df_interest['Element'] == 'C')] # noqa: E501 - skip_line, R_C_num = add_or_swap(df_select, new_file, resnum, new_res_name, vel, atom_num_B, orig_coords, skip_line, R_C_num, pick) # noqa: E501 - atom_num_B += 1 - if i not in skip_line: - write_line(new_file, orig_file[i], line, atom_num_B, vel, orig_coords[atom_num_A], resnum, new_res_name) # Add this current line too # noqa: E501 - elif (len(R_C_num) != 0 and current_element == 'H'): # Add remaining real heavy atoms if all heavy atoms have been written # noqa: E501 - while len(R_C_num) != 0: - df_select = df_interest[(df_interest['Atom Name Number'] == str(R_C_num[0])) & (df_interest['Element'] == 'C')] # noqa: E501 - skip_line, R_C_num = add_or_swap(df_select, new_file, resnum, new_res_name, vel, atom_num_B, orig_coords, skip_line, R_C_num, 0) # noqa: E501 - atom_num_B += 1 - if i not in skip_line: - write_line(new_file, orig_file[i], line, atom_num_B, vel, orig_coords[atom_num_A], resnum, new_res_name) # Add this current line too # noqa: E501 - elif current_element == 'H' and (current_num > R_H_num).any(): # Add real Hs in proper order - while (current_num > R_H_num).any(): - # Either add atom or perform swap - pick = np.where(current_num > R_H_num)[0][0] - df_select = df_interest[(df_interest['Atom Name Number'] == str(R_H_num[pick])) & (df_interest['Element'] == 'H')] # noqa: E501 - skip_line, R_H_num = add_or_swap(df_select, new_file, resnum, new_res_name, vel, atom_num_B, orig_coords, skip_line, R_H_num, pick) # noqa: E501 - atom_num_B += 1 - if i not in skip_line: - write_line(new_file, orig_file[i], line, atom_num_B, vel, orig_coords[atom_num_A], resnum, new_res_name) # Add this current line too # noqa: E501 - elif len(R_H_num) != 0 and (current_element == 'DC' or current_element == 'HV'): # Add remaining real Hs if all H atoms have been written # noqa: E501 - while len(R_H_num) != 0: - df_select = df_interest[(df_interest['Atom Name Number'] == str(R_H_num[0])) & (df_interest['Element'] == 'H')] # noqa: E501 - skip_line, R_H_num = add_or_swap(df_select, new_file, resnum, new_res_name, vel, atom_num_B, orig_coords, skip_line, R_H_num, 0) # noqa: E501 - atom_num_B += 1 - if current_element == 'DC': - while (current_num > D_C_num).any(): - # Either add atom or perform swap - pick = np.where(current_num > D_C_num)[0][0] - df_select = df_interest[(df_interest['Atom Name Number'] == str(D_C_num[pick])) & (df_interest['Element'] == 'C')] # noqa: E501 - skip_line, D_C_num = add_or_swap(df_select, new_file, resnum, new_res_name, vel, atom_num_B, orig_coords, skip_line, D_C_num, pick) # noqa: E501 - atom_num_B += 1 - if i not in skip_line: - write_line(new_file, orig_file[i], line, atom_num_B, vel, orig_coords[atom_num_A], resnum, new_res_name) # Add this current line too # noqa: E501 - elif current_element == 'DC' and (current_num > D_C_num).any(): # Add dummy Cs in correct order # noqa: E501 - while (current_num > D_C_num).any(): - # Either add atom or perform swap - pick = np.where(current_num > D_C_num)[0][0] - df_select = df_interest[(df_interest['Atom Name Number'] == str(D_C_num[pick])) & (df_interest['Element'] == 'C')] # noqa: E501 - skip_line, D_C_num = add_or_swap(df_select, new_file, resnum, new_res_name, vel, atom_num_B, orig_coords, skip_line, D_C_num, pick) # noqa: E501 - atom_num_B += 1 - if i not in skip_line: - write_line(new_file, orig_file[i], line, atom_num_B, vel, orig_coords[atom_num_A], resnum, new_res_name) # Add this current line too # noqa: E501 - elif len(D_C_num) != 0 and current_element == 'HV': # Add remaining dummy Cs after real Hs - while len(D_C_num) != 0: - df_select = df_interest[(df_interest['Atom Name Number'] == str(D_C_num[0])) & (df_interest['Element'] == 'C')] # noqa: E501 - skip_line, D_C_num = add_or_swap(df_select, new_file, resnum, new_res_name, vel, atom_num_B, orig_coords, skip_line, D_C_num, 0) # noqa: E501 + elif line[1] == atom_order[res_interest_atom]: # Just change atom or residue number as needed since atom is in the right order + write_line(new_file, orig_file[i], line, atom_num_B, vel, orig_coords[atom_num_A], resnum, new_res_name) # noqa: E501 + res_interest_atom += 1 + elif (f'{current_element}{current_num}' == atom_order[res_interest_atom]) or (f'{current_element}V{current_num}' == atom_order[res_interest_atom]) or (f'D{current_element}{current_num}' == atom_order[res_interest_atom]): # Since atom is not in missing it must be a D2R flip # noqa: E501 + df_select = df_interest[df_interest['Name'] == line[1]] + skip_line = add_or_swap(df_select, new_file, resnum, new_res_name, vel, atom_num_B, orig_coords, skip_line, atom_order[res_interest_atom]) # noqa: E501 + res_interest_atom += 1 + elif line[1] in atom_order: # Atom is in the molecule, but there are other atoms before it + atom_pos = atom_order.index(line[1]) + for x in range(res_interest_atom, atom_pos): + df_select = df_interest[df_interest['Name'] == atom_order[x]] + skip_line = add_or_swap(df_select, new_file, resnum, new_res_name, vel, atom_num_B, orig_coords, skip_line, atom_order[x]) # noqa: E501 atom_num_B += 1 - if i not in skip_line: - write_line(new_file, orig_file[i], line, atom_num_B, vel, orig_coords[atom_num_A], resnum, new_res_name) # Add this current line too # noqa: E501 - elif current_element == 'HV' and (current_num > D_H_num).any(): # Add dummy Hs in proper order # noqa: E501 - while (current_num > D_H_num).any(): - # Either add atom or perform swap - pick = np.where(current_num > D_H_num)[0][0] - df_select = df_interest[(df_interest['Atom Name Number'] == str(D_H_num[0])) & (df_interest['Element'] == 'H')] # noqa: E501 - skip_line, D_H_num = add_or_swap(df_select, new_file, resnum, new_res_name, vel, atom_num_B, orig_coords, skip_line, D_H_num, 0) # noqa: E501 + res_interest_atom += 1 + write_line(new_file, orig_file[i], line, atom_num_B, vel, orig_coords[atom_num_A], resnum, new_res_name) # noqa: E501 + res_interest_atom += 1 + elif (f'{current_element}{current_num}' in atom_order) or (f'{current_element}V{current_num}' in atom_order) or (f'D{current_element}{current_num}' in atom_order): # Atom is in the molecule, but needs swapped AND there are other atoms before it # noqa: E501 + if (f'{current_element}{current_num}' in atom_order): + atom_pos = atom_order.index(f'{current_element}{current_num}') + elif (f'{current_element}V{current_num}' in atom_order): + atom_pos = atom_order.index(f'{current_element}V{current_num}') + elif (f'D{current_element}{current_num}' in atom_order): + atom_pos = atom_order.index(f'D{current_element}{current_num}') + + for x in range(res_interest_atom, atom_pos): + df_select = df_interest[df_interest['Name'] == atom_order[x]] + skip_line = add_or_swap(df_select, new_file, resnum, new_res_name, vel, atom_num_B, orig_coords, skip_line, atom_order[x]) # noqa: E501 atom_num_B += 1 - if i not in skip_line: - write_line(new_file, orig_file[i], line, atom_num_B, vel, orig_coords[atom_num_A], resnum, new_res_name) # Add this current line too # noqa: E501 + res_interest_atom += 1 + df_select = df_interest[df_interest['Name'] == line[1]] + skip_line = add_or_swap(df_select, new_file, resnum, new_res_name, vel, atom_num_B, orig_coords, skip_line, atom_order[res_interest_atom]) # noqa: E501 + res_interest_atom += 1 else: - write_line(new_file, orig_file[i], line, atom_num_B, vel, orig_coords[atom_num_A], resnum, new_res_name) # noqa: E501 + print(f'Warning {line} not written') elif resname != old_res_name and prev_resname == old_res_name: # Add dummy atoms at the end of the residue - while len(R_H_num) != 0: - df_select = df_interest[(df_interest['Atom Name Number'] == str(R_H_num[0])) & (df_interest['Element'] == 'H')] # noqa: E501 - skip_line, R_H_num = add_or_swap(df_select, new_file, int(resnum)-1, new_res_name, vel, atom_num_B, orig_coords, skip_line, R_H_num, 0) # noqa: E501 - atom_num_B += 1 - while len(D_C_num) != 0: - df_select = df_interest[(df_interest['Atom Name Number'] == str(D_C_num[0])) & (df_interest['Element'] == 'C')] # noqa: E501 - skip_line, D_C_num = add_or_swap(df_select, new_file, int(resnum)-1, new_res_name, vel, atom_num_B, orig_coords, skip_line, D_C_num, 0) # noqa: E501 + while res_interest_atom < len(atom_order): + df_select = df_interest[df_interest['Name'] == atom_order[res_interest_atom]] + skip_line = add_or_swap(df_select, new_file, resnum, new_res_name, vel, atom_num_B, orig_coords, skip_line, atom_order[res_interest_atom]) # noqa: E501 atom_num_B += 1 - while len(D_H_num) != 0: - df_select = df_interest[(df_interest['Atom Name Number'] == str(D_H_num[0])) & (df_interest['Element'] == 'H')] # noqa: E501 - skip_line, D_H_num = add_or_swap(df_select, new_file, int(resnum)-1, new_res_name, vel, atom_num_B, orig_coords, skip_line, D_H_num, 0) # noqa: E501 - atom_num_B += 1 - line, prev_line = process_line(orig_file, i) + res_interest_atom += 1 write_line(new_file, orig_file[i], line, atom_num_B, vel, orig_coords[atom_num_A]) else: print(f'Warning line {i+1} not written') atom_num_A += 1 - if len(R_H_num) != 0: # If we hit end of coordinates without adding Hs add them now - while len(R_H_num) != 0: - atom_num_B += 1 - df_select = df_interest[(df_interest['Atom Name Number'] == str(R_H_num[0])) & (df_interest['Element'] == 'H')] # noqa: E501 - skip_line, R_H_num = add_or_swap(df_select, new_file, resnum, new_res_name, vel, atom_num_B, orig_coords, skip_line, R_H_num, 0) # noqa: E501 - - if len(D_C_num) != 0: # If we hit end of coordinates without adding Cs add them now - while len(D_C_num) != 0: - atom_num_B += 1 - df_select = df_interest[(df_interest['Atom Name Number'] == str(D_C_num[0])) & (df_interest['Element'] == 'C')] # noqa: E501 - skip_line, D_C_num = add_or_swap(df_select, new_file, resnum, new_res_name, vel, atom_num_B, orig_coords, skip_line, D_C_num, 0) # noqa: E501 - - if len(D_H_num) != 0: # If we hit end of coordinates without adding Hs add them now - while len(D_H_num) != 0: - atom_num_B += 1 - df_select = df_interest[(df_interest['Atom Name Number'] == str(D_H_num[0])) & (df_interest['Element'] == 'H')] # noqa: E501 - skip_line, D_H_num = add_or_swap(df_select, new_file, resnum, new_res_name, vel, atom_num_B, orig_coords, skip_line, D_H_num, 0) # noqa: E501 + while res_interest_atom < len(atom_order): + df_select = df_interest[df_interest['Name'] == atom_order[res_interest_atom]] + skip_line = add_or_swap(df_select, new_file, resnum, new_res_name, vel, atom_num_B, orig_coords, skip_line, atom_order[res_interest_atom]) # noqa: E501 + atom_num_B += 1 + res_interest_atom += 1 # Add Box dimensions to file new_file.write(orig_file[-1]) diff --git a/ensemble_md/utils/gmx_parser.py b/ensemble_md/utils/gmx_parser.py index a2ffc4a..3fd49c5 100644 --- a/ensemble_md/utils/gmx_parser.py +++ b/ensemble_md/utils/gmx_parser.py @@ -408,3 +408,37 @@ def read_top(file_name, resname): if len(line_sep) > 4 and line_sep[3] == resname: return input_file raise Exception(f'Residue {resname} can not be found in {file_name}') + + +def deter_atom_order(mol_file, resname): + """ + Determine the order of atoms in the residue we will modify to ensure the output GRO is formatted properly + + Parameters + ---------- + file_name : list of str + Contains the contents of the original GRO file + resname : str + Name of the residue of interest we are searching for. + + Returns + ------- + atom_order : list of str + List of the atom names in the order they appear in the GRO file + + """ + from ensemble_md.utils import coordinate_swap + + atom_order = [] + for line in mol_file: + split_line = line.split(' ') + while '' in split_line: + split_line.remove('') + if resname in split_line[0]: + if len(split_line[1]) > 5: + split_line = coordinate_swap.sep_merge(split_line) + atom_order.append(split_line[1]) + elif len(atom_order) != 0: + break + + return atom_order