Skip to content

Commit

Permalink
halfway fixing domains_from_pae
Browse files Browse the repository at this point in the history
  • Loading branch information
terwill committed Jul 31, 2022
1 parent ea59614 commit 977c104
Showing 1 changed file with 29 additions and 10 deletions.
39 changes: 29 additions & 10 deletions mmtbx/domains_from_pae.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,31 +38,50 @@
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""

def parse_pae_file(pae_json_file):
import json, numpy

try:
with open(pae_json_file, 'rt') as f:
data = json.load(f)[0]
data = json.load(f)
except Exception as e:
raise Sorry("Unable to read the json file %s" %(pae_json_file))

r1, d = data['residue1'],data['distance']

size = max(r1)

matrix = numpy.empty((size,size))

matrix.ravel()[:] = d
if isinstance(data, dict) and 'pae' in data:
# ColabFold 1.3 produces a JSON file different from AlphaFold database.
matrix = numpy.array(data['pae'])

elif not isinstance(data, list):
raise Sorry("Data in %s is not in a recognised format" %(pae_json_file))

data = data[0]
if 'residue1' in data.keys() and 'distance' in data.keys():
r1, d = data['residue1'],data['distance']
size = max(r1)
matrix = numpy.empty((size,size))
matrix.ravel()[:] = d
elif 'predicted_aligned_error' in data.keys():
matrix = numpy.array(data['predicted_aligned_error'])
matrix[matrix==0]=0.2
# New format has zeros, leading to divide-by-zero when weighting edges.
# Replace with previous minimum. Could also change weight function
# from PAE**-(pae_power) to exp(-PAE*pae_power), but that might
# need some recalibration and testing.
else:
raise Sorry("Data in %s is not in a recognised format" %(pae_json_file))

return matrix


def domains_from_pae_matrix_networkx(pae_matrix, pae_power=1,
pae_cutoff=5, graph_resolution=1, weight_by_ca_ca_distance=False,
distance_power=1, distance_model=None):
'''
Takes a predicted aligned error (PAE) matrix representing the predicted error in distances between each
pair of residues in a model, and uses a graph-based community clustering algorithm to partition the model
Takes a predicted aligned error (PAE) matrix representing the
predicted error in distances between each
pair of residues in a model, and uses a graph-based community
clustering algorithm to partition the model
into approximately rigid groups.
Arguments:
Expand Down

0 comments on commit 977c104

Please sign in to comment.