bio-structural-biology-alphafold-predictions
SKILL.md
AlphaFold Predictions
Download and analyze AlphaFold predicted protein structures from the AlphaFold Protein Structure Database.
Download Structures
Single Structure by UniProt ID
import requests
def download_alphafold(uniprot_id, output_dir='.'):
'''Download AlphaFold structure for UniProt accession'''
base_url = 'https://alphafold.ebi.ac.uk/files'
pdb_url = f'{base_url}/AF-{uniprot_id}-F1-model_v4.pdb'
cif_url = f'{base_url}/AF-{uniprot_id}-F1-model_v4.cif'
response = requests.get(pdb_url)
if response.status_code == 200:
output_path = f'{output_dir}/AF-{uniprot_id}-F1-model_v4.pdb'
with open(output_path, 'w') as f:
f.write(response.text)
return output_path
return None
pdb_file = download_alphafold('P04637') # Human p53
Check Availability
def check_alphafold_exists(uniprot_id):
'''Check if AlphaFold prediction exists'''
url = f'https://alphafold.ebi.ac.uk/api/prediction/{uniprot_id}'
response = requests.get(url)
return response.status_code == 200
if check_alphafold_exists('P04637'):
print('AlphaFold structure available')
Get Metadata
def get_alphafold_info(uniprot_id):
'''Get AlphaFold prediction metadata'''
url = f'https://alphafold.ebi.ac.uk/api/prediction/{uniprot_id}'
response = requests.get(url)
if response.status_code == 200:
return response.json()[0]
return None
info = get_alphafold_info('P04637')
print(f"Gene: {info['gene']}")
print(f"Organism: {info['organismScientificName']}")
print(f"Model version: {info['latestVersion']}")
File Types Available
Database version v4 (current as of 2025). The version number refers to the database release, not the AlphaFold model version.
| File | URL Pattern | Description |
|---|---|---|
| PDB | AF-{id}-F1-model_v4.pdb |
Structure coordinates |
| mmCIF | AF-{id}-F1-model_v4.cif |
Structure with metadata |
| PAE JSON | AF-{id}-F1-predicted_aligned_error_v4.json |
Predicted aligned error |
def download_pae(uniprot_id, output_dir='.'):
'''Download PAE (predicted aligned error) matrix'''
url = f'https://alphafold.ebi.ac.uk/files/AF-{uniprot_id}-F1-predicted_aligned_error_v4.json'
response = requests.get(url)
if response.status_code == 200:
output_path = f'{output_dir}/AF-{uniprot_id}-F1-pae.json'
with open(output_path, 'w') as f:
f.write(response.text)
return output_path
return None
Analyze pLDDT Confidence Scores
Extract from PDB B-factors
AlphaFold stores pLDDT scores in the B-factor column.
from Bio.PDB import PDBParser
def extract_plddt(pdb_file):
'''Extract pLDDT confidence scores from AlphaFold PDB'''
parser = PDBParser(QUIET=True)
structure = parser.get_structure('protein', pdb_file)
residue_plddt = {}
for model in structure:
for chain in model:
for residue in chain:
if residue.id[0] == ' ': # Standard residue
ca = residue['CA'] if 'CA' in residue else list(residue.get_atoms())[0]
residue_plddt[residue.id[1]] = ca.get_bfactor()
return residue_plddt
plddt = extract_plddt('AF-P04637-F1-model_v4.pdb')
avg_plddt = sum(plddt.values()) / len(plddt)
print(f'Average pLDDT: {avg_plddt:.1f}')
Confidence Interpretation
| pLDDT | Confidence | Interpretation |
|---|---|---|
| >90 | Very high | High accuracy, can be used as experimental |
| 70-90 | Confident | Good backbone, may have sidechain errors |
| 50-70 | Low | Caution, may be disordered |
| <50 | Very low | Likely disordered or wrong |
Plot pLDDT per Residue
import matplotlib.pyplot as plt
def plot_plddt(plddt_dict, output='plddt_plot.png'):
residues = sorted(plddt_dict.keys())
scores = [plddt_dict[r] for r in residues]
plt.figure(figsize=(12, 4))
plt.fill_between(residues, scores, alpha=0.3)
plt.plot(residues, scores)
plt.axhline(y=70, color='orange', linestyle='--', label='Confident threshold')
plt.axhline(y=90, color='green', linestyle='--', label='Very high threshold')
plt.xlabel('Residue')
plt.ylabel('pLDDT')
plt.ylim(0, 100)
plt.legend()
plt.savefig(output)
plt.close()
plot_plddt(plddt)
Analyze PAE (Predicted Aligned Error)
import json
import numpy as np
import matplotlib.pyplot as plt
def load_pae(pae_file):
'''Load PAE matrix from JSON'''
with open(pae_file) as f:
data = json.load(f)
# AlphaFold v4 format
if 'predicted_aligned_error' in data[0]:
return np.array(data[0]['predicted_aligned_error'])
# Older format
return np.array(data['predicted_aligned_error'])
def plot_pae(pae_matrix, output='pae_plot.png'):
plt.figure(figsize=(8, 8))
plt.imshow(pae_matrix, cmap='Greens_r', vmin=0, vmax=30)
plt.colorbar(label='Expected position error (A)')
plt.xlabel('Scored residue')
plt.ylabel('Aligned residue')
plt.title('Predicted Aligned Error')
plt.savefig(output)
plt.close()
pae = load_pae('AF-P04637-F1-pae.json')
plot_pae(pae)
PAE Interpretation
- Low PAE (green): Residues have well-defined relative positions
- High PAE (white): Uncertain relative positions (flexible linkers, domains)
- Diagonal blocks: Distinct structural domains
Batch Download
def batch_download_alphafold(uniprot_ids, output_dir='.'):
'''Download multiple AlphaFold structures'''
import os
os.makedirs(output_dir, exist_ok=True)
results = {}
for uid in uniprot_ids:
pdb_file = download_alphafold(uid, output_dir)
results[uid] = pdb_file
if pdb_file:
print(f'Downloaded: {uid}')
else:
print(f'Not found: {uid}')
return results
ids = ['P04637', 'P53_HUMAN', 'Q9Y6K9']
files = batch_download_alphafold(ids, 'alphafold_structures')
Compare with Experimental Structure
from Bio.PDB import PDBParser, Superimposer
def compare_structures(alphafold_pdb, experimental_pdb):
'''Calculate RMSD between AlphaFold and experimental structure'''
parser = PDBParser(QUIET=True)
af_struct = parser.get_structure('af', alphafold_pdb)
exp_struct = parser.get_structure('exp', experimental_pdb)
# Get CA atoms from first chain
af_atoms = [r['CA'] for r in af_struct[0].get_residues() if 'CA' in r]
exp_atoms = [r['CA'] for r in exp_struct[0].get_residues() if 'CA' in r]
# Align by length (simple approach)
min_len = min(len(af_atoms), len(exp_atoms))
af_atoms = af_atoms[:min_len]
exp_atoms = exp_atoms[:min_len]
super_imposer = Superimposer()
super_imposer.set_atoms(exp_atoms, af_atoms)
rmsd = super_imposer.rms
return rmsd
Related Skills
- structural-biology/structure-io - Load and parse PDB/mmCIF files
- structural-biology/geometric-analysis - RMSD, superimposition
- database-access/uniprot-access - Get UniProt IDs for proteins
- structural-biology/structure-navigation - Navigate structure hierarchy
Weekly Installs
3
Repository
gptomics/bioskillsGitHub Stars
349
First Seen
Jan 24, 2026
Security Audits
Installed on
opencode2
codex2
claude-code2
windsurf1
openclaw1
trae1