bio-restriction-fragment-analysis

Installation

SKILL.md

Fragment Analysis

Get Fragment Sizes

from Bio import SeqIO
from Bio.Restriction import EcoRI

record = SeqIO.read('sequence.fasta', 'fasta')
seq = record.seq

# catalyze() returns tuple: (fragments_5prime, fragments_3prime)
# For standard use, take the first element
fragments = EcoRI.catalyze(seq)[0]

# fragments is tuple of Seq objects
sizes = [len(f) for f in fragments]
print(f'Fragment sizes: {sorted(sizes, reverse=True)}')

Linear vs Circular Digestion

from Bio.Restriction import EcoRI

# Linear DNA
fragments_linear = EcoRI.catalyze(seq, linear=True)[0]

# Circular DNA (plasmid)
fragments_circular = EcoRI.catalyze(seq, linear=False)[0]

# Circular produces one fewer fragment (ends join)
print(f'Linear: {len(fragments_linear)} fragments')
print(f'Circular: {len(fragments_circular)} fragments')

Get Fragment Sequences

from Bio.Restriction import EcoRI

fragments = EcoRI.catalyze(seq)[0]

for i, frag in enumerate(fragments, 1):
    print(f'Fragment {i}: {len(frag)} bp')
    print(f'  5\' end: {frag[:20]}...')
    print(f'  3\' end: ...{frag[-20:]}')

Double Digest

from Bio.Restriction import EcoRI, BamHI, RestrictionBatch

# Method 1: Sequential digestion
frags_ecori = EcoRI.catalyze(seq)[0]
final_fragments = []
for frag in frags_ecori:
    sub_frags = BamHI.catalyze(frag)[0]
    final_fragments.extend(sub_frags)

# Method 2: Using RestrictionBatch
batch = RestrictionBatch([EcoRI, BamHI])
# Note: RestrictionBatch doesn't have catalyze, use Analysis

# Method 3: Manual calculation from positions
ecori_sites = EcoRI.search(seq)
bamhi_sites = BamHI.search(seq)
all_sites = sorted(set(ecori_sites + bamhi_sites))

fragment_sizes = []
for i in range(len(all_sites) - 1):
    fragment_sizes.append(all_sites[i + 1] - all_sites[i])
# Add terminal fragments
fragment_sizes.insert(0, all_sites[0])
fragment_sizes.append(len(seq) - all_sites[-1])

Calculate Fragment Sizes from Positions

def fragments_from_positions(seq_len, cut_positions, linear=True):
    '''Calculate fragment sizes from cut positions'''
    if not cut_positions:
        return [seq_len]

    positions = sorted(cut_positions)
    fragments = []

    if linear:
        # First fragment: start to first cut
        fragments.append(positions[0])
        # Middle fragments
        for i in range(len(positions) - 1):
            fragments.append(positions[i + 1] - positions[i])
        # Last fragment: last cut to end
        fragments.append(seq_len - positions[-1])
    else:
        # Circular: all fragments between cuts
        for i in range(len(positions) - 1):
            fragments.append(positions[i + 1] - positions[i])
        # Wrap-around fragment
        fragments.append((seq_len - positions[-1]) + positions[0])

    return fragments

# Usage
sites = EcoRI.search(seq)
sizes = fragments_from_positions(len(seq), sites, linear=True)
print(f'Fragment sizes: {sorted(sizes, reverse=True)}')

Simulate Gel Pattern

def simulate_gel(fragment_sizes, ladder=None):
    '''Print a text-based gel simulation'''
    if ladder is None:
        ladder = [10000, 8000, 6000, 5000, 4000, 3000, 2000, 1500, 1000, 750, 500, 250]

    max_size = max(max(fragment_sizes), max(ladder))

    print('Ladder  |  Digest')
    print('-' * 30)

    for size in sorted(ladder + fragment_sizes, reverse=True):
        ladder_mark = f'{size:>6}' if size in ladder else '      '
        digest_mark = '====' if size in fragment_sizes else ''
        print(f'{ladder_mark}  |  {digest_mark}')

# Usage
sizes = [len(f) for f in EcoRI.catalyze(seq)[0]]
simulate_gel(sizes)

Detailed Fragment Report

from Bio.Restriction import EcoRI, BamHI

def fragment_report(seq, enzyme, linear=True):
    '''Generate detailed fragment analysis'''
    sites = enzyme.search(seq, linear=linear)
    fragments = enzyme.catalyze(seq, linear=linear)[0]

    print(f'Enzyme: {enzyme}')
    print(f'Recognition site: {enzyme.site}')
    print(f'Number of sites: {len(sites)}')
    print(f'Cut positions: {sites}')
    print(f'\nFragments ({len(fragments)}):')

    sizes = sorted([len(f) for f in fragments], reverse=True)
    total = sum(sizes)

    for i, size in enumerate(sizes, 1):
        pct = (size / total) * 100
        print(f'  {i}. {size:6d} bp ({pct:5.1f}%)')

    print(f'\nTotal: {total} bp')
    return sizes

# Usage
sizes = fragment_report(seq, EcoRI)

Compare Expected vs Observed Fragments

def compare_fragments(expected, observed, tolerance=50):
    '''Compare expected fragment sizes with observed (from gel)'''
    matched = []
    unmatched_exp = list(expected)
    unmatched_obs = list(observed)

    for exp in expected:
        for obs in observed:
            if abs(exp - obs) <= tolerance:
                matched.append((exp, obs))
                if exp in unmatched_exp:
                    unmatched_exp.remove(exp)
                if obs in unmatched_obs:
                    unmatched_obs.remove(obs)
                break

    print('Matched fragments:')
    for exp, obs in matched:
        print(f'  Expected: {exp}, Observed: {obs}')

    if unmatched_exp:
        print(f'\nMissing (expected but not observed): {unmatched_exp}')
    if unmatched_obs:
        print(f'\nExtra (observed but not expected): {unmatched_obs}')

# Usage
expected = [3000, 2000, 1500, 500]
observed = [3050, 2000, 1480, 510, 200]  # From gel
compare_fragments(expected, observed)

Fragment with Sequence Context

from Bio.Restriction import EcoRI

def annotated_fragments(seq, enzyme, context=50):
    '''Get fragments with surrounding sequence context'''
    sites = enzyme.search(seq)
    fragments = enzyme.catalyze(seq)[0]

    print(f'{enzyme} digest ({len(fragments)} fragments):')

    for i, (frag, site) in enumerate(zip(fragments, [0] + sites), 1):
        print(f'\nFragment {i}: {len(frag)} bp (starts at {site})')
        print(f"  5' sequence: {str(frag[:context])}...")
        print(f"  3' sequence: ...{str(frag[-context:])}")

# Usage
annotated_fragments(seq, EcoRI)

Notes

catalyze() returns tuple - use [0] to get 5' fragments
Fragment order - fragments returned in 5' to 3' order
Circular DNA - produces n fragments from n cuts (not n+1)
Double digest - combine cut positions, then calculate fragments

Related Skills

restriction-sites - Find cut positions for fragment calculation
restriction-mapping - Visualize fragment positions
enzyme-selection - Choose enzymes for desired fragments

Related skills

More from gptomics/bioskills

Installs

Repository

gptomics/bioskills

GitHub Stars

562

First Seen

Jan 24, 2026

Security Audits

Gen Agent Trust HubPass

SnykPass

bio-restriction-fragment-analysis

Fragment Analysis

Get Fragment Sizes

Linear vs Circular Digestion

Get Fragment Sequences

Double Digest

Calculate Fragment Sizes from Positions

Simulate Gel Pattern

Detailed Fragment Report

Compare Expected vs Observed Fragments

Fragment with Sequence Context

Notes

Related Skills

More from gptomics/bioskills

bioskills

bio-single-cell-batch-integration

bio-epitranscriptomics-merip-preprocessing

bio-data-visualization-multipanel-figures

bio-data-visualization-specialized-omics-plots

bio-read-qc-fastp-workflow