import unittest
import editdistance
import numpy as np
from wub.simulate import seq as sim_seq
from wub.util import seq as seq_util
[docs]class TestSimulateSeq(unittest.TestCase):
"""Test sequence simulation utilities."""
[docs] def test_simulate_sequencing_errors(self):
"""Test function simulating sequencing errors."""
error_rate = 0.1
error_weights = {'substitution': 1.0 / 6,
'insertion': 1.0 / 6,
'deletion': 4.0 / 6}
sequence = sim_seq.simulate_sequence(5000)
mutated_record = sim_seq.simulate_sequencing_errors(
sequence, error_rate, error_weights)
distance = editdistance.eval(sequence, mutated_record.seq)
expected_errors = len(sequence) * error_rate
errors_sd = np.sqrt(len(sequence) * error_rate * (1 - error_rate))
# Should pass 0.9973 proportion of cases:
self.assertTrue(expected_errors - errors_sd * 3 < distance < expected_errors +
errors_sd * 3, msg="expected: {} realised:{}".format(expected_errors, distance))
[docs] def test_add_errors(self):
"""Test function adding sequencing errors."""
seq = "ATGCATGCATGC"
mut_seq = sim_seq.add_errors(seq, 6, 'substitution')
self.assertSequenceEqual(seq_util.alignment_stats(seq, mut_seq), (12, 6, 0, 0, 0.5))
[docs] def test_compress_raw_cigar_list(self):
"""Test compression of raw cigar lists."""
cigar_list = [
(1, 'M'), (1, 'M'), (1, 'M'), (1, 'D'), (1, 'D'), (1, 'M'), (1, 'I'), (1, 'M')]
compressed = sim_seq.compress_raw_cigar_list(cigar_list)
expected = [(3, 'M'), (2, 'D'), (1, 'M'), (1, 'I'), (1, 'M')]
self.assertSequenceEqual(compressed, expected)
[docs] def test_cigar_list_to_string(self):
"""Test formatting of cigar strings."""
cigar_list = [(3, 'M'), (2, 'D'), (1, 'M'), (1, 'I'), (1, 'M')]
cigar_string = sim_seq.cigar_list_to_string(cigar_list)
expected = "3M2D1M1I1M"
self.assertEqual(cigar_string, expected)