Source code for labtools.adtools.finder

import re

[docs]def pull_AD(read, barcoded = False, ad_preceder = "GCTAGC", bc_preceder = "GGGCCCG", bc_anteceder = "GGAGAGAA", ad_length = 120, bclength = 11, **kwargs): """Find the activation domain tile in a read. Takes a read sequence and uses customizable anchor sequences to locate a variable sequence (AD/seq of interest) in the read. Includes support for barcodes. Parameters ---------- read : str The biological read of interest. barcoded : bool, default False Whether or not the sequence includes a barcode in addition to the AD/seq of interest. ad_preceder : str, default "GCTAGC" The anchor sequence directly before the AD. bc_preceder : str, default "GGGCCCG" The anchor sequence directly before the barcode. bc_anteceder : str, default "GGAGAGAA" The anchor sequence directly after the barcode. ad_length : int, default 120 The length of the AD/seq of interest. bc_length : int, default 11 The length of the barcode sequence if used. Returns ---------- AD : str The sequence of interest, if located. Else None. barcode : str The barcode, if used and located. Else None. Examples ---------- >>> pull_AD("ACTTTTATVGCTAGCATGGCTGGTAGATCTTGGTTGATTGATTCTAATAGAATTGCTACTAAGATTATGTCTGCTTCTGCTTCTTCTGATCCAAGACAAGTTGTTTGGAAATCTAATCCATCTAGACATTGTCCAGCTGATCGATGCTAGTAGAGAGAGA") ATGGCTGGTAGATCTTGGTTGATTGATTCTAATAGAATTGCTACTAAGATTATGTCTGCTTCTGCTTCTTCTGATCCAAGACAAGTTGTTTGGAAATCTAATCCATCTAGACATTGTCCA """ searched_read = re.split(ad_preceder, read, maxsplit=1) AD = None barcode = None if len(searched_read) == 2: roi = searched_read[1] if barcoded: searched_read = re.split(bc_preceder, roi[ad_length:], maxsplit=1) if len(searched_read) == 2: barcode = searched_read[1][:bclength] else: searched_read = re.split(bc_anteceder, roi[ad_length:], maxsplit=1) if len(searched_read) == 2: barcode = searched_read[0][-bclength:] if barcode == None or len(barcode) != bclength: barcode = None AD = roi[:ad_length] else: AD = roi[:ad_length] return AD, barcode
[docs]def pull_barcode(read, bc_preceder = "GGGCCCG", bc_anteceder = "GGAGAGAA", bclength = 11, **kwargs): """Find the barcode in a read. Takes a read sequence and uses customizable anchor sequences to locate a variable sequence (barcode) in the read. Parameters ---------- read : str The biological read of interest. bc_preceder : str, default "GGGCCCG" The anchor sequence directly before the barcode. bc_anteceder : str, default "GGAGAGAA" The anchor sequence directly after the barcode. bc_length : int, default 11 The length of the barcode sequence if used. Returns ---------- barcode : str The barcode, if used and located. Else None. Examples ---------- >>> pull_barcode("ACTTTTATVGCTAGCATGGCTGGTAGATCTTGGTTGATTGATTCTAATAGAATTGCTACTAAGATTATGTCTGCTTCTGCTTCTTCTGATCCAAGACAAGTTGTTTGGAAATCTAATCCATCTAGACATTGTCCAGCTGATCGATGCTAGTAGAGAGAGA") ATGGCTGGTAGATCTTGGTTGATTGATTCTAATAGAATTGCTACTAAGATTATGTCTGCTTCTGCTTCTTCTGATCCAAGACAAGTTGTTTGGAAATCTAATCCATCTAGACATTGTCCA """ barcode = None searched_read = re.split(bc_preceder, read, maxsplit=1) if len(searched_read) == 2: barcode = searched_read[1][:bclength] else: searched_read = re.split(bc_anteceder, read, maxsplit=1) if len(searched_read) == 2: barcode = searched_read[0][-bclength:] return barcode