Source code for riboraptor.fasta

import os
import warnings

from pyfaidx import Fasta
from Bio.Seq import Seq
from Bio.Alphabet import generic_dna


[docs]class FastaReader(object): """Class for reading and querying fasta file.""" def __init__(self, fasta_location): """ Parameters --------- fasta_location : string Path to fasta file """ self.fasta_location = fasta_location try: self.fasta = Fasta( fasta_location, as_raw=True, sequence_always_upper=True) except Exception as e: raise Exception('Error reading fasta file {} : {}'.format( os.path.abspath(self.fasta_location), e))
[docs] def query(self, intervals): """ Query regions for sequence. Parameters ---------- intervals: list of Interval The intervals for fasta is one-based and full-closed Returns ------- sequences: list(str) An array containing scores for each Interval This function is agnostic of the strand information, the position in the scores is corresponding to the interval .. currentmodule:: .FastaReader .. autosummary:: .FastaReader """ sequences = [] chrom_lengths = self.chromosomes for i in intervals: if i.chrom not in list(chrom_lengths.keys()): warnings.warn( 'Chromosome {} does not appear in the fasta'.format( i.chrom), UserWarning) continue chrom_length = chrom_lengths[i.chrom] if i.start > chrom_length: raise Exception( 'Chromsome start point exceeds chromosome length: {}>{}'. format(i.start, chrom_length)) elif i.end > chrom_length: raise Exception( 'Chromsome end point exceeds chromosome length: {}>{}'. format(i.end, chrom_length)) seq = self.fasta.get_seq(i.chrom, i.start, i.end) sequences.append(seq) return sequences
[docs] def complement(self, seq): return str(Seq(seq, generic_dna).complement())
[docs] def reverse_complement(self, seq): return str(Seq(seq, generic_dna).reverse_complement())
@property def chromosomes(self): """Return list of chromsome and their sizes as in the fasta file. Returns ------- chroms : dict Dictionary with {"chr": "Length"} format .. currentmodule:: .FastaReader .. autosummary:: .FastaReader """ chroms = {} for chrom in self.fasta.keys(): chroms[chrom] = len(self.fasta[chrom]) return chroms