python class 实现 fasta 文件解析

没有对象怎么办(object)???当然是new一个啊

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
class Sequence(object):
# 使用__init__对Sequence进行初始化,Sequence对象具有三个属性
def __init__(self, name, seq, descr=''):
self.name = name
self.seq = seq
self.descr = descr

# 给Sequence对象定义一个反向互补方法
def reverse_complement(self):
RC = {'A':'T','T':'A','C':'G','G':'C','N':'N',
'a':'t','t':'a','c':'g','g':'c','n':'n'}
return Sequence(self.name, ''.join([RC[i] for i in self.seq[::-1]]), self.descr)

# 定义输出到文件的方法,接受文件句柄输入
def write_to_fasta_file(self, file_handle):
def _SeqFormat(seq, chara=80):
tmp = ''
for i in range(0,len(seq),chara):
tmp += (seq[i:(i+chara)]+'\n')
return tmp
file_handle.write('>{} {}\n'.format(self.name, self.descr))
file_handle.write('{}'.format(_SeqFormat(self.seq)))

class FastaReader(object):
def __init__(self, fastafile):
self.fasta = fastafile

# 使用__iter__方法实现对fasta文件的循环解析,其中用到yeild构造了生成器
def __iter__(self):
with open(self.fasta) as f:
seq = None
for line in f:
if line.startswith( ">" ):
if seq:
yield Sequence( name, seq, descr )
name, sep, descr = line[1:-1].partition(' ')
seq = ""
else:
assert seq is not None, "FASTA file does not start with '>'."
seq += line[:-1].encode()
if seq is not None:
yield Sequence( name, seq, descr )

# example
for i in FastaReader('ref.fa'):
# 使用
i.write_to_fasta_file(file_handle)
i.seq
i.name
i.descr

懒癌发作了,就这,,,

---------本文结束,感谢您的阅读---------