这是从fasta格式文件中提取指定位置范围的DNA序列,
M30931.1-SIV.fasta是输入文件,一个文件里只能有一个DNA序列。
#==========================================
# This code was written by Sihua Peng, PhD.
#==========================================
from Bio import SeqIO
def extract_sequence(input_file, output_file, start, end):
# 读取fasta文件
records = list(SeqIO.parse(input_file, "fasta"))
# 截取序列并保存到新的记录中
new_records = []
for record in records:
sequence = record.seq[start - 1:end] # 因为索引从0开始,所以要减1
new_record = record[start - 1:end] # 创建新的记录,包括标题和截取的序列
new_records.append(new_record)
# 将新的记录保存为fasta格式文件
with open(output_file, "w") as f:
SeqIO.write(new_records, f, "fasta")
# 输入文件名和输出文件名1191..3770
input_file = "M30931.1-SIV.fasta"
output_file = "M30931.1-SIV-jiequ.fasta"
# 截取序列的起始位置和结束位置
#5778..8411
start_position = 5778
end_position = 8411
# 调用函数进行截取和保存
extract_sequence(input_file, output_file, start_position, end_position)