这是从fasta格式文件中提取指定位置范围的DNA序列, M30931.1-SIV.fasta是输入文件,一个文件里只能有一个DNA序列。

    
#==========================================
# This code was written by Sihua Peng, PhD.
#==========================================

from Bio import SeqIO

def extract_sequence(input_file, output_file, start, end):
    # 读取fasta文件
    records = list(SeqIO.parse(input_file, "fasta"))

    # 截取序列并保存到新的记录中
    new_records = []
    for record in records:
        sequence = record.seq[start - 1:end]  # 因为索引从0开始,所以要减1
        new_record = record[start - 1:end]  # 创建新的记录,包括标题和截取的序列
        new_records.append(new_record)

    # 将新的记录保存为fasta格式文件
    with open(output_file, "w") as f:
        SeqIO.write(new_records, f, "fasta")

# 输入文件名和输出文件名1191..3770
input_file = "M30931.1-SIV.fasta"
output_file = "M30931.1-SIV-jiequ.fasta"


# 截取序列的起始位置和结束位置
#5778..8411
start_position = 5778
end_position = 8411

# 调用函数进行截取和保存
extract_sequence(input_file, output_file, start_position, end_position)