#==========================================#
# This code was written by Sihua Peng, PhD.
#==========================================#
from Bio import SeqIO
import csv
# 定义函数将DNA序列分成n段
def split_sequence(dna_sequence):
segments = [
dna_sequence[0:250],
dna_sequence[250:500],
dna_sequence[500:750],
dna_sequence[750:1000],
dna_sequence[1000:1250],
dna_sequence[1250:1500],
dna_sequence[1500:1750],
dna_sequence[1750:2000],
dna_sequence[2000:2250],
dna_sequence[2250:]
]
return segments
# 清空FASTA文件
def clear_fasta_files():
output_files = [
'L-first-segment.fasta',
'L-second-segment.fasta',
'L-third-segment.fasta',
'L-fourth-segment.fasta',
'L-fifth-segment.fasta',
'L-sixth-segment.fasta',
'L-seventh-segment.fasta',
'L-eighth-segment.fasta',
'L-nineth-segment.fasta',
'L-tenth-segment.fasta'
]
for file in output_files:
with open(file, 'w') as fasta_file:
pass
# 清空FASTA文件
clear_fasta_files()
# 读取sihua.csv文件,并处理每个fasta格式文件
with open('single-4-L-samples.csv', 'r') as csvfile:
reader = csv.reader(csvfile)
for row in reader:
fasta_file = row[0] if row else "" # Use the value of the row as the filename
if fasta_file:
for record in SeqIO.parse(fasta_file, 'fasta'):
dna_sequence = str(record.seq)
segments = split_sequence(dna_sequence)
# 将每一段DNA序列写入对应的FASTA文件
output_files = [
'L-first-segment.fasta',
'L-second-segment.fasta',
'L-third-segment.fasta',
'L-fourth-segment.fasta',
'L-fifth-segment.fasta',
'L-sixth-segment.fasta',
'L-seventh-segment.fasta',
'L-eighth-segment.fasta',
'L-nineth-segment.fasta',
'L-tenth-segment.fasta'
]
for j, segment in enumerate(segments):
with open(output_files[j], 'a') as fasta_file:
fasta_file.write(">{}\n".format(record.id))
fasta_file.write(segment + "\n")