把比对后的m个fasta格式文件进行分段。输入文件是csv文件,一行一个fasta格式文件(m行)。分成n段,结果保存到n个cfasta文件,一段一个文件,一个文件中有m条DNA序列,每条DNA序列长度相同。  

    
#==========================================#
# This code was written by Sihua Peng, PhD.
#==========================================#

from Bio import SeqIO
import csv

# 定义函数将DNA序列分成n段
def split_sequence(dna_sequence):
    segments = [
        dna_sequence[0:250],
        dna_sequence[250:500],
        dna_sequence[500:750],
        dna_sequence[750:1000],
        dna_sequence[1000:1250],        
        dna_sequence[1250:1500],
        dna_sequence[1500:1750],
        dna_sequence[1750:2000],
        dna_sequence[2000:2250],
        dna_sequence[2250:]
    ]
    return segments

# 清空FASTA文件
def clear_fasta_files():
    output_files = [
        'L-first-segment.fasta',
        'L-second-segment.fasta',
        'L-third-segment.fasta',
        'L-fourth-segment.fasta',
        'L-fifth-segment.fasta',        
        'L-sixth-segment.fasta',
        'L-seventh-segment.fasta',
        'L-eighth-segment.fasta',
        'L-nineth-segment.fasta',
        'L-tenth-segment.fasta'
    ]
    for file in output_files:
        with open(file, 'w') as fasta_file:
            pass

# 清空FASTA文件
clear_fasta_files()

# 读取sihua.csv文件,并处理每个fasta格式文件
with open('single-4-L-samples.csv', 'r') as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        fasta_file = row[0] if row else ""  # Use the value of the row as the filename
        if fasta_file:
            for record in SeqIO.parse(fasta_file, 'fasta'):
                dna_sequence = str(record.seq)
                segments = split_sequence(dna_sequence)

                # 将每一段DNA序列写入对应的FASTA文件
                output_files = [
                    'L-first-segment.fasta',
                    'L-second-segment.fasta',
                    'L-third-segment.fasta',
                    'L-fourth-segment.fasta',
                    'L-fifth-segment.fasta',
                    'L-sixth-segment.fasta',
                    'L-seventh-segment.fasta',
                    'L-eighth-segment.fasta',
                    'L-nineth-segment.fasta',
                    'L-tenth-segment.fasta'
                ]
                for j, segment in enumerate(segments):
                    with open(output_files[j], 'a') as fasta_file:
                        fasta_file.write(">{}\n".format(record.id))
                        fasta_file.write(segment + "\n")

    


segmentation-of-alignment-DNA