把比对后的m个fasta格式文件进行分段。输入文件是csv文件,一行一个fasta格式文件(m行)。分成n段,结果保存到n个csv文件,一段一个文件,一个文件中有m行DNA序列,每行的DNA序列长度相同。K-first-segment.csv中有m行DNA片段,供下一步随机选取。  

    
#==========================================
# This code was written by Sihua Peng, PhD.
#==========================================


from Bio import SeqIO
import csv

# 定义函数将DNA序列分成n段
def split_sequence(dna_sequence):
    segments = [
        dna_sequence[0:250],
        dna_sequence[250:500],
        dna_sequence[500:750],
        dna_sequence[750:1000],
        dna_sequence[1000:1250],        
        dna_sequence[1250:1500],
        dna_sequence[1500:1750],
        dna_sequence[1750:2000],
        dna_sequence[2000:2250],
        dna_sequence[2250:]
    ]
    
    return segments

# 清空CSV文件
def clear_csv_files():
    output_files = [
        'K-first-segment.csv',
        'K-second-segment.csv',
        'K-third-segment.csv',
        'K-fourth-segment.csv',
        'K-fifth-segment.csv',        
        'K-sixth-segment.csv',
        'K-seventh-segment.csv',
        'K-eighth-segment.csv',
        'K-nineth-segment.csv',
        'K-tenth-segment.csv'
    ]
    for file in output_files:
        with open(file, 'w') as csvfile:
            pass

# 清空CSV文件
clear_csv_files()

# 读取sihua.csv文件,并处理每个fasta格式文件
with open('DNA_file-5.csv', 'r') as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        fasta_file = row[0] if row else ""  # Use the value of the row as the filename
        if fasta_file:
            for record in SeqIO.parse(fasta_file, 'fasta'):
                dna_sequence = str(record.seq)
                segments = split_sequence(dna_sequence)

                # 将每一段DNA序列写入对应的CSV文件
                output_files = [
                    'K-first-segment.csv',
                    'K-second-segment.csv',
                    'K-third-segment.csv',
                    'K-fourth-segment.csv',
                    'K-fifth-segment.csv',
                    'K-sixth-segment.csv',
                    'K-seventh-segment.csv',
                    'K-eighth-segment.csv',
                    'K-nineth-segment.csv',
                    'K-tenth-segment.csv'
                ]
                for j, segment in enumerate(segments):
                    with open(output_files[j], 'a', newline='') as csvfile:
                        writer = csv.writer(csvfile)
                        writer.writerow([segment])  # 将片段作为单元素列表写入行中