根据accession number从NCBI下载DNA序列
#==========================================
# This code was written by Sihua Peng, PhD.
#==========================================
import os
import csv
import requests
import io
from Bio import Entrez, SeqIO
def download_fasta_by_accession(accession_file, output_folder):
Entrez.email = "sihuapeng@gmail.com" # 请替换为您自己的电子邮件地址
base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
with open(accession_file, 'r') as csvfile:
accession_reader = csv.reader(csvfile)
for accession_number in accession_reader:
accession_number = accession_number[0].strip() # 去除换行符和空格
params = {
"db": "nucleotide",
"rettype": "fasta",
"retmode": "text",
"id": accession_number,
}
response = requests.get(base_url, params=params)
if response.status_code == 200:
fasta_text = response.text
handle = io.StringIO(fasta_text)
record = SeqIO.read(handle, "fasta")
output_file = os.path.join(output_folder, f"{record.id}.fasta")
with open(output_file, "w") as f:
SeqIO.write(record, f, "fasta")
print(f"Downloaded: {output_file}")
else:
print(f"Error: Unable to fetch data for {accession_number}. Status code: {response.status_code}")
# 输入文件
if __name__ == "__main__":
accession_file = "accession_numbers.csv"
output_folder = "fasta_files"
download_fasta_by_accession(accession_file, output_folder)