来自交流群的读友分享
结合官方网站和下面代码,可自由定制符合自己需求的下载任务。
https://www.ncei.noaa.gov/data/cmorph-high-resolution-global-precipitation-estimates/access/
#-*- coding: utf-8 -*-
import calendar
import os
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
def getLegalUrl(year, mon, day, time):
base_url = "https://www.ncei.noaa.gov/data/cmorph-high-resolution-global-precipitation-estimates/access/30min/8km/"
url_preletter = 'CMORPH_V1.0_ADJ_8km-30min_'
url = base_url + str(year) + '/' + str(mon).zfill(2) + '/' + str(day).zfill(2) + '/' + url_preletter + str(year) + str(mon).zfill(2) + str(day).zfill(2) + str(time).zfill(2) + '.nc'
return url
def getfilename(year, mon, day, time):
filename_preletter = 'CMORPH_V1.0_ADJ_8km-30min_'
filename = filename_preletter + str(year) + str(mon).zfill(2) + str(day).zfill(2) + str(time).zfill(2) + '.nc'
return filename
def download_file(url, save_path):
session = requests.Session()
# 创建一个重试机制
retries = Retry(total=5, backoff_factor=1, status_forcelist=[502, 503, 504])
session.mount('https://', HTTPAdapter(max_retries=retries))
try:
response = session.get(url, timeout=60)
if response.status_code == 200:
with open(save_path, 'wb') as file:
file.write(response.content)
return True
else:
return False
except requests.exceptions.RequestException as e:
print(f"网络错误: {e}")
return False
if __name__ == '__main__':
f = open("D:\\CMORPH_data\\all_file_url.txt", "a")
failed_files = []
success_count = 0
fail_count = 0
for year in [2021, 2022]:
for mon in [6, 7, 8]:
days = calendar.monthrange(year, mon)[1] # 查找某年某月有多少天
save_path = 'D:\\CMORPH_data\\' + str(year) + '\\' + str(mon).zfill(2) + '\\' # 保存的目的文件夹
# 检查目录是否存在,如果不存在则创建
current_dir = 'D:\\CMORPH_data'
for dir in [str(year), str(mon).zfill(2)]:
current_dir = os.path.join(current_dir, dir)
if not os.path.exists(current_dir):
print(f"目录 {current_dir} 不存在,尝试创建...")
try:
os.mkdir(current_dir)
print(f"成功创建目录:{current_dir}")
except Exception as e:
print(f"创建目录 {current_dir} 时发生错误:{e}")
continue
for day in range(1, days+1): # 第一次下载文件
for time in range(0, 24):
print(year, mon, day, time)
url = getLegalUrl(year, mon, day, time)
filename = getfilename(year, mon, day, time)
file_path = os.path.join(save_path, filename)
f.write(url + '\n')
if not os.path.exists(file_path):
if download_file(url, file_path):
print(f"文件 {filename} 已成功下载")
success_count += 1
else:
print(f"文件 {filename} 下载失败")
fail_count += 1
failed_files.append(file_path)
else:
print(f"文件 {filename} 已存在,跳过下载")
# 重试下载失败的文件
retry_count = 0
while failed_files and retry_count < 10:
retry_count += 1
print(f"开始第 {retry_count} 次重试...")
for file_path in failed_files:
url = getLegalUrl(year, mon, day, time)
if download_file(url, file_path):
print(f"文件 {filename} 已成功下载")
success_count += 1
fail_count -= 1
failed_files.remove(file_path)
else:
print(f"文件 {filename} 下载失败")
# 如果仍有下载失败的文件,将它们的名字保存到一个文件中
if failed_files:
with open("D:\\CMORPH_data\\failed_files.txt", "w") as fail_file:
for file_path in failed_files:
fail_file.write(file_path + '\n')
f.close()
print(f'下载完成,成功下载 {success_count} 个文件,失败 {fail_count} 个文件.')