前几天做了一下土地覆被二级分类转一级分类,操作就是把二级类的像元值的十位数提取出来,作为一级类的数值,这个可以在QGIS中完成,但是转换后的数据数据量极大,在ChatGPT的帮助下,我做了个Python GPU的计算,计算结果非常理想,计算速度快,数据量很小。
QGIS重分类
QGIS的重分类是我经常使用的工具,可以完成土地覆被重分类工作,但是经实验发现,QGIS转换速度很慢,转换后的文件数据量巨大,全国30米土地覆被一幅高大20GB,转换效果差。
Python设置代码如下:
processing.run("native:reclassifybytable", {'INPUT_RASTER':'G:/Geodata/CASLUCC/1980s.tif','RASTER_BAND':1,'TABLE':['10','12','1','20','24','2','30','33','3','40','46','4','50','53','5','60','67','6','90','99','9'],'NO_DATA':-9999,'RANGE_BOUNDARIES':0,'NODATA_FOR_MISSING':False,'DATA_TYPE':0,'OUTPUT':'I:/LUCC_CAS/L1_1980s.tif'})
Python CPU栅格计算
我用的服务器是至强E5双路CPU,256GB内存,运行下面的程序,居然显示内存不足了,ChatGPT表示并行计算建议将分块设置的小一点,下面代码默认是1024,我估计256可能会好一些吧,计算速度极慢,等了好半天,不想等了,突然想起GPU也是可以计算的嘛,干嘛不试试GPU计算,于是就有了后面第三部分的代码。
import os
import numpy as np
import rasterio
from concurrent.futures import ProcessPoolExecutor
from tqdm import tqdm # 导入tqdm库
# Define input and output directories
input_dir = r'G:\Geodata\CASLUCC'
output_dir = r'I:\LUCC_CAS'
# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)
# Get list of all .tif files in the input directory
tif_files = [f for f in os.listdir(input_dir) if f.endswith('.tif')]
# Function to process each raster file
def process_raster(input_path, output_path):
with rasterio.open(input_path) as src:
# Read metadata
meta = src.meta.copy()
# Initialize the processed data array
processed_data = np.zeros((src.height, src.width), dtype=np.uint8)
# Define block size (you can adjust this based on available memory)
block_size = 1024 # 建议改256,减小块减小内存使用
# Calculate total number of blocks
num_blocks_x = (src.width + block_size - 1) // block_size # Horizontal blocks
num_blocks_y = (src.height + block_size - 1) // block_size # Vertical blocks
# Process the raster in blocks to reduce memory usage
for i in tqdm(range(0, src.height, block_size), desc=f'Processing {os.path.basename(input_path)}', total=num_blocks_y):
for j in range(0, src.width, block_size):
window = rasterio.windows.Window(j, i, block_size, block_size)
data = src.read(1, window=window)
# Create a mask for valid data (values between 11 and 99)
mask = (data >= 11) & (data <= 99)
# Process data (compute tens digit)
processed_data[i:i+block_size, j:j+block_size] = np.where(mask, data // 10, 0)
# Update metadata for output
meta.update(dtype=rasterio.uint8, compress='LZW', tiled=True, bigtiff='YES')
# Write the processed data to the output file with high compression
with rasterio.open(output_path, 'w', **meta) as dst:
dst.write(processed_data, 1)
# Use ProcessPoolExecutor to process each .tif file in parallel
def parallel_processing():
with ProcessPoolExecutor() as executor:
# Map the process_raster function to each file in the list, and track progress with tqdm
futures = [
executor.submit(process_raster, os.path.join(input_dir, tif_file), os.path.join(output_dir, tif_file))
for tif_file in tif_files
]
# Use tqdm to track the overall progress of the futures
for future in tqdm(futures, desc="Overall progress", total=len(futures)):
future.result() # This will raise exceptions if there were any errors during processing
# Run the parallel processing
if __name__ == "__main__":
parallel_processing()
print("All files processed successfully.")
Python GPU栅格计算
这个效果最好,推荐使用,我是用的RTX3060显卡,12GB显存,计算速度非常快,一幅全国30米的影像2分半就可以算完,效率很高,而且处理结果数据量很小,只要400MB,非常理想。
要说唯一的不足,就是这个GPU计算的环境配置非常麻烦。两点要注意:
CUDA环境不要安装太高版本的,11.3版本比较好 使用conda forge来安装cupy,参考参考文献1
Conda安装cupy代码:
conda install -c conda-forge cupy
GPU栅格计算代码:
分块处理,block大小为1024 输出数据使用LZW压缩,INT8,NODATA值设为0
# -*- coding: utf-8 -*-
"""
Created on Mon Dec 30 22:06:12 2024
@author: Xu Yang
"""
import os
import cupy as cp
import rasterio
from tqdm import tqdm
import numpy as np
# Define input and output directories定义输入和输出路径
input_dir = r'G:\Geodata\CASLUCC'
output_dir = r'I:\LUCC_CAS'
# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)
# Get list of all .tif files in the input directory获取所有的tif文件
tif_files = [f for f in os.listdir(input_dir) if f.endswith('.tif')]
tif_files = tif_files[6:10]
# Function to process each raster file using GPU (with memory-efficient approach)
def process_raster(input_path, output_path):
with rasterio.open(input_path) as src:
# Read metadata
meta = src.meta.copy()
# Initialize the processed data array (on CPU)
processed_data = np.zeros((src.height, src.width), dtype=np.uint8)
# Define block size (you can adjust this based on available memory)
block_size = 1024 # You can modify this to a smaller block size if needed
# Calculate total number of blocks
num_blocks_x = (src.width + block_size - 1) // block_size # Horizontal blocks
num_blocks_y = (src.height + block_size - 1) // block_size # Vertical blocks
# Process the raster in blocks to reduce memory usage
for i in tqdm(range(0, src.height, block_size), desc=f'Processing {os.path.basename(input_path)}', total=num_blocks_y):
for j in range(0, src.width, block_size):
window = rasterio.windows.Window(j, i, block_size, block_size)
data = src.read(1, window=window)
# Transfer data to GPU using CuPy
data_gpu = cp.asarray(data) # Transfer data to GPU using CuPy
# Create a mask for valid data (values between 11 and 99)
mask = (data_gpu >= 11) & (data_gpu <= 99)
# Compute the tens digit (L1 level) on the GPU
processed_data_gpu = cp.zeros_like(data_gpu, dtype=cp.uint8)
processed_data_gpu[mask] = data_gpu[mask] // 10
# Transfer the result back to CPU (host memory) for writing
processed_data[i:i+block_size, j:j+block_size] = cp.asnumpy(processed_data_gpu) # Transfer to NumPy array
# Update metadata for output
meta.update(dtype=rasterio.uint8, nodata = 0, compress='LZW', tiled=True, bigtiff='YES')
# Write the processed data to the output file with high compression
with rasterio.open(output_path, 'w', **meta) as dst:
dst.write(processed_data, 1)
# Use tqdm to show progress during parallel processing显示进度条
def parallel_processing():
for tif_file in tqdm(tif_files, desc="Processing files"):
input_path = os.path.join(input_dir, tif_file)
output_path = os.path.join(output_dir, tif_file)
process_raster(input_path, output_path)
# Run the parallel processing并行处理
if __name__ == "__main__":
parallel_processing()
print("All files processed successfully.")
参考文献
https://docs.cupy.dev/en/stable/install.html