Python开发之二维数组空缺值的近邻填充
Python开发之二维数组空缺值的填充
- 1 实现一,任意位置填充
- 2 实现二,填充内部
- 3 实现三,只填充边缘,不包括四个角
前言:主要实现二维数据里面某一个数据的缺失,用缺失的近邻数据进行均值填充,可以应用在遥感图像中空缺值的填充等。
1 实现一,任意位置填充
对二维数据任意位置的空缺值进行近邻填充。
代码实现,其中指定空缺值,指定近邻的数量
import numpy as np# 定义一个函数来填充-23750为空值
def fill_empty_with_mean(arr, empty_value,min_neighbors):# 获取数组的行数和列数rows, cols = arr.shape# 遍历数组for i in range(rows):for j in range(cols):if arr[i, j] == empty_value:# 获取周围非空元素的均值neighbors = []for x in [-1, 0, 1]:for y in [-1, 0, 1]:if 0 <= i + x < rows and 0 <= j + y < cols and arr[i + x, j + y] != empty_value:neighbors.append(arr[i + x, j + y])# 计算均值并填充if len(neighbors) >=min_neighbors:# print(neighbors)# print(np.mean(neighbors))arr[i, j] = np.mean(neighbors)if __name__ == '__main__':# 创建一个包含-23750为空值的示例二维数组array = np.array([[1, -23750, 1, 1, 1],[1, -23750, 1, 1, -23750],[-23750, 2, 9, -1, 1.0],[-1, -1, -23750, 5, -23750]])# 使用函数填充-23750为空值empty_value = -23750min_neighbors = 3fill_empty_with_mean(array, empty_value,min_neighbors)# 输出填充后的数组print(array)
2 实现二,填充内部
对二维数据任意位置的空缺值进行近邻填充(去掉边缘)。
代码实现,其中指定空缺值,指定近邻的数量
import numpy as npdef fill_empty_with_mean(arr, empty_value, min_neighbors=5):# 获取数组的行数和列数rows, cols = arr.shape# 遍历数组for i in range(1, rows - 1): # 从第2行到倒数第2行for j in range(1, cols - 1): # 从第2列到倒数第2列if arr[i, j] == empty_value:# 获取周围非空元素的个数neighbors = [arr[i + x, j + y] for x in [-1, 0, 1] for y in [-1, 0, 1] ifarr[i + x, j + y] != empty_value]# 如果周围非空元素的个数大于等于min_neighbors,计算均值并填充if len(neighbors) >= min_neighbors:arr[i, j] = np.mean(neighbors)return arrif __name__ == '__main__':# 创建一个包含-23750为空值的示例二维数组array = np.array([[1, -23750, 1, 1, 1],[1, -23750, 1, 1, -23750],[-23750, 1, 7, -1, 1],[-1, -1, -23750, 5, 6]])# 输出填充后的数组# print(array)# 使用函数填充-23750为空值,只有周围至少有5个非空值时才填充empty_value = -23750min_neighbors = 5arr = fill_empty_with_mean(array, empty_value, min_neighbors)# 输出填充后的数组print(arr)
3 实现三,只填充边缘,不包括四个角
代码实现
import numpy as npdef fill_edge_empty_with_mean(arr, empty_value, min_neighbors=5):# 获取数组的行数和列数rows, cols = arr.shape# 遍历第一行for j in range(1, cols - 1):if arr[0, j] == empty_value:# 获取周围非空元素的个数neighbors = [arr[0 + x, j + y] for x in [0, 1] for y in [-1, 0, 1] if arr[0 + x, j + y] != empty_value]print(neighbors)# 如果周围非空元素的个数大于等于min_neighbors,计算均值并填充if len(neighbors) >= min_neighbors:arr[0, j] = np.mean(neighbors)print(float(np.mean(neighbors)))## 遍历最后一行for j in range(1, cols - 1):if arr[rows - 1, j] == empty_value:# 获取周围非空元素的个数neighbors = [arr[rows - 1 + x, j + y] for x in [-1, 0] for y in [-1, 0, 1] if arr[rows - 1 + x, j + y] != empty_value]print(neighbors)# 如果周围非空元素的个数大于等于min_neighbors,计算均值并填充if len(neighbors) >= min_neighbors:arr[rows - 1, j] = np.mean(neighbors)print(float(np.mean(neighbors)))## 遍历第一列for i in range(1, rows - 1):if arr[i, 0] == empty_value :neighbors = [arr[i + x, 0 + y] for x in [-1, 0, 1] for y in [0, 1] if arr[i + x, 0 + y] != empty_value]print(neighbors)# 如果周围非空元素的个数大于等于min_neighbors,计算均值并填充if len(neighbors) >= min_neighbors:arr[i, 0] = np.mean(neighbors)print(float(np.mean(neighbors)))# 遍历最后一列for i in range(1, rows - 1):if arr[i, cols - 1] == empty_value:# 获取周围非空元素的个数neighbors = [arr[i + x, cols-1 + y] for x in [-1, 0, 1] for y in [-1, 0] if arr[i + x, cols-1 + y] != empty_value]print(neighbors)# 如果周围非空元素的个数大于等于min_neighbors,计算均值并填充if len(neighbors) >= min_neighbors:arr[i, cols - 1] = np.mean(neighbors)print(float(np.mean(neighbors)))return arrif __name__ == '__main__':# 创建一个包含-23750为空值的示例二维数组array = np.array([[1, -23750, 1, 1, 1],[1, -1, 1, 1, -23750],[-23750, 1, 7, -1, 1],[-1, -1, -23750, 5, 6.0]])# 输出填充后的数组# print(array)# 使用函数填充-23750为空值,只有周围至少有5个非空值时才填充empty_value = -23750min_neighbors = 3arr = fill_edge_empty_with_mean(array, empty_value, min_neighbors)# 输出填充后的数组print(arr)