前提
多尺度表达
- 物体在不同的观测尺度下不同的表现方式
概念
高斯金字塔(Gaussian Pyramid)
主要应用于下采样,最下层为原始图像,越上层尺度越小
高斯核(Gaussian Kernel)
[g(sigma)=frac{1}{2 pi sigma^{2}} exp left(-frac{x^{2}+y^{2}}{2 sigma^{2}}
ight)
]
-
高斯核用作平滑图像时,粗糙尺度只是细尺度图像的简化表达,该特性被称为causality
-
Semi-group特性,多次卷积等于一次大尺度高斯滤波器平滑的结果
[gleft(sigma_{1} ight) * ldots * gleft(sigma_{n} ight) * f(x, y)=gleft(sigma_{1}+ldots+sigma_{n} ight) * f(x, y) ]
多尺度表达
(L(x,y,sigma))其中(x)和(y)为空间位置信息,(sigma)为尺度
[egin{array}{l}
L_{0}(x, y, 0)=f(x, y) \
L_{1}(x, y, sigma)=g(sigma) * L_{0}(x, y ; 0) \
L_{2}(x, y ; 2 sigma)=g(sigma) * L_{1}(x, y ; sigma) \
vdots \
L_{n}(x, y ; n sigma)=g(sigma) * L_{n-1}(x, y ;(n-1) sigma)
end{array}
]
高斯函数可分离性可得:
[egin{array}{l}
g(x ; sigma)=left[frac{1}{sqrt{2 pi} sigma} exp left(-frac{x^{2}}{2 sigma^{2}}
ight)
ight], quad g(x ; sigma)=left[frac{1}{sqrt{2 pi} sigma} exp left(-frac{y^{2}}{2 sigma^{2}}
ight)
ight] \
g(x, y ; sigma)=g(x ; sigma) imes g(y ; sigma)
end{array}
]
可以先沿x轴卷积再沿着y轴卷积
[egin{array}{l}
L_{x}(x, y ; sigma)=g(x ; sigma) * f(x, y) \
L(x, y ; sigma)=g(y ; sigma) * L_{x}(x, y ; sigma)
end{array}
]
(sigma)如何选取呢?(示例取1,即标准正态分布)
根据正态分布的曲线(sigma),标准差越小中心越凸,反之越平
例: 5*1的高斯核((sigma)=1),归一化后
g(x)=[0.05,0.25,0.4,0.25,0.05]
至少选取5个才能表达出一维高斯函数
DOG金字塔
即高斯金字塔当前层相邻尺度倍数sigma的图像之差(尺寸不变)
拉普拉斯金字塔(Laplacian Pyramid)
高斯金字塔当前层与上一层进行上采样之差
应用:图片融合
通过截图软件将上图截出右图:
左图取(256*256),右图也取(256*256)融合
-
构建需要融合两图的拉普拉斯金字塔(Laplacian Pyramid)——最后一层为高斯金字塔的最后一层
class GaussianPyramid(): ''' Gaussion Pyramid ''' def __init__(self): def calc_pyramid(self, origin: np.ndarray, size: int = 5): ''' calculate gaussian pyramid ''' pyramid = [np.copy(origin)] for s in range(size): # cv2 origin algorithm pyramid.append(cv2.pyrDown(pyramid[-1])) # cache self.pyramid: List[np.ndarray] = pyramid return self
def loadPyramid(source: str, prefix="first") -> Tuple[List[np.ndarray]]: global resource_path, DEBUG img = cv2.imread(path.join(resource_path, source)) if img.shape[0] < 256 or img.shape[1] < 256: raise "the shape of image must be greater than (256,256)" img = img[:256, :256].astype(np.float64) # separate from bgr channels # b, g, r = img[:, :, 0], img[:, :, 1], img[:, :, 2] # blue_gaussian = GaussianPyramid().calc_pyramid(b).pyramid # red_gaussian = GaussianPyramid().calc_pyramid(r).pyramid # green_pyramid = GaussianPyramid().calc_pyramid(g).pyramid gaussian = GaussianPyramid().calc_pyramid(img).pyramid showImgs(gaussian, title="{}_gaussian".format(prefix)) # showBgr(blue_gaussian, green_pyramid, red_gaussian, # title="{}_gaussian".format(prefix)) laplacian = LaplacianPyramid(gaussian).pyramid showImgs(laplacian, title="{}_laplacian".format(prefix)) return laplacian
# 主程序 pyramid = loadPyramid("fzu.jpeg") second_pyramid = loadPyramid("fzu2.png", "second")
-
创建融合掩膜(掩膜和它的补集),使用融合掩膜构造高斯金字塔(Gaussian Pyramid)
# create mask left 50% mask = np.ones((256, 128, 3), dtype=np.float64) mask = cv2.copyMakeBorder( mask, 0, 0, 0, 128, cv2.BORDER_CONSTANT, value=[0, 0, 0]) # create mask gaussian pyramid mask_pyramid = GaussianPyramid().calc_pyramid(mask).show("mask").pyramid
-
将1步的拉斯拉斯金字塔与掩膜的高斯金字塔对应相乘
for index in range(len(pyramid)): pyramid[index] = np.multiply(pyramid[index], mask_pyramid[index]) for i in range(len(second_pyramid)): second_pyramid[i] = np.multiply(second_pyramid[i], 1-mask_pyramid[i])
-
将两图的拉普拉斯金字塔相加形成混合拉普拉斯金字塔
blendPyramid = [] for i in range(len(pyramid)): blendPyramid.append(pyramid[i]+second_pyramid[i])
-
从混合拉普拉斯金字塔重构图像
def restructPyramid(pyramid: List[np.ndarray]) -> np.ndarray: img = pyramid[-1] for index in range(len(pyramid)-2, -1, -1): img = cv2.pyrUp(img, dstsize=tuple( pyramid[index].shape[:2]))+pyramid[index] return img.astype(np.uint8)
# show cv2.imshow("restruct",restructPyramid(blendPyramid)) cv2.waitKey(0)
实验结果:
Reference:
pyrDown/Up原理 https://zhuanlan.zhihu.com/p/92118785
Laplacian 融合图像 https://www.jianshu.com/p/3185cca3f082
PS:
注意:浮点数和整形转换可能造成全白
根据原理写的对于单通道的上采样和下采样实现
class GaussianFilter():
'''
Gaussian Filter
'''
def __init__(self, size: int = 5, sigma: int = 1):
if (size & 1) != 1 or size < 5:
raise "size must be odd number and greater than 5"
self.size = size # Kernel Size
self.sigma = sigma # Standard
self.kernel = self._calc_kernel() # Gaussian Kernel
def _gaussian_function(self, x: int) -> float:
'''
g(sigma) = 1/2pisqr(sigma^2)exp(-(x^2)/sqr(sigma))
'''
return 1/(2*np.pi*np.square(self.sigma))*np.exp(-(np.square(x))/(2*np.square(self.sigma)))
def _calc_kernel(self) -> np.ndarray:
'''
calculate gaussian kernel
'''
kernel = np.zeros(self.size, dtype=np.float64)
for x in range(self.size):
kernel[x] = self._gaussian_function(x-self.size//2)
return np.divide(kernel, np.sum(kernel))
def down(self, origin: np.ndarray) -> np.ndarray:
'''
apply gaussian filter
'''
origin = origin.astype(np.float64)
copy = np.copy(origin)
rows, cols = origin.shape[0], origin.shape[1]
length = self.size//2
# x axis
for row in range(rows):
for col in range(length, cols-length):
copy[row][col] = np.sum(np.multiply(
origin[row, col-length:col+length+1].flatten(), self.kernel))
origin = copy
copy = np.copy(copy)
# y axis
for row in range(length, rows-length):
for col in range(cols):
copy[row][col] = np.sum(np.multiply(
origin[row-length:row+length+1, col].flatten(), self.kernel))
return copy[::2, ::2].astype(np.uint8) # remove even pixels
def up(self, origin: np.ndarray) -> np.ndarray:
'''
apply gaussian filter
'''
origin = origin.astype(np.float64)
copy = np.copy(origin)
rows, cols = origin.shape[0], origin.shape[1]
# expand even row
for row in range(rows):
copy = np.insert(copy, row*2+1, values=0, axis=0)
# expand even col
for col in range(cols):
copy = np.insert(copy, col*2+1, values=0, axis=1)
origin = np.copy(copy)
rows, cols = origin.shape[0], origin.shape[1]
length = self.size//2
# x axis
for row in range(rows):
for col in range(length, cols-length):
copy[row][col] = np.sum(np.multiply(
origin[row, col-length:col+length+1].flatten(), self.kernel))
origin = copy
copy = np.copy(copy)
# y axis
for row in range(length, rows-length):
for col in range(cols):
copy[row][col] = np.sum(np.multiply(
origin[row-length:row+length+1, col].flatten(), self.kernel))
copy *= 4
return copy.astype(np.uint8)