diff --git a/Normalize.lua b/Normalize.lua new file mode 100644 index 000000000..232888902 --- /dev/null +++ b/Normalize.lua @@ -0,0 +1,87 @@ +local Normalize, parent = torch.class('nn.Normalize', 'nn.Module') + +function Normalize:__init(p,eps) + parent.__init(self) + assert(p,'p-norm not provided') + assert(p > 0, p..'-norm not supported') + self.p = p + self.eps = eps or 1e-10 +end + +function Normalize:updateOutput(input) + assert(input:dim() <= 2, 'only 1d layer supported') + local is_batch = true + if input:dim() == 1 then + input = input:view(1,-1) + is_batch = false + end + + self.output:resizeAs(input) + + self.norm = self.norm or input.new() + self.normp = self.normp or input.new() + self.buffer = self.buffer or input.new() + + if self.p % 2 ~= 0 then + self.buffer:abs(input):pow(self.p) + else + self.buffer:pow(input,self.p) + end + self.normp:sum(self.buffer,2):add(self.eps) + self.norm:pow(self.normp,1/self.p) + self.output:cdiv(input,self.norm:view(-1,1):expandAs(self.output)) + + if not is_batch then + self.output = self.output[1] + end + return self.output +end + +function Normalize:updateGradInput(input, gradOutput) + assert(input:dim() <= 2, 'only 1d layer supported') + assert(gradOutput:dim() <= 2, 'only 1d layer supported') + + local is_batch = true + if input:dim() == 1 then + input = input:view(1,-1) + is_batch = false + end + + local n = input:size(1) -- batch size + local d = input:size(2) -- dimensionality of vectors + -- compute diagonal term + self.eye = self.eye or torch.eye(d):typeAs(input):view(1,d,d) + local eyeExpand = self.eye:expand(n,d,d) + self.diag = self.diag or self.eye.new() + self.diag:cmul(eyeExpand, self.normp:view(n,1,1):expand(n,d,d)) + -- compute cross term + self.buffer:abs(input):pow(self.p-2):cmul(input) + local b1 = self.buffer:view(n,d,1) + local b2 = input:view(n,1,d) + + self.diag:baddbmm(-1,b1,b2) + -- compute the local gradient of the Lp transformation + self.buffer:cmul(self.normp,self.norm) + self.diag:cdiv(self.buffer:view(n,1,1):expand(n,d,d)) + -- chain the gradient + self.gradInput:resize(n,d,1) + self.gradInput:bmm(self.diag, gradOutput:view(n,d,1)) + self.gradInput = self.gradInput:view(n,d) + + if not is_batch then + self.gradInput = self.gradInput[1] + end + + return self.gradInput +end + +function Normalize:__tostring__() + local s + -- different prints if the norm is integer + if self.p % 1 == 0 then + s = '%s(%d)' + else + s = '%s(%f)' + end + return string.format(s,torch.type(self),self.p) +end diff --git a/doc/simple.md b/doc/simple.md index 35446d7cf..586a830d8 100755 --- a/doc/simple.md +++ b/doc/simple.md @@ -28,6 +28,7 @@ Simple Modules are used for various tasks like adapting Tensor methods and provi * [Power](#nn.Power) : an element-wise [pow](https://github.com/torch/torch7/blob/master/doc/maths.md#res-torchpowres-x) operation ; * [Square](#nn.Square) : an element-wise square operation ; * [Sqrt](#nn.Sqrt) : an element-wise [sqrt](https://github.com/torch/torch7/blob/master/doc/maths.md#res-torchsqrtres-x) operation ; + * [Normalize](#nn.Normalize) : normalizes the input to have unit `L_p` norm ; * [MM](#nn.MM) : matrix-matrix multiplication (also supports batches of matrices) ; * Miscellaneous Modules : * [BatchNormalization](#nn.BatchNormalization) - mean/std normalization over the mini-batch inputs (with an optional affine transform) ; @@ -886,6 +887,23 @@ gnuplot.grid(true) ![](image/power.png) + +## Normalize ## + +```lua +module = nn.Normalize(p, [eps]) +``` +Normalizes the input Tensor to have unit `L_p` norm. The smoothing parameter `eps` prevents division by zero when the input contains all zero elements (default = `1e-10`). + +Input can be 1D or 2D (in which case it's considered as in batch mode) + +```lua +A = torch.randn(3, 5) +m = nn.Normalize(2) +B = m:forward(A) -- B is also 3 x 5 +-- take the L2 norm over the second axis: +print(torch.norm(B, 2, 2)) -- norms is [1, 1, 1] +``` ## MM ## diff --git a/init.lua b/init.lua index 3659d4d6c..d5fd96fb9 100644 --- a/init.lua +++ b/init.lua @@ -46,6 +46,7 @@ include('WeightedEuclidean.lua') include('PairwiseDistance.lua') include('CosineDistance.lua') include('DotProduct.lua') +include('Normalize.lua') include('Exp.lua') include('Log.lua') diff --git a/test.lua b/test.lua index 99d4e79ac..26711e732 100644 --- a/test.lua +++ b/test.lua @@ -412,6 +412,49 @@ function nntest.Power() mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') end +function nntest.Normalize() + -- compare forward against torch implementation + -- and check gradient + for _,p in pairs({1,2,1.5}) do + local ini = math.random(3,10) + local input = torch.randn(ini) + local module = nn.Normalize(p) + local out = module:forward(input) + local expected = torch.div(input,input:norm(p)) + mytester:assertTensorEq(out, expected, 1e-7, + torch.typename(module) ..' (' .. p ..') - forward err ') + + local err = jac.testJacobian(module, input, -2, 2) + mytester:assertlt(err, precision, 'error norm '..p..' on state ') + end + + -- batch mode + for _,p in pairs({1,2,torch.uniform()*math.random(1,10)}) do + local ini = math.random(3,5) + local inj = math.random(3,5) + local ink = math.random(3,5) + local input = torch.Tensor(inj, ini):zero() + + local module = nn.Normalize(p) + + local err = jac.testJacobian(module, input, -2, 2) + mytester:assertlt(err, precision, 'error norm '..p..' on state ') + end + + -- test IO correctness + local ini = math.random(3,5) + local inj = math.random(3,5) + local ink = math.random(3,5) + local input = torch.Tensor(inj, ini):zero() + + local module = nn.Normalize(2) + + local ferr, berr = jac.testIO(module,input, 0.1, 2) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') + +end + function nntest.Square() local in1 = torch.rand(5,7) local module = nn.Square()