#!/usr/bin/perl # Distancia de Mahalanobis # Autor: Hermano Pereira # www.hermano.com.br # 14/07/2011 # Kardi Teknomo # http://people.revoledu.com/kardi/tutorial/Similarity/MahalanobisDistance.html # Group 1 my @g1 = ([2,2],[2,5],[6,5],[7,3],[4,7],[6,4],[5,3],[4,6],[2,5],[1,3]); # Group 2 my @g2 = ([6,5],[7,4],[8,7],[5,6],[5,4]); &mahalanobis(\@g1,\@g2); #################################################################################### sub mahalanobis { my ($ref_a,$ref_b) = @_; my @a = @{$ref_a}; my @b = @{$ref_b}; print "\n\nValores de entrada para distancia de Mahalanobis:\n\n"; print "a = "; &mahalanobis_mprint(@a); print "\n\nb = "; &mahalanobis_mprint(@b); print "\n"; ; print "Subtraindo o valor de cada coluna com sua media aritmetica:\n\n"; print "a -> a2 = "; my @a2 = &mahalanobis_mean(@a); &mahalanobis_mprint(@a2); print "\n\nb -> b2 = "; my @b2 = &mahalanobis_mean(@b); &mahalanobis_mprint(@b2); print "\n"; ; print "Calculando a matriz de covariancia:\n\n"; print "a2 -> ac = "; my @ac = &mahalanobis_mcov(@a2); &mahalanobis_mprint(@ac); print "\n\nb2 -> bc = "; my @bc = &mahalanobis_mcov(@b2); &mahalanobis_mprint(@bc); print "\n"; ; print "Juncao (pooled) das matrizes de acordo com seus pesos:\n\n"; print "ac + bc -> mp = "; my @mp = &mahalanobis_mpooled(\@ac,\@bc,scalar(@a),scalar(@b)); &mahalanobis_mprint(@mp); print "\n"; ; print "Calcular matriz inversa:\n\n"; print "mp -> mpi = "; my @mpi = &mahalanobis_mpoolinv(@mp); &mahalanobis_mprint(@mpi); print "\n"; ; print "Calcular diferenca da media aritmetica entre as matrizes:\n\n"; print "a <-> b -> mmd = "; my @mmd = &mahalanobis_meandiff(\@a,\@b); &mahalanobis_mprint(@mmd); print "\n"; ; print "Transposta da matriz de diferencas de medias:\n\n"; print "mmd -> mmdt = "; my @mmdt = &mahalanobis_mtransp(@mmd); &mahalanobis_mprint(@mmdt); print "\n"; ; print "Multiplicar matriz transposta da diferencas de medias com a matriz inversa (pooled):\n\n"; print "mmdt * mpi -> mm1 = "; my @mm1 = &mahalanobis_mprod(\@mmdt,\@mpi); &mahalanobis_mprint(@mm1); print "\n"; ; print "Multiplicar matriz do resultado anterior com a matriz de diferenca de medias:\n\n"; print "mm1 * mmd -> mm2 = "; my @mm2 = &mahalanobis_mprod(\@mm1,\@mmd); &mahalanobis_mprint(@mm2); print "\n"; ; print "E com a raiz quadrada do resultado anterior:\n\n"; my $mahalanobis = sqrt($mm2[0][0]); print "Mahalanobis entre a e b = ".$mahalanobis."\n\n"; return $mahalanobis; } sub mahalanobis_mean { my @m = @_; my @sum; for (my $i = 0; $i < scalar(@m); $i++) { for (my $j = 0; $j < scalar(@{$m[$i]}); $j++) { $sum[$j] += $m[$i][$j]; } } my @avg; for (my $j = 0; $j < scalar(@{$m[0]}); $j++) { $avg[$j] = $sum[$j] / scalar(@m); } my @m2; for (my $i = 0; $i < scalar(@m); $i++) { for (my $j = 0; $j < scalar(@{$m[$i]}); $j++) { $m2[$i][$j] = $m[$i][$j] - $avg[$j]; } } return @m2; } sub mahalanobis_mprint { my @m = @_; print "("; for (my $i = 0; $i < scalar(@m); $i++) { print "["; for (my $j = 0; $j < scalar(@{$m[$i]}); $j++) { print $m[$i][$j]; print "," if (! ($j == scalar(@{$m[$i]}) -1)); } print "]"; print "," if (! ($i == scalar(@m) - 1)); } print ")"; } sub mahalanobis_mcov { my @m = @_; my $cols = scalar(@{$m[0]}); my @m2; for (my $i = 0; $i < $cols; $i++) { for (my $j = 0; $j < $cols; $j++) { $m2[$i][$j] = &mahalanobis_mcov_cov(\@m,$i,$j); } } return @m2; } sub mahalanobis_mcov_cov { my ($ref_m,$c1,$c2) = @_; my @m = @{$ref_m}; my $sprod = 0; for (my $i = 0; $i < scalar(@m); $i++) { $sprod += ($m[$i][$c1] * $m[$i][$c2]); } my $cov = $sprod / scalar(@m); return $cov; } sub mahalanobis_mpooled { my ($ref_m1,$ref_m2,$s1,$s2) = @_; my @m1 = @{$ref_m1}; my @m2 = @{$ref_m2}; my @m3; for (my $i = 0; $i < scalar(@m1); $i++) { for (my $j = 0; $j < scalar(@m2); $j++) { $m3[$i][$j] = ($s1/($s1+$s2) * $m1[$i][$j]) + ($s2/($s1+$s2) * $m2[$i][$j]); } } return @m3; } sub mahalanobis_mpoolinv { my @m = @_; # append for (my $i = 0; $i < scalar(@m); $i++) { for (my $j = 0; $j < scalar(@m); $j++) { if ($i == $j) { $m[$i][$j + scalar(@m)] = 1; } else { $m[$i][$j + scalar(@m)] = 0; } } } # inverse for (my $i = 0; $i < scalar(@m); $i++) { my $p = 1 / $m[$i][$i]; for (my $j = $i; $j <= scalar(@m) + $i; $j++) { $m[$i][$j] *= $p; } for (my $i2 = 0; $i2 < scalar(@m); $i2++) { if ($i != $i2) { for (my $j = scalar(@m) + $i; $j >= $i; $j--) { $m[$i2][$j] += (($m[$i2][$i] * -1) * $m[$i][$j]); } } } } # result my @m2; for (my $i = 0; $i < scalar(@m); $i++) { for (my $j = scalar(@{$m[$i]}) / 2; $j < scalar(@{$m[$i]}); $j++) { $m2[$i][$j - scalar(@{$m[$i]}) / 2] = $m[$i][$j]; } } return @m2; } sub mahalanobis_meandiff { my ($ref_a,$ref_b) = @_; my @a = @{$ref_a}; my @b = @{$ref_b}; my @a_avg = &mahalanobis_meandiff_avg(@a); my @b_avg = &mahalanobis_meandiff_avg(@b); my @m; for (my $i = 0; $i < scalar(@a_avg); $i++) { $m[$i][0] = $a_avg[$i] - $b_avg[$i]; } return @m; } sub mahalanobis_meandiff_avg { my @m = @_; my @sum; for (my $i = 0; $i < scalar(@m); $i++) { for (my $j = 0; $j < scalar(@{$m[$i]}); $j++) { $sum[$j] += $m[$i][$j]; } } my @avg; for (my $j = 0; $j < scalar(@{$m[0]}); $j++) { $avg[$j] = $sum[$j] / scalar(@m); } return @avg; } sub mahalanobis_mtransp { my @m = @_; my @m2; for (my $i = 0; $i < scalar(@m); $i++) { for (my $j = 0; $j < scalar(@{$m[$i]}); $j++) { $m2[$j][$i] = $m[$i][$j]; } } return @m2; } sub mahalanobis_mprod { my ($ref_a,$ref_b) = @_; my @a = @{$ref_a}; my @b = @{$ref_b}; my @m; for (my $i = 0; $i < scalar(@a); $i++) { for (my $j = 0; $j < scalar(@{$b[0]}); $j++) { for (my $k = 0; $k < scalar(@b); $k++) { $m[$i][$j] += $a[$i][$k] * $b[$k][$j]; } } } return @m; } ####################################################################################