3.144.16.40@hermano.com.br:~$ ls ./estudos/medidas_de_distancia/mahalanobis
.. 00-leia.txt mahalanobis.txt
3.144.16.40@hermano.com.br:~$ cat ./estudos/medidas_de_distancia/mahalanobis/mahalanobis.txt
#!/usr/bin/perl
# Distancia de Mahalanobis
# Autor: Hermano Pereira
# www.hermano.com.br
# 14/07/2011
# Kardi Teknomo
# http://people.revoledu.com/kardi/tutorial/Similarity/MahalanobisDistance.html
# Group 1
my @g1 = ([2,2],[2,5],[6,5],[7,3],[4,7],[6,4],[5,3],[4,6],[2,5],[1,3]);
# Group 2
my @g2 = ([6,5],[7,4],[8,7],[5,6],[5,4]);
&mahalanobis(\@g1,\@g2);
####################################################################################
sub mahalanobis {
my ($ref_a,$ref_b) = @_;
my @a = @{$ref_a};
my @b = @{$ref_b};
print "\n\nValores de entrada para distancia de Mahalanobis:\n\n";
print "a = ";
&mahalanobis_mprint(@a);
print "\n\nb = ";
&mahalanobis_mprint(@b);
print "\n";
<STDIN>;
print "Subtraindo o valor de cada coluna com sua media aritmetica:\n\n";
print "a -> a2 = ";
my @a2 = &mahalanobis_mean(@a);
&mahalanobis_mprint(@a2);
print "\n\nb -> b2 = ";
my @b2 = &mahalanobis_mean(@b);
&mahalanobis_mprint(@b2);
print "\n";
<STDIN>;
print "Calculando a matriz de covariancia:\n\n";
print "a2 -> ac = ";
my @ac = &mahalanobis_mcov(@a2);
&mahalanobis_mprint(@ac);
print "\n\nb2 -> bc = ";
my @bc = &mahalanobis_mcov(@b2);
&mahalanobis_mprint(@bc);
print "\n";
<STDIN>;
print "Juncao (pooled) das matrizes de acordo com seus pesos:\n\n";
print "ac + bc -> mp = ";
my @mp = &mahalanobis_mpooled(\@ac,\@bc,scalar(@a),scalar(@b));
&mahalanobis_mprint(@mp);
print "\n";
<STDIN>;
print "Calcular matriz inversa:\n\n";
print "mp -> mpi = ";
my @mpi = &mahalanobis_mpoolinv(@mp);
&mahalanobis_mprint(@mpi);
print "\n";
<STDIN>;
print "Calcular diferenca da media aritmetica entre as matrizes:\n\n";
print "a <-> b -> mmd = ";
my @mmd = &mahalanobis_meandiff(\@a,\@b);
&mahalanobis_mprint(@mmd);
print "\n";
<STDIN>;
print "Transposta da matriz de diferencas de medias:\n\n";
print "mmd -> mmdt = ";
my @mmdt = &mahalanobis_mtransp(@mmd);
&mahalanobis_mprint(@mmdt);
print "\n";
<STDIN>;
print "Multiplicar matriz transposta da diferencas de medias com a matriz inversa (pooled):\n\n";
print "mmdt * mpi -> mm1 = ";
my @mm1 = &mahalanobis_mprod(\@mmdt,\@mpi);
&mahalanobis_mprint(@mm1);
print "\n";
<STDIN>;
print "Multiplicar matriz do resultado anterior com a matriz de diferenca de medias:\n\n";
print "mm1 * mmd -> mm2 = ";
my @mm2 = &mahalanobis_mprod(\@mm1,\@mmd);
&mahalanobis_mprint(@mm2);
print "\n";
<STDIN>;
print "E com a raiz quadrada do resultado anterior:\n\n";
my $mahalanobis = sqrt($mm2[0][0]);
print "Mahalanobis entre a e b = ".$mahalanobis."\n\n";
return $mahalanobis;
}
sub mahalanobis_mean {
my @m = @_;
my @sum;
for (my $i = 0; $i < scalar(@m); $i++) {
for (my $j = 0; $j < scalar(@{$m[$i]}); $j++) {
$sum[$j] += $m[$i][$j];
}
}
my @avg;
for (my $j = 0; $j < scalar(@{$m[0]}); $j++) {
$avg[$j] = $sum[$j] / scalar(@m);
}
my @m2;
for (my $i = 0; $i < scalar(@m); $i++) {
for (my $j = 0; $j < scalar(@{$m[$i]}); $j++) {
$m2[$i][$j] = $m[$i][$j] - $avg[$j];
}
}
return @m2;
}
sub mahalanobis_mprint {
my @m = @_;
print "(";
for (my $i = 0; $i < scalar(@m); $i++) {
print "[";
for (my $j = 0; $j < scalar(@{$m[$i]}); $j++) {
print $m[$i][$j];
print "," if (! ($j == scalar(@{$m[$i]}) -1));
}
print "]";
print "," if (! ($i == scalar(@m) - 1));
}
print ")";
}
sub mahalanobis_mcov {
my @m = @_;
my $cols = scalar(@{$m[0]});
my @m2;
for (my $i = 0; $i < $cols; $i++) {
for (my $j = 0; $j < $cols; $j++) {
$m2[$i][$j] = &mahalanobis_mcov_cov(\@m,$i,$j);
}
}
return @m2;
}
sub mahalanobis_mcov_cov {
my ($ref_m,$c1,$c2) = @_;
my @m = @{$ref_m};
my $sprod = 0;
for (my $i = 0; $i < scalar(@m); $i++) {
$sprod += ($m[$i][$c1] * $m[$i][$c2]);
}
my $cov = $sprod / scalar(@m);
return $cov;
}
sub mahalanobis_mpooled {
my ($ref_m1,$ref_m2,$s1,$s2) = @_;
my @m1 = @{$ref_m1};
my @m2 = @{$ref_m2};
my @m3;
for (my $i = 0; $i < scalar(@m1); $i++) {
for (my $j = 0; $j < scalar(@m2); $j++) {
$m3[$i][$j] = ($s1/($s1+$s2) * $m1[$i][$j]) + ($s2/($s1+$s2) * $m2[$i][$j]);
}
}
return @m3;
}
sub mahalanobis_mpoolinv {
my @m = @_;
# append
for (my $i = 0; $i < scalar(@m); $i++) {
for (my $j = 0; $j < scalar(@m); $j++) {
if ($i == $j) {
$m[$i][$j + scalar(@m)] = 1;
} else {
$m[$i][$j + scalar(@m)] = 0;
}
}
}
# inverse
for (my $i = 0; $i < scalar(@m); $i++) {
my $p = 1 / $m[$i][$i];
for (my $j = $i; $j <= scalar(@m) + $i; $j++) {
$m[$i][$j] *= $p;
}
for (my $i2 = 0; $i2 < scalar(@m); $i2++) {
if ($i != $i2) {
for (my $j = scalar(@m) + $i; $j >= $i; $j--) {
$m[$i2][$j] += (($m[$i2][$i] * -1) * $m[$i][$j]);
}
}
}
}
# result
my @m2;
for (my $i = 0; $i < scalar(@m); $i++) {
for (my $j = scalar(@{$m[$i]}) / 2; $j < scalar(@{$m[$i]}); $j++) {
$m2[$i][$j - scalar(@{$m[$i]}) / 2] = $m[$i][$j];
}
}
return @m2;
}
sub mahalanobis_meandiff {
my ($ref_a,$ref_b) = @_;
my @a = @{$ref_a};
my @b = @{$ref_b};
my @a_avg = &mahalanobis_meandiff_avg(@a);
my @b_avg = &mahalanobis_meandiff_avg(@b);
my @m;
for (my $i = 0; $i < scalar(@a_avg); $i++) {
$m[$i][0] = $a_avg[$i] - $b_avg[$i];
}
return @m;
}
sub mahalanobis_meandiff_avg {
my @m = @_;
my @sum;
for (my $i = 0; $i < scalar(@m); $i++) {
for (my $j = 0; $j < scalar(@{$m[$i]}); $j++) {
$sum[$j] += $m[$i][$j];
}
}
my @avg;
for (my $j = 0; $j < scalar(@{$m[0]}); $j++) {
$avg[$j] = $sum[$j] / scalar(@m);
}
return @avg;
}
sub mahalanobis_mtransp {
my @m = @_;
my @m2;
for (my $i = 0; $i < scalar(@m); $i++) {
for (my $j = 0; $j < scalar(@{$m[$i]}); $j++) {
$m2[$j][$i] = $m[$i][$j];
}
}
return @m2;
}
sub mahalanobis_mprod {
my ($ref_a,$ref_b) = @_;
my @a = @{$ref_a};
my @b = @{$ref_b};
my @m;
for (my $i = 0; $i < scalar(@a); $i++) {
for (my $j = 0; $j < scalar(@{$b[0]}); $j++) {
for (my $k = 0; $k < scalar(@b); $k++) {
$m[$i][$j] += $a[$i][$k] * $b[$k][$j];
}
}
}
return @m;
}
####################################################################################
3.144.16.40@hermano.com.br:~$ clear_