Skip to content

Commit

Permalink
releasing source code
Browse files Browse the repository at this point in the history
  • Loading branch information
bobular committed Jun 19, 2012
1 parent 11cb416 commit c3f1980
Show file tree
Hide file tree
Showing 13 changed files with 3,112 additions and 0 deletions.
576 changes: 576 additions & 0 deletions Algorithm.pm

Large diffs are not rendered by default.

430 changes: 430 additions & 0 deletions Grammar.pm

Large diffs are not rendered by default.

878 changes: 878 additions & 0 deletions Individual.pm

Large diffs are not rendered by default.

262 changes: 262 additions & 0 deletions PDL/Kohonen.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,262 @@
# -*- mode: cperl -*-
package PDL::Kohonen;

use PDL;
use PDL::IO::FastRaw;

@ISA = qw(PDL);

### TO DO: load+save methods (simply with fraw)


# data is always D x N
# where D is the dimensionality of the data
# and there are N data points
# maps are always D x W x H

sub initialize {
my $class = shift;
my $self = {
PDL => null, # used to store PDL object
};
bless $self, $class;
}

##
# init method
##
# usage: $map->init($data, 10, 6, 4);
# initialises a 10x6x4 map randomly based on the min/max
# of the data in $data piddle
# or: $map->init($perlarrayref, 8, 7)
# initialises a 8x7 map as above but uses a reference
# to a perl array of individual data point piddles
# [note: arrays of many piddles seem to take up memory]
##
sub init {
my ($self, $data, @mdims) = @_;
die "init() arguments: data, map_dimensions\n"
unless (defined $data && @mdims > 0);

if (ref($data) eq 'ARRAY') {
$data = cat(@$data);
}

my $max = $data->mv(-1,0)->maximum;
my $min = $data->mv(-1,0)->minimum;

my @ddims = $data->dims;
my $n = pop @ddims;
$self->{mapdims} = \@mdims; # map dimensions, e.g. 6 x 4
$self->{nmapdims} = scalar @mdims;
$self->{mapvolume} = ones(@mdims)->nelem;
$self->{datadims} = \@ddims; # data dimensions, e.g. 20 x 15
$self->{ndatadims} = scalar @ddims;
($self->{sdim}) = sort { $a<=>$b } @mdims; # smallest map dimension size
$self->{PDL} = random(@ddims, @mdims);
$self->{PDL} *= ($max-$min)+$min;
return $self;
}

sub train {
my ($self, $data, $p) = @_;

if (ref($data) eq 'ARRAY') {
$data = cat(@$data);
}

my $n = $data->dim(-1);
my $dta = $data->mv(-1, 0);

die "you must init() the map first\n" unless ($self->{mapdims});

my $alpha = defined $p->{alpha} ? $p->{alpha} : 0.1;
my $radius = defined $p->{radius} ? $p->{radius} : $self->{sdim}/2;

$p->{epochs} = 1 unless (defined $p->{epochs});
$p->{order} = 'random' unless (defined $p->{order}); # also: linear

$p->{ramp} = 'linear' unless (defined $p->{ramp}); # also: off

my $winnerfunc = $p->{winnerfunc} || \&euclidean_winner;
$p->{progress} = 'on' unless (defined $p->{progress});

my ($dalpha, $dradius) = (0, 0);
if ($p->{ramp} =~ /linear/i) {
$dalpha = $alpha/$p->{epochs};
$dradius = $radius/$p->{epochs};
}

my $progformat = $p->{progress} =~ /on/i ?
"\rradius %2d alpha %6.4f data %5d of %5d epoch %3d of %3d" : '';

my $ordertype = 0;
$ordertype = 1 if ($p->{order} =~ /linear/i);

if ($p->{progress} =~ /on/) {
printf "training map (%s) with %d data points (%s) for %d epochs...\n",
join("x", @{$self->{mapdims}}), $n,
join("x", @{$self->{datadims}}), $p->{epochs};
}
local $| = 1;

for (my $e=0; $e<$p->{epochs}; $e++) {
for (my $i=0; $i<$n; $i++) {
printf $progformat, $radius, $alpha, $i+1, $n, $e+1, $p->{epochs};
my $d = $ordertype ? $i : int(rand($n));

my $vec = $dta->slice("($d)");

my @w = $self->$winnerfunc($vec);

for (my $r=$radius; $r>=0; $r--) {
my $hood = $self->hood($r, @w);
$hood -= ($hood-$vec)*$alpha;
}
}
$alpha -= $dalpha;
$radius -= $dradius;
}
print "\n" if ($progformat);
}


# runs your data through a trained map
# returns two piddles
# - winning node coordinates (ushort M x N)
# - quantisation error (double N)
sub apply {
my ($self, $data, $p) = @_;
my $winnerfunc = $p->{winnerfunc} || \&euclidean_winner;
$p->{progress} = 'on' unless (defined $p->{progress});

if (ref($data) eq 'ARRAY') {
$data = cat(@$data);
}

my $n = $data->dim(-1);
my $dta = $data->mv(-1, 0);

my $mds = join("x", @{$self->{mapdims}});
my $progformat = $p->{progress} =~ /on/i ?
"\rapplying map ($mds) to data point %5d of %5d" : '';

local $| = 1;

my $winvecs = zeroes ushort, $n, $self->{nmapdims};
my $errors = zeroes $n;

my $error = 0;
for (my $i=0; $i<$n; $i++) {
printf $progformat, $i+1, $n;
my $vec = $dta->slice("($i)");
$winvecs->slice("($i)") .= ushort($self->$winnerfunc($vec, \$error));
set($errors, $i, $error);
}
print "\n" if ($progformat);

return ($winvecs->mv(0, -1), $errors);
}

sub euclidean_winner {
my ($self, $vec, $qref) = @_;

my $d = $vec - $self;
$d *= $d;
while ($d->ndims > $self->{nmapdims}) {
$d = $d->sumover();
}
my @d = $d->dims();
my ($i) = $d->flat->qsorti->list;

# pass the error back through a reference, if given
if ($qref && ref($qref)) {
$$qref = sqrt($d->flat->at($i));
}

return $self->unflattenindex($i);
}

sub unflattenindex {
my ($self, $i) = @_;
my @result;
my $volume = $self->{mapvolume};
foreach my $dim (reverse @{$self->{mapdims}}) {
$volume = $volume/$dim;
my $index = int($i/$volume);
unshift @result, $index;
$i -= $index*$volume;
}
return @result;
}

sub hood {
my ($self, $radius, @coords) = @_;
$radius = int($radius);

my $slice = ',' x ($self->{ndatadims}-1);

if ($radius == 0) {
return $self->slice(join ',', $slice, @coords);
}

for (my $i=0; $i<@coords; $i++) {
my ($left, $right) = ($coords[$i]-$radius, $coords[$i]+$radius);
$left = 0 if ($left < 0);
$right = $self->{mapdims}->[$i] - 1 if ($right >= $self->{mapdims}->[$i]);
$slice .= ",$left:$right";
}
return $self->slice($slice);
}

sub save {
my ($self, $filename) = @_;
$self->writefraw($filename);
open(HDR, ">>$filename.hdr") || die "can't append to $filename.hdr";
foreach $key (qw(nmapdims ndatadims sdim mapvolume)) {
print HDR "PDL::Kohonen $key $self->{$key}\n";
}
close(HDR);
}

sub load {
my ($self, $filename) = @_;
$self->{PDL} = readfraw($filename);

open(HDR, "$filename.hdr") || die "can't open $filename.hdr";
while (<HDR>) {
if (/^PDL::Kohonen/) {
chomp;
my ($dum, $key, $val) = split ' ', $_, 3;
$self->{$key} = $val;
}
}
close(HDR);

die "couldn't find header information while loading map\n"
unless ($self->{nmapdims} && $self->{ndatadims} && $self->{sdim});

my @dims = $self->dims;
my @mdims = splice @dims, -$self->{nmapdims};
$self->{mapdims} = \@mdims;
$self->{datadims} = \@dims;
}



sub quantiseOLDCODE {
my ($map, $data, $distfunc) = @_;
$map = $map->clump(1,2);
my $mapsize = $map->dim(1);
my $dupdata = $data->dummy(1, $mapsize);
my $d = $dupdata - $map;
$d *= $d;
$d = $d->sumover();
my $i = $d->qsorti->slice("(0),:"); # get the map index of smallest dists
for (my $dim = 0; $dim<$map->dim(0); $dim++) {
$data->slice("($dim)") .= $map->slice("($dim)")->index($i);
}
}


1;
117 changes: 117 additions & 0 deletions PDL/ReadAudioSoundFile.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
package PDL::ReadAudioSoundFile;

use PDL;
use PDL::Audio;

#### these are not needed any more - see _OLD subs below
#use Audio::SoundFile;
#use Audio::SoundFile::Header;

#no longer exported due to strange AUTOLOAD problem in perl 5.8.8
#require Exporter;
#@EXPORT = qw(readaudiosoundfile writeaudiosoundfile);

$BUFSIZE = 8**7;

sub readaudiosoundfile {
my ($file) = @_;

$^W = 0;
my $pdl = raudio($file);
$^W = 1;

# raudio reads stereo files the other way round
if ($pdl->ndims == 2) {
my $hdr = $pdl->gethdr;
$pdl = $pdl->mv(-1,0)->sever;
$pdl->sethdr($hdr);
}
return $pdl;
}

## default WAV file output only
sub writeaudiosoundfile {
my ($pdl, $file) = @_;
$pdl = $pdl->scale2short unless ($pdl->type == short);
$^W = 0;
$pdl->mv(-1,0)->waudio( path => $file, filetype => FILE_RIFF,
format => FORMAT_16_LINEAR_LITTLE_ENDIAN );
$^W = 1;
}


sub readaudiosoundfile_OLD {
my ($file, $debug) = @_;

my $header;
my $reader = new Audio::SoundFile::Reader($file, \$header);
my $channels = $header->{channels};
my $samples = $header->{samples};
my $samplerate = $header->{samplerate};
if ($debug) {
foreach $key (keys %$header) {
warn "$key => $header->{$key}\n";
}
}
my %hdr = (path=>$file,
rate=>$samplerate);

my $pdl;
my $remaining = $samples*$channels;
my ($buf, $got);
while (($got = $reader->bread_pdl(\$buf, $remaining > $BUFSIZE ? $BUFSIZE : $remaining)) > 0) {
$remaining -= $got;
if (defined $pdl) {
$pdl = $pdl->append($buf);
$pdl->sever;
} else {
$pdl = $buf->copy();
}
}
$reader->close();
# if stereo, the piddle from bread_pdl has the two channels
# 'interleaved' and these need to be separated.
if ($channels == 2) {
my $left = $pdl->slice("0:-1:2");
my $right = $pdl->slice("1:-1:2");
my $both = cat $left, $right;
$both->sethdr(\%hdr);
return $both;
} elsif ($channels == 1) {
$pdl->sethdr(\%hdr);
return $pdl;
} else {
die "can't handle $channels channels";
}
}


sub writeaudiosoundfile_OLD {
my ($pdl, $file) = @_;

my $header = new Audio::SoundFile::Header(
samplerate => $pdl->rate() || 44100,
channels => $pdl->dim(1),
pcmbitwidth => 16,
format => SF_FORMAT_WAV | SF_FORMAT_PCM,
);

my $writer = new Audio::SoundFile::Writer($file, $header);

$pdl = $pdl->scale2short unless ($pdl->type == short);

# fold stereo sample into a single piddle
if ($pdl->dim(1) == 2) {
$pdl = $pdl->xchg(0,1)->flat;
}

for (my $i=0; $i<$pdl->dim(0); $i+=$BUFSIZE) {
my $end = $i+$BUFSIZE-1;
$end = -1 if ($end >= $pdl->dim(0));

my $buf = $pdl->slice("$i:$end")->sever;
my $wrote = $writer->bwrite_pdl($buf);
}
$writer->close;
}
1;
Loading

0 comments on commit c3f1980

Please sign in to comment.