From 59259bfb998f099681613e7367aafb9f8cd6adfd Mon Sep 17 00:00:00 2001
From: drowe67 <david@rowetel.com>
Date: Fri, 3 Sep 2021 18:23:42 +0930
Subject: [PATCH 01/20] first pass at Octave and C implementations of the
 Psuedo-Gray binary switching algorithms

---
 misc/CMakeLists.txt       |   3 +
 misc/vq_binary_switch.c   | 222 ++++++++++++++++++++++++++++++++++++++
 octave/vq_binary_switch.m | 158 +++++++++++++++++++++++++++
 3 files changed, 383 insertions(+)
 create mode 100644 misc/vq_binary_switch.c
 create mode 100644 octave/vq_binary_switch.m

diff --git a/misc/CMakeLists.txt b/misc/CMakeLists.txt
index 4db39a80f..55728b427 100644
--- a/misc/CMakeLists.txt
+++ b/misc/CMakeLists.txt
@@ -28,5 +28,8 @@ target_link_libraries(timpulse m)
 add_executable(vq_mbest vq_mbest.c)
 target_link_libraries(vq_mbest codec2)
 
+add_executable(vq_binary_switch vq_binary_switch.c)
+target_link_libraries(vq_binary_switch m)
+
 add_executable(pre pre.c)
 target_link_libraries(pre codec2)
diff --git a/misc/vq_binary_switch.c b/misc/vq_binary_switch.c
new file mode 100644
index 000000000..6b3e138e1
--- /dev/null
+++ b/misc/vq_binary_switch.c
@@ -0,0 +1,222 @@
+/*
+  vq_binary_switch.c
+  David Rowe Dec 2021
+
+  C implementation of [1], that re-arranges VQ indexes so they are robust to single
+  bit errors.
+
+  [1] Psuedo Gray Coding, Zeger & Gersho 1990 
+*/
+
+#include <assert.h>
+#include <getopt.h>
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include "mbest.h"
+
+#define MAX_DIM     20
+#define MAX_ENTRIES 4096
+
+// equation (33) of [1], total cost of all hamming distance 1 vectors of vq index k
+float cost_of_distance_one(float *vq, int n, int dim, float *prob, int k, int verbose) {
+  int log2N = log2(n);
+  float c = 0.0;
+  for (int b=0; b<log2N; b++) {
+    unsigned int index_neighbour = k ^ (1<<b);
+    float dist = 0.0;
+    for(int i=0; i<dim; i++)
+      dist += pow(vq[k*dim+i] - vq[index_neighbour*dim+i], 2.0);
+    c += prob[k]*dist;
+    if (verbose)
+      printf("k: %d b: %d index_neighbour: %d dist: %f prob: %f c: %f \n", k, b, index_neighbour, dist, prob[k], c);
+  }
+  return c;
+}
+
+// equation (39) of [1]
+float distortion_of_current_mapping(float *vq, int n, int dim, float *prob) {
+  float d = 0.0;
+  for(int k=0; k<n; k++)
+    d += cost_of_distance_one(vq, n, dim, prob, k, 0);
+  return d;
+}
+ 
+// we sort the cost array c[], returning the indexes of sorted elements
+float c[MAX_ENTRIES];
+
+/* Note how the compare function compares the values of the
+ * array to be sorted. The passed value to this function
+ * by `qsort' are actually the `idx' array elements.
+ */
+int compare_increase (const void * a, const void * b) {
+    int aa = *((int *) a), bb = *((int *) b);
+    if (c[aa] < c[bb]) {
+        return 1;
+    } else if (c[aa] == c[bb]) {
+        return 0;
+    } else {
+        return -1;
+     }
+}
+
+void sort_c(int *idx, const size_t n) {
+    for (size_t i=0; i<n; i++) idx[i] = i;
+    qsort(idx, n, sizeof(int), compare_increase);
+}
+
+void swap(float *vq, int dim, int index1, int index2) {
+  float tmp[dim];
+  for(int i=0; i<dim; i++) tmp[i] = vq[index1*dim+i];
+  for(int i=0; i<dim; i++) vq[index1*dim+i] = vq[index2*dim+i];
+  for(int i=0; i<dim; i++) vq[index2*dim+i] = tmp[i];
+}
+
+int main(int argc, char *argv[]) {
+    float vq[MAX_DIM*MAX_ENTRIES];
+    int   dim = MAX_DIM;
+    int   max_iter = 200;
+    int   st = -1;
+    int   en = -1;
+    int   verbose = 0;
+    
+    int o = 0; int opt_idx = 0;
+    while (o != -1) {
+       static struct option long_opts[] = {
+           {"st",      required_argument, 0, 't'},
+           {"en",      required_argument, 0, 'e'},
+	   {0, 0, 0, 0}
+        };
+        o = getopt_long(argc,argv,"hd:m:vt:e:",long_opts,&opt_idx);
+        switch (o) {
+	case 'd':
+	    dim = atoi(optarg);
+	    assert(dim <= MAX_DIM);
+	    break; 
+        case 'm':
+            max_iter = atoi(optarg);
+            break;
+        case 't':
+            st = atoi(optarg);
+            break;
+        case 'e':
+            en = atoi(optarg);
+            break;
+        case 'v':
+            verbose = 1;
+            break;
+	help:
+	    fprintf(stderr, "\n");
+            fprintf(stderr, "usage: %s -d dimension [-m max_iterations -v --st Kst --en Ken] vq_in.f32 vq_out.f32\n", argv[0]);
+	    fprintf(stderr, "\n");
+            fprintf(stderr, "--st Kst    Start vector element for error calculation (default 0)\n");
+            fprintf(stderr, "--en Ken    End vector element for error calculation (default K-1)\n");
+            fprintf(stderr, "-v          verbose\n");
+            exit(1);
+        }
+    }
+
+    int dx = optind;
+    if ((argc - dx) < 2) {
+        fprintf(stderr, "Too few arguments\n");
+	goto help;
+    }
+    if (dim == 0) goto help;
+
+    /* default to measuring error on entire vector */
+    if (st == -1) st = 0; 
+    if (en == -1) en = dim-1;
+
+    /* load VQ quantiser file --------------------*/
+
+    fprintf(stderr, "loading %s ... ", argv[dx]);
+    FILE *fq=fopen(argv[dx], "rb");
+    if (fq == NULL) {
+      fprintf(stderr, "Couldn't open: %s\n", argv[dx]);
+      exit(1);
+    }
+
+    /* count how many entries m of dimension k are in this VQ file */
+    int n = 0;
+    float dummy[dim];
+    while (fread(dummy, sizeof(float), dim, fq) == (size_t)dim)
+      n++;
+    assert(n <= MAX_ENTRIES);
+    fprintf(stderr, "%d entries of vectors width %d\n", n, dim);
+
+    /* now load VQ into memory */
+    rewind(fq);                       
+    int rd = fread(vq, sizeof(float), n*dim, fq);
+    assert(rd == n*dim);
+    fclose(fq);
+
+    /* set probability of each vector to 1.0 for now */
+    float prob[n];
+    for(int l=0; l<n; l++) prob[l] = 1.0;
+    
+    int iteration = 0;
+    int i = 0;
+    int finished = 0;
+    int switches = 0;
+    float distortion0 = distortion_of_current_mapping(vq, n, dim, prob);
+    fprintf(stderr, "distortion0: %f\n", distortion0);
+    
+    while(!finished) {
+
+      // generate a list A(i) of which vectors have the largest cost of bit errors
+      for(int k=0; k<n; k++) {
+	c[k] = cost_of_distance_one(vq, n, dim, prob, k, 0);
+	//fprintf(stderr, "c[%d] = %f\n", k, c[k]);
+      }
+      
+      int A[n];
+      sort_c(A, n);
+      for(int k=0; k<n; k++) {
+	//fprintf(stderr, "A[%d] = %d\n", k, A[k]);
+      }
+      
+      // Try switching each vector with A(i)
+      float best_delta = 0; int best_j = 0;
+      for(int j=1; j<n; j++) {
+	// we can't switch with ourself
+	if (j != A[i]) {
+	  float distortion1 = distortion_of_current_mapping(vq, n, dim, prob);
+	  // switch vq entries A(i) and j
+	  swap(vq, dim, A[i], j);
+	  float distortion2 = distortion_of_current_mapping(vq, n, dim, prob);
+	  float delta = distortion2 - distortion1;
+	  if (delta < 0.0) {
+	    if (fabs(delta) > best_delta) {
+	      best_delta = fabs(delta);
+	      best_j = j;
+	    }
+	  }
+	  // unswitch
+	  swap(vq, dim, A[i], j);
+	}
+      } //next j
+
+      // printf("best_delta: %f best_j: %d\n", best_delta, best_j);
+      if (best_delta == 0.0) {
+	// Hmm, no improvement, lets try the next vector in the sorted cost list
+	if (i == n-1) finished = 1; else i++;
+      } else {
+	// OK keep the switch that minimised the distortion
+	swap(vq, dim, A[i], best_j);
+	switches++;
+
+	// set up for next iteration
+	iteration++;
+	float distortion = distortion_of_current_mapping(vq, n, dim, prob);
+	printf("it: %3d dist: %f %3.2f i: %3d sw: %3d\n", iteration, distortion,
+	       distortion/distortion0, i, switches);
+	if (iteration >= max_iter) finished = 1;
+	i = 0;
+      }
+    }
+    
+    return 0;
+}
+
diff --git a/octave/vq_binary_switch.m b/octave/vq_binary_switch.m
new file mode 100644
index 000000000..ad57e30ec
--- /dev/null
+++ b/octave/vq_binary_switch.m
@@ -0,0 +1,158 @@
+% vq_binary_switch.m
+% David Rowe Sep 2021
+%
+% Experiments in making VQs robust to bit errors, this is an Octave
+% implementation of [1].
+%
+% [1] Psuedo Gray Coding, Zeger & Gersho 1990
+
+1;
+
+% equation (33) of [1], for hamming distance 1
+function c = cost_of_distance_one(vq, prob, k, verbose=0)
+  [N K] = size(vq);
+  log2N = log2(N);
+  c = 0;
+  for b=0:log2N-1
+    index_neighbour = bitxor(k-1,2.^b) + 1;
+    diff = vq(k,:) - vq(index_neighbour, :);
+    dist = sum(diff*diff');
+    c += prob(k)*dist;
+    if verbose
+      printf("k: %d b: %d index_neighbour: %d dist: %f prob: %f c: %f \n", k, b, index_neighbour, dist, prob(k), c);
+    end
+  end
+endfunction
+
+% equation (39) of [1]
+function d = distortion_of_current_mapping(vq, prob)
+  [N K] = size(vq);
+  
+  d = 0;
+  for k=1:N
+    d += cost_of_distance_one(vq, prob, k);
+  end
+endfunction
+
+function [vq distortion] = binary_switching(vq, prob, max_iteration)
+  [N K] = size(vq);
+  iteration = 0;
+  i = 1;
+  finished = 0;
+  switches = 0;
+  distortion0 = distortion_of_current_mapping(vq, prob)
+  
+  while !finished
+
+    % generate a list A(i) of which vectors have the largest cost of bit errors
+    c = zeros(1,N);
+    for k=1:N
+      c(k) = cost_of_distance_one(vq, prob, k);
+    end
+    [tmp A] = sort(c,"descend");
+    
+    % Try switching each vector with A(i)
+    best_delta = 0;
+    for j=2:N
+      % we can't switch with ourself
+      if j != A(i)
+        distortion1 = distortion_of_current_mapping(vq, prob);
+        % switch vq entries A(i) and j
+	tmp = vq(A(i),:);
+	vq(A(i),:) = vq(j,:);
+	vq(j,:) = tmp;
+	
+	distortion2 = distortion_of_current_mapping(vq, prob);
+        delta = distortion2 - distortion1;
+	if delta < 0
+	  if abs(delta) > best_delta
+	    best_delta = abs(delta);
+	    best_j = j;
+	  end
+	end
+	% unswitch
+	tmp = vq(A(i),:);
+	vq(A(i),:) = vq(j,:);
+	vq(j,:) = tmp;
+      end
+    end % next j
+
+    % printf("best_delta: %f best_j: %d\n", best_delta, best_j);
+    if best_delta == 0
+      % Hmm, no improvement, lets try the next vector in the sorted cost list
+      if i == N
+        finished = 1;
+      else
+        i++;
+      end
+    else
+      % OK keep the switch that minimised the distortion
+ 
+      tmp = vq(A(i),:);
+      vq(A(i),:) = vq(best_j,:);
+      vq(best_j,:) = tmp;
+      switches++;
+
+      % set up for next iteration
+      iteration++;
+      distortion = distortion_of_current_mapping(vq, prob);
+      printf("it: %3d dist: %f %3.2f i: %3d sw: %3d\n", iteration, distortion,
+             distortion/distortion0, i, switches);
+      if iteration >= max_iteration, finished = 1, end
+      i = 1;
+     end
+
+  end
+
+endfunction
+
+function test_binary_switch
+  vq1 = [1 1; -1 1; -1 -1; 1 -1];
+  %f=fopen("vq1.f32","wb"); fwrite(f, vq1, 'float32'); fclose(f);
+  [vq2 distortion] = binary_switching(vq1, ones(1,4), 10);
+  % algorithm should put hamming distance 1 neighbours in adjacent quadrants
+  distance_to_closest_neighbours = 2;
+  % there are two hamming distance 1 neighbours
+  target_distortion = 2^2*distance_to_closest_neighbours*length(vq1);
+  assert(target_distortion == distortion);
+  printf("test_binary_switch OK!\n");
+endfunction
+
+% return indexes of hamming distance one vectors
+function ind = neighbour_indexes(vq, k)
+  [N K] = size(vq);
+  log2N = log2(N);
+  ind = [];
+  for b=0:log2N-1
+    index_neighbour = bitxor(k-1,2.^b) + 1;
+    ind = [ind index_neighbour];
+  end
+endfunction
+
+%test_binary_switch;
+
+randn('seed',1);
+N=16;     % Number of VQ codebook vectors
+K=2;      % Vector length
+
+Ntrain=10000;
+
+training_data = randn(Ntrain,K);
+[idx vq1] = kmeans(training_data, N);
+[vq2 distortion] = binary_switching(vq1, [1 ones(1,N-1)], 1000);
+
+figure(1); clf; plot(training_data(:,1), training_data(:,2),'+');
+hold on;
+plot(vq1(:,1), vq1(:,2),'og','linewidth', 2);
+plot(vq2(:,1), vq2(:,2),'or','linewidth', 2);
+
+% plot hamming distance 1 neighbours
+k = 1;
+ind = neighbour_indexes(vq2, k);
+for i=1:length(ind)
+  plot([vq2(k,1) vq2(ind(i),1)],[vq2(k,2) vq2(ind(i),2)],'r-','linewidth', 2);
+end
+
+hold off;
+
+

From e1a0599538444dbbbd0bc1d8b00128f2af460f81 Mon Sep 17 00:00:00 2001
From: drowe67 <david@rowetel.com>
Date: Fri, 3 Sep 2021 19:07:18 +0930
Subject: [PATCH 02/20] added a few options

---
 misc/vq_binary_switch.c | 47 +++++++++++++++++++++++------------------
 1 file changed, 27 insertions(+), 20 deletions(-)

diff --git a/misc/vq_binary_switch.c b/misc/vq_binary_switch.c
index 6b3e138e1..bd1bfe6ef 100644
--- a/misc/vq_binary_switch.c
+++ b/misc/vq_binary_switch.c
@@ -21,13 +21,13 @@
 #define MAX_ENTRIES 4096
 
 // equation (33) of [1], total cost of all hamming distance 1 vectors of vq index k
-float cost_of_distance_one(float *vq, int n, int dim, float *prob, int k, int verbose) {
+float cost_of_distance_one(float *vq, int n, int dim, float *prob, int k, int st, int en, int verbose) {
   int log2N = log2(n);
   float c = 0.0;
   for (int b=0; b<log2N; b++) {
     unsigned int index_neighbour = k ^ (1<<b);
     float dist = 0.0;
-    for(int i=0; i<dim; i++)
+    for(int i=st; i<=en; i++)
       dist += pow(vq[k*dim+i] - vq[index_neighbour*dim+i], 2.0);
     c += prob[k]*dist;
     if (verbose)
@@ -37,10 +37,10 @@ float cost_of_distance_one(float *vq, int n, int dim, float *prob, int k, int ve
 }
 
 // equation (39) of [1]
-float distortion_of_current_mapping(float *vq, int n, int dim, float *prob) {
+float distortion_of_current_mapping(float *vq, int n, int dim, float *prob, int st, int en) {
   float d = 0.0;
   for(int k=0; k<n; k++)
-    d += cost_of_distance_one(vq, n, dim, prob, k, 0);
+    d += cost_of_distance_one(vq, n, dim, prob, k, st, en, 0);
   return d;
 }
  
@@ -81,6 +81,7 @@ int main(int argc, char *argv[]) {
     int   st = -1;
     int   en = -1;
     int   verbose = 0;
+    int   n = 0;
     
     int o = 0; int opt_idx = 0;
     while (o != -1) {
@@ -89,7 +90,7 @@ int main(int argc, char *argv[]) {
            {"en",      required_argument, 0, 'e'},
 	   {0, 0, 0, 0}
         };
-        o = getopt_long(argc,argv,"hd:m:vt:e:",long_opts,&opt_idx);
+        o = getopt_long(argc,argv,"hd:m:vt:e:n:",long_opts,&opt_idx);
         switch (o) {
 	case 'd':
 	    dim = atoi(optarg);
@@ -104,13 +105,17 @@ int main(int argc, char *argv[]) {
         case 'e':
             en = atoi(optarg);
             break;
+        case 'n':
+            n = atoi(optarg);
+            break;
         case 'v':
             verbose = 1;
             break;
 	help:
 	    fprintf(stderr, "\n");
-            fprintf(stderr, "usage: %s -d dimension [-m max_iterations -v --st Kst --en Ken] vq_in.f32 vq_out.f32\n", argv[0]);
+            fprintf(stderr, "usage: %s -d dimension [-m max_iterations -v --st Kst --en Ken -n nVQ] vq_in.f32 vq_out.f32\n", argv[0]);
 	    fprintf(stderr, "\n");
+            fprintf(stderr, "-n nVQ      Run with just the first nVQ entries of the VQ\n");
             fprintf(stderr, "--st Kst    Start vector element for error calculation (default 0)\n");
             fprintf(stderr, "--en Ken    End vector element for error calculation (default K-1)\n");
             fprintf(stderr, "-v          verbose\n");
@@ -138,16 +143,18 @@ int main(int argc, char *argv[]) {
       exit(1);
     }
 
-    /* count how many entries m of dimension k are in this VQ file */
-    int n = 0;
-    float dummy[dim];
-    while (fread(dummy, sizeof(float), dim, fq) == (size_t)dim)
-      n++;
-    assert(n <= MAX_ENTRIES);
-    fprintf(stderr, "%d entries of vectors width %d\n", n, dim);
+    if (n) {
+      /* count how many entries m of dimension k are in this VQ file */
+      int n = 0;
+      float dummy[dim];
+      while (fread(dummy, sizeof(float), dim, fq) == (size_t)dim)
+	n++;
+      assert(n <= MAX_ENTRIES);
+      fprintf(stderr, "%d entries of vectors width %d\n", n, dim);
 
-    /* now load VQ into memory */
-    rewind(fq);                       
+      rewind(fq);
+    }
+    /* load VQ into memory */
     int rd = fread(vq, sizeof(float), n*dim, fq);
     assert(rd == n*dim);
     fclose(fq);
@@ -160,14 +167,14 @@ int main(int argc, char *argv[]) {
     int i = 0;
     int finished = 0;
     int switches = 0;
-    float distortion0 = distortion_of_current_mapping(vq, n, dim, prob);
+    float distortion0 = distortion_of_current_mapping(vq, n, dim, prob, st, en);
     fprintf(stderr, "distortion0: %f\n", distortion0);
     
     while(!finished) {
 
       // generate a list A(i) of which vectors have the largest cost of bit errors
       for(int k=0; k<n; k++) {
-	c[k] = cost_of_distance_one(vq, n, dim, prob, k, 0);
+	c[k] = cost_of_distance_one(vq, n, dim, prob, k, st, en, verbose);
 	//fprintf(stderr, "c[%d] = %f\n", k, c[k]);
       }
       
@@ -182,10 +189,10 @@ int main(int argc, char *argv[]) {
       for(int j=1; j<n; j++) {
 	// we can't switch with ourself
 	if (j != A[i]) {
-	  float distortion1 = distortion_of_current_mapping(vq, n, dim, prob);
+	  float distortion1 = distortion_of_current_mapping(vq, n, dim, prob, st, en);
 	  // switch vq entries A(i) and j
 	  swap(vq, dim, A[i], j);
-	  float distortion2 = distortion_of_current_mapping(vq, n, dim, prob);
+	  float distortion2 = distortion_of_current_mapping(vq, n, dim, prob, st, en);
 	  float delta = distortion2 - distortion1;
 	  if (delta < 0.0) {
 	    if (fabs(delta) > best_delta) {
@@ -209,7 +216,7 @@ int main(int argc, char *argv[]) {
 
 	// set up for next iteration
 	iteration++;
-	float distortion = distortion_of_current_mapping(vq, n, dim, prob);
+	float distortion = distortion_of_current_mapping(vq, n, dim, prob, st, en);
 	printf("it: %3d dist: %f %3.2f i: %3d sw: %3d\n", iteration, distortion,
 	       distortion/distortion0, i, switches);
 	if (iteration >= max_iter) finished = 1;

From 3313c17b289830013263238f78bfb5d5815f52aa Mon Sep 17 00:00:00 2001
From: David <david@rowetel.com>
Date: Sat, 4 Sep 2021 07:52:46 +0930
Subject: [PATCH 03/20] saving output VQ

---
 misc/vq_binary_switch.c | 33 +++++++++++++++++++--------------
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/misc/vq_binary_switch.c b/misc/vq_binary_switch.c
index bd1bfe6ef..34adde0f2 100644
--- a/misc/vq_binary_switch.c
+++ b/misc/vq_binary_switch.c
@@ -143,9 +143,8 @@ int main(int argc, char *argv[]) {
       exit(1);
     }
 
-    if (n) {
+    if (n==0) {
       /* count how many entries m of dimension k are in this VQ file */
-      int n = 0;
       float dummy[dim];
       while (fread(dummy, sizeof(float), dim, fq) == (size_t)dim)
 	n++;
@@ -154,11 +153,12 @@ int main(int argc, char *argv[]) {
 
       rewind(fq);
     }
+
     /* load VQ into memory */
-    int rd = fread(vq, sizeof(float), n*dim, fq);
-    assert(rd == n*dim);
+    int nrd = fread(vq, sizeof(float), n*dim, fq);
+    assert(nrd == n*dim);
     fclose(fq);
-
+   
     /* set probability of each vector to 1.0 for now */
     float prob[n];
     for(int l=0; l<n; l++) prob[l] = 1.0;
@@ -169,20 +169,15 @@ int main(int argc, char *argv[]) {
     int switches = 0;
     float distortion0 = distortion_of_current_mapping(vq, n, dim, prob, st, en);
     fprintf(stderr, "distortion0: %f\n", distortion0);
-    
+
     while(!finished) {
 
       // generate a list A(i) of which vectors have the largest cost of bit errors
       for(int k=0; k<n; k++) {
 	c[k] = cost_of_distance_one(vq, n, dim, prob, k, st, en, verbose);
-	//fprintf(stderr, "c[%d] = %f\n", k, c[k]);
-      }
-      
+      }      
       int A[n];
       sort_c(A, n);
-      for(int k=0; k<n; k++) {
-	//fprintf(stderr, "A[%d] = %d\n", k, A[k]);
-      }
       
       // Try switching each vector with A(i)
       float best_delta = 0; int best_j = 0;
@@ -214,11 +209,21 @@ int main(int argc, char *argv[]) {
 	swap(vq, dim, A[i], best_j);
 	switches++;
 
+	// save results
+	FILE *fq=fopen(argv[dx+1], "wb");
+	if (fq == NULL) {
+	    fprintf(stderr, "Couldn't open: %s\n", argv[dx+1]);
+	    exit(1);
+	}
+	int nwr = fwrite(vq, sizeof(float), n*dim, fq);
+	assert(nwr == n*dim);
+	fclose(fq);
+	
 	// set up for next iteration
 	iteration++;
 	float distortion = distortion_of_current_mapping(vq, n, dim, prob, st, en);
-	printf("it: %3d dist: %f %3.2f i: %3d sw: %3d\n", iteration, distortion,
-	       distortion/distortion0, i, switches);
+	fprintf(stderr, "it: %3d dist: %f %3.2f i: %3d sw: %3d\n", iteration, distortion,
+	                distortion/distortion0, i, switches);
 	if (iteration >= max_iter) finished = 1;
 	i = 0;
       }

From 23a4d7b50f9de5705f1b510192288e8c745aa765 Mon Sep 17 00:00:00 2001
From: drowe67 <david@rowetel.com>
Date: Sat, 4 Sep 2021 10:39:19 +0930
Subject: [PATCH 04/20] prototyped more efficient version in Octave

---
 octave/vq_binary_switch.m | 125 ++++++++++++++++++++++++++------------
 1 file changed, 86 insertions(+), 39 deletions(-)

diff --git a/octave/vq_binary_switch.m b/octave/vq_binary_switch.m
index ad57e30ec..575c8ace1 100644
--- a/octave/vq_binary_switch.m
+++ b/octave/vq_binary_switch.m
@@ -8,6 +8,16 @@
 
 1;
 
+% returns indexes of hamming distance 1 neighbours
+function index_neighbours = distance_one_neighbours(N,k)
+  log2N = log2(N);
+  index_neighbours = [];
+  for b=0:log2N-1
+    index_neighbour = bitxor(k-1,2.^b) + 1;
+    index_neighbours = [index_neighbours index_neighbour];
+  end
+end
+
 % equation (33) of [1], for hamming distance 1
 function c = cost_of_distance_one(vq, prob, k, verbose=0)
   [N K] = size(vq);
@@ -25,16 +35,20 @@
 endfunction
 
 % equation (39) of [1]
-function d = distortion_of_current_mapping(vq, prob)
+function d = distortion_of_current_mapping(vq, prob, verbose=0)
   [N K] = size(vq);
   
   d = 0;
   for k=1:N
-    d += cost_of_distance_one(vq, prob, k);
+    c = cost_of_distance_one(vq, prob, k);
+    d += c;
+    if verbose
+      printf("k: %2d c: %f d: %f\n", k, c, d);
+    end
   end
 endfunction
 
-function [vq distortion] = binary_switching(vq, prob, max_iteration)
+function [vq distortion] = binary_switching(vq, prob, max_iteration, fast_en=1)
   [N K] = size(vq);
   iteration = 0;
   i = 1;
@@ -56,20 +70,40 @@
     for j=2:N
       % we can't switch with ourself
       if j != A(i)
-        distortion1 = distortion_of_current_mapping(vq, prob);
+        if fast_en
+          delta = -cost_of_distance_one(vq, prob, A(i)) - cost_of_distance_one(vq, prob, j);
+	  n1 = [distance_one_neighbours(N,A(i)) distance_one_neighbours(N,j)];
+	  n1(n1 == A(i)) = [];
+	  n1(n1 == j) = [];
+	  for l=1:length(n1)
+	    delta -= cost_of_distance_one(vq, prob, n1(l));
+	  end
+        else
+	  distortion1 = distortion_of_current_mapping(vq, prob);
+	end
+	
         % switch vq entries A(i) and j
 	tmp = vq(A(i),:);
 	vq(A(i),:) = vq(j,:);
 	vq(j,:) = tmp;
+		
+        if fast_en
+	  delta += cost_of_distance_one(vq, prob, A(i)) + cost_of_distance_one(vq, prob, j);
+	  for l=1:length(n1)
+	    delta += cost_of_distance_one(vq, prob, n1(l));
+	  end
+	else
+	  distortion2 = distortion_of_current_mapping(vq, prob);
+          delta = distortion2 - distortion1;
+	end
 	
-	distortion2 = distortion_of_current_mapping(vq, prob);
-        delta = distortion2 - distortion1;
 	if delta < 0
 	  if abs(delta) > best_delta
 	    best_delta = abs(delta);
 	    best_j = j;
 	  end
 	end
+	
 	% unswitch
 	tmp = vq(A(i),:);
 	vq(A(i),:) = vq(j,:);
@@ -106,6 +140,17 @@
 
 endfunction
 
+% return indexes of hamming distance one vectors
+function ind = neighbour_indexes(vq, k)
+  [N K] = size(vq);
+  log2N = log2(N);
+  ind = [];
+  for b=0:log2N-1
+    index_neighbour = bitxor(k-1,2.^b) + 1;
+    ind = [ind index_neighbour];
+  end
+endfunction
+
 function test_binary_switch
   vq1 = [1 1; -1 1; -1 -1; 1 -1];
   %f=fopen("vq1.f32","wb"); fwrite(f, vq1, 'float32'); fclose(f);
@@ -118,41 +163,43 @@
   printf("test_binary_switch OK!\n");
 endfunction
 
-% return indexes of hamming distance one vectors
-function ind = neighbour_indexes(vq, k)
-  [N K] = size(vq);
-  log2N = log2(N);
-  ind = [];
-  for b=0:log2N-1
-    index_neighbour = bitxor(k-1,2.^b) + 1;
-    ind = [ind index_neighbour];
+function test_fast
+  N=16;     % Number of VQ codebook vectors
+  K=2;      % Vector length
+  Ntrain=10000;
+
+  training_data = randn(Ntrain,K);
+  [idx vq1] = kmeans(training_data, N);
+  [vq2 distortion] = binary_switching(vq1, [1 ones(1,N-1)], 1000, fast_en = 0);
+  [vq3 distortion] = binary_switching(vq1, [1 ones(1,N-1)], 1000, fast_en = 1);
+  assert(vq2 == vq3);  
+  printf("test_fast OK!\n");
+endfunction
+
+function demo
+  N=16;     % Number of VQ codebook vectors
+  K=2;      % Vector length
+  Ntrain=10000;
+  training_data = randn(Ntrain,K);
+  [idx vq1] = kmeans(training_data, N);
+  [vq2 distortion] = binary_switching(vq1, [1 ones(1,N-1)], 1000, 1);
+
+  figure(1); clf; plot(training_data(:,1), training_data(:,2),'+');
+  hold on;
+  plot(vq1(:,1), vq1(:,2),'og','linewidth', 2);
+  plot(vq2(:,1), vq2(:,2),'or','linewidth', 2);
+
+  % plot hamming distance 1 neighbours
+  k = 1;
+  ind = neighbour_indexes(vq2, k);
+  for i=1:length(ind)
+    plot([vq2(k,1) vq2(ind(i),1)],[vq2(k,2) vq2(ind(i),2)],'r-','linewidth', 2);
   end
+  hold off;
 endfunction
 
+pkg load statistics
 %test_binary_switch;
-
-randn('seed',1);
-N=16;     % Number of VQ codebook vectors
-K=2;      % Vector length
-
-Ntrain=10000;
-
-training_data = randn(Ntrain,K);
-[idx vq1] = kmeans(training_data, N);
-[vq2 distortion] = binary_switching(vq1, [1 ones(1,N-1)], 1000);
-
-figure(1); clf; plot(training_data(:,1), training_data(:,2),'+');
-hold on;
-plot(vq1(:,1), vq1(:,2),'og','linewidth', 2);
-plot(vq2(:,1), vq2(:,2),'or','linewidth', 2);
-
-% plot hamming distance 1 neighbours
-k = 1;
-ind = neighbour_indexes(vq2, k);
-for i=1:length(ind)
-  plot([vq2(k,1) vq2(ind(i),1)],[vq2(k,2) vq2(ind(i),2)],'r-','linewidth', 2);
-end
-
-hold off;
-
+test_fast;
+%demo
 

From 30694b1b433341dd675f6e4be2ba08d36ef66ed0 Mon Sep 17 00:00:00 2001
From: drowe67 <david@rowetel.com>
Date: Sat, 4 Sep 2021 11:07:32 +0930
Subject: [PATCH 05/20] test for fast version in Octave, fast version option in
 C

---
 misc/vq_binary_switch.c   | 49 +++++++++++++++++++++++++++++++++++----
 octave/vq_binary_switch.m |  5 ++++
 2 files changed, 50 insertions(+), 4 deletions(-)

diff --git a/misc/vq_binary_switch.c b/misc/vq_binary_switch.c
index 34adde0f2..cc8d5478b 100644
--- a/misc/vq_binary_switch.c
+++ b/misc/vq_binary_switch.c
@@ -82,6 +82,7 @@ int main(int argc, char *argv[]) {
     int   en = -1;
     int   verbose = 0;
     int   n = 0;
+    int   fast_en = 0;
     
     int o = 0; int opt_idx = 0;
     while (o != -1) {
@@ -90,7 +91,7 @@ int main(int argc, char *argv[]) {
            {"en",      required_argument, 0, 'e'},
 	   {0, 0, 0, 0}
         };
-        o = getopt_long(argc,argv,"hd:m:vt:e:n:",long_opts,&opt_idx);
+        o = getopt_long(argc,argv,"hd:m:vt:e:n:f",long_opts,&opt_idx);
         switch (o) {
 	case 'd':
 	    dim = atoi(optarg);
@@ -105,6 +106,9 @@ int main(int argc, char *argv[]) {
         case 'e':
             en = atoi(optarg);
             break;
+        case 'f':
+	    fast_en = 1;
+            break;
         case 'n':
             n = atoi(optarg);
             break;
@@ -167,6 +171,7 @@ int main(int argc, char *argv[]) {
     int i = 0;
     int finished = 0;
     int switches = 0;
+    int log2N = log2(n);
     float distortion0 = distortion_of_current_mapping(vq, n, dim, prob, st, en);
     fprintf(stderr, "distortion0: %f\n", distortion0);
 
@@ -182,13 +187,49 @@ int main(int argc, char *argv[]) {
       // Try switching each vector with A(i)
       float best_delta = 0; int best_j = 0;
       for(int j=1; j<n; j++) {
+	float distortion1, distortion2, delta = 0.0;
+	
 	// we can't switch with ourself
 	if (j != A[i]) {
-	  float distortion1 = distortion_of_current_mapping(vq, n, dim, prob, st, en);
+	  if (fast_en) {
+	    // subtract just those contributions to delta that will change
+	    delta -= cost_of_distance_one(vq, n, dim, prob, A[i], st, en, verbose);
+	    delta -= cost_of_distance_one(vq, n, dim, prob, j, st, en, verbose);
+	    for (int b=0; b<log2N; b++) {
+	      unsigned int index_neighbour;
+	      index_neighbour = A[i] ^ (1<<b);
+	      if ((index_neighbour != j) && (index_neighbour != A[i]))
+		   delta -= cost_of_distance_one(vq, n, dim, prob, index_neighbour, st, en, verbose);
+	      index_neighbour = j ^ (1<<b);
+	      if ((index_neighbour != j) && (index_neighbour != A[i]))
+		   delta -= cost_of_distance_one(vq, n, dim, prob, index_neighbour, st, en, verbose);
+	    }
+	  }
+	  else
+	    distortion1 = distortion_of_current_mapping(vq, n, dim, prob, st, en);
+
 	  // switch vq entries A(i) and j
 	  swap(vq, dim, A[i], j);
-	  float distortion2 = distortion_of_current_mapping(vq, n, dim, prob, st, en);
-	  float delta = distortion2 - distortion1;
+
+	  if (fast_en) {
+	    // add just those contributions to delta that will change
+	    delta += cost_of_distance_one(vq, n, dim, prob, A[i], st, en, verbose);
+	    delta += cost_of_distance_one(vq, n, dim, prob, j, st, en, verbose);
+	    for (int b=0; b<log2N; b++) {
+	      unsigned int index_neighbour;
+	      index_neighbour = A[i] ^ (1<<b);
+	      if ((index_neighbour != j) && (index_neighbour != A[i]))
+		   delta += cost_of_distance_one(vq, n, dim, prob, index_neighbour, st, en, verbose);
+	      index_neighbour = j ^ (1<<b);
+	      if ((index_neighbour != j) && (index_neighbour != A[i]))
+		   delta += cost_of_distance_one(vq, n, dim, prob, index_neighbour, st, en, verbose);
+	    }
+	  }
+	  else {
+	    distortion2 = distortion_of_current_mapping(vq, n, dim, prob, st, en);
+	    delta = distortion2 - distortion1;
+	  }
+
 	  if (delta < 0.0) {
 	    if (fabs(delta) > best_delta) {
 	      best_delta = fabs(delta);
diff --git a/octave/vq_binary_switch.m b/octave/vq_binary_switch.m
index 575c8ace1..1a63de53e 100644
--- a/octave/vq_binary_switch.m
+++ b/octave/vq_binary_switch.m
@@ -170,6 +170,11 @@
 
   training_data = randn(Ntrain,K);
   [idx vq1] = kmeans(training_data, N);
+  f=fopen("vq1.f32","wb");
+  for r=1:rows(vq1)
+    fwrite(f,vq1(r,:),"float32");
+  end
+  fclose(f);
   [vq2 distortion] = binary_switching(vq1, [1 ones(1,N-1)], 1000, fast_en = 0);
   [vq3 distortion] = binary_switching(vq1, [1 ones(1,N-1)], 1000, fast_en = 1);
   assert(vq2 == vq3);  

From a6b07d4b47c4ddb81316bb4914195fedad4da9aa Mon Sep 17 00:00:00 2001
From: David <david@rowetel.com>
Date: Sat, 4 Sep 2021 12:49:20 +0930
Subject: [PATCH 06/20] inf iterations as default

---
 misc/vq_binary_switch.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/misc/vq_binary_switch.c b/misc/vq_binary_switch.c
index cc8d5478b..c8a4f58df 100644
--- a/misc/vq_binary_switch.c
+++ b/misc/vq_binary_switch.c
@@ -77,7 +77,7 @@ void swap(float *vq, int dim, int index1, int index2) {
 int main(int argc, char *argv[]) {
     float vq[MAX_DIM*MAX_ENTRIES];
     int   dim = MAX_DIM;
-    int   max_iter = 200;
+    int   max_iter = INT_MAX;
     int   st = -1;
     int   en = -1;
     int   verbose = 0;

From 8d3abaaec867cd96bd818e4a8e88ab34b6a57e83 Mon Sep 17 00:00:00 2001
From: drowe67 <david@rowetel.com>
Date: Sat, 4 Sep 2021 12:50:07 +0930
Subject: [PATCH 07/20] Octave script to simulate use of optimised VQ and plot
 results

---
 octave/vq_compare.m | 194 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 194 insertions(+)
 create mode 100644 octave/vq_compare.m

diff --git a/octave/vq_compare.m b/octave/vq_compare.m
new file mode 100644
index 000000000..9b5755e3e
--- /dev/null
+++ b/octave/vq_compare.m
@@ -0,0 +1,194 @@
+% vq_compare.m
+% David Rowe Sep 2021
+%
+% Compare the Eb/No performance of Vector Quantisers (robustness to bit errors) using
+% Spectral Distortion (SD) measure.
+
+#{
+  usage:
+
+  1. Generate the initial VQ (vq_stage1.f32) and input test vector file (all_speech_8k_lim.f32):
+ 
+    cd codec2/build_linux
+    ../script/train_trellis.sh
+
+  2. Run the Psuedo-Gray binary switch tool to optimise the VQ against single bit errors:
+
+    ./misc/vq_binary_switch -d 20 vq_stage1.f32 vq_stage1_bs001.f32 -m 5000 --st 2 --en 16 -f
+
+    This can take a while, but if you ctrl-C at any time it will have saved the most recent codebook.
+   
+  3. Run this script to compare the two VQs:
+
+    octave:34> vq_compare
+1;
+
+% converts a decimal value to a soft dec binary value
+function c = dec2sd(dec, nbits)
+    
+    % convert to binary
+
+    c = zeros(1,nbits);
+    for j=0:nbits-1
+      mask = 2.^j;
+      if bitand(dec,mask)
+        c(nbits-j) = 1;
+      end
+    end
+
+    % map to +/- 1
+
+    c = -1 + 2*c;
+endfunction
+
+% fast version of vector quantiser
+function [indexes target_] = vector_quantiser_fast(vq, target, verbose=1)
+  [vq_size K] = size(vq);
+  [ntarget tmp] = size(target);
+  target_ = zeros(ntarget,K);
+  indexes = zeros(1,ntarget);
+
+  % pre-compute energy of each VQ vector
+  vqsq = zeros(vq_size,1);
+  for i=1:vq_size
+    vqsq(i) = vq(i,:)*vq(i,:)';
+  end
+
+  % use efficient matrix multiplies to search for best match to target
+  for i=1:ntarget
+    best_e = 1E32;
+    e = vqsq - 2*(vq * target(i,:)');
+    [best_e best_ind] = min(e);
+    if verbose printf("best_e: %f best_ind: %d\n", best_e, best_ind), end;
+    target_(i,:) = vq(best_ind,:); indexes(i) = best_ind;
+  end
+endfunction
+
+
+% VQ a target sequence of frames then run a test using vanilla uncoded/trellis decoder
+function results = run_test(target, vq, EbNo, verbose)
+  [frames tmp]      = size(target);
+  [vq_length tmp]   = size(vq);
+  nbits             = log2(vq_length);
+  nerrors           = 0;
+  tbits             = 0;
+  nframes           = 0;
+  nper              = 0;
+  
+  % Vector Quantise target vectors sequence
+  [tx_indexes target_ ] = vector_quantiser_fast(vq, target, verbose);
+  % use convention of indexes starting from 0
+  tx_indexes -= 1; 
+  %  mean SD of VQ with no errors
+  diff = target - target_;
+  mse_noerrors = mean(diff(:).^2);
+  
+  % construct tx symbol codewords from VQ indexes
+  tx_codewords = zeros(frames, nbits);
+  for f=1:frames
+    tx_codewords(f,:) = dec2sd(tx_indexes(f), nbits);
+  end
+
+  rx_codewords = tx_codewords + randn(frames, nbits)*sqrt(1/(2*EbNo));
+  rx_indexes = zeros(1,frames);
+
+  for f=1:frames
+    tx_bits        = tx_codewords(f,:) > 0;
+    rx_bits        = rx_codewords(f,:) > 0;
+    rx_indexes(f)  = sum(rx_bits .* 2.^(nbits-1:-1:0));
+    errors         = sum(xor(tx_bits, rx_bits));
+    nerrors       += errors;
+    if errors nper++;, end
+    tbits += nbits;
+    nframes++;
+  end
+
+  EbNodB = 10*log10(EbNo);
+  target_ = vq(rx_indexes+1,:);
+  diff = target - target_;
+  mse = mean(diff(:).^2);
+  printf("Eb/No: %3.2f dB nframes: %2d nerrors: %3d BER: %4.3f PER: %3.2f mse: %3.2f %3.2f\n", 
+         EbNodB, nframes, nerrors, nerrors/tbits, nper/nframes, mse_noerrors, mse);
+  results.ber = nerrors/tbits;	 
+  results.per = nper/nframes;	 
+  results.mse_noerrors = mse_noerrors;
+  results.mse = mse;
+  results.tx_indexes = tx_indexes;
+  results.rx_indexes = rx_indexes;
+endfunction
+
+% Simulations ---------------------------------------------------------------------
+
+% top level function to set up and run a test with a specific vq
+function results = run_test_vq(vq_fn, nframes=100, dec=1, EbNodB=3, verbose=0)
+  K = 20; K_st=2+1; K_en=16+1;
+  target_fn = "../build_linux/all_speech_8k_lim.f32";
+  
+  % load VQ
+  vq = load_f32(vq_fn, K);
+  [vq_size tmp] = size(vq);
+  vq = vq(:,K_st:K_en);
+  
+  % load sequence of target vectors we wish to VQ
+  target = load_f32(target_fn, K);
+
+  % limit test to the first nframes vectors
+  target = target(1:dec:dec*nframes,K_st:K_en);
+  
+  % run a test
+  EbNo=10^(EbNodB/10);
+  results = run_test(target, vq, EbNo, verbose);
+  if verbose
+    for f=2:nframes-1
+      printf("f: %03d tx_index: %04d rx_index: %04d\n", f,  results.tx_indexes(f), results.rx_indexes(f));
+    end
+  end  
+endfunction
+
+% generate sets of curves
+function run_curves(frames=100, dec=1)
+  results1_log = [];
+  EbNodB = 0:5;
+  for i=1:length(EbNodB)
+    results = run_test_vq("../build_linux/vq_stage1.f32", frames, dec, EbNodB(i), verbose=0);
+    results1_log = [results1_log results];
+  end
+  results2_log = [];
+  for i=1:length(EbNodB)
+    results = run_test_vq("../build_linux/vq_stage1_bs001.f32", frames, dec, EbNodB(i), verbose=0);
+    results2_log = [results2_log results];
+  end
+  for i=1:length(results1_log)
+    ber(i) = results1_log(i).ber;
+    per(i) = results1_log(i).per;
+    mse_noerrors(i) = sqrt(results1_log(i).mse_noerrors);
+    mse_vq1(i) = sqrt(results1_log(i).mse);
+    mse_vq2(i) = sqrt(results2_log(i).mse);
+  end
+
+  figure(1); clf;
+  semilogy(EbNodB, ber, 'g+-;ber;','linewidth', 2); hold on;
+  semilogy(EbNodB, per, 'b+-;per;','linewidth', 2);
+  grid; xlabel('Eb/No(dB)');
+  % grid minor is busted
+  for y=2:9
+    semilogy([min(EbNodB) max(EbNodB)],[0.001*y 0.001*y],'--k');
+    semilogy([min(EbNodB) max(EbNodB)],[0.01*y 0.01*y],'--k');
+    semilogy([min(EbNodB) max(EbNodB)-1],[0.1*y 0.1*y],'--k');
+  end
+  hold off;
+  
+  figure(2); clf;
+  plot(EbNodB, mse_noerrors, "b+-;no errors;"); hold on;
+  plot(EbNodB, mse_vq1, "g+-;vq1;");
+  plot(EbNodB, mse_vq2, "r+-;vq2;");
+  hold off; grid; title("RMS SD (dB)"); xlabel('Eb/No(dB)');
+endfunction
+
+% -------------------------------------------------------------------
+
+graphics_toolkit ("gnuplot");
+more off;
+randn('state',1);
+
+run_curves(600,4)

From b51215bc62d93df82c336a62ff3e36f628a7d673 Mon Sep 17 00:00:00 2001
From: drowe67 <david@rowetel.com>
Date: Thu, 9 Sep 2021 18:11:56 +0930
Subject: [PATCH 08/20] correct var calc with using st and en

---
 misc/vqtrain.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/misc/vqtrain.c b/misc/vqtrain.c
index 9659ce412..547ba30b6 100644
--- a/misc/vqtrain.c
+++ b/misc/vqtrain.c
@@ -187,7 +187,7 @@ int main(int argc, char *argv[]) {
         assert(ret == k);
         quantise(cb, vec, k, 1, st, en, &e, &se);
     }
-    var = se/(J*k);
+    var = se/(J*(en-st+1));
     printf("\r  It: 0, var: %f sd: %f\n", var, sqrt(var));
 
     /* set up initial codebook state from samples of training set */
@@ -224,11 +224,11 @@ int main(int argc, char *argv[]) {
 	    acc(&cent[ind*k], vec, k);
             //if (i < 100)
             //    printf("e: %f sqrt(e/k): %f sd: %f noutliers: %ld\n", e, sqrt(e/k), sd, noutliers[0]);
-            if (sqrt(e/k) > 1.0) noutliers[0]++;
-            if (sqrt(e/k) > 2.0) noutliers[1]++;
-            if (sqrt(e/k) > 3.0) noutliers[2]++;
+            if (sqrt(e/(en-st+1)) > 1.0) noutliers[0]++;
+            if (sqrt(e/(en-st+1)) > 2.0) noutliers[1]++;
+            if (sqrt(e/(en-st+1)) > 3.0) noutliers[2]++;
 	}
-	var = se/(J*k);
+	var = se/(J*(en-st+1));
 	delta = (var_1-var)/var;
         int n_min = J;
         int n_max = 0;

From 877e5ac83372b471179f4da00a54ab8bba8c93d9 Mon Sep 17 00:00:00 2001
From: drowe67 <david@rowetel.com>
Date: Thu, 9 Sep 2021 18:52:41 +0930
Subject: [PATCH 09/20] script built up to point we can synthesis a file

---
 octave/save_f32.m       | 12 ++++++
 octave/vq_700c_eq.m     |  8 ----
 octave/vq_compare.m     | 55 +++++++++++++++++-------
 script/train_trellis.sh | 94 ++++++++++++++++++++++++++++++++++++++---
 4 files changed, 141 insertions(+), 28 deletions(-)
 create mode 100644 octave/save_f32.m

diff --git a/octave/save_f32.m b/octave/save_f32.m
new file mode 100644
index 000000000..62214d47a
--- /dev/null
+++ b/octave/save_f32.m
@@ -0,0 +1,12 @@
+% save_f32.m
+% David Rowe Sep 2021
+%
+% save a matrix to .f32 binary files in row-major order
+
+function save_f32(fn, m)
+  f=fopen(fn,"wb");
+  [r c] = size(m);
+  mlinear = reshape(m', 1, r*c);
+  fwrite(f, mlinear, 'float32');
+  fclose(f);
+endfunction
diff --git a/octave/vq_700c_eq.m b/octave/vq_700c_eq.m
index 6da4b8447..3e57b9afa 100644
--- a/octave/vq_700c_eq.m
+++ b/octave/vq_700c_eq.m
@@ -164,14 +164,6 @@ function vq_700c_plots(fn_array)
   eq2 /= (ntargets*nvq);
 endfunction
 
-function save_f32(fn, m)
-  f=fopen(fn,"wb");
-  [r c] = size(m);
-  mlinear = reshape(m', 1, r*c);
-  fwrite(f, mlinear, 'float32');
-  fclose(f);
-endfunction
-
 function [targets e] = load_targets(fn_target_f32)
   nb_features = 41;
   K = 20;
diff --git a/octave/vq_compare.m b/octave/vq_compare.m
index 9b5755e3e..b27f8906f 100644
--- a/octave/vq_compare.m
+++ b/octave/vq_compare.m
@@ -16,12 +16,29 @@
 
     ./misc/vq_binary_switch -d 20 vq_stage1.f32 vq_stage1_bs001.f32 -m 5000 --st 2 --en 16 -f
 
-    This can take a while, but if you ctrl-C at any time it will have saved the most recent codebook.
+    This can take a while, but if you ctrl-C at any time it will have saved the most recent optimised VQ.
    
   3. Run this script to compare the two VQs:
 
     octave:34> vq_compare
-1;
+#}
+
+
+function vq_compare(action="run_curves", vq_fn, EbNodB=3, in_fn, out_fn)
+  graphics_toolkit ("gnuplot");
+  more off;
+  randn('state',1);
+
+  if strcmp(action, "run_curves")
+    run_curves(600);
+  end
+  if strcmp(action, "vq_file")
+    vq_file(vq_fn, EbNodB, in_fn, out_fn)
+  end
+endfunction
+
+
+% -------------------------------------------------------------------
 
 % converts a decimal value to a soft dec binary value
 function c = dec2sd(dec, nbits)
@@ -120,42 +137,50 @@
 % Simulations ---------------------------------------------------------------------
 
 % top level function to set up and run a test with a specific vq
-function results = run_test_vq(vq_fn, nframes=100, dec=1, EbNodB=3, verbose=0)
+function [results target_] = run_test_vq(vq_fn, target_fn, nframes=100, dec=1, EbNodB=3, verbose=0)
   K = 20; K_st=2+1; K_en=16+1;
-  target_fn = "../build_linux/all_speech_8k_lim.f32";
   
   % load VQ
   vq = load_f32(vq_fn, K);
   [vq_size tmp] = size(vq);
-  vq = vq(:,K_st:K_en);
+  vqsub = vq(:,K_st:K_en);
   
   % load sequence of target vectors we wish to VQ
   target = load_f32(target_fn, K);
 
   % limit test to the first nframes vectors
-  target = target(1:dec:dec*nframes,K_st:K_en);
+  if nframes != -1
+    last = dec*nframes
+  else
+    last = length(target);
+  end
+  target = target(1:dec:last, K_st:K_en);
   
   % run a test
   EbNo=10^(EbNodB/10);
-  results = run_test(target, vq, EbNo, verbose);
+  results = run_test(target, vqsub, EbNo, verbose);
   if verbose
     for f=2:nframes-1
       printf("f: %03d tx_index: %04d rx_index: %04d\n", f,  results.tx_indexes(f), results.rx_indexes(f));
     end
-  end  
+  end
+
+  % return full band vq-ed vectors
+  target_ = vq(results.rx_indexes+1,:);
 endfunction
 
 % generate sets of curves
 function run_curves(frames=100, dec=1)
+  target_fn = "../build_linux/all_speech_8k_lim.f32";
   results1_log = [];
   EbNodB = 0:5;
   for i=1:length(EbNodB)
-    results = run_test_vq("../build_linux/vq_stage1.f32", frames, dec, EbNodB(i), verbose=0);
+    results = run_test_vq("../build_linux/vq_stage1.f32", target_fn, frames, dec, EbNodB(i), verbose=0);
     results1_log = [results1_log results];
   end
   results2_log = [];
   for i=1:length(EbNodB)
-    results = run_test_vq("../build_linux/vq_stage1_bs001.f32", frames, dec, EbNodB(i), verbose=0);
+    results = run_test_vq("../build_linux/vq_stage1_bs001.f32", target_fn, frames, dec, EbNodB(i), verbose=0);
     results2_log = [results2_log results];
   end
   for i=1:length(results1_log)
@@ -185,10 +210,10 @@ function run_curves(frames=100, dec=1)
   hold off; grid; title("RMS SD (dB)"); xlabel('Eb/No(dB)');
 endfunction
 
-% -------------------------------------------------------------------
 
-graphics_toolkit ("gnuplot");
-more off;
-randn('state',1);
+function vq_file(vq_fn, EbNodB, in_fn, out_fn)
+  [results target_] = run_test_vq(vq_fn, in_fn, nframes=-1, dec=1, EbNodB, verbose=0);
+  save_f32(out_fn, target_);
+endfunction
+
 
-run_curves(600,4)
diff --git a/script/train_trellis.sh b/script/train_trellis.sh
index e2bd77740..338da3579 100755
--- a/script/train_trellis.sh
+++ b/script/train_trellis.sh
@@ -1,8 +1,10 @@
-#!/bin/bash -x
+#!/bin/bash
 # train_trellis.sh
 # David Rowe August 2021
 #
-# Training Trellis Vector Quantiser for Codec 2 newamp1, supports octave/trellis.m
+# Script to support:
+# 1. Training Trellis Vector Quantiser for Codec 2 newamp1, supports octave/trellis.m
+# 2. VQ sorting/optimisation experiments, octave/vq_compare.m
 
 TRAIN=~/Downloads/all_speech_8k.sw
 CODEC2_PATH=$HOME/codec2
@@ -21,9 +23,91 @@ function train() {
   c2sim $fullfile --rateK --rateKout ${filename}.f32
   echo "ratek=load_f32('../build_linux/${filename}.f32',20); vq_700c_eq; ratek_lim=limit_vec(ratek, 0, 40); save_f32('../build_linux/${filename}_lim.f32', ratek_lim); quit" | \
   octave -p ${CODEC2_PATH}/octave -qf
-  #vqtrain ${filename}_lim.f32 $K 4096 vq_stage1.f32 -s 1e-3 --st $Kst --en $Ken
+  vqtrain ${filename}_lim.f32 $K 4096 vq_stage1.f32 -s 1e-3 --st $Kst --en $Ken
+
+  # VQ the training file
   cat ${filename}_lim.f32 | vq_mbest --st $Kst --en $Ken -k $K -q vq_stage1.f32 > ${filename}_test.f32
 }
 
-# choose which function to run here
-train
+function listen() {
+  fullfile=$1
+  filename=$(basename -- "$fullfile")
+  extension="${filename##*.}"
+  filename="${filename%.*}"
+
+  fullfile_out=$2
+  vq_fn=$3
+
+  sox $fullfile -t raw - | c2sim - --rateK --rateKout ${filename}.f32
+
+  echo "ratek=load_f32('../build_linux/${filename}.f32',20); vq_700c_eq; ratek_lim=limit_vec(ratek, 0, 40); save_f32('../build_linux/${filename}_lim.f32', ratek_lim); quit" | \
+  octave -p ${CODEC2_PATH}/octave -qf
+
+  echo "pkg load statistics; vq_compare(action='vq_file', '${vq_fn}', EbNodB=100, '${filename}_lim.f32', '${filename}_test.f32'); quit" \ |
+  octave -p ${CODEC2_PATH}/octave -qf
+      
+  sox $fullfile -t raw - | c2sim - --rateK --rateKin ${filename}_test.f32 -o - | sox -t .s16 -r 8000 -c 1 - ${fullfile_out}
+}
+
+function print_help {
+    echo
+    echo "Trellis/VQ optimisation support script"
+    echo
+    echo "  usage ./train_trellis.sh [-d] [-t] [-v in.wav out.wav vq.f32]"
+    echo
+    echo "    -d        debug mode; trace script execution"
+    echo "    -t        train VQ and generate a fully quantised version of training vectors"
+    echo "    -v        synthesis an output file out.wav from in.raw, using the VQ vq.f32"
+    echo
+    exit
+}
+
+# command line arguments to select function
+
+if [ $# -lt 1 ]; then
+    print_help
+fi
+
+do_train=0
+do_vq=0
+POSITIONAL=()
+while [[ $# -gt 0 ]]
+do
+key="$1"
+case $key in
+    -d)
+        set -x	
+        shift
+    ;;
+    -t)
+        do_train=1
+	shift
+    ;;
+    -v)
+        do_vq=1
+	in_wav="$2"
+	out_wav="$3"
+	vq_fn="$4"
+        shift
+	shift
+	shift
+	shift
+    ;;
+    -h)
+        print_help	
+    ;;
+    *)
+    POSITIONAL+=("$1") # save it in an array for later
+    shift
+    ;;
+esac
+done
+set -- "${POSITIONAL[@]}" # restore positional parameters
+
+if [ $do_train -eq 1 ]; then
+    train
+fi
+
+if [ $do_vq -eq 1 ]; then
+  listen ${in_wav} ${out_wav} ${vq_fn}
+fi

From a4a0a5b724739f56b1c728231ee4db0167b3742f Mon Sep 17 00:00:00 2001
From: drowe67 <david@rowetel.com>
Date: Thu, 9 Sep 2021 19:47:02 +0930
Subject: [PATCH 10/20] ability to insert errors

---
 script/train_trellis.sh | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/script/train_trellis.sh b/script/train_trellis.sh
index 338da3579..4fd0f1005 100755
--- a/script/train_trellis.sh
+++ b/script/train_trellis.sh
@@ -37,13 +37,14 @@ function listen() {
 
   fullfile_out=$2
   vq_fn=$3
-
+  EbNodB=$4
+  
   sox $fullfile -t raw - | c2sim - --rateK --rateKout ${filename}.f32
 
   echo "ratek=load_f32('../build_linux/${filename}.f32',20); vq_700c_eq; ratek_lim=limit_vec(ratek, 0, 40); save_f32('../build_linux/${filename}_lim.f32', ratek_lim); quit" | \
   octave -p ${CODEC2_PATH}/octave -qf
 
-  echo "pkg load statistics; vq_compare(action='vq_file', '${vq_fn}', EbNodB=100, '${filename}_lim.f32', '${filename}_test.f32'); quit" \ |
+  echo "pkg load statistics; vq_compare(action='vq_file', '${vq_fn}', EbNodB=${EbNodB}, '${filename}_lim.f32', '${filename}_test.f32'); quit" \ |
   octave -p ${CODEC2_PATH}/octave -qf
       
   sox $fullfile -t raw - | c2sim - --rateK --rateKin ${filename}_test.f32 -o - | sox -t .s16 -r 8000 -c 1 - ${fullfile_out}
@@ -53,7 +54,7 @@ function print_help {
     echo
     echo "Trellis/VQ optimisation support script"
     echo
-    echo "  usage ./train_trellis.sh [-d] [-t] [-v in.wav out.wav vq.f32]"
+    echo "  usage ./train_trellis.sh [-d] [-t] [-v in.wav out.wav vq.f32 EbNodB]"
     echo
     echo "    -d        debug mode; trace script execution"
     echo "    -t        train VQ and generate a fully quantised version of training vectors"
@@ -88,10 +89,12 @@ case $key in
 	in_wav="$2"
 	out_wav="$3"
 	vq_fn="$4"
+	EbNodB="$5"
         shift
 	shift
 	shift
 	shift
+	shift
     ;;
     -h)
         print_help	
@@ -109,5 +112,5 @@ if [ $do_train -eq 1 ]; then
 fi
 
 if [ $do_vq -eq 1 ]; then
-  listen ${in_wav} ${out_wav} ${vq_fn}
+  listen ${in_wav} ${out_wav} ${vq_fn} ${EbNodB}
 fi

From cc7fc80ae05b68a1da1e9191a46beebeaa84b810 Mon Sep 17 00:00:00 2001
From: drowe67 <david@rowetel.com>
Date: Fri, 10 Sep 2021 13:51:48 +0930
Subject: [PATCH 11/20] ability to set decimation, EbNo, and sound device from
 command line

---
 octave/vq_compare.m     | 23 +++++++++++-----
 script/train_trellis.sh | 59 +++++++++++++++++++++++++++--------------
 2 files changed, 56 insertions(+), 26 deletions(-)

diff --git a/octave/vq_compare.m b/octave/vq_compare.m
index b27f8906f..4d87d5c9d 100644
--- a/octave/vq_compare.m
+++ b/octave/vq_compare.m
@@ -24,7 +24,7 @@
 #}
 
 
-function vq_compare(action="run_curves", vq_fn, EbNodB=3, in_fn, out_fn)
+function vq_compare(action="run_curves", vq_fn, dec=1, EbNodB=3, in_fn, out_fn)
   graphics_toolkit ("gnuplot");
   more off;
   randn('state',1);
@@ -33,7 +33,7 @@ function vq_compare(action="run_curves", vq_fn, EbNodB=3, in_fn, out_fn)
     run_curves(600);
   end
   if strcmp(action, "vq_file")
-    vq_file(vq_fn, EbNodB, in_fn, out_fn)
+    vq_file(vq_fn, dec, EbNodB, in_fn, out_fn)
   end
 endfunction
 
@@ -150,7 +150,7 @@ function vq_compare(action="run_curves", vq_fn, EbNodB=3, in_fn, out_fn)
 
   % limit test to the first nframes vectors
   if nframes != -1
-    last = dec*nframes
+    last = nframes
   else
     last = length(target);
   end
@@ -166,7 +166,18 @@ function vq_compare(action="run_curves", vq_fn, EbNodB=3, in_fn, out_fn)
   end
 
   % return full band vq-ed vectors
-  target_ = vq(results.rx_indexes+1,:);
+  target_ = zeros(last,K);
+  target_(1:dec:last,:) = vq(results.rx_indexes+1,:);
+  
+  % use linear interpolation to restore original frame rate
+  for f=1:dec:last-dec
+    prev = f; next = f + dec;
+    for g=prev+1:next-1
+      cnext = (g-prev)/dec; cprev = 1 - cnext;
+      target_(g,:) = cprev*target_(prev,:) + cnext*target_(next,:);
+      %printf("f: %d g: %d cprev: %f cnext: %f\n", f, g, cprev, cnext);
+    end
+  end
 endfunction
 
 % generate sets of curves
@@ -211,8 +222,8 @@ function run_curves(frames=100, dec=1)
 endfunction
 
 
-function vq_file(vq_fn, EbNodB, in_fn, out_fn)
-  [results target_] = run_test_vq(vq_fn, in_fn, nframes=-1, dec=1, EbNodB, verbose=0);
+function vq_file(vq_fn, dec, EbNodB, in_fn, out_fn)
+  [results target_] = run_test_vq(vq_fn, in_fn, nframes=-1, dec, EbNodB, verbose=0);
   save_f32(out_fn, target_);
 endfunction
 
diff --git a/script/train_trellis.sh b/script/train_trellis.sh
index 4fd0f1005..26cf098a1 100755
--- a/script/train_trellis.sh
+++ b/script/train_trellis.sh
@@ -30,35 +30,44 @@ function train() {
 }
 
 function listen() {
-  fullfile=$1
+  vq_fn=$1
+  dec=$2
+  EbNodB=$3
+  fullfile=$4
   filename=$(basename -- "$fullfile")
   extension="${filename##*.}"
   filename="${filename%.*}"
-
-  fullfile_out=$2
-  vq_fn=$3
-  EbNodB=$4
   
-  sox $fullfile -t raw - | c2sim - --rateK --rateKout ${filename}.f32
-
+  fullfile_out=$5
+  sox_options='-t raw -e signed-integer -b 16' 
+  sox $fullfile $sox_options - | c2sim - --rateK --rateKout ${filename}.f32
+  
   echo "ratek=load_f32('../build_linux/${filename}.f32',20); vq_700c_eq; ratek_lim=limit_vec(ratek, 0, 40); save_f32('../build_linux/${filename}_lim.f32', ratek_lim); quit" | \
   octave -p ${CODEC2_PATH}/octave -qf
 
-  echo "pkg load statistics; vq_compare(action='vq_file', '${vq_fn}', EbNodB=${EbNodB}, '${filename}_lim.f32', '${filename}_test.f32'); quit" \ |
+  echo "pkg load statistics; vq_compare(action='vq_file', '${vq_fn}', ${dec}, ${EbNodB}, '${filename}_lim.f32', '${filename}_test.f32'); quit" \ |
   octave -p ${CODEC2_PATH}/octave -qf
       
-  sox $fullfile -t raw - | c2sim - --rateK --rateKin ${filename}_test.f32 -o - | sox -t .s16 -r 8000 -c 1 - ${fullfile_out}
+  if [ "$fullfile_out" = "aplay" ]; then
+     sox $fullfile $sox_options - | c2sim - --rateK --rateKin ${filename}_test.f32 -o - | aplay -f S16_LE
+  else
+     sox $fullfile $sox_options - | c2sim - --rateK --rateKin ${filename}_test.f32 -o - | sox -t .s16 -r 8000 -c 1 - ${fullfile_out}
+  fi
+     
 }
 
 function print_help {
     echo
     echo "Trellis/VQ optimisation support script"
     echo
-    echo "  usage ./train_trellis.sh [-d] [-t] [-v in.wav out.wav vq.f32 EbNodB]"
+    echo "  usage ./train_trellis.sh [-x] [-t] [-v vq.f32 in.wav out.wav] [-e EbNodB] [-d dec]"
     echo
-    echo "    -d        debug mode; trace script execution"
-    echo "    -t        train VQ and generate a fully quantised version of training vectors"
-    echo "    -v        synthesis an output file out.wav from in.raw, using the VQ vq.f32"
+    echo "    -x                         debug mode; trace script execution"
+    echo "    -t                         train VQ and generate a fully quantised version of training vectors"
+    echo "    -v  in.wav out.wav vq.f32  synthesise an output file out.wav from in.raw, using the VQ vq.f32"
+    echo "    -v  in.wav aplay vq.f32    synthesise output, play immediately using aplay, using the VQ vq.f32"
+    echo "    -e  EbNodB                 Eb/No in dB for AWGn channel simulation (error insertion)"
+    echo "    -d  dec                    decimation/interpolation rate"
     echo
     exit
 }
@@ -71,12 +80,14 @@ fi
 
 do_train=0
 do_vq=0
+EbNodB=100
+dec=1
 POSITIONAL=()
 while [[ $# -gt 0 ]]
 do
 key="$1"
 case $key in
-    -d)
+    -x)
         set -x	
         shift
     ;;
@@ -86,16 +97,24 @@ case $key in
     ;;
     -v)
         do_vq=1
-	in_wav="$2"
-	out_wav="$3"
-	vq_fn="$4"
-	EbNodB="$5"
-        shift
+	vq_fn="$2"
+	in_wav="$3"
+	out_wav="$4"
 	shift
 	shift
 	shift
 	shift
     ;;
+    -d)
+	dec="$2"
+	shift
+	shift	
+	;;
+    -e)
+	EbNodB="$2"
+	shift
+	shift	
+	;;
     -h)
         print_help	
     ;;
@@ -112,5 +131,5 @@ if [ $do_train -eq 1 ]; then
 fi
 
 if [ $do_vq -eq 1 ]; then
-  listen ${in_wav} ${out_wav} ${vq_fn} ${EbNodB}
+  listen ${vq_fn} ${dec} ${EbNodB} ${in_wav} ${out_wav}
 fi

From c191d31b91fb8eee2fc7e11f0b130a43b83f28e0 Mon Sep 17 00:00:00 2001
From: drowe67 <david@rowetel.com>
Date: Sat, 11 Sep 2021 09:16:35 +0930
Subject: [PATCH 12/20] added support for adding probability of vector use to
 optimisation.  SD results suggest 0.5dB gain

---
 misc/vq_binary_switch.c | 41 +++++++++++++++++++++++++++++++----------
 misc/vq_mbest.c         | 41 ++++++++++++++++++++++++++++-------------
 misc/vqtrain.c          | 14 +++++---------
 octave/vq_compare.m     | 15 +++++++++++----
 4 files changed, 75 insertions(+), 36 deletions(-)

diff --git a/misc/vq_binary_switch.c b/misc/vq_binary_switch.c
index c8a4f58df..e61aa0996 100644
--- a/misc/vq_binary_switch.c
+++ b/misc/vq_binary_switch.c
@@ -67,11 +67,15 @@ void sort_c(int *idx, const size_t n) {
     qsort(idx, n, sizeof(int), compare_increase);
 }
 
-void swap(float *vq, int dim, int index1, int index2) {
+void swap(float *vq, int dim, float *prob, int index1, int index2) {
   float tmp[dim];
   for(int i=0; i<dim; i++) tmp[i] = vq[index1*dim+i];
   for(int i=0; i<dim; i++) vq[index1*dim+i] = vq[index2*dim+i];
   for(int i=0; i<dim; i++) vq[index2*dim+i] = tmp[i];
+
+  tmp[0] = prob[index1];
+  prob[index1] = prob[index2];
+  prob[index2] = tmp[0];
 }
 
 int main(int argc, char *argv[]) {
@@ -83,15 +87,17 @@ int main(int argc, char *argv[]) {
     int   verbose = 0;
     int   n = 0;
     int   fast_en = 0;
+    char  prob_fn[80]="";
     
     int o = 0; int opt_idx = 0;
     while (o != -1) {
        static struct option long_opts[] = {
+           {"prob",    required_argument, 0, 'p'},
            {"st",      required_argument, 0, 't'},
            {"en",      required_argument, 0, 'e'},
 	   {0, 0, 0, 0}
         };
-        o = getopt_long(argc,argv,"hd:m:vt:e:n:f",long_opts,&opt_idx);
+        o = getopt_long(argc,argv,"hd:m:vt:e:n:fp:",long_opts,&opt_idx);
         switch (o) {
 	case 'd':
 	    dim = atoi(optarg);
@@ -112,6 +118,9 @@ int main(int argc, char *argv[]) {
         case 'n':
             n = atoi(optarg);
             break;
+        case 'p':
+	    strcpy(prob_fn,optarg);
+            break;	    
         case 'v':
             verbose = 1;
             break;
@@ -119,10 +128,11 @@ int main(int argc, char *argv[]) {
 	    fprintf(stderr, "\n");
             fprintf(stderr, "usage: %s -d dimension [-m max_iterations -v --st Kst --en Ken -n nVQ] vq_in.f32 vq_out.f32\n", argv[0]);
 	    fprintf(stderr, "\n");
-            fprintf(stderr, "-n nVQ      Run with just the first nVQ entries of the VQ\n");
-            fprintf(stderr, "--st Kst    Start vector element for error calculation (default 0)\n");
-            fprintf(stderr, "--en Ken    End vector element for error calculation (default K-1)\n");
-            fprintf(stderr, "-v          verbose\n");
+            fprintf(stderr, "-n nVQ           Run with just the first nVQ entries of the VQ\n");
+            fprintf(stderr, "--st Kst         Start vector element for error calculation (default 0)\n");
+            fprintf(stderr, "--en Ken         End vector element for error calculation (default K-1)\n");
+            fprintf(stderr, "--prob probFile  f32 file of probabilities for each VQ element (default 1.0)\n");
+            fprintf(stderr, "-v               verbose\n");
             exit(1);
         }
     }
@@ -163,9 +173,20 @@ int main(int argc, char *argv[]) {
     assert(nrd == n*dim);
     fclose(fq);
    
-    /* set probability of each vector to 1.0 for now */
+    /* set probability of each vector to 1.0 as default */
     float prob[n];
     for(int l=0; l<n; l++) prob[l] = 1.0;
+    if (strlen(prob_fn)) {
+      fprintf(stderr, "Reading probability file: %s\n", prob_fn);
+      FILE *fp = fopen(prob_fn,"rb");
+      assert(fp != NULL);
+      int nrd = fread(prob, sizeof(float), n, fp);
+      assert(nrd == n);
+      fclose(fp);
+      float sum = 0.0;
+      for(int l=0; l<n; l++) sum += prob[l];
+      fprintf(stderr, "sum = %f\n", sum);
+    }
     
     int iteration = 0;
     int i = 0;
@@ -209,7 +230,7 @@ int main(int argc, char *argv[]) {
 	    distortion1 = distortion_of_current_mapping(vq, n, dim, prob, st, en);
 
 	  // switch vq entries A(i) and j
-	  swap(vq, dim, A[i], j);
+	  swap(vq, dim, prob, A[i], j);
 
 	  if (fast_en) {
 	    // add just those contributions to delta that will change
@@ -237,7 +258,7 @@ int main(int argc, char *argv[]) {
 	    }
 	  }
 	  // unswitch
-	  swap(vq, dim, A[i], j);
+	  swap(vq, dim, prob, A[i], j);
 	}
       } //next j
 
@@ -247,7 +268,7 @@ int main(int argc, char *argv[]) {
 	if (i == n-1) finished = 1; else i++;
       } else {
 	// OK keep the switch that minimised the distortion
-	swap(vq, dim, A[i], best_j);
+	swap(vq, dim, prob, A[i], best_j);
 	switches++;
 
 	// save results
diff --git a/misc/vq_mbest.c b/misc/vq_mbest.c
index bef2e19d2..5df2440ab 100644
--- a/misc/vq_mbest.c
+++ b/misc/vq_mbest.c
@@ -39,22 +39,24 @@ int main(int argc, char *argv[]) {
     int   st = -1;
     int   en = -1;
     int   num = INT_MAX;
+    int   output_vec_usage = 0;
     
     int o = 0; int opt_idx = 0;
     while (o != -1) {
        static struct option long_opts[] = {
-	   {"k",       required_argument, 0, 'k'},
-	   {"quant",   required_argument, 0, 'q'},
-	   {"mbest",   required_argument, 0, 'm'},
-	   {"lower",   required_argument, 0, 'l'},
-	   {"verbose", required_argument, 0, 'v'},
-           {"st",      required_argument, 0, 't'},
-           {"en",      required_argument, 0, 'e'},
-           {"num",     required_argument, 0, 'n'},
+	   {"k",         required_argument, 0, 'k'},
+	   {"quant",     required_argument, 0, 'q'},
+	   {"mbest",     required_argument, 0, 'm'},
+	   {"lower",     required_argument, 0, 'l'},
+	   {"verbose",   required_argument, 0, 'v'},
+           {"st",        required_argument, 0, 't'},
+           {"en",        required_argument, 0, 'e'},
+           {"num",       required_argument, 0, 'n'},
+           {"vec_usage", no_argument, 0, 'u'},
 	   {0, 0, 0, 0}
         };
 
-        o = getopt_long(argc,argv,"hk:q:m:vt:e:n:",long_opts,&opt_idx);
+        o = getopt_long(argc,argv,"hk:q:m:vt:e:n:u",long_opts,&opt_idx);
         switch (o) {
 	case 'k':
 	    k = atoi(optarg);
@@ -111,7 +113,10 @@ int main(int argc, char *argv[]) {
         case 'e':
             en = atoi(optarg);
             break;
-        case 'v':
+        case 'u':
+            output_vec_usage = 1;
+            break;
+	case 'v':
             verbose = 1;
             break;
 	help:
@@ -125,7 +130,8 @@ int main(int argc, char *argv[]) {
             fprintf(stderr, "--st    Kst              start vector element for error calculation (default 0)\n");
             fprintf(stderr, "--en    Ken              end vector element for error calculation (default K-1)\n");
 	    fprintf(stderr, "--num   numToProcess     number of vectors to quantise (default to EOF)\n");
-	    fprintf(stderr, "-v                       Verbose\n");
+	    fprintf(stderr, "--vec_usage              Output a record of how many times each vector is used\n");
+ 	    fprintf(stderr, "-v                       Verbose\n");
             exit(1);
         }
     }
@@ -137,7 +143,8 @@ int main(int argc, char *argv[]) {
     if (st == -1) st = 0; 
     if (en == -1) en = k-1;
 
-    int   indexes[num_stages], nvecs = 0;
+    int   indexes[num_stages], nvecs = 0; int vec_usage[m[0]];
+    for(int i=0; i<m[0]; i++) vec_usage[i] = 0;
     float target[k], quantised[k];
     float sqe = 0.0;
     while(fread(&target, sizeof(float), k, stdin) && (nvecs < num)) {
@@ -161,8 +168,17 @@ int main(int argc, char *argv[]) {
 	}
 	fwrite(&quantised, sizeof(float), k, stdout);
 	nvecs++;
+	// count number f time each vector is used (just for first stage)
+	vec_usage[indexes[0]]++;
     }
+
     fprintf(stderr, "%4.2f\n", sqe/(nvecs*(en-st+1)));
+
+    if (output_vec_usage) {
+      for(int i=0; i<m[0]; i++)
+	fprintf(stderr, "%d\n", vec_usage[i]);
+    }
+
     return 0;
 }
 
@@ -200,7 +216,6 @@ void quant_pred_mbest(float vec_out[],
         index[i] = 0;
     }
 
-    
     for(i=0; i<st; i++)
         w[i] = 0.0;
     for(i=st; i<=en; i++)
diff --git a/misc/vqtrain.c b/misc/vqtrain.c
index 547ba30b6..7060f12b7 100644
--- a/misc/vqtrain.c
+++ b/misc/vqtrain.c
@@ -76,7 +76,7 @@ int main(int argc, char *argv[]) {
     long   J;		/* number of vectors in training set		*/
     long   i,j;
     long   ind;	     	/* index of current vector			*/
-    float  e;           /* sqaured error for current vector             */
+    float  e;           /* squared error for current vector             */
     float  se;		/* squared error for this iteration		*/
     float  var,var_1;	/* current and previous iterations distortion	*/
     float  delta;	/* improvement in distortion 			*/
@@ -131,12 +131,11 @@ int main(int argc, char *argv[]) {
     }
     int dx = optind;
 
-    //fprintf(stderr, "argc: %d dx: %d\n", argc, dx);
     if ((argc - dx) < 4) {
         fprintf(stderr, "Too few arguments\n");
     helpmsg:
         fprintf(stderr, "usage: %s [Options] TrainFile.f32 K(dimension) M(codebook size) VQFile.f32\n", argv[0]);
-        fprintf(stderr, "  -r --residual VQResidualErrorFile.f32usage\n");
+        fprintf(stderr, "  -r --residual VQResidualErrorFile.f32\n");
         fprintf(stderr, "  -s --stop StopDelta\n");
         fprintf(stderr, "     --st   Kst        start vector element for error calculation (default 0)\n");
         fprintf(stderr, "     --en   Ken        end vector element for error calculation (default K-1)\n");
@@ -164,7 +163,7 @@ int main(int argc, char *argv[]) {
     cb = (float*)malloc(sizeof(float)*k*m);
     cent = (float*)malloc(sizeof(float)*k*m);
     n = (long*)malloc(sizeof(long)*m);
-    if (cb == NULL || cb == NULL || cent == NULL || vec == NULL) {
+    if (vec == NULL || cb == NULL || cent == NULL || n == NULL) {
 	printf("Error in malloc.\n");
 	exit(1);
     }
@@ -222,8 +221,6 @@ int main(int argc, char *argv[]) {
 	    ind = quantise(cb, vec, k, m, st, en, &e, &se);
 	    n[ind]++;
 	    acc(&cent[ind*k], vec, k);
-            //if (i < 100)
-            //    printf("e: %f sqrt(e/k): %f sd: %f noutliers: %ld\n", e, sqrt(e/k), sd, noutliers[0]);
             if (sqrt(e/(en-st+1)) > 1.0) noutliers[0]++;
             if (sqrt(e/(en-st+1)) > 2.0) noutliers[1]++;
             if (sqrt(e/(en-st+1)) > 3.0) noutliers[2]++;
@@ -289,7 +286,7 @@ int main(int argc, char *argv[]) {
 
 \*-----------------------------------------------------------------------*/
 
-/*---------------------------------------------------------------------------*\
+/*-----------------------------------------------------------------------*\
 
 	FUNCTION....: zero()
 
@@ -298,7 +295,7 @@ int main(int argc, char *argv[]) {
 
 	Zeros a vector of length k.
 
-\*---------------------------------------------------------------------------*/
+\*-----------------------------------------------------------------------*/
 
 void zero(float v[], int k)
 /*  float  v[];		ptr to start of vector		*/
@@ -358,7 +355,6 @@ void norm(float v[], int k, long n)
 /*---------------------------------------------------------------------------*\
 
 	FUNCTION....: quantise()
-
 	AUTHOR......: David Rowe
 	DATE CREATED: 23/2/95
 
diff --git a/octave/vq_compare.m b/octave/vq_compare.m
index 4d87d5c9d..f47b1d5e5 100644
--- a/octave/vq_compare.m
+++ b/octave/vq_compare.m
@@ -30,7 +30,7 @@ function vq_compare(action="run_curves", vq_fn, dec=1, EbNodB=3, in_fn, out_fn)
   randn('state',1);
 
   if strcmp(action, "run_curves")
-    run_curves(600);
+    run_curves(30*100);
   end
   if strcmp(action, "vq_file")
     vq_file(vq_fn, dec, EbNodB, in_fn, out_fn)
@@ -150,7 +150,7 @@ function vq_compare(action="run_curves", vq_fn, dec=1, EbNodB=3, in_fn, out_fn)
 
   % limit test to the first nframes vectors
   if nframes != -1
-    last = nframes
+    last = nframes;
   else
     last = length(target);
   end
@@ -194,12 +194,18 @@ function run_curves(frames=100, dec=1)
     results = run_test_vq("../build_linux/vq_stage1_bs001.f32", target_fn, frames, dec, EbNodB(i), verbose=0);
     results2_log = [results2_log results];
   end
+  results4_log = [];
+  for i=1:length(EbNodB)
+    results = run_test_vq("../build_linux/vq_stage1_bs004.f32", target_fn, frames, dec, EbNodB(i), verbose=0);
+    results4_log = [results4_log results];
+  end
   for i=1:length(results1_log)
     ber(i) = results1_log(i).ber;
     per(i) = results1_log(i).per;
     mse_noerrors(i) = sqrt(results1_log(i).mse_noerrors);
     mse_vq1(i) = sqrt(results1_log(i).mse);
     mse_vq2(i) = sqrt(results2_log(i).mse);
+    mse_vq4(i) = sqrt(results4_log(i).mse);
   end
 
   figure(1); clf;
@@ -216,8 +222,9 @@ function run_curves(frames=100, dec=1)
   
   figure(2); clf;
   plot(EbNodB, mse_noerrors, "b+-;no errors;"); hold on;
-  plot(EbNodB, mse_vq1, "g+-;vq1;");
-  plot(EbNodB, mse_vq2, "r+-;vq2;");
+  plot(EbNodB, mse_vq1, "g+-;vanilla;");
+  plot(EbNodB, mse_vq2, "r+-;binary switch;");
+  plot(EbNodB, mse_vq4, "b+-;binary switch with prob;");
   hold off; grid; title("RMS SD (dB)"); xlabel('Eb/No(dB)');
 endfunction
 

From ad4e42b467e293e28390b33030144af02804b148 Mon Sep 17 00:00:00 2001
From: drowe67 <david@rowetel.com>
Date: Sat, 11 Sep 2021 11:08:10 +0930
Subject: [PATCH 13/20] renamed tool and comparing bs with prob

---
 octave/vq_compare.m |  12 +---
 script/subsetvq.sh  | 136 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 139 insertions(+), 9 deletions(-)
 create mode 100755 script/subsetvq.sh

diff --git a/octave/vq_compare.m b/octave/vq_compare.m
index f47b1d5e5..f08284730 100644
--- a/octave/vq_compare.m
+++ b/octave/vq_compare.m
@@ -25,7 +25,6 @@
 
 
 function vq_compare(action="run_curves", vq_fn, dec=1, EbNodB=3, in_fn, out_fn)
-  graphics_toolkit ("gnuplot");
   more off;
   randn('state',1);
 
@@ -211,14 +210,9 @@ function run_curves(frames=100, dec=1)
   figure(1); clf;
   semilogy(EbNodB, ber, 'g+-;ber;','linewidth', 2); hold on;
   semilogy(EbNodB, per, 'b+-;per;','linewidth', 2);
-  grid; xlabel('Eb/No(dB)');
-  % grid minor is busted
-  for y=2:9
-    semilogy([min(EbNodB) max(EbNodB)],[0.001*y 0.001*y],'--k');
-    semilogy([min(EbNodB) max(EbNodB)],[0.01*y 0.01*y],'--k');
-    semilogy([min(EbNodB) max(EbNodB)-1],[0.1*y 0.1*y],'--k');
-  end
-  hold off;
+  grid('minor'); xlabel('Eb/No(dB)');
+
+hold off;
   
   figure(2); clf;
   plot(EbNodB, mse_noerrors, "b+-;no errors;"); hold on;
diff --git a/script/subsetvq.sh b/script/subsetvq.sh
new file mode 100755
index 000000000..25663a174
--- /dev/null
+++ b/script/subsetvq.sh
@@ -0,0 +1,136 @@
+#!/bin/bash
+# subsetvq.sh
+# David Rowe August 2021
+#
+# Script to support:
+# 1. Subset VQ training and listening
+# 1. Training Trellis Vector Quantiser for Codec 2 newamp1, supports octave/trellis.m
+# 2. VQ sorting/optimisation experiments, octave/vq_compare.m
+
+TRAIN=~/Downloads/all_speech_8k.sw
+CODEC2_PATH=$HOME/codec2
+PATH=$PATH:$CODEC2_PATH/build_linux/src:$CODEC2_PATH/build_linux/misc
+K=20
+Kst=2
+Ken=16
+
+# train a new VQ and generate quantised training material
+function train() {
+  fullfile=$TRAIN
+  filename=$(basename -- "$fullfile")
+  extension="${filename##*.}"
+  filename="${filename%.*}"
+  
+  c2sim $fullfile --rateK --rateKout ${filename}.f32
+  echo "ratek=load_f32('../build_linux/${filename}.f32',20); vq_700c_eq; ratek_lim=limit_vec(ratek, 0, 40); save_f32('../build_linux/${filename}_lim.f32', ratek_lim); quit" | \
+  octave -p ${CODEC2_PATH}/octave -qf
+  vqtrain ${filename}_lim.f32 $K 4096 vq_stage1.f32 -s 1e-3 --st $Kst --en $Ken
+
+  # VQ the training file
+  cat ${filename}_lim.f32 | vq_mbest --st $Kst --en $Ken -k $K -q vq_stage1.f32 > ${filename}_test.f32
+}
+
+function listen() {
+  vq_fn=$1
+  dec=$2
+  EbNodB=$3
+  fullfile=$4
+  filename=$(basename -- "$fullfile")
+  extension="${filename##*.}"
+  filename="${filename%.*}"
+  
+  fullfile_out=$5
+  sox_options='-t raw -e signed-integer -b 16' 
+  sox $fullfile $sox_options - | c2sim - --rateK --rateKout ${filename}.f32
+  
+  echo "ratek=load_f32('../build_linux/${filename}.f32',20); vq_700c_eq; ratek_lim=limit_vec(ratek, 0, 40); save_f32('../build_linux/${filename}_lim.f32', ratek_lim); quit" | \
+  octave -p ${CODEC2_PATH}/octave -qf
+
+  echo "pkg load statistics; vq_compare(action='vq_file', '${vq_fn}', ${dec}, ${EbNodB}, '${filename}_lim.f32', '${filename}_test.f32'); quit" \ |
+  octave -p ${CODEC2_PATH}/octave -qf
+      
+  if [ "$fullfile_out" = "aplay" ]; then
+     sox $fullfile $sox_options - | c2sim - --rateK --rateKin ${filename}_test.f32 -o - | aplay -f S16_LE
+  else
+     sox $fullfile $sox_options - | c2sim - --rateK --rateKin ${filename}_test.f32 -o - | sox -t .s16 -r 8000 -c 1 - ${fullfile_out}
+  fi
+     
+}
+
+function print_help {
+    echo
+    echo "Trellis/VQ optimisation support script"
+    echo
+    echo "  usage ./train_trellis.sh [-x] [-t] [-v vq.f32 in.wav out.wav] [-e EbNodB] [-d dec]"
+    echo
+    echo "    -x                         debug mode; trace script execution"
+    echo "    -t                         train VQ and generate a fully quantised version of training vectors"
+    echo "    -v  in.wav out.wav vq.f32  synthesise an output file out.wav from in.raw, using the VQ vq.f32"
+    echo "    -v  in.wav aplay vq.f32    synthesise output, play immediately using aplay, using the VQ vq.f32"
+    echo "    -e  EbNodB                 Eb/No in dB for AWGn channel simulation (error insertion)"
+    echo "    -d  dec                    decimation/interpolation rate"
+    echo
+    exit
+}
+
+# command line arguments to select function
+
+if [ $# -lt 1 ]; then
+    print_help
+fi
+
+do_train=0
+do_vq=0
+EbNodB=100
+dec=1
+POSITIONAL=()
+while [[ $# -gt 0 ]]
+do
+key="$1"
+case $key in
+    -x)
+        set -x	
+        shift
+    ;;
+    -t)
+        do_train=1
+	shift
+    ;;
+    -v)
+        do_vq=1
+	vq_fn="$2"
+	in_wav="$3"
+	out_wav="$4"
+	shift
+	shift
+	shift
+	shift
+    ;;
+    -d)
+	dec="$2"
+	shift
+	shift	
+	;;
+    -e)
+	EbNodB="$2"
+	shift
+	shift	
+	;;
+    -h)
+        print_help	
+    ;;
+    *)
+    POSITIONAL+=("$1") # save it in an array for later
+    shift
+    ;;
+esac
+done
+set -- "${POSITIONAL[@]}" # restore positional parameters
+
+if [ $do_train -eq 1 ]; then
+    train
+fi
+
+if [ $do_vq -eq 1 ]; then
+  listen ${vq_fn} ${dec} ${EbNodB} ${in_wav} ${out_wav}
+fi

From d9e0e1727502e226ce95675a48d4da3cbdd6bcf5 Mon Sep 17 00:00:00 2001
From: drowe67 <david@rowetel.com>
Date: Sat, 11 Sep 2021 18:29:39 +0930
Subject: [PATCH 14/20] try binary switching VQ

---
 octave/trellis.m | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/octave/trellis.m b/octave/trellis.m
index a11c0bd56..c29d1a125 100644
--- a/octave/trellis.m
+++ b/octave/trellis.m
@@ -399,7 +399,7 @@
 % top level function to set up and run a test
 function results = test_trellis(nframes=100, dec=1, ntxcw=8, nstages=3, EbNodB=3, verbose=0)
   K = 20; K_st=2+1; K_en=16+1;
-  vq_fn = "../build_linux/vq_stage1.f32";
+  vq_fn = "../build_linux/vq_stage1_bs004.f32";
   vq_output_fn = "../build_linux/all_speech_8k_test.f32";
   target_fn = "../build_linux/all_speech_8k_lim.f32";
   
@@ -511,11 +511,11 @@ function test_vq(vq_fn)
 endfunction
 
 % generate sets of curves
-function run_curves(frames=100, dec=1)
+function run_curves(frames=100, dec=1, nstages=5)
   results_log = [];
-  EbNodB = [1 2 3 4];
+  EbNodB = [0 1 2 3 4 5];
   for i=1:length(EbNodB)
-    results = test_trellis(frames, dec, ntxcw=8, nstages=3, EbNodB(i), verbose=0);
+    results = test_trellis(frames, dec, ntxcw=8, nstages, EbNodB(i), verbose=0);
     results_log = [results_log results];
   end
   for i=1:length(results_log)
@@ -530,24 +530,23 @@ function run_curves(frames=100, dec=1)
 
   figure(1); clf; semilogy(EbNodB, ber_vanilla, "r+-;uncoded;"); hold on;
   semilogy(EbNodB, ber, "g+-;trellis;"); hold off;
-  grid; title(sprintf("BER dec=%d nstages=%d",dec,nstages));
+  grid('minor'); title(sprintf("BER dec=%d nstages=%d",dec,nstages));
   print("-dpng", sprintf("trellis_dec_%d_ber.png",dec));
   
   figure(2); clf; semilogy(EbNodB, per_vanilla, "r+-;uncoded;"); hold on;
   semilogy(EbNodB, per, "g+-;trellis;");
-  grid; title(sprintf("PER dec=%d nstages=%d",dec,nstages));
+  grid('minor'); title(sprintf("PER dec=%d nstages=%d",dec,nstages));
   print("-dpng", sprintf("trellis_dec_%d_per.png",dec));
 
   figure(3); clf; plot(EbNodB, mse_noerrors, "b+-;no errors;"); hold on;
   plot(EbNodB, mse_vanilla, "r+-;uncoded;");
   plot(EbNodB, mse, "g+-;trellis;"); hold off;
-  grid; title(sprintf("RMS SD dec=%d nstages=%d",dec,nstages));
+  grid('minor'); title(sprintf("RMS SD dec=%d nstages=%d",dec,nstages));
   print("-dpng", sprintf("trellis_dec_%d_rms_sd.png",dec));
 endfunction
 
 % -------------------------------------------------------------------
 
-graphics_toolkit ("gnuplot");
 more off;
 randn('state',1);
 
@@ -557,9 +556,10 @@ function run_curves(frames=100, dec=1)
 %test_trellis(nframes=600, dec=1, ntxcw=8, nstages=3, EbNodB=3, verbose=0);
 %test_trellis(nframes=600, dec=4, ntxcw=8, nstages=3, EbNodB=3, verbose=0);
 
-run_curves(600,1)
-run_curves(600,2)
-run_curves(600,4)
+%run_curves(600,1)
+%run_curves(600,2)
+%run_curves(600,4)
+run_curves(600,3,5)
 
 %test_trellis(nframes=200, dec=1, ntxcw=1, nstages=3, EbNodB=3, verbose=0);
 %test_trellis(nframes=100, dec=2, ntxcw=8, nstages=3, EbNodB=3, verbose=0);

From 3932fd8f15dc9113f27b14b931b3473eec198796 Mon Sep 17 00:00:00 2001
From: drowe67 <david@rowetel.com>
Date: Mon, 13 Sep 2021 07:25:03 +0930
Subject: [PATCH 15/20] output of trellis sim for convenience

---
 octave/trellis_dec3_nstage3.txt | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 octave/trellis_dec3_nstage3.txt

diff --git a/octave/trellis_dec3_nstage3.txt b/octave/trellis_dec3_nstage3.txt
new file mode 100644
index 000000000..b4d8e98e5
--- /dev/null
+++ b/octave/trellis_dec3_nstage3.txt
@@ -0,0 +1,15 @@
+# Created by Octave 5.2.0, Sun Sep 12 12:26:33 2021 ACST <david@boomer>
+# name: EbNodB
+# type: matrix
+# rows: 1
+# columns: 6
+ 0 1 2 3 4 5
+
+
+# name: rms_sd
+# type: matrix
+# rows: 1
+# columns: 6
+ 7.7412523956816406 6.4841339000579836 5.8017966072531637 4.9470616511274006 4.3575512816148612 3.5924646693565796
+
+

From 529cadae3bc29a45b9041329c91a106e86fb2a64 Mon Sep 17 00:00:00 2001
From: drowe67 <david@rowetel.com>
Date: Mon, 13 Sep 2021 07:28:39 +0930
Subject: [PATCH 16/20] unused variable

---
 octave/ldpcut.m | 1 -
 1 file changed, 1 deletion(-)

diff --git a/octave/ldpcut.m b/octave/ldpcut.m
index b05c803f8..e83642f51 100644
--- a/octave/ldpcut.m
+++ b/octave/ldpcut.m
@@ -71,7 +71,6 @@
 
     tx_bits = [];
     tx_symbols = [];
-    rx_symbols = [];
 
     % Encode a bunch of frames
 

From be2cc03ad787524e738960920f3e60380f46b78f Mon Sep 17 00:00:00 2001
From: drowe67 <david@rowetel.com>
Date: Mon, 13 Sep 2021 07:29:19 +0930
Subject: [PATCH 17/20] export test results for use on vq_compare curves

---
 octave/trellis.m | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/octave/trellis.m b/octave/trellis.m
index c29d1a125..02b8b3efa 100644
--- a/octave/trellis.m
+++ b/octave/trellis.m
@@ -511,7 +511,7 @@ function test_vq(vq_fn)
 endfunction
 
 % generate sets of curves
-function run_curves(frames=100, dec=1, nstages=5)
+function [EbNodB rms_sd] = run_curves(frames=100, dec=1, nstages=5)
   results_log = [];
   EbNodB = [0 1 2 3 4 5];
   for i=1:length(EbNodB)
@@ -523,9 +523,9 @@ function run_curves(frames=100, dec=1, nstages=5)
     ber_vanilla(i) = results_log(i).ber_vanilla;
     per(i) = results_log(i).per;
     per_vanilla(i) = results_log(i).per_vanilla;
-    mse_noerrors(i) = sqrt(results_log(i).mse_noerrors);
-    mse(i) = sqrt(results_log(i).mse);
-    mse_vanilla(i) = sqrt(results_log(i).mse_vanilla);
+    rms_sd_noerrors(i) = sqrt(results_log(i).mse_noerrors);
+    rms_sd(i) = sqrt(results_log(i).mse);
+    rms_sd_vanilla(i) = sqrt(results_log(i).mse_vanilla);
   end
 
   figure(1); clf; semilogy(EbNodB, ber_vanilla, "r+-;uncoded;"); hold on;
@@ -538,9 +538,9 @@ function run_curves(frames=100, dec=1, nstages=5)
   grid('minor'); title(sprintf("PER dec=%d nstages=%d",dec,nstages));
   print("-dpng", sprintf("trellis_dec_%d_per.png",dec));
 
-  figure(3); clf; plot(EbNodB, mse_noerrors, "b+-;no errors;"); hold on;
-  plot(EbNodB, mse_vanilla, "r+-;uncoded;");
-  plot(EbNodB, mse, "g+-;trellis;"); hold off;
+  figure(3); clf; plot(EbNodB, rms_sd_noerrors, "b+-;no errors;"); hold on;
+  plot(EbNodB, rms_sd_vanilla, "r+-;uncoded;");
+  plot(EbNodB, rms_sd, "g+-;trellis;"); hold off;
   grid('minor'); title(sprintf("RMS SD dec=%d nstages=%d",dec,nstages));
   print("-dpng", sprintf("trellis_dec_%d_rms_sd.png",dec));
 endfunction
@@ -559,7 +559,7 @@ function run_curves(frames=100, dec=1, nstages=5)
 %run_curves(600,1)
 %run_curves(600,2)
 %run_curves(600,4)
-run_curves(600,3,5)
+[EbNodB rms_sd] = run_curves(30*100,3,3)
 
 %test_trellis(nframes=200, dec=1, ntxcw=1, nstages=3, EbNodB=3, verbose=0);
 %test_trellis(nframes=100, dec=2, ntxcw=8, nstages=3, EbNodB=3, verbose=0);

From d148c0ce7b3194d65a667695f29d0a95fd97d3d7 Mon Sep 17 00:00:00 2001
From: drowe67 <david@rowetel.com>
Date: Mon, 13 Sep 2021 08:33:37 +0930
Subject: [PATCH 18/20] added LDPC curve for comparison, and imported trellis
 results

---
 script/train_trellis.sh | 135 ----------------------------------------
 1 file changed, 135 deletions(-)
 delete mode 100755 script/train_trellis.sh

diff --git a/script/train_trellis.sh b/script/train_trellis.sh
deleted file mode 100755
index 26cf098a1..000000000
--- a/script/train_trellis.sh
+++ /dev/null
@@ -1,135 +0,0 @@
-#!/bin/bash
-# train_trellis.sh
-# David Rowe August 2021
-#
-# Script to support:
-# 1. Training Trellis Vector Quantiser for Codec 2 newamp1, supports octave/trellis.m
-# 2. VQ sorting/optimisation experiments, octave/vq_compare.m
-
-TRAIN=~/Downloads/all_speech_8k.sw
-CODEC2_PATH=$HOME/codec2
-PATH=$PATH:$CODEC2_PATH/build_linux/src:$CODEC2_PATH/build_linux/misc
-K=20
-Kst=2
-Ken=16
-
-# train a new VQ and generate quantised training material
-function train() {
-  fullfile=$TRAIN
-  filename=$(basename -- "$fullfile")
-  extension="${filename##*.}"
-  filename="${filename%.*}"
-  
-  c2sim $fullfile --rateK --rateKout ${filename}.f32
-  echo "ratek=load_f32('../build_linux/${filename}.f32',20); vq_700c_eq; ratek_lim=limit_vec(ratek, 0, 40); save_f32('../build_linux/${filename}_lim.f32', ratek_lim); quit" | \
-  octave -p ${CODEC2_PATH}/octave -qf
-  vqtrain ${filename}_lim.f32 $K 4096 vq_stage1.f32 -s 1e-3 --st $Kst --en $Ken
-
-  # VQ the training file
-  cat ${filename}_lim.f32 | vq_mbest --st $Kst --en $Ken -k $K -q vq_stage1.f32 > ${filename}_test.f32
-}
-
-function listen() {
-  vq_fn=$1
-  dec=$2
-  EbNodB=$3
-  fullfile=$4
-  filename=$(basename -- "$fullfile")
-  extension="${filename##*.}"
-  filename="${filename%.*}"
-  
-  fullfile_out=$5
-  sox_options='-t raw -e signed-integer -b 16' 
-  sox $fullfile $sox_options - | c2sim - --rateK --rateKout ${filename}.f32
-  
-  echo "ratek=load_f32('../build_linux/${filename}.f32',20); vq_700c_eq; ratek_lim=limit_vec(ratek, 0, 40); save_f32('../build_linux/${filename}_lim.f32', ratek_lim); quit" | \
-  octave -p ${CODEC2_PATH}/octave -qf
-
-  echo "pkg load statistics; vq_compare(action='vq_file', '${vq_fn}', ${dec}, ${EbNodB}, '${filename}_lim.f32', '${filename}_test.f32'); quit" \ |
-  octave -p ${CODEC2_PATH}/octave -qf
-      
-  if [ "$fullfile_out" = "aplay" ]; then
-     sox $fullfile $sox_options - | c2sim - --rateK --rateKin ${filename}_test.f32 -o - | aplay -f S16_LE
-  else
-     sox $fullfile $sox_options - | c2sim - --rateK --rateKin ${filename}_test.f32 -o - | sox -t .s16 -r 8000 -c 1 - ${fullfile_out}
-  fi
-     
-}
-
-function print_help {
-    echo
-    echo "Trellis/VQ optimisation support script"
-    echo
-    echo "  usage ./train_trellis.sh [-x] [-t] [-v vq.f32 in.wav out.wav] [-e EbNodB] [-d dec]"
-    echo
-    echo "    -x                         debug mode; trace script execution"
-    echo "    -t                         train VQ and generate a fully quantised version of training vectors"
-    echo "    -v  in.wav out.wav vq.f32  synthesise an output file out.wav from in.raw, using the VQ vq.f32"
-    echo "    -v  in.wav aplay vq.f32    synthesise output, play immediately using aplay, using the VQ vq.f32"
-    echo "    -e  EbNodB                 Eb/No in dB for AWGn channel simulation (error insertion)"
-    echo "    -d  dec                    decimation/interpolation rate"
-    echo
-    exit
-}
-
-# command line arguments to select function
-
-if [ $# -lt 1 ]; then
-    print_help
-fi
-
-do_train=0
-do_vq=0
-EbNodB=100
-dec=1
-POSITIONAL=()
-while [[ $# -gt 0 ]]
-do
-key="$1"
-case $key in
-    -x)
-        set -x	
-        shift
-    ;;
-    -t)
-        do_train=1
-	shift
-    ;;
-    -v)
-        do_vq=1
-	vq_fn="$2"
-	in_wav="$3"
-	out_wav="$4"
-	shift
-	shift
-	shift
-	shift
-    ;;
-    -d)
-	dec="$2"
-	shift
-	shift	
-	;;
-    -e)
-	EbNodB="$2"
-	shift
-	shift	
-	;;
-    -h)
-        print_help	
-    ;;
-    *)
-    POSITIONAL+=("$1") # save it in an array for later
-    shift
-    ;;
-esac
-done
-set -- "${POSITIONAL[@]}" # restore positional parameters
-
-if [ $do_train -eq 1 ]; then
-    train
-fi
-
-if [ $do_vq -eq 1 ]; then
-  listen ${vq_fn} ${dec} ${EbNodB} ${in_wav} ${out_wav}
-fi

From 2acee25ec12f5d2596fc1022982a4c98b1bf76e0 Mon Sep 17 00:00:00 2001
From: drowe67 <david@rowetel.com>
Date: Mon, 13 Sep 2021 18:20:28 +0930
Subject: [PATCH 19/20] added LDPC curve for comparison, and imported trellis
 results

---
 octave/vq_compare.m | 152 +++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 135 insertions(+), 17 deletions(-)

diff --git a/octave/vq_compare.m b/octave/vq_compare.m
index f08284730..5527b3c2d 100644
--- a/octave/vq_compare.m
+++ b/octave/vq_compare.m
@@ -27,7 +27,8 @@
 function vq_compare(action="run_curves", vq_fn, dec=1, EbNodB=3, in_fn, out_fn)
   more off;
   randn('state',1);
-
+  graphics_toolkit("gnuplot");
+  
   if strcmp(action, "run_curves")
     run_curves(30*100);
   end
@@ -123,7 +124,7 @@ function vq_compare(action="run_curves", vq_fn, dec=1, EbNodB=3, in_fn, out_fn)
   target_ = vq(rx_indexes+1,:);
   diff = target - target_;
   mse = mean(diff(:).^2);
-  printf("Eb/No: %3.2f dB nframes: %2d nerrors: %3d BER: %4.3f PER: %3.2f mse: %3.2f %3.2f\n", 
+  printf("Eb/No: %3.2f dB nframes: %3d nerrors: %4d BER: %4.3f PER: %3.2f mse: %3.2f %3.2f\n", 
          EbNodB, nframes, nerrors, nerrors/tbits, nper/nframes, mse_noerrors, mse);
   results.ber = nerrors/tbits;	 
   results.per = nper/nframes;	 
@@ -133,10 +134,113 @@ function vq_compare(action="run_curves", vq_fn, dec=1, EbNodB=3, in_fn, out_fn)
   results.rx_indexes = rx_indexes;
 endfunction
 
+% VQ a target sequence of frames then run a test using a LDPC code
+function results = run_test_ldpc(target, vq, EbNo, verbose)
+  [frames tmp]      = size(target);
+  [vq_length tmp]   = size(vq);
+  nbits             = log2(vq_length);
+  nerrors           = 0;
+  tbits             = 0;
+  nframes           = 0;
+  nper              = 0;
+  
+  % init LDPC code
+  mod_order = 4; bps = 2;
+  modulation = 'QPSK';
+  mapping = 'gray';
+  max_iterations = 100; demod_type = 0; decoder_type = 0;
+  ldpc; init_cml('~/cml/');
+  tempStruct = load("HRA_56_56.txt");
+  b = fieldnames(tempStruct);
+  ldpcArrayName = b{1,1};
+  % extract the array from the struct
+  HRA = tempStruct.(ldpcArrayName);
+  [code_param framesize rate] = ldpc_init_user(HRA, modulation, mod_order, mapping);
+
+  % set up noise
+  EbNodB = 10*log10(EbNo);
+  EsNodB = EbNodB + 10*log10(rate) + 10*log10(bps);
+  EsNo = 10^(EsNodB/10);
+  variance = 1/EsNo;
+
+  % Vector Quantise target vectors sequence
+  [tx_indexes target_ ] = vector_quantiser_fast(vq, target, verbose);
+  % use convention of indexes starting from 0
+  tx_indexes -= 1; 
+  %  mean SD of VQ with no errors
+  diff = target - target_;
+  mse_noerrors = mean(diff(:).^2);
+
+  % construct tx frames x nbit matrix using VQ indexes
+  tx_bits = zeros(frames, nbits);
+  for f=1:frames
+    tx_bits(f,:) = dec2sd(tx_indexes(f), nbits) > 0;
+  end
+
+  % find a superframe size, that has an integer number of nbits and data_bits_per_frame frames
+  bits_per_superframe = nbits;
+  while mod(bits_per_superframe,nbits) || mod(bits_per_superframe,code_param.data_bits_per_frame)
+    bits_per_superframe += nbits;
+  end
+
+  Nsuperframes = floor(frames*nbits/bits_per_superframe);
+  Nldpc_codewords = Nsuperframes*bits_per_superframe/code_param.data_bits_per_frame;
+  frames = Nsuperframes*bits_per_superframe/nbits;
+  %printf("bits_per_superframe: %d Nldpc_codewords: %d frames: %d\n", bits_per_superframe, Nldpc_codewords, frames);
+  
+  % reshape tx_bits matrix into Nldpc_codewords x data_bits_per_frame
+  tx_bits = tx_bits(1:frames,:);
+  tx_bits_ldpc = reshape(tx_bits',code_param.data_bits_per_frame, Nldpc_codewords)';
+
+  % modulate tx symbols
+  tx_symbols = [];
+  for nn=1:Nldpc_codewords
+    [tx_codeword atx_symbols] = ldpc_enc(tx_bits_ldpc(nn,:), code_param);
+    tx_symbols = [tx_symbols atx_symbols];
+  end
+  
+  noise = sqrt(variance*0.5)*(randn(1,length(tx_symbols)) + j*randn(1,length(tx_symbols)));
+  rx_symbols = tx_symbols+noise;
+  
+  % LDPC decode  
+  for nn = 1:Nldpc_codewords
+    st = (nn-1)*code_param.coded_syms_per_frame + 1;
+    en = (nn)*code_param.coded_syms_per_frame;
+
+    arx_codeword = ldpc_dec(code_param, max_iterations, demod_type, decoder_type, rx_symbols(st:en), EsNo, ones(1,code_param.coded_syms_per_frame));
+    rx_bits_ldpc(nn,:) = arx_codeword(1:code_param.data_bits_per_frame);
+  end
+  
+  % reshape rx_bits_ldpc matrix into frames x nbits
+  rx_bits = reshape(rx_bits_ldpc',nbits,frames)';
+
+  rx_indexes = tx_indexes;
+  for f=1:frames
+    rx_indexes(f)  = sum(rx_bits(f,:) .* 2.^(nbits-1:-1:0));
+    errors         = sum(xor(tx_bits(f,:), rx_bits(f,:)));
+    nerrors       += errors;
+    if errors nper++;, end
+    tbits += nbits;
+    nframes++;
+  end
+
+  EbNodB = 10*log10(EbNo);
+  target_ = vq(rx_indexes+1,:);
+  diff = target - target_;
+  mse = mean(diff(:).^2);
+  printf("Eb/No: %3.2f dB nframes: %4d nerrors: %4d BER: %4.3f PER: %3.2f mse: %3.2f %3.2f\n", 
+         EbNodB, nframes, nerrors, nerrors/tbits, nper/nframes, mse_noerrors, mse);
+  results.ber = nerrors/tbits;	 
+  results.per = nper/nframes;	 
+  results.mse = mse;
+  results.tx_indexes = tx_indexes;
+  results.rx_indexes = rx_indexes;
+endfunction
+
 % Simulations ---------------------------------------------------------------------
 
 % top level function to set up and run a test with a specific vq
-function [results target_] = run_test_vq(vq_fn, target_fn, nframes=100, dec=1, EbNodB=3, verbose=0)
+function [results target_] = run_test_vq(vq_fn, target_fn, nframes=100, dec=1, EbNodB=3, ldpc_en=0, verbose=0)
   K = 20; K_st=2+1; K_en=16+1;
   
   % load VQ
@@ -157,7 +261,11 @@ function vq_compare(action="run_curves", vq_fn, dec=1, EbNodB=3, in_fn, out_fn)
   
   % run a test
   EbNo=10^(EbNodB/10);
-  results = run_test(target, vqsub, EbNo, verbose);
+  if ldpc_en
+    results = run_test_ldpc(target, vqsub, EbNo, verbose);
+  else
+    results = run_test(target, vqsub, EbNo, verbose);
+  end
   if verbose
     for f=2:nframes-1
       printf("f: %03d tx_index: %04d rx_index: %04d\n", f,  results.tx_indexes(f), results.rx_indexes(f));
@@ -182,20 +290,27 @@ function vq_compare(action="run_curves", vq_fn, dec=1, EbNodB=3, in_fn, out_fn)
 % generate sets of curves
 function run_curves(frames=100, dec=1)
   target_fn = "../build_linux/all_speech_8k_lim.f32";
-  results1_log = [];
   EbNodB = 0:5;
+
+  results1_ldpc_log = [];
   for i=1:length(EbNodB)
-    results = run_test_vq("../build_linux/vq_stage1.f32", target_fn, frames, dec, EbNodB(i), verbose=0);
-    results1_log = [results1_log results];
+    results = run_test_vq("../build_linux/vq_stage1.f32", target_fn, frames, dec, EbNodB(i), ldpc_en=1, verbose=0);
+    results1_ldpc_log = [results1_ldpc_log results];
   end
-  results2_log = [];
+  results4_ldpc_log = [];
   for i=1:length(EbNodB)
-    results = run_test_vq("../build_linux/vq_stage1_bs001.f32", target_fn, frames, dec, EbNodB(i), verbose=0);
-    results2_log = [results2_log results];
+    results = run_test_vq("../build_linux/vq_stage1_bs004.f32", target_fn, frames, dec, EbNodB(i), ldpc_en=1, verbose=0);
+    results4_ldpc_log = [results4_ldpc_log results];
+  end
+
+  results1_log = [];
+  for i=1:length(EbNodB)
+    results = run_test_vq("../build_linux/vq_stage1.f32", target_fn, frames, dec, EbNodB(i), ldpc_en=0, verbose=0);
+    results1_log = [results1_log results];
   end
   results4_log = [];
   for i=1:length(EbNodB)
-    results = run_test_vq("../build_linux/vq_stage1_bs004.f32", target_fn, frames, dec, EbNodB(i), verbose=0);
+    results = run_test_vq("../build_linux/vq_stage1_bs004.f32", target_fn, frames, dec, EbNodB(i), ldpc_en=0, verbose=0);
     results4_log = [results4_log results];
   end
   for i=1:length(results1_log)
@@ -203,22 +318,25 @@ function run_curves(frames=100, dec=1)
     per(i) = results1_log(i).per;
     mse_noerrors(i) = sqrt(results1_log(i).mse_noerrors);
     mse_vq1(i) = sqrt(results1_log(i).mse);
-    mse_vq2(i) = sqrt(results2_log(i).mse);
     mse_vq4(i) = sqrt(results4_log(i).mse);
+    mse_vq1_ldpc(i) = sqrt(results1_ldpc_log(i).mse);
+    mse_vq4_ldpc(i) = sqrt(results4_ldpc_log(i).mse);
   end
 
   figure(1); clf;
   semilogy(EbNodB, ber, 'g+-;ber;','linewidth', 2); hold on;
   semilogy(EbNodB, per, 'b+-;per;','linewidth', 2);
   grid('minor'); xlabel('Eb/No(dB)');
-
-hold off;
+  hold off;
   
   figure(2); clf;
   plot(EbNodB, mse_noerrors, "b+-;no errors;"); hold on;
-  plot(EbNodB, mse_vq1, "g+-;vanilla;");
-  plot(EbNodB, mse_vq2, "r+-;binary switch;");
-  plot(EbNodB, mse_vq4, "b+-;binary switch with prob;");
+  plot(EbNodB, mse_vq1, "g+-;vanilla AWGN;");
+  plot(EbNodB, mse_vq4, "b+-;binary switch;");
+  plot(EbNodB, mse_vq1_ldpc, "r+-;ldpc (112,56);");
+  plot(EbNodB, mse_vq4_ldpc, "k+-;binary switch ldpc (112,56);");
+  load trellis_dec3_nstage3.txt
+  plot(EbNodB, rms_sd, "c+-;binary switch trellis dec3;");  
   hold off; grid; title("RMS SD (dB)"); xlabel('Eb/No(dB)');
 endfunction
 

From 838439873a0843657aff3e06083a33b8520a49b7 Mon Sep 17 00:00:00 2001
From: drowe67 <david@rowetel.com>
Date: Mon, 13 Sep 2021 18:21:10 +0930
Subject: [PATCH 20/20] support for listening to trellis decoded speech

---
 octave/trellis.m   | 49 ++++++++++++++++++++++++++++++++++------------
 script/subsetvq.sh | 26 +++++++++++++++++-------
 2 files changed, 56 insertions(+), 19 deletions(-)

diff --git a/octave/trellis.m b/octave/trellis.m
index 02b8b3efa..501e728eb 100644
--- a/octave/trellis.m
+++ b/octave/trellis.m
@@ -397,16 +397,15 @@
 % Simulations ---------------------------------------------------------------------
 
 % top level function to set up and run a test
-function results = test_trellis(nframes=100, dec=1, ntxcw=8, nstages=3, EbNodB=3, verbose=0)
+function [results target_] = test_trellis(target_fn, nframes=100, dec=1, ntxcw=8, nstages=3, EbNodB=3, verbose=0)
   K = 20; K_st=2+1; K_en=16+1;
   vq_fn = "../build_linux/vq_stage1_bs004.f32";
   vq_output_fn = "../build_linux/all_speech_8k_test.f32";
-  target_fn = "../build_linux/all_speech_8k_lim.f32";
   
   % load VQ
   vq = load_f32(vq_fn, K);
   [vq_size tmp] = size(vq);
-  vq = vq(:,K_st:K_en);
+  vqsub = vq(:,K_st:K_en);
   
   % load file of VQ-ed vectors to train up SD PDF estimator
   [sd_table h_table] = vq_hist(vq_output_fn, dec);
@@ -415,16 +414,35 @@
   target = load_f32(target_fn, K);
 
   % limit test to the first nframes vectors
-  target = target(1:dec:dec*nframes,K_st:K_en);
+  if nframes != -1
+    last = nframes;
+  else
+    last = length(target);
+  end
+  target = target(1:dec:last,K_st:K_en);
   
   % run a test
   EbNo=10^(EbNodB/10);
-  results = run_test(target, vq, sd_table, h_table, ntxcw, nstages, EbNo, verbose);
+  results = run_test(target, vqsub, sd_table, h_table, ntxcw, nstages, EbNo, verbose);
   if verbose
     for f=2:nframes-1
       printf("f: %03d tx_index: %04d rx_index: %04d\n", f,  results.tx_indexes(f), results.rx_indexes(f));
     end
-  end  
+  end
+
+  % return full band vq-ed vectors
+  target_ = zeros(last,K);
+  target_(1:dec:last,:) = vq(results.rx_indexes+1,:);
+  
+  % use linear interpolation to restore original frame rate
+  for f=1:dec:last-dec
+    prev = f; next = f + dec;
+    for g=prev+1:next-1
+      cnext = (g-prev)/dec; cprev = 1 - cnext;
+      target_(g,:) = cprev*target_(prev,:) + cnext*target_(next,:);
+      %printf("f: %d g: %d cprev: %f cnext: %f\n", f, g, cprev, cnext);
+    end
+  end
 endfunction
 
 % Plot histograms of SD at different decimations in time
@@ -514,8 +532,10 @@ function test_vq(vq_fn)
 function [EbNodB rms_sd] = run_curves(frames=100, dec=1, nstages=5)
   results_log = [];
   EbNodB = [0 1 2 3 4 5];
+  target_fn = "../build_linux/all_speech_8k_lim.f32";
+
   for i=1:length(EbNodB)
-    results = test_trellis(frames, dec, ntxcw=8, nstages, EbNodB(i), verbose=0);
+    results = test_trellis(target_fn, frames, dec, ntxcw=8, nstages, EbNodB(i), verbose=0);
     results_log = [results_log results];
   end
   for i=1:length(results_log)
@@ -545,6 +565,11 @@ function test_vq(vq_fn)
   print("-dpng", sprintf("trellis_dec_%d_rms_sd.png",dec));
 endfunction
 
+function vq_file(vq_fn, dec, EbNodB, in_fn, out_fn)
+  [results target_] = test_trellis(in_fn, nframes=-1, dec, ntxcw=8, nstages=3, EbNodB, verbose=0);
+  save_f32(out_fn, target_);
+endfunction
+
 % -------------------------------------------------------------------
 
 more off;
@@ -553,16 +578,16 @@ function test_vq(vq_fn)
 % uncomment one of the below to run a test or simulation
 
 % These two tests show where we are at:
-%test_trellis(nframes=600, dec=1, ntxcw=8, nstages=3, EbNodB=3, verbose=0);
-%test_trellis(nframes=600, dec=4, ntxcw=8, nstages=3, EbNodB=3, verbose=0);
+%test_trellis(target_fn, nframes=600, dec=1, ntxcw=8, nstages=3, EbNodB=3, verbose=0);
+%test_trellis(target_fn, nframes=600, dec=4, ntxcw=8, nstages=3, EbNodB=3, verbose=0);
 
 %run_curves(600,1)
 %run_curves(600,2)
 %run_curves(600,4)
-[EbNodB rms_sd] = run_curves(30*100,3,3)
+%[EbNodB rms_sd] = run_curves(30*100,3,3)
 
-%test_trellis(nframes=200, dec=1, ntxcw=1, nstages=3, EbNodB=3, verbose=0);
-%test_trellis(nframes=100, dec=2, ntxcw=8, nstages=3, EbNodB=3, verbose=0);
+%test_trellis(target_fn, nframes=200, dec=1, ntxcw=1, nstages=3, EbNodB=3, verbose=0);
+%test_trellis(target_fn, nframes=100, dec=2, ntxcw=8, nstages=3, EbNodB=3, verbose=0);
 %test_vq("../build_linux/vq_stage1.f32");
 %vq_hist_dec("../build_linux/all_speech_8k_test.f32");
 %test_single
diff --git a/script/subsetvq.sh b/script/subsetvq.sh
index 25663a174..562a4e821 100755
--- a/script/subsetvq.sh
+++ b/script/subsetvq.sh
@@ -30,7 +30,7 @@ function train() {
   cat ${filename}_lim.f32 | vq_mbest --st $Kst --en $Ken -k $K -q vq_stage1.f32 > ${filename}_test.f32
 }
 
-function listen() {
+function listen_vq() {
   vq_fn=$1
   dec=$2
   EbNodB=$3
@@ -40,15 +40,21 @@ function listen() {
   filename="${filename%.*}"
   
   fullfile_out=$5
+  do_trellis=$6
   sox_options='-t raw -e signed-integer -b 16' 
   sox $fullfile $sox_options - | c2sim - --rateK --rateKout ${filename}.f32
   
   echo "ratek=load_f32('../build_linux/${filename}.f32',20); vq_700c_eq; ratek_lim=limit_vec(ratek, 0, 40); save_f32('../build_linux/${filename}_lim.f32', ratek_lim); quit" | \
   octave -p ${CODEC2_PATH}/octave -qf
 
-  echo "pkg load statistics; vq_compare(action='vq_file', '${vq_fn}', ${dec}, ${EbNodB}, '${filename}_lim.f32', '${filename}_test.f32'); quit" \ |
-  octave -p ${CODEC2_PATH}/octave -qf
-      
+  if [ "$do_trellis" -eq 0 ]; then
+     echo "pkg load statistics; vq_compare(action='vq_file', '${vq_fn}', ${dec}, ${EbNodB}, '${filename}_lim.f32', '${filename}_test.f32'); quit" \ |
+     octave -p ${CODEC2_PATH}/octave -qf
+  else
+     echo "pkg load statistics; trellis; vq_file('${vq_fn}', ${dec}, ${EbNodB}, '${filename}_lim.f32', '${filename}_test.f32'); quit" \ |
+     octave -p ${CODEC2_PATH}/octave -qf
+  fi
+  
   if [ "$fullfile_out" = "aplay" ]; then
      sox $fullfile $sox_options - | c2sim - --rateK --rateKin ${filename}_test.f32 -o - | aplay -f S16_LE
   else
@@ -65,10 +71,11 @@ function print_help {
     echo
     echo "    -x                         debug mode; trace script execution"
     echo "    -t                         train VQ and generate a fully quantised version of training vectors"
-    echo "    -v  in.wav out.wav vq.f32  synthesise an output file out.wav from in.raw, using the VQ vq.f32"
-    echo "    -v  in.wav aplay vq.f32    synthesise output, play immediately using aplay, using the VQ vq.f32"
+    echo "    -v  vq.f32 in.wav out.wav  synthesise an output file out.wav from in.raw, using the VQ vq.f32"
+    echo "    -v  vq.f32 in.wav aplay    synthesise output, play immediately using aplay, using the VQ vq.f32"
     echo "    -e  EbNodB                 Eb/No in dB for AWGn channel simulation (error insertion)"
     echo "    -d  dec                    decimation/interpolation rate"
+    echo "    -r                         use trellis decoder"
     echo
     exit
 }
@@ -81,6 +88,7 @@ fi
 
 do_train=0
 do_vq=0
+do_trellis=0
 EbNodB=100
 dec=1
 POSITIONAL=()
@@ -106,6 +114,10 @@ case $key in
 	shift
 	shift
     ;;
+    -r)
+        do_trellis=1
+	shift
+    ;;
     -d)
 	dec="$2"
 	shift
@@ -132,5 +144,5 @@ if [ $do_train -eq 1 ]; then
 fi
 
 if [ $do_vq -eq 1 ]; then
-  listen ${vq_fn} ${dec} ${EbNodB} ${in_wav} ${out_wav}
+  listen_vq ${vq_fn} ${dec} ${EbNodB} ${in_wav} ${out_wav} ${do_trellis}
 fi