From e2ee009e4aab5e9eed570e43ae58863e1aa23de3 Mon Sep 17 00:00:00 2001
From: Thomas Christensen <tchr@mit.edu>
Date: Wed, 14 Aug 2019 20:29:20 +0200
Subject: [PATCH 01/16] WIP: Initial commit for calculating symmetry
 transformed overlaps between Bloch states of H

---
 mpb/Makefile.am |   2 +-
 mpb/mpb.scm.in  |   3 +-
 mpb/transform.c | 137 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 140 insertions(+), 2 deletions(-)
 create mode 100644 mpb/transform.c

diff --git a/mpb/Makefile.am b/mpb/Makefile.am
index 27cf00ea..b8d85c54 100644
--- a/mpb/Makefile.am
+++ b/mpb/Makefile.am
@@ -20,7 +20,7 @@ EXTRA_DIST = mpb.scm.in epsilon.c mu.c
 
 nodist_pkgdata_DATA = $(SPECIFICATION_FILE)
 
-MY_SOURCES = medium.c epsilon_file.c field-smob.c fields.c	\
+MY_SOURCES = transform.c medium.c epsilon_file.c field-smob.c fields.c	\
 material_grid.c material_grid_opt.c matrix-smob.c mpb.c field-smob.h matrix-smob.h mpb.h my-smob.h
 
 MY_LIBS = $(top_builddir)/src/matrixio/libmatrixio.a $(top_builddir)/src/libmpb@MPB_SUFFIX@.la $(NLOPT_LIB) -lctl $(GUILE_LIBS)
diff --git a/mpb/mpb.scm.in b/mpb/mpb.scm.in
index b4a951bd..db9cb4d4 100644
--- a/mpb/mpb.scm.in
+++ b/mpb/mpb.scm.in
@@ -210,7 +210,8 @@
   'number 'function)
 (define-external-function compute-energy-in-object-list false false
   'number (make-list-type 'geometric-object))
-
+(define-external-function transformed-overlap false false 
+  'cnumber 'matrix3x3 'vector3)
 (define-external-function output-field-to-file false false
   no-return-value 'integer 'string)  
 
diff --git a/mpb/transform.c b/mpb/transform.c
new file mode 100644
index 00000000..40ef33bd
--- /dev/null
+++ b/mpb/transform.c
@@ -0,0 +1,137 @@
+/* Copyright (C) 1999-2014 Massachusetts Institute of Technology.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+#include "config.h"
+#include <mpiglue.h>
+#include <mpi_utils.h>
+#include <check.h>
+
+#include "mpb.h"
+#include <ctl-io.h>
+#include <xyz_loop.h>
+#include <maxwell.h>
+
+
+
+/* compute and return adjoint(cv1)*cv2*/
+static cnumber cvector3_cdot(cvector3 cv1, cvector3 cv2)
+{
+    cnumber dest;
+    dest.re = (cv1.x.re*cv2.x.re + cv1.y.re*cv2.y.re + cv1.z.re*cv2.z.re +
+               cv1.x.im*cv2.x.im + cv1.y.im*cv2.y.im + cv1.z.im*cv2.z.im);
+    dest.im = (cv1.x.re*cv2.x.im + cv1.y.re*cv2.y.im + cv1.z.re*cv2.z.im -
+               cv1.x.im*cv2.x.re - cv1.y.im*cv2.y.re - cv1.z.im*cv2.z.re);
+    return dest;
+}
+
+
+/* Right now, this assumes that the user loads the hfield via (get-hfield band) beforehand. 
+   Probably, this ought to be implemented instead by just computing this internally, here,
+   using maxwell_compute_h_from_H; this would also be more natural for eventually supporting
+   magnetic materials. Similarly, it would generalize to D and E fields. */
+cnumber transformed_overlap(matrix3x3 W, vector3 w)
+{
+    #ifdef HAVE_MPI
+        int local_n2, local_y_start;
+    #endif
+    int n1, n2, n3;
+    real s1, s2, s3, c1, c2, c3;
+    cnumber integral = {0,0};
+    matrix3x3 invW;
+    number detW;
+
+    if (!curfield || !strchr("hb", curfield_type)) {
+        mpi_one_fprintf(stderr, "The real-space H (or B) field must be loaded first.\n");
+        return integral;
+    }
+    #ifdef HAVE_MPI
+        CHECK(0, "transformed_overlap(..) not yet implemented for MPI!");
+    #endif
+
+    /* should also check and error if SCALAR_COMPLEX not defined, or if mu_inv != NULL */
+
+    n1 = mdata->nx; n2 = mdata->ny; n3 = mdata->nz;
+
+    s1 = geometry_lattice.size.x / n1;
+    s2 = geometry_lattice.size.y / n2;
+    s3 = geometry_lattice.size.z / n3;
+    c1 = n1 <= 1 ? 0 : geometry_lattice.size.x * 0.5;
+    c2 = n2 <= 1 ? 0 : geometry_lattice.size.y * 0.5;
+    c3 = n3 <= 1 ? 0 : geometry_lattice.size.z * 0.5;
+
+    invW = matrix3x3_inverse(W);
+    detW = matrix3x3_determinant(W);
+
+    LOOP_XYZ(mdata) {
+        vector3 p, pt;
+        cvector3 F, Ft;
+        cnumber integrand;
+
+        p.x = i1 * s1 - c1; 
+        p.y = i2 * s2 - c2; 
+        p.z = i3 * s3 - c3;
+
+        /* Field value at current point p */
+        F.x = cscalar2cnumber(curfield[3*xyz_index+0]); /* since F.x is a cnumber and curfield is a */
+        F.y = cscalar2cnumber(curfield[3*xyz_index+1]); /* scalar_complex; we have to convert here  */
+        F.z = cscalar2cnumber(curfield[3*xyz_index+2]); 
+
+        /* First, obtain new transformed coordinate pt = invW*p - w */
+        pt = vector3_minus(matrix3x3_vector3_mult(invW, p), w);
+
+        /* Next, obtain field value at transformed coordinate pt; interpolation is 
+           needed to ensure generality in the case of fractional translations. 
+           Unfortunately, that is _NOT_ compatible with MPI, since get_val is not
+           implemented for MPI */
+        Ft = get_bloch_field_point(pt); 
+
+        /* Transform the components of Ft by W; this is a bit tedious to do, since 
+           there are no matrix3x3*cvector3 routines, nor any CASSIGN_CVECTOR_RE.
+           Instead, we do manually what is done in matrix3x3_vector3_mult, twice  */
+        Ft.x.re = W.c0.x * Ft.x.re + W.c1.x * Ft.y.re + W.c2.x * Ft.z.re;
+        Ft.y.re = W.c0.y * Ft.x.re + W.c1.y * Ft.y.re + W.c2.y * Ft.z.re;
+        Ft.z.re = W.c0.z * Ft.x.re + W.c1.z * Ft.y.re + W.c2.z * Ft.z.re;
+        Ft.x.im = W.c0.x * Ft.x.im + W.c1.x * Ft.y.im + W.c2.x * Ft.z.im;
+        Ft.y.im = W.c0.y * Ft.x.im + W.c1.y * Ft.y.im + W.c2.y * Ft.z.im;
+        Ft.z.im = W.c0.z * Ft.x.im + W.c1.z * Ft.y.im + W.c2.z * Ft.z.im;
+
+        integrand = cvector3_cdot(F, Ft);
+        integral.re += integrand.re;
+        integral.im += integrand.im;
+    }}}
+
+    integral.re *= Vol / H.N;
+    integral.im *= Vol / H.N;
+
+    { /* C89 requires new declarations to exist only in blocks; hence the {...} here */
+    /* this isn't really needed, since we don't support MPI anyway; still works
+       in serial though, so might as well keep it around */
+    cnumber integral_sum;
+    mpi_allreduce(&integral, &integral_sum, 2, number,
+                  MPI_DOUBLE, MPI_SUM, mpb_comm);
+    integral_sum.re *= detW;
+    integral_sum.im *= detW;
+
+    return integral_sum;
+    }
+}
+

From 3cf12c7a9ee0c401bf983f92c13ca18e637aca42 Mon Sep 17 00:00:00 2001
From: Thomas Christensen <tchr@mit.edu>
Date: Thu, 15 Aug 2019 00:27:15 +0200
Subject: [PATCH 02/16] Symmetry operations act on the full wave, not just the
 Bloch part; fix that

---
 mpb/transform.c | 78 +++++++++++++++++++++++++++++++------------------
 1 file changed, 49 insertions(+), 29 deletions(-)

diff --git a/mpb/transform.c b/mpb/transform.c
index 40ef33bd..919eabea 100644
--- a/mpb/transform.c
+++ b/mpb/transform.c
@@ -56,6 +56,7 @@ cnumber transformed_overlap(matrix3x3 W, vector3 w)
     int n1, n2, n3;
     real s1, s2, s3, c1, c2, c3;
     cnumber integral = {0,0};
+    vector3 kvector = cur_kvector;
     matrix3x3 invW;
     number detW;
 
@@ -66,9 +67,9 @@ cnumber transformed_overlap(matrix3x3 W, vector3 w)
     #ifdef HAVE_MPI
         CHECK(0, "transformed_overlap(..) not yet implemented for MPI!");
     #endif
-
     /* should also check and error if SCALAR_COMPLEX not defined, or if mu_inv != NULL */
-
+    
+    /* prepare before looping ... */
     n1 = mdata->nx; n2 = mdata->ny; n3 = mdata->nz;
 
     s1 = geometry_lattice.size.x / n1;
@@ -79,59 +80,78 @@ cnumber transformed_overlap(matrix3x3 W, vector3 w)
     c3 = n3 <= 1 ? 0 : geometry_lattice.size.z * 0.5;
 
     invW = matrix3x3_inverse(W);
-    detW = matrix3x3_determinant(W);
+    detW = matrix3x3_determinant(W); /* ought to check that |detW| = 1; otherwise, not a valid symmetry operation */
+
+    kvector.x *= TWOPI/geometry_lattice.size.x; /* hoist these rescalings outside the  */
+    kvector.y *= TWOPI/geometry_lattice.size.y; /* loop; might be that licm takes care */
+    kvector.z *= TWOPI/geometry_lattice.size.z; /* of it - but better to be sure       */
 
-    LOOP_XYZ(mdata) {
+
+    /* loop over coordinates */
+    LOOP_XYZ(mdata) { /* implies two opening braces '{{' */
         vector3 p, pt;
         cvector3 F, Ft;
         cnumber integrand;
+        double deltaphi;
+        scalar_complex phase;
 
-        p.x = i1 * s1 - c1; 
-        p.y = i2 * s2 - c2; 
+        /* Current lattice position in loop */
+        p.x = i1 * s1 - c1;
+        p.y = i2 * s2 - c2;
         p.z = i3 * s3 - c3;
 
-        /* Field value at current point p */
+        /* Bloch field value at current point p (exludes exp(ikr) factor) */
         F.x = cscalar2cnumber(curfield[3*xyz_index+0]); /* since F.x is a cnumber and curfield is a */
         F.y = cscalar2cnumber(curfield[3*xyz_index+1]); /* scalar_complex; we have to convert here  */
         F.z = cscalar2cnumber(curfield[3*xyz_index+2]); 
 
-        /* First, obtain new transformed coordinate pt = invW*p - w */
+        /* Transformed coordinate pt = invW*p - w */
         pt = vector3_minus(matrix3x3_vector3_mult(invW, p), w);
 
-        /* Next, obtain field value at transformed coordinate pt; interpolation is 
-           needed to ensure generality in the case of fractional translations. 
-           Unfortunately, that is _NOT_ compatible with MPI, since get_val is not
-           implemented for MPI */
-        Ft = get_bloch_field_point(pt); 
+        /* Field value at transformed coordinate pt; interpolation is needed to ensure
+           generality in the case of fractional translations. Unfortunately, this
+           precludes compatible with MPI, since get_val is not implemented for MPI */
+        Ft = get_bloch_field_point(pt); /* excludes exp(ikr) factor */
 
-        /* Transform the components of Ft by W; this is a bit tedious to do, since 
-           there are no matrix3x3*cvector3 routines, nor any CASSIGN_CVECTOR_RE.
+        /* Transform the vector components of Ft by W; a bit tedious to do, since 
+           there are no matrix3x3*cvector3 routines, nor any CASSIGN_CVECTOR_RE/IM.
            Instead, we do manually what is done in matrix3x3_vector3_mult, twice  */
-        Ft.x.re = W.c0.x * Ft.x.re + W.c1.x * Ft.y.re + W.c2.x * Ft.z.re;
-        Ft.y.re = W.c0.y * Ft.x.re + W.c1.y * Ft.y.re + W.c2.y * Ft.z.re;
-        Ft.z.re = W.c0.z * Ft.x.re + W.c1.z * Ft.y.re + W.c2.z * Ft.z.re;
-        Ft.x.im = W.c0.x * Ft.x.im + W.c1.x * Ft.y.im + W.c2.x * Ft.z.im;
-        Ft.y.im = W.c0.y * Ft.x.im + W.c1.y * Ft.y.im + W.c2.y * Ft.z.im;
-        Ft.z.im = W.c0.z * Ft.x.im + W.c1.z * Ft.y.im + W.c2.z * Ft.z.im;
-
+        Ft.x.re = W.c0.x*Ft.x.re + W.c1.x*Ft.y.re + W.c2.x*Ft.z.re;
+        Ft.y.re = W.c0.y*Ft.x.re + W.c1.y*Ft.y.re + W.c2.y*Ft.z.re;
+        Ft.z.re = W.c0.z*Ft.x.re + W.c1.z*Ft.y.re + W.c2.z*Ft.z.re;
+        Ft.x.im = W.c0.x*Ft.x.im + W.c1.x*Ft.y.im + W.c2.x*Ft.z.im;
+        Ft.y.im = W.c0.y*Ft.x.im + W.c1.y*Ft.y.im + W.c2.y*Ft.z.im;
+        Ft.z.im = W.c0.z*Ft.x.im + W.c1.z*Ft.y.im + W.c2.z*Ft.z.im;
+
+        /* Inner product of H and OH (for O = {W|w}), in Bloch form */
         integrand = cvector3_cdot(F, Ft);
-        integral.re += integrand.re;
-        integral.im += integrand.im;
+
+        /* So far, we have excluded the Bloch phases; they must be included, however.
+           It saves two trigonometric operations if we do them just jointly for p and pt.
+           Note that rescaling by TWOPI/geometry_lattice.xyz is hoised outside loop      */
+        deltaphi = (kvector.x*(-p.x+pt.x) + kvector.y*(-p.y+pt.y) + kvector.z*(-p.z+pt.z));
+        CASSIGN_SCALAR(phase, cos(deltaphi), sin(deltaphi));
+
+        /* Add integrand-contribution to integral, keeping Bloch phases in mind */
+        integral.re += integrand.re*phase.re - integrand.im*phase.im;
+        integral.im += integrand.re*phase.im + integrand.im*phase.re;
     }}}
 
     integral.re *= Vol / H.N;
     integral.im *= Vol / H.N;
 
-    { /* C89 requires new declarations to exist only in blocks; hence the {...} here */
-    /* this isn't really needed, since we don't support MPI anyway; still works
-       in serial though, so might as well keep it around */
+    /* C89 requires new type declarations to occur only at the start of a blocks; 
+       hence {...} here */
+    { 
+    /* Also, we don't really need to do this, since we don't support MPI anyway; 
+       still works in serial though, so might as well keep it around */
     cnumber integral_sum;
     mpi_allreduce(&integral, &integral_sum, 2, number,
                   MPI_DOUBLE, MPI_SUM, mpb_comm);
-    integral_sum.re *= detW;
+
+    integral_sum.re *= detW; /* H & B are pseudovectors => transform includes det(W) */
     integral_sum.im *= detW;
 
     return integral_sum;
     }
 }
-

From 600909a4866cb6b0f319bf01e7aba48eec1022bc Mon Sep 17 00:00:00 2001
From: tchr <tchr@mit.edu>
Date: Mon, 3 Feb 2020 23:33:53 -0500
Subject: [PATCH 03/16] =?UTF-8?q?Minor=20comment=20improvements=20and=20an?=
 =?UTF-8?q?=20error=20message=20for=20detW=20=E2=89=A0=201?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mpb/transform.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/mpb/transform.c b/mpb/transform.c
index 919eabea..42e3a10d 100644
--- a/mpb/transform.c
+++ b/mpb/transform.c
@@ -32,7 +32,7 @@
 
 
 
-/* compute and return adjoint(cv1)*cv2*/
+/* compute and return adjoint(cv1)*cv2 */
 static cnumber cvector3_cdot(cvector3 cv1, cvector3 cv2)
 {
     cnumber dest;
@@ -67,27 +67,31 @@ cnumber transformed_overlap(matrix3x3 W, vector3 w)
     #ifdef HAVE_MPI
         CHECK(0, "transformed_overlap(..) not yet implemented for MPI!");
     #endif
-    /* should also check and error if SCALAR_COMPLEX not defined, or if mu_inv != NULL */
+    /* TODO: should also check and error if SCALAR_COMPLEX not defined, or if mu_inv != NULL */
     
     /* prepare before looping ... */
     n1 = mdata->nx; n2 = mdata->ny; n3 = mdata->nz;
 
-    s1 = geometry_lattice.size.x / n1;
+    s1 = geometry_lattice.size.x / n1; /* pixel spacings */
     s2 = geometry_lattice.size.y / n2;
     s3 = geometry_lattice.size.z / n3;
-    c1 = n1 <= 1 ? 0 : geometry_lattice.size.x * 0.5;
+    c1 = n1 <= 1 ? 0 : geometry_lattice.size.x * 0.5; /* offsets (negative) */
     c2 = n2 <= 1 ? 0 : geometry_lattice.size.y * 0.5;
     c3 = n3 <= 1 ? 0 : geometry_lattice.size.z * 0.5;
 
     invW = matrix3x3_inverse(W);
-    detW = matrix3x3_determinant(W); /* ought to check that |detW| = 1; otherwise, not a valid symmetry operation */
+    detW = matrix3x3_determinant(W);
+    if (fabs(fabs(detW) - 1.0) > 1e-12) {
+        mpi_one_fprintf(stderr, "Valid symmetry operations {W|w} must have |det(W)| = 1.0\n");
+        return integral;
+    }
 
     kvector.x *= TWOPI/geometry_lattice.size.x; /* hoist these rescalings outside the  */
     kvector.y *= TWOPI/geometry_lattice.size.y; /* loop; might be that licm takes care */
     kvector.z *= TWOPI/geometry_lattice.size.z; /* of it - but better to be sure       */
 
 
-    /* loop over coordinates */
+    /* Loop over coordinates (introduces int vars i1, i2, i3, & xyz_index) */
     LOOP_XYZ(mdata) { /* implies two opening braces '{{' */
         vector3 p, pt;
         cvector3 F, Ft;
@@ -110,7 +114,7 @@ cnumber transformed_overlap(matrix3x3 W, vector3 w)
 
         /* Field value at transformed coordinate pt; interpolation is needed to ensure
            generality in the case of fractional translations. Unfortunately, this
-           precludes compatible with MPI, since get_val is not implemented for MPI */
+           precludes compatibility with MPI, since get_val is not implemented for MPI */
         Ft = get_bloch_field_point(pt); /* excludes exp(ikr) factor */
 
         /* Transform the vector components of Ft by W; a bit tedious to do, since 
@@ -128,7 +132,7 @@ cnumber transformed_overlap(matrix3x3 W, vector3 w)
 
         /* So far, we have excluded the Bloch phases; they must be included, however.
            It saves two trigonometric operations if we do them just jointly for p and pt.
-           Note that rescaling by TWOPI/geometry_lattice.xyz is hoised outside loop      */
+           Note that rescaling by TWOPI/geometry_lattice.xyz is hoisted outside loop      */
         deltaphi = (kvector.x*(-p.x+pt.x) + kvector.y*(-p.y+pt.y) + kvector.z*(-p.z+pt.z));
         CASSIGN_SCALAR(phase, cos(deltaphi), sin(deltaphi));
 

From 89e8ed59382490080eb305af20422102980b9e46 Mon Sep 17 00:00:00 2001
From: tchr <tchr@mit.edu>
Date: Mon, 10 Feb 2020 17:15:17 -0500
Subject: [PATCH 04/16] generalize so that nontrivial mu can be used, and also
 allow passing in the d-field instead of the b-field

---
 mpb/fields.c    | 28 +++++++++++++--------------
 mpb/transform.c | 50 ++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 50 insertions(+), 28 deletions(-)

diff --git a/mpb/fields.c b/mpb/fields.c
index bc6c2359..4561a595 100644
--- a/mpb/fields.c
+++ b/mpb/fields.c
@@ -704,19 +704,23 @@ number get_energy_point(vector3 p)
      return f_interp_val(p, mdata, (real *) curfield, 1, 0);
 }
 
-cvector3 get_bloch_field_point(vector3 p)
+void get_bloch_field_point_(scalar_complex *field, vector3 p)
 {
-     scalar_complex field[3];
-     cvector3 F;
-
      CHECK(curfield && strchr("dhbecv", curfield_type),
 	   "field must be must be loaded before get-*field*-point");
      field[0] = f_interp_cval(p, mdata, &curfield[0].re, 6);
      field[1] = f_interp_cval(p, mdata, &curfield[1].re, 6);
      field[2] = f_interp_cval(p, mdata, &curfield[2].re, 6);
-     F.x = cscalar2cnumber(field[0]);
-     F.y = cscalar2cnumber(field[1]);
-     F.z = cscalar2cnumber(field[2]);
+}
+
+cvector3 get_bloch_field_point(vector3 p)
+{
+     scalar_complex field[3];
+     cvector3 F;
+
+	 get_bloch_field_point_(field, p);
+
+	 F = cscalar32cvector3(field);
      return F;
 }
 
@@ -725,11 +729,7 @@ cvector3 get_field_point(vector3 p)
      scalar_complex field[3], phase;
      cvector3 F;
 
-     CHECK(curfield && strchr("dhbecv", curfield_type),
-	   "field must be must be loaded before get-*field*-point");
-     field[0] = f_interp_cval(p, mdata, &curfield[0].re, 6);
-     field[1] = f_interp_cval(p, mdata, &curfield[1].re, 6);
-     field[2] = f_interp_cval(p, mdata, &curfield[2].re, 6);
+     get_bloch_field_point_(field, p);
 
      if (curfield_type != 'v') {
 	  double phase_phi = TWOPI *
@@ -742,9 +742,7 @@ cvector3 get_field_point(vector3 p)
 	  CASSIGN_MULT(field[2], field[2], phase);
      }
 
-     F.x = cscalar2cnumber(field[0]);
-     F.y = cscalar2cnumber(field[1]);
-     F.z = cscalar2cnumber(field[2]);
+	 F = cscalar32cvector3(field);
      return F;
 }
 
diff --git a/mpb/transform.c b/mpb/transform.c
index 42e3a10d..8d4cc70c 100644
--- a/mpb/transform.c
+++ b/mpb/transform.c
@@ -43,13 +43,14 @@ static cnumber cvector3_cdot(cvector3 cv1, cvector3 cv2)
     return dest;
 }
 
-
 /* Right now, this assumes that the user loads the hfield via (get-hfield band) beforehand. 
    Probably, this ought to be implemented instead by just computing this internally, here,
    using maxwell_compute_h_from_H; this would also be more natural for eventually supporting
    magnetic materials. Similarly, it would generalize to D and E fields. */
 cnumber transformed_overlap(matrix3x3 W, vector3 w)
 {
+    /* TODO: Could we just call get_dfield or get_hfield here instead of having the user do it? 
+             then the input would have a choice between e or d type input */
     #ifdef HAVE_MPI
         int local_n2, local_y_start;
     #endif
@@ -60,14 +61,13 @@ cnumber transformed_overlap(matrix3x3 W, vector3 w)
     matrix3x3 invW;
     number detW;
 
-    if (!curfield || !strchr("hb", curfield_type)) {
-        mpi_one_fprintf(stderr, "The real-space H (or B) field must be loaded first.\n");
+    if (!curfield || !strchr("db", curfield_type)) {
+        mpi_one_fprintf(stderr, "a real-space H or D-field must be loaded beforehand.\n");
         return integral;
     }
     #ifdef HAVE_MPI
-        CHECK(0, "transformed_overlap(..) not yet implemented for MPI!");
+        CHECK(0, "transformed_overlap(..) not yet implemented for MPI!"); /* TODO: Remove if PR#112 is merged */
     #endif
-    /* TODO: should also check and error if SCALAR_COMPLEX not defined, or if mu_inv != NULL */
     
     /* prepare before looping ... */
     n1 = mdata->nx; n2 = mdata->ny; n3 = mdata->nz;
@@ -82,7 +82,7 @@ cnumber transformed_overlap(matrix3x3 W, vector3 w)
     invW = matrix3x3_inverse(W);
     detW = matrix3x3_determinant(W);
     if (fabs(fabs(detW) - 1.0) > 1e-12) {
-        mpi_one_fprintf(stderr, "Valid symmetry operations {W|w} must have |det(W)| = 1.0\n");
+        mpi_one_fprintf(stderr, "valid symmetry operations {W|w} must have |det(W)| = 1.0\n");
         return integral;
     }
 
@@ -91,8 +91,9 @@ cnumber transformed_overlap(matrix3x3 W, vector3 w)
     kvector.z *= TWOPI/geometry_lattice.size.z; /* of it - but better to be sure       */
 
 
-    /* Loop over coordinates (introduces int vars i1, i2, i3, & xyz_index) */
+    /* Loop over coordinates (introduces int vars i1, i2, i3, xyz_index) */
     LOOP_XYZ(mdata) { /* implies two opening braces '{{' */
+
         vector3 p, pt;
         cvector3 F, Ft;
         cnumber integrand;
@@ -105,9 +106,9 @@ cnumber transformed_overlap(matrix3x3 W, vector3 w)
         p.z = i3 * s3 - c3;
 
         /* Bloch field value at current point p (exludes exp(ikr) factor) */
-        F.x = cscalar2cnumber(curfield[3*xyz_index+0]); /* since F.x is a cnumber and curfield is a */
-        F.y = cscalar2cnumber(curfield[3*xyz_index+1]); /* scalar_complex; we have to convert here  */
-        F.z = cscalar2cnumber(curfield[3*xyz_index+2]); 
+        F = cscalar32cvector3(curfield+3*xyz_index); /* each "point" in curfield is a scalar_complex 
+                                                        triad while F is a cvector3 (w/ cnumber entries);
+                                                        convert via cscalar32cvector3 */
 
         /* Transformed coordinate pt = invW*p - w */
         pt = vector3_minus(matrix3x3_vector3_mult(invW, p), w);
@@ -115,6 +116,7 @@ cnumber transformed_overlap(matrix3x3 W, vector3 w)
         /* Field value at transformed coordinate pt; interpolation is needed to ensure
            generality in the case of fractional translations. Unfortunately, this
            precludes compatibility with MPI, since get_val is not implemented for MPI */
+        /* TODO: Remove disclaimer above if PR#112 is merged */
         Ft = get_bloch_field_point(pt); /* excludes exp(ikr) factor */
 
         /* Transform the vector components of Ft by W; a bit tedious to do, since 
@@ -127,7 +129,26 @@ cnumber transformed_overlap(matrix3x3 W, vector3 w)
         Ft.y.im = W.c0.y*Ft.x.im + W.c1.y*Ft.y.im + W.c2.y*Ft.z.im;
         Ft.z.im = W.c0.z*Ft.x.im + W.c1.z*Ft.y.im + W.c2.z*Ft.z.im;
 
-        /* Inner product of H and OH (for O = {W|w}), in Bloch form */
+        /* Multiplying F (either B or D-field) with μ⁻¹ or ε⁻¹ to get H- or E-fields,
+           since the overlap we need to calculate is ⟨F|Ft⟩ = (H|Bt) or (E|Dt),
+           with t-postscript denoting a field transformed by {W|w}. Here, we essentially
+           adapt some boiler-plate code from compute_field_energy_internal in fields.c     */
+        scalar_complex field[3];
+        if (curfield_type == 'd') {
+            assign_symmatrix_vector(field, mdata->eps_inv[xyz_index], curfield+3*xyz_index);
+            F = cscalar32cvector3(field);
+        }
+        else if (curfield_type == 'b' && mdata->mu_inv != NULL) {
+            assign_symmatrix_vector(field, mdata->mu_inv[xyz_index], curfield+3*xyz_index);
+            F = cscalar32cvector3(field);
+        }
+        /* else {
+            field[0] =   curfield[3*i];
+            field[1] = curfield[3*i+1];
+            field[2] = curfield[3*i+2];
+        } */
+        
+        /* Inner product of F and Ft={W|w}F in Bloch form */
         integrand = cvector3_cdot(F, Ft);
 
         /* So far, we have excluded the Bloch phases; they must be included, however.
@@ -149,12 +170,15 @@ cnumber transformed_overlap(matrix3x3 W, vector3 w)
     { 
     /* Also, we don't really need to do this, since we don't support MPI anyway; 
        still works in serial though, so might as well keep it around */
+    /* TODO: Remove disclaimer above if PR#112 is merged */
     cnumber integral_sum;
     mpi_allreduce(&integral, &integral_sum, 2, number,
                   MPI_DOUBLE, MPI_SUM, mpb_comm);
 
-    integral_sum.re *= detW; /* H & B are pseudovectors => transform includes det(W) */
-    integral_sum.im *= detW;
+    if (curfield_type == 'b') {  /* H & B are pseudovectors => transform includes det(W) */
+        integral_sum.re *= detW; 
+        integral_sum.im *= detW;
+    }
 
     return integral_sum;
     }

From f00ea00ab989b07ade7bdb19ad1bf1dc699e18a8 Mon Sep 17 00:00:00 2001
From: tchr <tchr@mit.edu>
Date: Mon, 10 Feb 2020 18:47:10 -0500
Subject: [PATCH 05/16] swap a few cnumbers for scalar_complex to hopefully
 avoid copying things unnecessarily

---
 mpb/mpb.h             |  1 +
 mpb/transform.c       | 90 ++++++++++++++++---------------------------
 src/matrices/scalar.h |  8 ++++
 3 files changed, 43 insertions(+), 56 deletions(-)

diff --git a/mpb/mpb.h b/mpb/mpb.h
index ce47e88e..2e2187c8 100644
--- a/mpb/mpb.h
+++ b/mpb/mpb.h
@@ -74,6 +74,7 @@ extern int kpoint_index;
 extern void compute_field_squared(void);
 void get_efield(integer which_band);
 real mean_medium_from_matrix(const symmetric_matrix *eps_inv);
+void get_bloch_field_point_(scalar_complex *field, vector3 p);
 
 /**************************************************************************/
 
diff --git a/mpb/transform.c b/mpb/transform.c
index 8d4cc70c..1e794031 100644
--- a/mpb/transform.c
+++ b/mpb/transform.c
@@ -32,34 +32,20 @@
 
 
 
-/* compute and return adjoint(cv1)*cv2 */
-static cnumber cvector3_cdot(cvector3 cv1, cvector3 cv2)
-{
-    cnumber dest;
-    dest.re = (cv1.x.re*cv2.x.re + cv1.y.re*cv2.y.re + cv1.z.re*cv2.z.re +
-               cv1.x.im*cv2.x.im + cv1.y.im*cv2.y.im + cv1.z.im*cv2.z.im);
-    dest.im = (cv1.x.re*cv2.x.im + cv1.y.re*cv2.y.im + cv1.z.re*cv2.z.im -
-               cv1.x.im*cv2.x.re - cv1.y.im*cv2.y.re - cv1.z.im*cv2.z.re);
-    return dest;
-}
-
-/* Right now, this assumes that the user loads the hfield via (get-hfield band) beforehand. 
-   Probably, this ought to be implemented instead by just computing this internally, here,
-   using maxwell_compute_h_from_H; this would also be more natural for eventually supporting
-   magnetic materials. Similarly, it would generalize to D and E fields. */
+/* Right now, this assumes that the user loads the bfield or dfield via (get-hfield band) or
+   (get-field band) beforehand. 
+   Instead, this could be implemented by just computing this here, e.g. by having us calling
+   get_hfield or get_bfield; that seems to achieve the same (i.e. setting curfield(_type)) */
 cnumber transformed_overlap(matrix3x3 W, vector3 w)
 {
     /* TODO: Could we just call get_dfield or get_hfield here instead of having the user do it? 
              then the input would have a choice between e or d type input */
-    #ifdef HAVE_MPI
-        int local_n2, local_y_start;
-    #endif
     int n1, n2, n3;
     real s1, s2, s3, c1, c2, c3;
     cnumber integral = {0,0};
+    number detW;
     vector3 kvector = cur_kvector;
     matrix3x3 invW;
-    number detW;
 
     if (!curfield || !strchr("db", curfield_type)) {
         mpi_one_fprintf(stderr, "a real-space H or D-field must be loaded beforehand.\n");
@@ -93,73 +79,65 @@ cnumber transformed_overlap(matrix3x3 W, vector3 w)
 
     /* Loop over coordinates (introduces int vars i1, i2, i3, xyz_index) */
     LOOP_XYZ(mdata) { /* implies two opening braces '{{' */
-
         vector3 p, pt;
-        cvector3 F, Ft;
-        cnumber integrand;
+        scalar_complex F[3], Ft[3], integrand, phase;
         double deltaphi;
-        scalar_complex phase;
 
         /* Current lattice position in loop */
         p.x = i1 * s1 - c1;
         p.y = i2 * s2 - c2;
         p.z = i3 * s3 - c3;
 
-        /* Bloch field value at current point p (exludes exp(ikr) factor) */
-        F = cscalar32cvector3(curfield+3*xyz_index); /* each "point" in curfield is a scalar_complex 
-                                                        triad while F is a cvector3 (w/ cnumber entries);
-                                                        convert via cscalar32cvector3 */
-
         /* Transformed coordinate pt = invW*p - w */
         pt = vector3_minus(matrix3x3_vector3_mult(invW, p), w);
 
-        /* Field value at transformed coordinate pt; interpolation is needed to ensure
-           generality in the case of fractional translations. Unfortunately, this
+        /* Bloch field value at transformed coordinate pt: interpolation is needed to ensure
+           generality in the case of fractional translations. Unfortunately, this currently
            precludes compatibility with MPI, since get_val is not implemented for MPI */
         /* TODO: Remove disclaimer above if PR#112 is merged */
-        Ft = get_bloch_field_point(pt); /* excludes exp(ikr) factor */
+        get_bloch_field_point_(Ft, pt); /* sets Ft to F at p [excludes exp(ikr) factor] */
 
         /* Transform the vector components of Ft by W; a bit tedious to do, since 
            there are no matrix3x3*cvector3 routines, nor any CASSIGN_CVECTOR_RE/IM.
            Instead, we do manually what is done in matrix3x3_vector3_mult, twice  */
-        Ft.x.re = W.c0.x*Ft.x.re + W.c1.x*Ft.y.re + W.c2.x*Ft.z.re;
-        Ft.y.re = W.c0.y*Ft.x.re + W.c1.y*Ft.y.re + W.c2.y*Ft.z.re;
-        Ft.z.re = W.c0.z*Ft.x.re + W.c1.z*Ft.y.re + W.c2.z*Ft.z.re;
-        Ft.x.im = W.c0.x*Ft.x.im + W.c1.x*Ft.y.im + W.c2.x*Ft.z.im;
-        Ft.y.im = W.c0.y*Ft.x.im + W.c1.y*Ft.y.im + W.c2.y*Ft.z.im;
-        Ft.z.im = W.c0.z*Ft.x.im + W.c1.z*Ft.y.im + W.c2.z*Ft.z.im;
-
-        /* Multiplying F (either B or D-field) with μ⁻¹ or ε⁻¹ to get H- or E-fields,
-           since the overlap we need to calculate is ⟨F|Ft⟩ = (H|Bt) or (E|Dt),
-           with t-postscript denoting a field transformed by {W|w}. Here, we essentially
-           adapt some boiler-plate code from compute_field_energy_internal in fields.c     */
-        scalar_complex field[3];
+        Ft[0].re = W.c0.x*Ft[0].re + W.c1.x*Ft[1].re + W.c2.x*Ft[2].re;
+        Ft[0].im = W.c0.x*Ft[0].im + W.c1.x*Ft[1].im + W.c2.x*Ft[2].im;
+        Ft[1].re = W.c0.y*Ft[0].re + W.c1.y*Ft[1].re + W.c2.y*Ft[2].re;
+        Ft[1].im = W.c0.y*Ft[0].im + W.c1.y*Ft[1].im + W.c2.y*Ft[2].im;
+        Ft[2].re = W.c0.z*Ft[0].re + W.c1.z*Ft[1].re + W.c2.z*Ft[2].re;
+        Ft[2].im = W.c0.z*Ft[0].im + W.c1.z*Ft[1].im + W.c2.z*Ft[2].im;
+        
+        /* Get the Bloch field value at current point p [exludes exp(ikr) factor].
+           We multiply the input field F (either B or D-field) with μ⁻¹ or ε⁻¹ to get
+           H- or E-fields, as the relevant overlap is ⟨F|Ft⟩ = (H|Bt) or (E|Dt), with
+           t-postscript denoting a field transformed by {W|w}. Here, we essentially
+           adapt some boiler-plate code from compute_field_energy_internal in fields.c   */
         if (curfield_type == 'd') {
-            assign_symmatrix_vector(field, mdata->eps_inv[xyz_index], curfield+3*xyz_index);
-            F = cscalar32cvector3(field);
+            assign_symmatrix_vector(F, mdata->eps_inv[xyz_index], curfield+3*xyz_index);
         }
         else if (curfield_type == 'b' && mdata->mu_inv != NULL) {
-            assign_symmatrix_vector(field, mdata->mu_inv[xyz_index], curfield+3*xyz_index);
-            F = cscalar32cvector3(field);
+            assign_symmatrix_vector(F, mdata->mu_inv[xyz_index],  curfield+3*xyz_index);
+        }
+        else {
+            F[0] = curfield[3*xyz_index];
+            F[1] = curfield[3*xyz_index+1];
+            F[2] = curfield[3*xyz_index+2];
         }
-        /* else {
-            field[0] =   curfield[3*i];
-            field[1] = curfield[3*i+1];
-            field[2] = curfield[3*i+2];
-        } */
         
         /* Inner product of F and Ft={W|w}F in Bloch form */
-        integrand = cvector3_cdot(F, Ft);
+        CASSIGN_CONJ_MULT(integrand, F[0], Ft[0]);          /* add adjoint(F)*Ft to integrand */
+        CACCUMULATE_SUM_CONJ_MULT(integrand, F[1], Ft[1]);
+        CACCUMULATE_SUM_CONJ_MULT(integrand, F[2], Ft[2]);
 
         /* So far, we have excluded the Bloch phases; they must be included, however.
            It saves two trigonometric operations if we do them just jointly for p and pt.
            Note that rescaling by TWOPI/geometry_lattice.xyz is hoisted outside loop      */
-        deltaphi = (kvector.x*(-p.x+pt.x) + kvector.y*(-p.y+pt.y) + kvector.z*(-p.z+pt.z));
+        deltaphi = (kvector.x*(pt.x-p.x) + kvector.y*(pt.y-p.y) + kvector.z*(pt.z-p.z));
         CASSIGN_SCALAR(phase, cos(deltaphi), sin(deltaphi));
 
         /* Add integrand-contribution to integral, keeping Bloch phases in mind */
-        integral.re += integrand.re*phase.re - integrand.im*phase.im;
-        integral.im += integrand.re*phase.im + integrand.im*phase.re;
+        integral.re += CSCALAR_MULT_RE(integrand, phase);
+        integral.im += CSCALAR_MULT_IM(integrand, phase); 
     }}}
 
     integral.re *= Vol / H.N;
diff --git a/src/matrices/scalar.h b/src/matrices/scalar.h
index dbe21d06..9749453a 100644
--- a/src/matrices/scalar.h
+++ b/src/matrices/scalar.h
@@ -63,6 +63,14 @@ typedef struct {
 		    bbbb_re * cccc_im + bbbb_im * cccc_re); \
 }
 
+/* a = conj(b) * c */
+#define CASSIGN_CONJ_MULT(a, b, c) { \
+     real bbbb_re = (b).re, bbbb_im = (b).im; \
+     real cccc_re = (c).re, cccc_im = (c).im; \
+     CASSIGN_SCALAR(a, bbbb_re * cccc_re + bbbb_im * cccc_im, \
+              bbbb_re * cccc_im - bbbb_im * cccc_re); \
+}
+
 /* a = b / c = b * conj(c) / |c|^2 */
 #define CASSIGN_DIV(a, b, c) { \
      scalar_complex aaaa_tmp; real aaaa_tmp_norm; \

From 6a386f64e43df202c964f9141787a014ec3111eb Mon Sep 17 00:00:00 2001
From: tchr <tchr@mit.edu>
Date: Wed, 12 Feb 2020 15:18:08 -0500
Subject: [PATCH 06/16] Fix bug in vector transformation and account for the
 fact that vector fields are in a Cartesian basis

---
 mpb/transform.c | 35 ++++++++++++++++++++---------------
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/mpb/transform.c b/mpb/transform.c
index 1e794031..af282882 100644
--- a/mpb/transform.c
+++ b/mpb/transform.c
@@ -45,7 +45,7 @@ cnumber transformed_overlap(matrix3x3 W, vector3 w)
     cnumber integral = {0,0};
     number detW;
     vector3 kvector = cur_kvector;
-    matrix3x3 invW;
+    matrix3x3 invW, Wc;
 
     if (!curfield || !strchr("db", curfield_type)) {
         mpi_one_fprintf(stderr, "a real-space H or D-field must be loaded beforehand.\n");
@@ -65,22 +65,27 @@ cnumber transformed_overlap(matrix3x3 W, vector3 w)
     c2 = n2 <= 1 ? 0 : geometry_lattice.size.y * 0.5;
     c3 = n3 <= 1 ? 0 : geometry_lattice.size.z * 0.5;
 
-    invW = matrix3x3_inverse(W);
     detW = matrix3x3_determinant(W);
     if (fabs(fabs(detW) - 1.0) > 1e-12) {
         mpi_one_fprintf(stderr, "valid symmetry operations {W|w} must have |det(W)| = 1.0\n");
         return integral;
     }
+    invW = matrix3x3_inverse(W);
+    /* W is specified in the lattice basis, but field *vectors* evaluated in real-space are
+       in a Cartesian basis: when we transform the vector components, we must account for this
+       difference. We transform W to a Cartesian basis Wc=RWR⁻¹ (R=geometry_lattice.basis, a
+       matrix w/ columns of Cartesian basis vectors) and then use Wc to transform vector fields */
+    Wc = matrix3x3_mult(matrix3x3_mult(geometry_lattice.basis, W),
+                        matrix3x3_inverse(geometry_lattice.basis));
 
     kvector.x *= TWOPI/geometry_lattice.size.x; /* hoist these rescalings outside the  */
     kvector.y *= TWOPI/geometry_lattice.size.y; /* loop; might be that licm takes care */
     kvector.z *= TWOPI/geometry_lattice.size.z; /* of it - but better to be sure       */
 
-
     /* Loop over coordinates (introduces int vars i1, i2, i3, xyz_index) */
     LOOP_XYZ(mdata) { /* implies two opening braces '{{' */
         vector3 p, pt;
-        scalar_complex F[3], Ft[3], integrand, phase;
+        scalar_complex F[3], Ft[3], Ftemp[3], integrand, phase;
         double deltaphi;
 
         /* Current lattice position in loop */
@@ -95,17 +100,17 @@ cnumber transformed_overlap(matrix3x3 W, vector3 w)
            generality in the case of fractional translations. Unfortunately, this currently
            precludes compatibility with MPI, since get_val is not implemented for MPI */
         /* TODO: Remove disclaimer above if PR#112 is merged */
-        get_bloch_field_point_(Ft, pt); /* sets Ft to F at p [excludes exp(ikr) factor] */
-
-        /* Transform the vector components of Ft by W; a bit tedious to do, since 
-           there are no matrix3x3*cvector3 routines, nor any CASSIGN_CVECTOR_RE/IM.
-           Instead, we do manually what is done in matrix3x3_vector3_mult, twice  */
-        Ft[0].re = W.c0.x*Ft[0].re + W.c1.x*Ft[1].re + W.c2.x*Ft[2].re;
-        Ft[0].im = W.c0.x*Ft[0].im + W.c1.x*Ft[1].im + W.c2.x*Ft[2].im;
-        Ft[1].re = W.c0.y*Ft[0].re + W.c1.y*Ft[1].re + W.c2.y*Ft[2].re;
-        Ft[1].im = W.c0.y*Ft[0].im + W.c1.y*Ft[1].im + W.c2.y*Ft[2].im;
-        Ft[2].re = W.c0.z*Ft[0].re + W.c1.z*Ft[1].re + W.c2.z*Ft[2].re;
-        Ft[2].im = W.c0.z*Ft[0].im + W.c1.z*Ft[1].im + W.c2.z*Ft[2].im;
+        get_bloch_field_point_(Ftemp, pt); /* assigns Ftemp to field at p [excludes exp(ikr) factor] */
+
+        /* Transform the vector components of Ftemp by W to obtain Ft; a bit repetitious but 
+           we just write out the matrix-product manually here, for real and imag components.
+           Since F, Ftemp, and Ft are in a Cartesian basis, we use Wc instead of W.          */
+        Ft[0].re = Wc.c0.x*Ftemp[0].re + Wc.c1.x*Ftemp[1].re + Wc.c2.x*Ftemp[2].re;
+        Ft[0].im = Wc.c0.x*Ftemp[0].im + Wc.c1.x*Ftemp[1].im + Wc.c2.x*Ftemp[2].im;
+        Ft[1].re = Wc.c0.y*Ftemp[0].re + Wc.c1.y*Ftemp[1].re + Wc.c2.y*Ftemp[2].re;
+        Ft[1].im = Wc.c0.y*Ftemp[0].im + Wc.c1.y*Ftemp[1].im + Wc.c2.y*Ftemp[2].im;
+        Ft[2].re = Wc.c0.z*Ftemp[0].re + Wc.c1.z*Ftemp[1].re + Wc.c2.z*Ftemp[2].re;
+        Ft[2].im = Wc.c0.z*Ftemp[0].im + Wc.c1.z*Ftemp[1].im + Wc.c2.z*Ftemp[2].im;
         
         /* Get the Bloch field value at current point p [exludes exp(ikr) factor].
            We multiply the input field F (either B or D-field) with μ⁻¹ or ε⁻¹ to get

From 636474b8f5bdaa3db8d75e60ce232741eb9afd68 Mon Sep 17 00:00:00 2001
From: tchr <tchr@mit.edu>
Date: Wed, 12 Feb 2020 15:21:54 -0500
Subject: [PATCH 07/16] Add band-index function interfaces to
 transformed_overlap without invoking get_bfield/get_dfield and add a clearer
 method description: - compute_symmetry (band function) - compute_symmetries
 (all bands)

Also minor revisions to comments and micro-refactoring.
---
 mpb/mpb.scm.in  |  4 +++
 mpb/transform.c | 70 ++++++++++++++++++++++++++++++++-----------------
 2 files changed, 50 insertions(+), 24 deletions(-)

diff --git a/mpb/mpb.scm.in b/mpb/mpb.scm.in
index db9cb4d4..546393c5 100644
--- a/mpb/mpb.scm.in
+++ b/mpb/mpb.scm.in
@@ -212,6 +212,10 @@
   'number (make-list-type 'geometric-object))
 (define-external-function transformed-overlap false false 
   'cnumber 'matrix3x3 'vector3)
+(define-external-function compute-symmetries false false
+  (make-list-type 'cnumber) 'matrix3x3 'vector3)
+(define-external-function compute-symmetry false false
+  'cnumber 'integer 'matrix3x3 'vector3)
 (define-external-function output-field-to-file false false
   no-return-value 'integer 'string)  
 
diff --git a/mpb/transform.c b/mpb/transform.c
index af282882..1f2c114a 100644
--- a/mpb/transform.c
+++ b/mpb/transform.c
@@ -32,28 +32,36 @@
 
 
 
-/* Right now, this assumes that the user loads the bfield or dfield via (get-hfield band) or
-   (get-field band) beforehand. 
-   Instead, this could be implemented by just computing this here, e.g. by having us calling
-   get_hfield or get_bfield; that seems to achieve the same (i.e. setting curfield(_type)) */
+/* If curfield is set to the real-space D-field (of band-index i), computes the overlap 
+         ∫ Eᵢ(r){W|w}Dᵢ(r) dr  =  ∫ Eᵢ(r)(WDᵢ)({W|w}⁻¹r) dr,
+   for a symmetry operation {W|w} with point-group part W and translation part w; each 
+   specified in the lattice basis. The vector fields Eᵢ and Dᵢ include Bloch phases.
+   If instead curfield is set to the real-space B-field, the overlap
+         ∫ Hᵢ(r){W|v}Bᵢ(r) dr  =  det(W) ∫ Eᵢ(r)(WDᵢ)({W|w}⁻¹r) dr,
+   is computed instead. Note that a factor det(W) is then included since B & H are
+   pseudovectors. As a result, the computed symmetry expectation values are independent
+   of whether the D- or B-field is used.
+   No other choices for curfield are allowed: to set curfield to the real-space B- or D-
+   field call get-bfield/dfield in Scheme, or get_bfield/dfield in C.                   
+   Usually, it will be more convenient to use the accessor compute_symmetry(i, W, w) 
+   which defaults to the B-field (since μ = 1 usually) and instead takes a band-index i .*/
 cnumber transformed_overlap(matrix3x3 W, vector3 w)
 {
-    /* TODO: Could we just call get_dfield or get_hfield here instead of having the user do it? 
-             then the input would have a choice between e or d type input */
     int n1, n2, n3;
     real s1, s2, s3, c1, c2, c3;
-    cnumber integral = {0,0};
+    cnumber integral = {0,0}, integral_sum;
     number detW;
     vector3 kvector = cur_kvector;
     matrix3x3 invW, Wc;
 
     if (!curfield || !strchr("db", curfield_type)) {
-        mpi_one_fprintf(stderr, "a real-space H or D-field must be loaded beforehand.\n");
+        mpi_one_fprintf(stderr, "a real-space H- or D-field must be loaded beforehand\n");
         return integral;
     }
     #ifdef HAVE_MPI
         CHECK(0, "transformed_overlap(..) not yet implemented for MPI!"); /* TODO: Remove if PR#112 is merged */
     #endif
+    /* TODO: Is special-casing for #ifndef SCALAR_COMPLEX needed? */
     
     /* prepare before looping ... */
     n1 = mdata->nx; n2 = mdata->ny; n3 = mdata->nz;
@@ -88,17 +96,17 @@ cnumber transformed_overlap(matrix3x3 W, vector3 w)
         scalar_complex F[3], Ft[3], Ftemp[3], integrand, phase;
         double deltaphi;
 
-        /* Current lattice position in loop */
+        /* Current lattice coordinate */
         p.x = i1 * s1 - c1;
         p.y = i2 * s2 - c2;
         p.z = i3 * s3 - c3;
 
-        /* Transformed coordinate pt = invW*p - w */
+        /* Transformed coordinate pt = {W|w}⁻¹p = W⁻¹p - w */
         pt = vector3_minus(matrix3x3_vector3_mult(invW, p), w);
 
         /* Bloch field value at transformed coordinate pt: interpolation is needed to ensure
            generality in the case of fractional translations. Unfortunately, this currently
-           precludes compatibility with MPI, since get_val is not implemented for MPI */
+           precludes compatibility with MPI, since get_val is not implemented for MPI        */
         /* TODO: Remove disclaimer above if PR#112 is merged */
         get_bloch_field_point_(Ftemp, pt); /* assigns Ftemp to field at p [excludes exp(ikr) factor] */
 
@@ -111,12 +119,12 @@ cnumber transformed_overlap(matrix3x3 W, vector3 w)
         Ft[1].im = Wc.c0.y*Ftemp[0].im + Wc.c1.y*Ftemp[1].im + Wc.c2.y*Ftemp[2].im;
         Ft[2].re = Wc.c0.z*Ftemp[0].re + Wc.c1.z*Ftemp[1].re + Wc.c2.z*Ftemp[2].re;
         Ft[2].im = Wc.c0.z*Ftemp[0].im + Wc.c1.z*Ftemp[1].im + Wc.c2.z*Ftemp[2].im;
-        
+
         /* Get the Bloch field value at current point p [exludes exp(ikr) factor].
            We multiply the input field F (either B or D-field) with μ⁻¹ or ε⁻¹ to get
            H- or E-fields, as the relevant overlap is ⟨F|Ft⟩ = (H|Bt) or (E|Dt), with
            t-postscript denoting a field transformed by {W|w}. Here, we essentially
-           adapt some boiler-plate code from compute_field_energy_internal in fields.c   */
+           adapt some boiler-plate code from compute_field_energy_internal in fields.c       */
         if (curfield_type == 'd') {
             assign_symmatrix_vector(F, mdata->eps_inv[xyz_index], curfield+3*xyz_index);
         }
@@ -130,7 +138,7 @@ cnumber transformed_overlap(matrix3x3 W, vector3 w)
         }
         
         /* Inner product of F and Ft={W|w}F in Bloch form */
-        CASSIGN_CONJ_MULT(integrand, F[0], Ft[0]);          /* add adjoint(F)*Ft to integrand */
+        CASSIGN_CONJ_MULT(integrand, F[0], Ft[0]);  /* add adjoint(F)*Ft to integrand */
         CACCUMULATE_SUM_CONJ_MULT(integrand, F[1], Ft[1]);
         CACCUMULATE_SUM_CONJ_MULT(integrand, F[2], Ft[2]);
 
@@ -142,27 +150,41 @@ cnumber transformed_overlap(matrix3x3 W, vector3 w)
 
         /* Add integrand-contribution to integral, keeping Bloch phases in mind */
         integral.re += CSCALAR_MULT_RE(integrand, phase);
-        integral.im += CSCALAR_MULT_IM(integrand, phase); 
+        integral.im += CSCALAR_MULT_IM(integrand, phase);
     }}}
 
     integral.re *= Vol / H.N;
     integral.im *= Vol / H.N;
 
-    /* C89 requires new type declarations to occur only at the start of a blocks; 
-       hence {...} here */
-    { 
-    /* Also, we don't really need to do this, since we don't support MPI anyway; 
-       still works in serial though, so might as well keep it around */
-    /* TODO: Remove disclaimer above if PR#112 is merged */
-    cnumber integral_sum;
     mpi_allreduce(&integral, &integral_sum, 2, number,
                   MPI_DOUBLE, MPI_SUM, mpb_comm);
 
     if (curfield_type == 'b') {  /* H & B are pseudovectors => transform includes det(W) */
-        integral_sum.re *= detW; 
+        integral_sum.re *= detW;
         integral_sum.im *= detW;
     }
 
     return integral_sum;
-    }
 }
+
+cnumber compute_symmetry(int which_band, matrix3x3 W, vector3 w)
+{
+    cnumber symval;
+    get_bfield(which_band);
+    symval = transformed_overlap(W, w);
+
+    return symval;
+}
+
+cnumber_list compute_symmetries(matrix3x3 W, vector3 w)
+{
+    cnumber_list symvals;
+    int ib;
+    symvals.num_items = num_bands;
+    CHK_MALLOC(symvals.items, cnumber, num_bands);
+
+    for (ib = 0; ib < num_bands; ++ib){
+        symvals.items[ib] = compute_symmetry(ib+1, W, w);
+    }
+    return symvals;
+}
\ No newline at end of file

From c33a6012922c79c46aa09c8756be0df79eed8f12 Mon Sep 17 00:00:00 2001
From: tchr <tchr@mit.edu>
Date: Wed, 19 Feb 2020 11:59:40 -0500
Subject: [PATCH 08/16] Fix erroneous operation of {W|w}^-1 on coordinate p

- The translation component of {W|w}^-1 contains a factor of W^-1
- Previously, nonsymmorphic operations were consequently wrongly implemented
---
 mpb/transform.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mpb/transform.c b/mpb/transform.c
index 1f2c114a..c45b13f2 100644
--- a/mpb/transform.c
+++ b/mpb/transform.c
@@ -101,8 +101,8 @@ cnumber transformed_overlap(matrix3x3 W, vector3 w)
         p.y = i2 * s2 - c2;
         p.z = i3 * s3 - c3;
 
-        /* Transformed coordinate pt = {W|w}⁻¹p = W⁻¹p - w */
-        pt = vector3_minus(matrix3x3_vector3_mult(invW, p), w);
+        /* Transformed coordinate pt = {W|w}⁻¹p = W⁻¹(p-w) since {W|w}⁻¹={W⁻¹|-W⁻¹w} */
+        pt = matrix3x3_vector3_mult(invW, vector3_minus(p, w));
 
         /* Bloch field value at transformed coordinate pt: interpolation is needed to ensure
            generality in the case of fractional translations. Unfortunately, this currently

From 07066e084b90b48f03f44ecaa97beb29d40b589b Mon Sep 17 00:00:00 2001
From: tchr <tchr@mit.edu>
Date: Tue, 28 Jul 2020 00:03:38 -0400
Subject: [PATCH 09/16] Remove MPI guards/warnings (#112 has been merged)

---
 mpb/transform.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/mpb/transform.c b/mpb/transform.c
index c45b13f2..3057ebc3 100644
--- a/mpb/transform.c
+++ b/mpb/transform.c
@@ -37,7 +37,7 @@
    for a symmetry operation {W|w} with point-group part W and translation part w; each 
    specified in the lattice basis. The vector fields Eᵢ and Dᵢ include Bloch phases.
    If instead curfield is set to the real-space B-field, the overlap
-         ∫ Hᵢ(r){W|v}Bᵢ(r) dr  =  det(W) ∫ Eᵢ(r)(WDᵢ)({W|w}⁻¹r) dr,
+         ∫ Hᵢ(r){W|v}Bᵢ(r) dr  =  det(W) ∫ Hᵢ(r)(WBᵢ)({W|w}⁻¹r) dr,
    is computed instead. Note that a factor det(W) is then included since B & H are
    pseudovectors. As a result, the computed symmetry expectation values are independent
    of whether the D- or B-field is used.
@@ -58,10 +58,8 @@ cnumber transformed_overlap(matrix3x3 W, vector3 w)
         mpi_one_fprintf(stderr, "a real-space H- or D-field must be loaded beforehand\n");
         return integral;
     }
-    #ifdef HAVE_MPI
-        CHECK(0, "transformed_overlap(..) not yet implemented for MPI!"); /* TODO: Remove if PR#112 is merged */
-    #endif
     /* TODO: Is special-casing for #ifndef SCALAR_COMPLEX needed? */
+    /* TODO: Doesn't seem to work with mpbi */
     
     /* prepare before looping ... */
     n1 = mdata->nx; n2 = mdata->ny; n3 = mdata->nz;
@@ -105,9 +103,7 @@ cnumber transformed_overlap(matrix3x3 W, vector3 w)
         pt = matrix3x3_vector3_mult(invW, vector3_minus(p, w));
 
         /* Bloch field value at transformed coordinate pt: interpolation is needed to ensure
-           generality in the case of fractional translations. Unfortunately, this currently
-           precludes compatibility with MPI, since get_val is not implemented for MPI        */
-        /* TODO: Remove disclaimer above if PR#112 is merged */
+           generality in the case of fractional translations.                                */
         get_bloch_field_point_(Ftemp, pt); /* assigns Ftemp to field at p [excludes exp(ikr) factor] */
 
         /* Transform the vector components of Ftemp by W to obtain Ft; a bit repetitious but 

From 08cf53a8482b4d900fc61f1067919b51e05aa8fc Mon Sep 17 00:00:00 2001
From: Thomas Christensen <tchr@mit.edu>
Date: Wed, 2 Sep 2020 10:24:26 -0400
Subject: [PATCH 10/16] throw when used with mpbi

- also add a comment to remind ourselves why the current implementation
doesn't work with mpbi + a likely explanation for the origin of the
issue
---
 mpb/transform.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/mpb/transform.c b/mpb/transform.c
index 3057ebc3..944d6bbd 100644
--- a/mpb/transform.c
+++ b/mpb/transform.c
@@ -58,8 +58,17 @@ cnumber transformed_overlap(matrix3x3 W, vector3 w)
         mpi_one_fprintf(stderr, "a real-space H- or D-field must be loaded beforehand\n");
         return integral;
     }
-    /* TODO: Is special-casing for #ifndef SCALAR_COMPLEX needed? */
-    /* TODO: Doesn't seem to work with mpbi */
+    #ifndef SCALAR_COMPLEX
+        CHECK(0, "transformed_overlap(..) is not yet implemented for mpbi");
+        /* NOTE: Not completely sure why the current implementation does not work for mpbi
+           (i.e for assumed-inversion): one clue is that running this with mpbi and the
+           error-check off gives symmetry eigenvalues whose norm are ≈(ni÷2+1)/ni (where
+           ni=n1=n2=n3) instead of near-unity (as they should be). This suggests we are not
+           counting the number of grid points correctly somehow: I think the root issue is
+           that we use the LOOP_XYZ macro, which has special handling for mbpi (i.e. only
+           "visits" the "inversion-reduced" part of the unitcell): but here, we actually
+           really want to (or at least assume to) visit _all_ the points in the unitcell. */
+    #endif
     
     /* prepare before looping ... */
     n1 = mdata->nx; n2 = mdata->ny; n3 = mdata->nz;

From 5664c26c1c36da62b541c743a2e14a48d75799d8 Mon Sep 17 00:00:00 2001
From: Thomas Christensen <tchr@mit.edu>
Date: Thu, 10 Sep 2020 15:12:23 -0400
Subject: [PATCH 11/16] put guard-rails back on MPI: doesn't seem to work as
 is.

- add a note describing the problem
---
 mpb/transform.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/mpb/transform.c b/mpb/transform.c
index 944d6bbd..dcfa782e 100644
--- a/mpb/transform.c
+++ b/mpb/transform.c
@@ -58,6 +58,7 @@ cnumber transformed_overlap(matrix3x3 W, vector3 w)
         mpi_one_fprintf(stderr, "a real-space H- or D-field must be loaded beforehand\n");
         return integral;
     }
+
     #ifndef SCALAR_COMPLEX
         CHECK(0, "transformed_overlap(..) is not yet implemented for mpbi");
         /* NOTE: Not completely sure why the current implementation does not work for mpbi
@@ -69,6 +70,16 @@ cnumber transformed_overlap(matrix3x3 W, vector3 w)
            "visits" the "inversion-reduced" part of the unitcell): but here, we actually
            really want to (or at least assume to) visit _all_ the points in the unitcell. */
     #endif
+    #ifdef HAVE_MPI
+        /* NOTE: It seems there's some racey stuff going on with the MPI implementation,
+           unfortunately, so it doesn't end giving consistent (or even meaningful) results.
+           The issue could be that both `LOOP_XYZ` is distributed over workers _and_ that
+           `get_bloch_field_point_` also is (via the interpolation). I'm imagining that such
+           a naive "nested parallelism" doesn't jive here.
+           Long story short: disable it for now.
+        */
+        CHECK(0, "transformed_overlap(..) is not yet implemented for MPI");
+    #endif
     
     /* prepare before looping ... */
     n1 = mdata->nx; n2 = mdata->ny; n3 = mdata->nz;

From 816caa1e7dab42ec86613b169fb9d6904ea07568 Mon Sep 17 00:00:00 2001
From: Thomas Christensen <tchr@mit.edu>
Date: Tue, 6 Apr 2021 10:58:18 -0400
Subject: [PATCH 12/16] improve consistency of comments

---
 mpb/fields.c    |   7 ++-
 mpb/mpb.scm.in  |   2 +
 mpb/transform.c | 114 +++++++++++++++++++++++++-----------------------
 3 files changed, 64 insertions(+), 59 deletions(-)

diff --git a/mpb/fields.c b/mpb/fields.c
index 6bcb7cb5..df9b50d6 100644
--- a/mpb/fields.c
+++ b/mpb/fields.c
@@ -720,10 +720,9 @@ cvector3 get_bloch_field_point(vector3 p)
      scalar_complex field[3];
      cvector3 F;
 
-	 get_bloch_field_point_(field, p);
+     get_bloch_field_point_(field, p);
 
-	 F = cscalar32cvector3(field);
-     return F;
+     return cscalar32cvector3(field);
 }
 
 cvector3 get_field_point(vector3 p)
@@ -745,7 +744,7 @@ cvector3 get_field_point(vector3 p)
      }
 
 	 F = cscalar32cvector3(field);
-     return F;
+	 return F;
 }
 
 cnumber get_bloch_cscalar_point(vector3 p)
diff --git a/mpb/mpb.scm.in b/mpb/mpb.scm.in
index 546393c5..364c45e0 100644
--- a/mpb/mpb.scm.in
+++ b/mpb/mpb.scm.in
@@ -210,12 +210,14 @@
   'number 'function)
 (define-external-function compute-energy-in-object-list false false
   'number (make-list-type 'geometric-object))
+
 (define-external-function transformed-overlap false false 
   'cnumber 'matrix3x3 'vector3)
 (define-external-function compute-symmetries false false
   (make-list-type 'cnumber) 'matrix3x3 'vector3)
 (define-external-function compute-symmetry false false
   'cnumber 'integer 'matrix3x3 'vector3)
+
 (define-external-function output-field-to-file false false
   no-return-value 'integer 'string)  
 
diff --git a/mpb/transform.c b/mpb/transform.c
index dcfa782e..ca6b4a37 100644
--- a/mpb/transform.c
+++ b/mpb/transform.c
@@ -32,19 +32,21 @@
 
 
 
-/* If curfield is set to the real-space D-field (of band-index i), computes the overlap 
-         ∫ Eᵢ(r){W|w}Dᵢ(r) dr  =  ∫ Eᵢ(r)(WDᵢ)({W|w}⁻¹r) dr,
-   for a symmetry operation {W|w} with point-group part W and translation part w; each 
-   specified in the lattice basis. The vector fields Eᵢ and Dᵢ include Bloch phases.
-   If instead curfield is set to the real-space B-field, the overlap
-         ∫ Hᵢ(r){W|v}Bᵢ(r) dr  =  det(W) ∫ Hᵢ(r)(WBᵢ)({W|w}⁻¹r) dr,
-   is computed instead. Note that a factor det(W) is then included since B & H are
-   pseudovectors. As a result, the computed symmetry expectation values are independent
-   of whether the D- or B-field is used.
-   No other choices for curfield are allowed: to set curfield to the real-space B- or D-
-   field call get-bfield/dfield in Scheme, or get_bfield/dfield in C.                   
-   Usually, it will be more convenient to use the accessor compute_symmetry(i, W, w) 
-   which defaults to the B-field (since μ = 1 usually) and instead takes a band-index i .*/
+/* If `curfield` is the real-space D-field (of band-index i), computes the overlap 
+ *       ∫ Eᵢ†(r){W|w}Dᵢ(r) dr  =  ∫ Eᵢ†(r)(WDᵢ)({W|w}⁻¹r) dr,
+ * for a symmetry operation {W|w} with rotation W and translation w; each specified 
+ * in the lattice basis. The vector fields Eᵢ and Dᵢ include Bloch phases.
+ * If `curfield` is the real-space B-field, the overlap
+ *       ∫ Hᵢ(r){W|v}Bᵢ(r) dr  =  det(W) ∫ Hᵢ(r)(WBᵢ)({W|w}⁻¹r) dr,
+ * is computed instead. Note that a factor det(W) is then included since B & H are
+ * pseudovectors. As a result, the computed symmetry expectation values are
+ * independent of whether the D- or B-field is used.
+ * No other choices for `curfield` are allowed: to set `curfield` to the real-space
+ * B- or D-field use the `get-bfield` and `get-dfield` Scheme functions or the
+ * `get_bfield` and `get_dfield` in C functions.                   
+ * Usually, it will be more convenient to use the wrapper `compute_symmetry(i, W, w)`
+ * which defaults to the B-field (since μ = 1 usually) and takes a band-index `i`.
+ */
 cnumber transformed_overlap(matrix3x3 W, vector3 w)
 {
     int n1, n2, n3;
@@ -55,30 +57,31 @@ cnumber transformed_overlap(matrix3x3 W, vector3 w)
     matrix3x3 invW, Wc;
 
     if (!curfield || !strchr("db", curfield_type)) {
-        mpi_one_fprintf(stderr, "a real-space H- or D-field must be loaded beforehand\n");
+        mpi_one_fprintf(stderr, "curfield must refer to either a B- or D-field\n");
         return integral;
     }
 
     #ifndef SCALAR_COMPLEX
         CHECK(0, "transformed_overlap(..) is not yet implemented for mpbi");
         /* NOTE: Not completely sure why the current implementation does not work for mpbi
-           (i.e for assumed-inversion): one clue is that running this with mpbi and the
-           error-check off gives symmetry eigenvalues whose norm are ≈(ni÷2+1)/ni (where
-           ni=n1=n2=n3) instead of near-unity (as they should be). This suggests we are not
-           counting the number of grid points correctly somehow: I think the root issue is
-           that we use the LOOP_XYZ macro, which has special handling for mbpi (i.e. only
-           "visits" the "inversion-reduced" part of the unitcell): but here, we actually
-           really want to (or at least assume to) visit _all_ the points in the unitcell. */
+         * (i.e for assumed-inversion): one clue is that running this with mpbi and the
+         * error-check off gives symmetry eigenvalues whose norm are ≈(ni÷2+1)/ni (where
+         * ni=n1=n2=n3) instead of near-unity (as they should be). This suggests we are not
+         * counting the number of grid points correctly somehow: I think the root issue is
+         * that we use the LOOP_XYZ macro, which has special handling for mbpi (i.e. only
+         * "visits" the "inversion-reduced" part of the unitcell): but here, we actually
+         * really want to (or at least assume to) visit _all_ the points in the unitcell. 
+         */
     #endif
     #ifdef HAVE_MPI
-        /* NOTE: It seems there's some racey stuff going on with the MPI implementation,
-           unfortunately, so it doesn't end giving consistent (or even meaningful) results.
-           The issue could be that both `LOOP_XYZ` is distributed over workers _and_ that
-           `get_bloch_field_point_` also is (via the interpolation). I'm imagining that such
-           a naive "nested parallelism" doesn't jive here.
-           Long story short: disable it for now.
-        */
         CHECK(0, "transformed_overlap(..) is not yet implemented for MPI");
+        /* NOTE: It seems there's some racey stuff going on with the MPI implementation,
+         * unfortunately, so it doesn't end giving consistent (or even meaningful) results.
+         * The issue could be that both `LOOP_XYZ` is distributed over workers _and_ that
+         * `get_bloch_field_point_` also is (via the interpolation). I'm imagining that such
+         * a naive "nested parallelism" doesn't jive here.
+         * Long story short: disable it for now.
+         */
     #endif
     
     /* prepare before looping ... */
@@ -97,38 +100,40 @@ cnumber transformed_overlap(matrix3x3 W, vector3 w)
         return integral;
     }
     invW = matrix3x3_inverse(W);
-    /* W is specified in the lattice basis, but field *vectors* evaluated in real-space are
-       in a Cartesian basis: when we transform the vector components, we must account for this
-       difference. We transform W to a Cartesian basis Wc=RWR⁻¹ (R=geometry_lattice.basis, a
-       matrix w/ columns of Cartesian basis vectors) and then use Wc to transform vector fields */
+    /* W is specified in the lattice basis, but field *vectors* evaluated in real-space
+     * are in a Cartesian basis: when we transform the vector components, we must account
+     * for this difference. We transform W to a Cartesian basis Wc = RWR⁻¹ (with
+     * R = geometry_lattice.basis, a matrix w/ columns of Cartesian basis vectors) and
+     * then use Wc to transform vector fields.
+     */
     Wc = matrix3x3_mult(matrix3x3_mult(geometry_lattice.basis, W),
                         matrix3x3_inverse(geometry_lattice.basis));
 
-    kvector.x *= TWOPI/geometry_lattice.size.x; /* hoist these rescalings outside the  */
-    kvector.y *= TWOPI/geometry_lattice.size.y; /* loop; might be that licm takes care */
-    kvector.z *= TWOPI/geometry_lattice.size.z; /* of it - but better to be sure       */
+    /* hoist rescalings outside the loop (maybe licm takes care of it, but do it anyway) */
+    kvector.x *= TWOPI/geometry_lattice.size.x;
+    kvector.y *= TWOPI/geometry_lattice.size.y;
+    kvector.z *= TWOPI/geometry_lattice.size.z;
 
-    /* Loop over coordinates (introduces int vars i1, i2, i3, xyz_index) */
-    LOOP_XYZ(mdata) { /* implies two opening braces '{{' */
+    /* loop over coordinates (introduces int vars `i1`, `i2`, `i3`, `xyz_index`) */
+    LOOP_XYZ(mdata) { /* implies two opening braces `{{` */
         vector3 p, pt;
         scalar_complex F[3], Ft[3], Ftemp[3], integrand, phase;
         double deltaphi;
 
-        /* Current lattice coordinate */
+        /* current lattice coordinate */
         p.x = i1 * s1 - c1;
         p.y = i2 * s2 - c2;
         p.z = i3 * s3 - c3;
 
-        /* Transformed coordinate pt = {W|w}⁻¹p = W⁻¹(p-w) since {W|w}⁻¹={W⁻¹|-W⁻¹w} */
+        /* transformed coordinate pt = {W|w}⁻¹p = W⁻¹(p-w) since {W|w}⁻¹={W⁻¹|-W⁻¹w} */
         pt = matrix3x3_vector3_mult(invW, vector3_minus(p, w));
 
-        /* Bloch field value at transformed coordinate pt: interpolation is needed to ensure
-           generality in the case of fractional translations.                                */
-        get_bloch_field_point_(Ftemp, pt); /* assigns Ftemp to field at p [excludes exp(ikr) factor] */
+        /* Bloch field value at transformed coordinate pt: interpolation is needed to 
+         * ensure generality in the case of fractional translations.                  */
+        get_bloch_field_point_(Ftemp, pt); /* assign `Ftemp` to field at `p` (without eⁱᵏʳ factor) */
 
-        /* Transform the vector components of Ftemp by W to obtain Ft; a bit repetitious but 
-           we just write out the matrix-product manually here, for real and imag components.
-           Since F, Ftemp, and Ft are in a Cartesian basis, we use Wc instead of W.          */
+        /* define `Ft` as the vector components of `Ftemp` transformed by `Wc`; we just
+         * write out the matrix-product manually here, for both real & imag parts       */
         Ft[0].re = Wc.c0.x*Ftemp[0].re + Wc.c1.x*Ftemp[1].re + Wc.c2.x*Ftemp[2].re;
         Ft[0].im = Wc.c0.x*Ftemp[0].im + Wc.c1.x*Ftemp[1].im + Wc.c2.x*Ftemp[2].im;
         Ft[1].re = Wc.c0.y*Ftemp[0].re + Wc.c1.y*Ftemp[1].re + Wc.c2.y*Ftemp[2].re;
@@ -136,11 +141,11 @@ cnumber transformed_overlap(matrix3x3 W, vector3 w)
         Ft[2].re = Wc.c0.z*Ftemp[0].re + Wc.c1.z*Ftemp[1].re + Wc.c2.z*Ftemp[2].re;
         Ft[2].im = Wc.c0.z*Ftemp[0].im + Wc.c1.z*Ftemp[1].im + Wc.c2.z*Ftemp[2].im;
 
-        /* Get the Bloch field value at current point p [exludes exp(ikr) factor].
-           We multiply the input field F (either B or D-field) with μ⁻¹ or ε⁻¹ to get
-           H- or E-fields, as the relevant overlap is ⟨F|Ft⟩ = (H|Bt) or (E|Dt), with
-           t-postscript denoting a field transformed by {W|w}. Here, we essentially
-           adapt some boiler-plate code from compute_field_energy_internal in fields.c       */
+        /* get the Bloch field value at current point `p` (without eⁱᵏʳ factor).
+         * We multiply the input field `F` (either B or D-field) with μ⁻¹ or ε⁻¹ to get
+         * H- or E-fields, as the relevant overlap is ⟨F|Ft⟩ = ⟨H|Bt⟩ or ⟨E|Dt⟩, with
+         * t-postscript denoting a field transformed by {W|w}. Here, we essentially
+         * adapt some boiler-plate code from compute_field_energy_internal in fields.c   */
         if (curfield_type == 'd') {
             assign_symmatrix_vector(F, mdata->eps_inv[xyz_index], curfield+3*xyz_index);
         }
@@ -153,18 +158,17 @@ cnumber transformed_overlap(matrix3x3 W, vector3 w)
             F[2] = curfield[3*xyz_index+2];
         }
         
-        /* Inner product of F and Ft={W|w}F in Bloch form */
+        /* inner product of F and Ft={W|w}F in Bloch form */
         CASSIGN_CONJ_MULT(integrand, F[0], Ft[0]);  /* add adjoint(F)*Ft to integrand */
         CACCUMULATE_SUM_CONJ_MULT(integrand, F[1], Ft[1]);
         CACCUMULATE_SUM_CONJ_MULT(integrand, F[2], Ft[2]);
 
-        /* So far, we have excluded the Bloch phases; they must be included, however.
-           It saves two trigonometric operations if we do them just jointly for p and pt.
-           Note that rescaling by TWOPI/geometry_lattice.xyz is hoisted outside loop      */
+        /* so far, Bloch phases have been excluded; we include them here. It saves two
+         * trigonometric operations if we do them just jointly for `p` and `pt`.        */
         deltaphi = (kvector.x*(pt.x-p.x) + kvector.y*(pt.y-p.y) + kvector.z*(pt.z-p.z));
         CASSIGN_SCALAR(phase, cos(deltaphi), sin(deltaphi));
 
-        /* Add integrand-contribution to integral, keeping Bloch phases in mind */
+        /* finally, add integrand-contribution to integral, with Bloch phases included */
         integral.re += CSCALAR_MULT_RE(integrand, phase);
         integral.im += CSCALAR_MULT_IM(integrand, phase);
     }}}

From 91b8180168df31a03182d07571ad8c4a3f1565fd Mon Sep 17 00:00:00 2001
From: Thomas Christensen <tchr@mit.edu>
Date: Tue, 6 Apr 2021 11:00:12 -0400
Subject: [PATCH 13/16] add Scheme documentation

---
 doc/docs/Scheme_User_Interface.md | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/doc/docs/Scheme_User_Interface.md b/doc/docs/Scheme_User_Interface.md
index 10d44909..1b19fa5c 100644
--- a/doc/docs/Scheme_User_Interface.md
+++ b/doc/docs/Scheme_User_Interface.md
@@ -571,6 +571,31 @@ Returns a list of the group-velocity components (units of *c*) in the given *dir
 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
 As above, but returns the group velocity or component thereof only for band `which-band`.
 
+### Band symmetry
+
+MPB can compute the "characters" of a symmetry operation $g$ applied to a field at a particular point, i.e. MPB can compute the overlap integrals:
+
+```math
+(\mathbf{F}|g\mathbf{F}') = \int \mathbf{F}(\mathbf{r})^\dagger [g\mathbf{F}'(\mathbf{r})] \, \mathrm{d}\mathbf{r} = \int \mathbf{F}(\mathbf{r})^\dagger (g\mathbf{F}')(g^{-1}\mathbf{r}) \, \mathrm{d}\mathbf{r}
+```
+
+where **F** denotes either the **E**- or **H**-field and **F**′ denotes the associated **D**- or **B**-field (including Bloch phases) and integration is over the unit cell.
+The space group operation $g$ may include both rotational ($W$) and translational ($w$) parts, and are specified by the associated matrix-column pair $g = \{W,w\}$ acting on points as $\{W,w\}\mathbf{r} = W\mathbf{r} + w$.
+$W$ and $w$ should be supplied as `matrix3x3` and `vector3` variables in the basis of the lattice (listings of space group operations are available e.g. on the [Bilbao Crystallographic Server](https://www.cryst.ehu.es/)).
+(Note that $g$ transforms both the vectorial components of **F**′ as well as its coordinates: if **F**′ refers to a **B**-field, an additional factor of $\mathop{\mathrm{det}}W$ is incorporated to account for its pseudovectorial nature.)
+
+Given the character tables associated with the little groups of a considered photonic crystal, this functionality can e.g. be used to infer which irreducible representation (irrep) a given band (or band grouping) transforms as across the Brillouin zone.
+
+**`(compute-symmetry which_band W w)`**
+Compute the $(\mathbf{H}|g\mathbf{B})$ character for `which-band` under a symmetry operation with rotation `W` and translation `w`, returning a complex number.
+
+**`(compute-symmetries W w)`**
+Equivalent to `compute-symmetry`, but returns characters for *all* computed bands, returning a list of complex numbers.
+
+**`(transformed_overlap W w)`**
+Sets **F**′ to `curfield` (must be either a **D**- or **B**-field) and computes the associated character under the symmetry operation specified by `W` and `w`, returning a complex number.
+Usually, it will be more convenient to call `compute-symmetry` (which wraps `transformed_overlap` with an initialization of `curfield` via `get-bfield`).
+
 Field Manipulation
 ------------------
 

From b26b0ed4dfd42b93e33287d6ee042b6a71954864 Mon Sep 17 00:00:00 2001
From: Thomas Christensen <tchr@mit.edu>
Date: Sun, 30 May 2021 23:36:12 -0400
Subject: [PATCH 14/16] comment nits

---
 mpb/transform.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mpb/transform.c b/mpb/transform.c
index ca6b4a37..a84f605d 100644
--- a/mpb/transform.c
+++ b/mpb/transform.c
@@ -37,7 +37,7 @@
  * for a symmetry operation {W|w} with rotation W and translation w; each specified 
  * in the lattice basis. The vector fields Eᵢ and Dᵢ include Bloch phases.
  * If `curfield` is the real-space B-field, the overlap
- *       ∫ Hᵢ(r){W|v}Bᵢ(r) dr  =  det(W) ∫ Hᵢ(r)(WBᵢ)({W|w}⁻¹r) dr,
+ *       ∫ Hᵢ†(r){W|w}Bᵢ(r) dr  =  det(W) ∫ Hᵢ†(r)(WBᵢ)({W|w}⁻¹r) dr,
  * is computed instead. Note that a factor det(W) is then included since B & H are
  * pseudovectors. As a result, the computed symmetry expectation values are
  * independent of whether the D- or B-field is used.
@@ -130,7 +130,7 @@ cnumber transformed_overlap(matrix3x3 W, vector3 w)
 
         /* Bloch field value at transformed coordinate pt: interpolation is needed to 
          * ensure generality in the case of fractional translations.                  */
-        get_bloch_field_point_(Ftemp, pt); /* assign `Ftemp` to field at `p` (without eⁱᵏʳ factor) */
+        get_bloch_field_point_(Ftemp, pt); /* assign `Ftemp` to field at `pt` (without eⁱᵏʳ factor) */
 
         /* define `Ft` as the vector components of `Ftemp` transformed by `Wc`; we just
          * write out the matrix-product manually here, for both real & imag parts       */

From beabf8d004c0330b5a056064ea91fee484a73292 Mon Sep 17 00:00:00 2001
From: Thomas Christensen <tchr@mit.edu>
Date: Fri, 4 Jun 2021 10:58:51 -0400
Subject: [PATCH 15/16] add tests and improve `get_bloch_field_point_` function
 parameter signature

---
 examples/check.ctl | 41 +++++++++++++++++++++++++++++++++++++++++
 mpb/fields.c       |  2 +-
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/examples/check.ctl b/examples/check.ctl
index b60948aa..4a6c789a 100644
--- a/examples/check.ctl
+++ b/examples/check.ctl
@@ -303,6 +303,47 @@
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
+(print
+ "**************************************************************************\n"
+ " Test case: symmetry transformed overlaps & inversion/mirror eigenvalues.\n"
+ "**************************************************************************\n"
+)
+
+(set! resolution 16)
+(set! num-bands 6)
+(set! default-material air)
+; define a simple geometry with inversion and z-mirror
+(set! geometry-lattice
+  (make lattice (size 1 1 1) (basis1 1 0 0) (basis2 0 1 0) (basis3 0 0 1)))
+(set! geometry 
+  (list (make sphere (center 0 0 0) (radius 0.25) (material (make dielectric (epsilon 13) )))))
+
+; compare inversion eigenvalues at k = [1,1,1]/2 against precomputed values
+(set! k-points (list (vector3 0.5 0.5 0.5)))                              ; little group includes inversion
+(define W (matrix3x3 (vector3 -1 0 0) (vector3 0 -1 0) (vector3 0 0 -1))) ; inversion as operation {W|w}
+(define w (vector3 0 0 0))
+
+(run)
+(define symeigs-inv (compute-symmetries W w))
+(check-almost-equal (map real-part symeigs-inv) '(+1 +1 +1 -1 -1 -1))
+(check-almost-equal (map imag-part symeigs-inv) '( 0  0  0  0  0  0))
+
+; compare with run-zeven and run-zodd at k = 0 [must be 0 due to https://github.com/NanoComp/mpb/issues/114]
+(set! k-points (list (vector3 0 0 0)))
+(define mz (matrix3x3 (vector3 1 0 0)  (vector3 0 1 0)  (vector3 0 0 -1)))
+
+(run-zeven) ; even z-parity check
+(define symeigs-zeven (compute-symmetries mz w))
+(check-almost-equal (map real-part symeigs-zeven) '(+1 +1 +1 +1 +1 +1))
+(check-almost-equal (map imag-part symeigs-zeven) '( 0  0  0  0  0  0))
+
+(run-zodd) ; odd z-parity check
+(define symeigs-zodd (compute-symmetries mz w))
+(check-almost-equal (map real-part symeigs-zodd) '(-1 -1 -1 -1 -1 -1))
+(check-almost-equal (map imag-part symeigs-zodd) '( 0  0  0  0  0  0))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
 (display-eigensolver-stats)
 (print "Relative error ranged from " min-err " to " max-err
 	      ", with a mean of " (/ sum-err num-err) "\n")
diff --git a/mpb/fields.c b/mpb/fields.c
index df9b50d6..945f427b 100644
--- a/mpb/fields.c
+++ b/mpb/fields.c
@@ -706,7 +706,7 @@ number get_energy_point(vector3 p)
      return f_interp_val(p, mdata, (real *) curfield, 1, 0);
 }
 
-void get_bloch_field_point_(scalar_complex *field, vector3 p)
+void get_bloch_field_point_(scalar_complex field[static 3], vector3 p)
 {
      CHECK(curfield && strchr("dhbecv", curfield_type),
 	   "field must be must be loaded before get-*field*-point");

From c017a86b6a03d66577a2eb46b6a34a6e4e155416 Mon Sep 17 00:00:00 2001
From: Thomas Christensen <tchr@mit.edu>
Date: Sat, 5 Jun 2021 09:13:08 -0400
Subject: [PATCH 16/16] Update mpb/fields.c

Revert use of `static` for C89 compatibility.

Co-authored-by: Steven G. Johnson <stevenj@mit.edu>
---
 mpb/fields.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mpb/fields.c b/mpb/fields.c
index 945f427b..c154f11c 100644
--- a/mpb/fields.c
+++ b/mpb/fields.c
@@ -706,7 +706,7 @@ number get_energy_point(vector3 p)
      return f_interp_val(p, mdata, (real *) curfield, 1, 0);
 }
 
-void get_bloch_field_point_(scalar_complex field[static 3], vector3 p)
+void get_bloch_field_point_(scalar_complex field[3], vector3 p)
 {
      CHECK(curfield && strchr("dhbecv", curfield_type),
 	   "field must be must be loaded before get-*field*-point");