From 5d57155c2b8a63fdd26a994151e5a4bf08823f7c Mon Sep 17 00:00:00 2001
From: Hal Finkel <hfinkel@anl.gov>
Date: Mon, 25 Sep 2017 16:15:39 -0500
Subject: [PATCH] Add support for float4 (and other homogeneous aggregates)

---
 GenericIO.cxx               |  25 +++++--
 GenericIO.h                 | 127 +++++++++++++++++++++++++++++++++---
 GenericIOBenchmarkRead.cxx  |  79 +++++++++++++++-------
 GenericIOBenchmarkWrite.cxx | 110 +++++++++++++++++++++++--------
 GenericIOPrint.cxx          |  43 ++++++++++--
 5 files changed, 310 insertions(+), 74 deletions(-)

diff --git a/GenericIO.cxx b/GenericIO.cxx
index 7f7c590..0d31bae 100644
--- a/GenericIO.cxx
+++ b/GenericIO.cxx
@@ -321,6 +321,7 @@ struct VariableHeader {
   char Name[NameSize];
   endian_specific_value<uint64_t, IsBigEndian> Flags;
   endian_specific_value<uint64_t, IsBigEndian> Size;
+  endian_specific_value<uint64_t, IsBigEndian> ElementSize;
 };
 
 template <bool IsBigEndian>
@@ -582,6 +583,7 @@ nocomp:
       if (Vars[i].MaybePhysGhost) VFlags |= ValueMaybePhysGhost;
       VH->Flags = VFlags;
       RecordSize += VH->Size = Vars[i].Size;
+      VH->ElementSize = Vars[i].ElementSize;
     }
 
     MPI_Gather(&RHLocal, sizeof(RHLocal), MPI_BYTE,
@@ -1378,6 +1380,10 @@ void GenericIO::readData(int EffRank, size_t RowOffset, int Rank,
         continue;
       }
 
+      size_t ElementSize = VH->Size;
+      if (offsetof_safe(VH, ElementSize) < GH->VarsSize)
+        ElementSize = VH->ElementSize;
+
       VarFound = true;
       bool IsFloat = (bool) (VH->Flags & FloatValue),
            IsSigned = (bool) (VH->Flags & SignedValue);
@@ -1387,6 +1393,12 @@ void GenericIO::readData(int EffRank, size_t RowOffset, int Rank,
               " in: " << OpenFileName << ": current: " << Vars[i].Size <<
               ", file: " << VH->Size;
         throw runtime_error(ss.str());
+      } else if (ElementSize != Vars[i].ElementSize) {
+        stringstream ss;
+        ss << "Element size mismatch for variable " << Vars[i].Name <<
+              " in: " << OpenFileName << ": current: " << Vars[i].ElementSize <<
+              ", file: " << ElementSize;
+        throw runtime_error(ss.str());
       } else if (IsFloat != Vars[i].IsFloat) {
         string Float("float"), Int("integer");
         stringstream ss;
@@ -1564,9 +1576,10 @@ void GenericIO::readData(int EffRank, size_t RowOffset, int Rank,
 
       // Byte swap the data if necessary.
       if (IsBigEndian != isBigEndian())
-        for (size_t j = 0; j < RH->NElems; ++j) {
-          char *Offset = ((char *) VarData) + j*Vars[i].Size;
-          bswap(Offset, Vars[i].Size);
+        for (size_t j = 0;
+             j < RH->NElems*(Vars[i].Size/Vars[i].ElementSize); ++j) {
+          char *Offset = ((char *) VarData) + j*Vars[i].ElementSize;
+          bswap(Offset, Vars[i].ElementSize);
         }
 
       break;
@@ -1625,6 +1638,10 @@ void GenericIO::getVariableInfo(vector<VariableInfo> &VI) {
     if (VNameNull < NameSize)
       VName.resize(VNameNull);
 
+    size_t ElementSize = VH->Size;
+    if (offsetof_safe(VH, ElementSize) < GH->VarsSize)
+      ElementSize = VH->ElementSize;
+
     bool IsFloat = (bool) (VH->Flags & FloatValue),
          IsSigned = (bool) (VH->Flags & SignedValue),
          IsPhysCoordX = (bool) (VH->Flags & ValueIsPhysCoordX),
@@ -1633,7 +1650,7 @@ void GenericIO::getVariableInfo(vector<VariableInfo> &VI) {
          MaybePhysGhost = (bool) (VH->Flags & ValueMaybePhysGhost);
     VI.push_back(VariableInfo(VName, (size_t) VH->Size, IsFloat, IsSigned,
                               IsPhysCoordX, IsPhysCoordY, IsPhysCoordZ,
-                              MaybePhysGhost));
+                              MaybePhysGhost, ElementSize));
   }
 }
 
diff --git a/GenericIO.h b/GenericIO.h
index b3cedaf..817e701 100644
--- a/GenericIO.h
+++ b/GenericIO.h
@@ -117,6 +117,50 @@ protected:
   int FH;
 };
 
+namespace detail {
+// A standard enable_if idiom (we include our own here for pre-C++11 support).
+template <bool B, typename T = void>
+struct enable_if {};
+
+template <typename T>
+struct enable_if<true, T> { typedef T type; };
+
+// A SFINAE-based trait to detect whether a type has a member named x. This is
+// designed to work both with structs/classes and also with OpenCL-style vector
+// types.
+template <typename T>
+class has_x {
+  typedef char yes[1];
+  typedef char no[2];
+
+  template <typename C>
+  static yes &test(char(*)[sizeof((*((C *) 0)).x)]);
+
+  template <typename C>
+  static no &test(...);
+
+public:
+  enum { value = sizeof(test<T>(0)) == sizeof(yes) };
+};
+
+// A SFINAE-based trait to detect whether a type is array-like (i.e. supports
+// the [] operator).
+template <typename T>
+class is_array {
+  typedef char yes[1];
+  typedef char no[2];
+
+  template <typename C>
+  static yes &test(char(*)[sizeof((*((C *) 0))[0])]);
+
+  template <typename C>
+  static no &test(...);
+
+public:
+  enum { value = sizeof(test<T>(0)) == sizeof(yes) };
+};
+} // namespace detail
+
 class GenericIO {
 public:
   enum VariableFlags {
@@ -132,10 +176,10 @@ public:
 
   struct VariableInfo {
     VariableInfo(const std::string &N, std::size_t S, bool IF, bool IS,
-                 bool PCX, bool PCY, bool PCZ, bool PG)
+                 bool PCX, bool PCY, bool PCZ, bool PG, std::size_t ES = 0)
       : Name(N), Size(S), IsFloat(IF), IsSigned(IS),
         IsPhysCoordX(PCX), IsPhysCoordY(PCY), IsPhysCoordZ(PCZ),
-        MaybePhysGhost(PG) {}
+        MaybePhysGhost(PG), ElementSize(ES ? ES : S) {}
 
     std::string Name;
     std::size_t Size;
@@ -143,20 +187,70 @@ public:
     bool IsSigned;
     bool IsPhysCoordX, IsPhysCoordY, IsPhysCoordZ;
     bool MaybePhysGhost;
+    std::size_t ElementSize;
   };
 
 public:
-  struct Variable {
+  class Variable {
+  private:
+    template <typename ET>
+    void deduceTypeInfoFromElement(ET *) {
+      ElementSize = sizeof(ET);
+      IsFloat = !std::numeric_limits<ET>::is_integer;
+      IsSigned = std::numeric_limits<ET>::is_signed;
+    }
+
+    // There are specializations here to handle array types
+    // (e.g. typedef float float4[4];), struct types
+    // (e.g. struct float4 { float x, y, z, w; };), and scalar types.
+    // Builtin vector types
+    // (e.g. typedef float float4 __attribute__((ext_vector_type(4)));) should
+    // also work.
+    template <typename T>
+    typename detail::enable_if<detail::is_array<T>::value, void>::type
+    deduceTypeInfo(T *D) {
+      Size = sizeof(T);
+      deduceTypeInfoFromElement(&(*D)[0]);
+    }
+
+    template <typename T>
+    typename detail::enable_if<detail::has_x<T>::value &&
+                               !detail::is_array<T>::value, void>::type
+    deduceTypeInfo(T *D) {
+      Size = sizeof(T);
+      deduceTypeInfoFromElement(&(*D).x);
+    }
+
+    template <typename T>
+    typename detail::enable_if<!detail::has_x<T>::value &&
+                               !detail::is_array<T>::value, void>::type
+    deduceTypeInfo(T *D) {
+      Size = sizeof(T);
+      deduceTypeInfoFromElement(D);
+    }
+
+  public:
     template <typename T>
     Variable(const std::string &N, T* D, unsigned Flags = 0)
-      : Name(N), Size(sizeof(T)),
-        IsFloat(!std::numeric_limits<T>::is_integer),
-        IsSigned(std::numeric_limits<T>::is_signed),
-        Data((void *) D), HasExtraSpace(Flags & VarHasExtraSpace),
+      : Name(N), Data((void *) D), HasExtraSpace(Flags & VarHasExtraSpace),
         IsPhysCoordX(Flags & VarIsPhysCoordX),
         IsPhysCoordY(Flags & VarIsPhysCoordY),
         IsPhysCoordZ(Flags & VarIsPhysCoordZ),
-        MaybePhysGhost(Flags & VarMaybePhysGhost) {}
+        MaybePhysGhost(Flags & VarMaybePhysGhost) {
+      deduceTypeInfo(D);
+    }
+
+    template <typename T>
+    Variable(const std::string &N, std::size_t NumElements, T* D,
+             unsigned Flags = 0)
+      : Name(N), Data((void *) D), HasExtraSpace(Flags & VarHasExtraSpace),
+        IsPhysCoordX(Flags & VarIsPhysCoordX),
+        IsPhysCoordY(Flags & VarIsPhysCoordY),
+        IsPhysCoordZ(Flags & VarIsPhysCoordZ),
+        MaybePhysGhost(Flags & VarMaybePhysGhost) {
+      deduceTypeInfoFromElement(D);
+      Size = ElementSize*NumElements;
+    }
 
     Variable(const VariableInfo &VI, void *D, unsigned Flags = 0)
       : Name(VI.Name), Size(VI.Size), IsFloat(VI.IsFloat),
@@ -165,7 +259,8 @@ public:
         IsPhysCoordX((Flags & VarIsPhysCoordX) || VI.IsPhysCoordX),
         IsPhysCoordY((Flags & VarIsPhysCoordY) || VI.IsPhysCoordY),
         IsPhysCoordZ((Flags & VarIsPhysCoordZ) || VI.IsPhysCoordZ),
-        MaybePhysGhost((Flags & VarMaybePhysGhost) || VI.MaybePhysGhost) {}
+        MaybePhysGhost((Flags & VarMaybePhysGhost) || VI.MaybePhysGhost),
+        ElementSize(VI.ElementSize) {}
 
     std::string Name;
     std::size_t Size;
@@ -175,6 +270,7 @@ public:
     bool HasExtraSpace;
     bool IsPhysCoordX, IsPhysCoordY, IsPhysCoordZ;
     bool MaybePhysGhost;
+    std::size_t ElementSize;
   };
 
 public:
@@ -260,6 +356,19 @@ public:
     Vars.push_back(Variable(VI, Data, Flags));
   }
 
+  template <typename T>
+  void addScalarizedVariable(const std::string &Name, T *Data,
+                             std::size_t NumElements, unsigned Flags = 0) {
+    Vars.push_back(Variable(Name, NumElements, Data, Flags));
+  }
+
+  template <typename T, typename A>
+  void addScalarizedVariable(const std::string &Name, std::vector<T, A> &Data,
+                             std::size_t NumElements, unsigned Flags = 0) {
+    T *D = Data.empty() ? 0 : &Data[0];
+    addScalarizedVariable(Name, D, NumElements, Flags);
+  }
+
 #ifndef GENERICIO_NO_MPI
   // Writing
   void write();
diff --git a/GenericIOBenchmarkRead.cxx b/GenericIOBenchmarkRead.cxx
index 2d1f5d1..43fb99f 100644
--- a/GenericIOBenchmarkRead.cxx
+++ b/GenericIOBenchmarkRead.cxx
@@ -53,6 +53,10 @@
 #define ID_T int64_t
 #define MASK_T uint16_t
 
+struct pos_t {
+  POSVEL_T x, y, z, w;
+};
+
 using namespace std;
 using namespace gio;
 
@@ -63,17 +67,27 @@ int main(int argc, char *argv[]) {
   MPI_Comm_rank(MPI_COMM_WORLD, &commRank);
   MPI_Comm_size(MPI_COMM_WORLD, &commRanks);
 
+  bool UseAOS = false;
+  int a = 1;
+  if (argc > 1 && string(argv[a]) == "-a") {
+    UseAOS = true;
+    --argc;
+    ++a;
+  }
+
   if(argc != 2) {
-    fprintf(stderr,"USAGE: %s <mpiioName>\n", argv[0]);
+    fprintf(stderr,"USAGE: %s [-a] <mpiioName>\n", argv[0]);
     exit(-1);
   }
 
-  char *mpiioName = argv[1];
+  char *mpiioName = argv[a];
 
   vector<POSVEL_T> xx, yy, zz, vx, vy, vz, phi;
   vector<ID_T> id;
   vector<MASK_T> mask;
 
+  vector<pos_t> pos, vel;
+
   assert(sizeof(ID_T) == 8);
 
   size_t Np = 0;
@@ -92,36 +106,51 @@ int main(int argc, char *argv[]) {
 
   Np = GIO.readNumElems();
 
-  xx.resize(Np + GIO.requestedExtraSpace()/sizeof(POSVEL_T));
-  yy.resize(Np + GIO.requestedExtraSpace()/sizeof(POSVEL_T));
-  zz.resize(Np + GIO.requestedExtraSpace()/sizeof(POSVEL_T));
-  vx.resize(Np + GIO.requestedExtraSpace()/sizeof(POSVEL_T));
-  vy.resize(Np + GIO.requestedExtraSpace()/sizeof(POSVEL_T));
-  vz.resize(Np + GIO.requestedExtraSpace()/sizeof(POSVEL_T));
-  phi.resize(Np + GIO.requestedExtraSpace()/sizeof(POSVEL_T));
+  if (UseAOS) {
+    pos.resize(Np + (GIO.requestedExtraSpace() + sizeof(pos_t) - 1)/sizeof(pos_t));
+    vel.resize(Np + (GIO.requestedExtraSpace() + sizeof(pos_t) - 1)/sizeof(pos_t));
+  } else {
+    xx.resize(Np + GIO.requestedExtraSpace()/sizeof(POSVEL_T));
+    yy.resize(Np + GIO.requestedExtraSpace()/sizeof(POSVEL_T));
+    zz.resize(Np + GIO.requestedExtraSpace()/sizeof(POSVEL_T));
+    vx.resize(Np + GIO.requestedExtraSpace()/sizeof(POSVEL_T));
+    vy.resize(Np + GIO.requestedExtraSpace()/sizeof(POSVEL_T));
+    vz.resize(Np + GIO.requestedExtraSpace()/sizeof(POSVEL_T));
+    phi.resize(Np + GIO.requestedExtraSpace()/sizeof(POSVEL_T));
+  }
   id.resize(Np + GIO.requestedExtraSpace()/sizeof(ID_T));
   mask.resize(Np + GIO.requestedExtraSpace()/sizeof(MASK_T));
 
-  GIO.addVariable("x", xx, true);
-  GIO.addVariable("y", yy, true);
-  GIO.addVariable("z", zz, true);
-  GIO.addVariable("vx", vx, true);
-  GIO.addVariable("vy", vy, true);
-  GIO.addVariable("vz", vz, true);
-  GIO.addVariable("phi", phi, true);
-  GIO.addVariable("id", id, true);
-  GIO.addVariable("mask", mask, true);
+  if (UseAOS) {
+    GIO.addVariable("pos", pos, GenericIO::VarHasExtraSpace);
+    GIO.addVariable("vel", vel, GenericIO::VarHasExtraSpace);
+  } else {
+    GIO.addVariable("x", xx, GenericIO::VarHasExtraSpace);
+    GIO.addVariable("y", yy, GenericIO::VarHasExtraSpace);
+    GIO.addVariable("z", zz, GenericIO::VarHasExtraSpace);
+    GIO.addVariable("vx", vx, GenericIO::VarHasExtraSpace);
+    GIO.addVariable("vy", vy, GenericIO::VarHasExtraSpace);
+    GIO.addVariable("vz", vz, GenericIO::VarHasExtraSpace);
+    GIO.addVariable("phi", phi, GenericIO::VarHasExtraSpace);
+  }
+  GIO.addVariable("id", id, GenericIO::VarHasExtraSpace);
+  GIO.addVariable("mask", mask, GenericIO::VarHasExtraSpace);
 
   GIO.readData();
   } // destroy GIO prior to calling MPI_Finalize
 
-  xx.resize(Np);
-  yy.resize(Np);
-  zz.resize(Np);
-  vx.resize(Np);
-  vy.resize(Np);
-  vz.resize(Np);
-  phi.resize(Np);
+  if (UseAOS) {
+    pos.resize(Np);
+    vel.resize(Np);
+  } else {
+    xx.resize(Np);
+    yy.resize(Np);
+    zz.resize(Np);
+    vx.resize(Np);
+    vy.resize(Np);
+    vz.resize(Np);
+    phi.resize(Np);
+  }
   id.resize(Np);
   mask.resize(Np);
 
diff --git a/GenericIOBenchmarkWrite.cxx b/GenericIOBenchmarkWrite.cxx
index 3134efd..37e9882 100644
--- a/GenericIOBenchmarkWrite.cxx
+++ b/GenericIOBenchmarkWrite.cxx
@@ -69,6 +69,28 @@ struct Generator {
     T inc;
 };
 
+struct pos_t {
+  POSVEL_T x, y, z, w;
+};
+
+template <>
+struct Generator<pos_t> {
+  Generator(POSVEL_T start, POSVEL_T inc) :
+    GenX(start, inc), GenY(start, inc), GenZ(start, inc), GenW(start, inc) {}
+
+  pos_t operator()() {
+    pos_t v;
+    v.x = GenX();
+    v.y = GenY();
+    v.z = GenZ();
+    v.w = GenW();
+
+    return v;
+  }
+
+  Generator<POSVEL_T> GenX, GenY, GenZ, GenW;
+};
+
 int main(int argc, char *argv[]) {
   MPI_Init(&argc, &argv);
 
@@ -76,7 +98,14 @@ int main(int argc, char *argv[]) {
   MPI_Comm_rank(MPI_COMM_WORLD, &commRank);
   MPI_Comm_size(MPI_COMM_WORLD, &commRanks);
 
+  bool UseAOS = false;
   int a = 1;
+  if (argc > 1 && string(argv[a]) == "-a") {
+    UseAOS = true;
+    --argc;
+    ++a;
+  }
+
   if (argc > 1 && string(argv[a]) == "-c") {
     GenericIO::setDefaultShouldCompress(true);
     --argc;
@@ -84,7 +113,7 @@ int main(int argc, char *argv[]) {
   }
 
   if(argc != 4) {
-    fprintf(stderr,"USAGE: %s [-c] <mpiioName> <NP> <seed>\n", argv[0]);
+    fprintf(stderr,"USAGE: %s [-a] [-c] <mpiioName> <NP> <seed>\n", argv[0]);
     exit(-1);
   }
 
@@ -103,6 +132,8 @@ int main(int argc, char *argv[]) {
   vector<ID_T> id;
   vector<MASK_T> mask;
 
+  vector<pos_t> pos, vel;
+
   assert(sizeof(ID_T) == 8);
 
   unsigned Method = GenericIO::FileIOPOSIX;
@@ -122,46 +153,67 @@ int main(int argc, char *argv[]) {
   GIO.setPhysOrigin(0.0);
   GIO.setPhysScale(256.0);
 
-  xx.resize(Np + GIO.requestedExtraSpace()/sizeof(POSVEL_T));
-  yy.resize(Np + GIO.requestedExtraSpace()/sizeof(POSVEL_T));
-  zz.resize(Np + GIO.requestedExtraSpace()/sizeof(POSVEL_T));
-  vx.resize(Np + GIO.requestedExtraSpace()/sizeof(POSVEL_T));
-  vy.resize(Np + GIO.requestedExtraSpace()/sizeof(POSVEL_T));
-  vz.resize(Np + GIO.requestedExtraSpace()/sizeof(POSVEL_T));
-  phi.resize(Np + GIO.requestedExtraSpace()/sizeof(POSVEL_T));
+  if (UseAOS) {
+    pos.resize(Np + (GIO.requestedExtraSpace() + sizeof(pos_t) - 1)/sizeof(pos_t));
+    vel.resize(Np + (GIO.requestedExtraSpace() + sizeof(pos_t) - 1)/sizeof(pos_t));
+  } else {
+    xx.resize(Np + GIO.requestedExtraSpace()/sizeof(POSVEL_T));
+    yy.resize(Np + GIO.requestedExtraSpace()/sizeof(POSVEL_T));
+    zz.resize(Np + GIO.requestedExtraSpace()/sizeof(POSVEL_T));
+    vx.resize(Np + GIO.requestedExtraSpace()/sizeof(POSVEL_T));
+    vy.resize(Np + GIO.requestedExtraSpace()/sizeof(POSVEL_T));
+    vz.resize(Np + GIO.requestedExtraSpace()/sizeof(POSVEL_T));
+    phi.resize(Np + GIO.requestedExtraSpace()/sizeof(POSVEL_T));
+  }
   id.resize(Np + GIO.requestedExtraSpace()/sizeof(ID_T));
   mask.resize(Np + GIO.requestedExtraSpace()/sizeof(MASK_T));
 
-  std::generate(xx.begin(), xx.end(), Generator<POSVEL_T>(25, 3));
-  std::generate(yy.begin(), yy.end(), Generator<POSVEL_T>(25, 3));
-  std::generate(zz.begin(), zz.end(), Generator<POSVEL_T>(25, 3));
-  std::generate(vx.begin(), vx.end(), Generator<POSVEL_T>(25, 3));
-  std::generate(vy.begin(), vy.end(), Generator<POSVEL_T>(25, 3));
-  std::generate(vz.begin(), vz.end(), Generator<POSVEL_T>(25, 3));
-  std::generate(phi.begin(), phi.end(), Generator<POSVEL_T>(25, 3));
+  if (UseAOS) {
+    std::generate(pos.begin(), pos.end(), Generator<pos_t>(25, 3));
+    std::generate(vel.begin(), vel.end(), Generator<pos_t>(25, 3));
+  } else {
+    std::generate(xx.begin(), xx.end(), Generator<POSVEL_T>(25, 3));
+    std::generate(yy.begin(), yy.end(), Generator<POSVEL_T>(25, 3));
+    std::generate(zz.begin(), zz.end(), Generator<POSVEL_T>(25, 3));
+    std::generate(vx.begin(), vx.end(), Generator<POSVEL_T>(25, 3));
+    std::generate(vy.begin(), vy.end(), Generator<POSVEL_T>(25, 3));
+    std::generate(vz.begin(), vz.end(), Generator<POSVEL_T>(25, 3));
+    std::generate(phi.begin(), phi.end(), Generator<POSVEL_T>(25, 3));
+  }
   std::generate(id.begin(), id.end(), Generator<ID_T>(25, 3));
   std::fill(mask.begin(), mask.end(), 25);
 
-  GIO.addVariable("x", xx, CoordFlagsX | GenericIO::VarHasExtraSpace);
-  GIO.addVariable("y", yy, CoordFlagsY | GenericIO::VarHasExtraSpace);
-  GIO.addVariable("z", zz, CoordFlagsZ | GenericIO::VarHasExtraSpace);
-  GIO.addVariable("vx", vx, GenericIO::VarHasExtraSpace);
-  GIO.addVariable("vy", vy, GenericIO::VarHasExtraSpace);
-  GIO.addVariable("vz", vz, GenericIO::VarHasExtraSpace);
-  GIO.addVariable("phi", phi, GenericIO::VarHasExtraSpace);
+  if (UseAOS) {
+    GIO.addVariable("pos", pos, CoordFlagsX | CoordFlagsY | CoordFlagsZ |
+                                GenericIO::VarHasExtraSpace);
+    GIO.addVariable("vel", vel, GenericIO::VarHasExtraSpace);
+  } else {
+    GIO.addVariable("x", xx, CoordFlagsX | GenericIO::VarHasExtraSpace);
+    GIO.addVariable("y", yy, CoordFlagsY | GenericIO::VarHasExtraSpace);
+    GIO.addVariable("z", zz, CoordFlagsZ | GenericIO::VarHasExtraSpace);
+    GIO.addVariable("vx", vx, GenericIO::VarHasExtraSpace);
+    GIO.addVariable("vy", vy, GenericIO::VarHasExtraSpace);
+    GIO.addVariable("vz", vz, GenericIO::VarHasExtraSpace);
+    GIO.addVariable("phi", phi, GenericIO::VarHasExtraSpace);
+  }
   GIO.addVariable("id", id, GenericIO::VarHasExtraSpace);
   GIO.addVariable("mask", mask, GenericIO::VarHasExtraSpace);
 
   GIO.write();
   } // destroy GIO prior to calling MPI_Finalize
 
-  xx.resize(Np);
-  yy.resize(Np);
-  zz.resize(Np);
-  vx.resize(Np);
-  vy.resize(Np);
-  vz.resize(Np);
-  phi.resize(Np);
+  if (UseAOS) {
+    pos.resize(Np);
+    vel.resize(Np);
+  } else {
+    xx.resize(Np);
+    yy.resize(Np);
+    zz.resize(Np);
+    vx.resize(Np);
+    vy.resize(Np);
+    vz.resize(Np);
+    phi.resize(Np);
+  }
   id.resize(Np);
   mask.resize(Np);
 
diff --git a/GenericIOPrint.cxx b/GenericIOPrint.cxx
index 2fd9313..41f33c3 100644
--- a/GenericIOPrint.cxx
+++ b/GenericIOPrint.cxx
@@ -62,23 +62,30 @@ public:
 template <class T>
 class Printer : public PrinterBase {
 public:
-  Printer(GenericIO &G, size_t MNE, const string &N)
-    : Data(MNE + G.requestedExtraSpace()/sizeof(T)) {
-    G.addVariable(N, Data, true);
+  Printer(GenericIO &G, size_t MNE, size_t NE, const string &N)
+    : NumElements(NE), Data(MNE*NE + G.requestedExtraSpace()/sizeof(T)) {
+    G.addScalarizedVariable(N, Data, NE, GenericIO::VarHasExtraSpace);
   }
 
   virtual void print(ostream &os, size_t i) {
-    os << scientific << setprecision(numeric_limits<T>::digits10) << Data[i];
+    for (size_t j = 0; j < NumElements; ++j) {
+      os << scientific << setprecision(numeric_limits<T>::digits10) <<
+            Data[i*NumElements + j];
+
+      if (j != NumElements - 1)
+        os << "\t";
+    }
   }
 
 protected:
+  size_t NumElements;
   vector<T> Data;
 };
 
 template <typename T>
 PrinterBase *addPrinter(GenericIO::VariableInfo &V,
                 GenericIO &GIO, size_t MNE) {
-  if (sizeof(T) != V.Size)
+  if (sizeof(T) != V.ElementSize)
     return 0;
 
   if (V.IsFloat == numeric_limits<T>::is_integer)
@@ -86,7 +93,7 @@ PrinterBase *addPrinter(GenericIO::VariableInfo &V,
   if (V.IsSigned != numeric_limits<T>::is_signed)
     return 0;
 
-  return new Printer<T>(GIO, MNE, V.Name);
+  return new Printer<T>(GIO, MNE, V.Size/V.ElementSize, V.Name);
 }
 
 int main(int argc, char *argv[]) {
@@ -240,7 +247,29 @@ int main(int argc, char *argv[]) {
 
     cout << "# ";
     for (size_t i = 0; i < VI.size(); ++i) {
-      cout << VI[i].Name;
+      if (VI[i].Size == VI[i].ElementSize) {
+        cout << VI[i].Name;
+      } else {
+        size_t NumElements = VI[i].Size/VI[i].ElementSize;
+        for (size_t j = 0; j < NumElements; ++j) {
+          cout << VI[i].Name;
+          if (j == 0) {
+            cout << ".x";
+          } else if (j == 1) {
+            cout << ".y";
+          } else if (j == 2) {
+            cout << ".z";
+          } else if (j == 3) {
+            cout << ".w";
+          } else {
+            cout << ".w" << (j - 3);
+          }
+
+          if (j != NumElements - 1)
+            cout << "\t";
+        }
+      }
+
       if (i != VI.size() - 1)
         cout << "\t";
     }
-- 
GitLab