From 02348385b66e6efd2ebaa3a7a780a0068a1aaffc Mon Sep 17 00:00:00 2001
From: Michael Buehlmann <buehlmann.michi@gmail.com>
Date: Tue, 11 Jan 2022 12:49:55 -0600
Subject: [PATCH] update documentation

---
 README.md                       | 152 ++++++++++++++++---------------
 docs/conf.py                    |   1 +
 docs/cpp/genericio.rst          |   2 +
 docs/cpp/library.rst            |   2 +
 docs/environment.rst            |  43 +++++++++
 docs/executables/benchmarks.rst |  23 +++++
 docs/executables/tools.rst      |   3 +
 docs/index.rst                  |  14 ++-
 docs/python/class_interface.rst |  14 ++-
 docs/python/legacy_python.rst   |  46 ++++++++++
 docs/python/mpi.rst             |  94 ++++++++++++++++++-
 docs/python/readwrite.rst       |  64 ++++++++++++-
 python/README.md                | 155 --------------------------------
 python/genericio.cpp            |   3 +-
 setup.py                        |  85 ++++++++++--------
 15 files changed, 428 insertions(+), 273 deletions(-)
 create mode 100644 docs/cpp/genericio.rst
 create mode 100644 docs/cpp/library.rst
 create mode 100644 docs/environment.rst
 create mode 100644 docs/executables/benchmarks.rst
 create mode 100644 docs/executables/tools.rst
 create mode 100644 docs/python/legacy_python.rst
 delete mode 100644 python/README.md

diff --git a/README.md b/README.md
index 009fe49..eb7a488 100644
--- a/README.md
+++ b/README.md
@@ -1,108 +1,114 @@
-GenericIO
-=========
+# GenericIO
 
-GenericIO is a write-optimized library for writing self-describing scientific data files on large-scale parallel file systems.
+GenericIO is a write-optimized library for writing self-describing scientific
+data files on large-scale parallel file systems.
 
-Reference
----------
+## Reference
 
-Habib, et al., HACC: Simulating Future Sky Surveys on State-of-the-Art Supercomputing Architectures, New Astronomy, 2015
+Habib, et al., HACC: Simulating Future Sky Surveys on State-of-the-Art
+Supercomputing Architectures, New Astronomy, 2015
 (http://arxiv.org/abs/1410.2805).
 
-Source Code
------------
+## Obtaining the Source Code
 
 The most recent version of source is available by cloning this repo:
 ```bash
 git clone https://xgitlab.cels.anl.gov/hacc/genericio.git
 ```
 
-There is also a history of code [releases](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases):
-- [2019-04-17](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20190417)
-- [2017-09-25](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20170925)
-- [2016-08-29](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20160829)
-- [2016-04-12](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20160412)
-- [2015-06-08](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20150608)
+There is also a history of code
+[releases](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases):
+[2019-04-17](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20190417) /
+[2017-09-25](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20170925) /
+[2016-08-29](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20160829) /
+[2016-04-12](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20160412) /
+[2015-06-08](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20150608) /
 
-Output file partitions (subfiles)
----------------------------------
+-----
 
-If you're running on an IBM BG/Q supercomputer, then the number of subfiles (partitions) chosen is based on the I/O nodes in an automatic way. Otherwise, by default, the GenericIO library picks the number of subfiles based on a fairly-naive hostname-based hashing scheme. This works reasonably-well on small clusters, but not on larger systems. On a larger system, you might want to set these environmental variables:
+## Building Executables / C++Library
 
-```bash
-GENERICIO_PARTITIONS_USE_NAME=0
-GENERICIO_RANK_PARTITIONS=256
-```
+The executables and  ``libgenericio`` can be built either with
+[CMake](https://cmake.org/) (minimum version 3.10) or with
+[GNUMake](https://www.gnu.org/software/make/). The following executables will
+be built:
 
-Where the number of partitions (256 above) determines the number of subfiles used. If you're using a Lustre file system, for example, an optimal number of files is:
+- ``frontend/GenericIOPrint`` print data to stdout (non-MPI version)
+- ``frontend/GenericIOVerify`` verify and try reading data (non-MPI version)
+- ``mpi/GenericIOBenchmarkRead`` reading benchmark, works on data written with ``GenericIOBenchmarkWrite``
+- ``mpi/GenericIOBenchmarkWrite`` writing benchmark
+- ``mpi/GenericIOPrint`` print data to stdout
+- ``mpi/genericIORewrite`` rewrite data with a different number of ranks
+- ``mpi/genericIOVerify`` verify and try reading data
 
-```
-# of files * stripe count  ~ # OSTs
-```
-
-On Titan, for example, there are 1008 OSTs, and a default stripe count of 4, so we use approximately 256 files.
-
-Benchmarks
-----------
+**Using CMake**
 
-Once you build the library and associated programs (using make), you can run, for example:
+Note that the executables / libraries will be located in
+``build/<frontend/mpi>``. CMake will use the compiler pointed to in the ``CC``
+and ``CXX`` environmental variables.
 
 ```bash
-$ mpirun -np 8 ./mpi/GenericIOBenchmarkWrite /tmp/out.gio 123456 2
-Wrote 9 variables to /tmp/out (4691036 bytes) in 0.2361s: 18.9484 MB/s
+mkdir build && cd build
+cmake ..
+make -j4
 ```
 
+**Using Make**
+
+Make will create the executables / libraries under the main directory. Edit the
+``CC``, ``CXX``, ``MPICC``, and ``MPICXX`` variables in the GNUmakefile to
+change the compiler.
+
 ```bash
-$ mpirun -np 8 ./mpi/GenericIOBenchmarkRead /tmp/out.gio
-Read 9 variables from /tmp/out (4688028 bytes) in 0.223067s: 20.0426 MB/s [excluding header read]
+make
 ```
 
-The read benchmark always reads all of the input data. The output benchmark takes two numerical parameters, one if the number of data rows to write, and the second is a random seed (which slightly perturbs the per-rank output sizes, but not by much). Each row is 36 bytes for these benchmarks.
+## Installing the Python Library
 
-The write benchmark can be passed the -c parameter to enable output compression. Both benchmarks take an optional -a parameter to request that homogeneous aggregates (i.e. "float4") be used instead of using separate arrays for each position/velocity component.
+The `pygio` library is pip-installable and works with `mpi4py`.
 
+**Requirements**
 
-Python module
--------------
+Currently, a **CMake version >= 3.11.0** is required to fetch dependencies
+during configuration. The ``pygio`` module also requires MPI libraries to be
+findable by CMake's FindMPI. The compiler needs to support **C++17** (make sure
+that ``CC`` and ``CXX`` point to the correct compiler)
 
-[Click here to go to the README for the python interface](new_python/README.md)
+**Install**
 
+The python library can be installed by running pip in the **main folder**:
+```bash
+pip install .
+```
 
-Legacy python module
---------------------
+It will use the compiler referred by the ``CC`` and ``CXX`` environment
+variable. If the compiler supports OpenMP, the library will be threaded. Make
+sure to set ``OMP_NUM_THREADS`` to an appropriate variable, in particluar when
+using multiple MPI ranks per node.
 
-*This documentation is for the old python module. It is recommened to use the newer, pybind11 based version, see above*
+-----
 
+## Output file partitions (subfiles)
 
-The repository includes a genericio Python module that can read genericio-formatted files and return numpy arrays. This is included in the standard build. To use it, once you've built genericio, you can read genericio data as follows:
+If you're running on an IBM BG/Q supercomputer, then the number of subfiles
+(partitions) chosen is based on the I/O nodes in an automatic way. Otherwise, by
+default, the GenericIO library picks the number of subfiles based on a
+fairly-naive hostname-based hashing scheme. This works reasonably-well on small
+clusters, but not on larger systems. On a larger system, you might want to set
+these environmental variables:
 
 ```bash
-$ export PYTHONPATH=${GENERICIO_DIR}/legacy_python
-$ python
->>> import genericio
->>> genericio.gio_inspect('m000-99.fofproperties')
-Number of Elements: 1691
-[data type] Variable name
----------------------------------------------
-[i 32] fof_halo_count
-[i 64] fof_halo_tag
-[f 32] fof_halo_mass
-[f 32] fof_halo_mean_x
-[f 32] fof_halo_mean_y
-[f 32] fof_halo_mean_z
-[f 32] fof_halo_mean_vx
-[f 32] fof_halo_mean_vy
-[f 32] fof_halo_mean_vz
-[f 32] fof_halo_vel_disp
-
-(i=integer,f=floating point, number bits size)
->>> genericio.gio_read('m000-99.fofproperties','fof_halo_mass')
-array([[  4.58575588e+13],
-       [  5.00464689e+13],
-       [  5.07078771e+12],
-       ...,
-       [  1.35221006e+13],
-       [  5.29125710e+12],
-       [  7.12849857e+12]], dtype=float32)
-
-```
\ No newline at end of file
+GENERICIO_PARTITIONS_USE_NAME=0
+GENERICIO_RANK_PARTITIONS=256
+```
+
+Where the number of partitions (256 above) determines the number of subfiles
+used. If you're using a Lustre file system, for example, an optimal number of
+files is:
+
+```
+# of files * stripe count  ~ # OSTs
+```
+
+On Titan, for example, there are 1008 OSTs, and a default stripe count of 4, so
+we use approximately 256 files.
diff --git a/docs/conf.py b/docs/conf.py
index b2991e1..4c27594 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -30,6 +30,7 @@ author = "Hal Finkel, et al."
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = [
+    "sphinx.ext.doctest",
     "sphinx.ext.autodoc",
     "sphinx.ext.napoleon",
     "sphinx.ext.autodoc.typehints",
diff --git a/docs/cpp/genericio.rst b/docs/cpp/genericio.rst
new file mode 100644
index 0000000..657d452
--- /dev/null
+++ b/docs/cpp/genericio.rst
@@ -0,0 +1,2 @@
+GenericIO C++ Class
+===================
\ No newline at end of file
diff --git a/docs/cpp/library.rst b/docs/cpp/library.rst
new file mode 100644
index 0000000..b3712cd
--- /dev/null
+++ b/docs/cpp/library.rst
@@ -0,0 +1,2 @@
+GenericIO and CMake
+===================
\ No newline at end of file
diff --git a/docs/environment.rst b/docs/environment.rst
new file mode 100644
index 0000000..7dc96ca
--- /dev/null
+++ b/docs/environment.rst
@@ -0,0 +1,43 @@
+Runtime Options
+===============
+
+Library / Executables
+---------------------
+
+``GENERICIO_RANK0_CREATE_ALL``
+
+``GENERICIO_COMPRESS``
+
+``GENERICIO_FORCE_BLOCKS``
+
+``GENERICIO_RETRY_COUNT``
+
+``GENERICIO_RETRY_SLEEP``
+
+``GENERICIO_VERBOSE``
+
+``GENERICIO_PARTITIONS_USE_NAME``
+
+``GENERICIO_RANK_PARTITIONS``
+
+Executables
+-----------
+
+``GENERICIO_USE_MPIIO``
+
+BLOSC
+-----
+
+``BLOSC_CLEVEL``
+
+``BLOSC_SHUFFLE``
+
+``BLOSC_TYPESIZE``
+
+``BLOSC_COMPRESSOR``
+
+``BLOSC_BLOCKSIZE``
+
+``BLOSC_NTHREADS``
+
+``BLOSC_NOLOCK``
\ No newline at end of file
diff --git a/docs/executables/benchmarks.rst b/docs/executables/benchmarks.rst
new file mode 100644
index 0000000..c1f017b
--- /dev/null
+++ b/docs/executables/benchmarks.rst
@@ -0,0 +1,23 @@
+Benchmarks
+==========
+
+Once you build the library and associated programs (using make), you can run, for example:
+
+.. code-block::
+
+    $ mpirun -np 8 ./mpi/GenericIOBenchmarkWrite /tmp/out.gio 123456 2
+    Wrote 9 variables to /tmp/out (4691036 bytes) in 0.2361s: 18.9484 MB/s
+
+.. code-block::
+
+    $ mpirun -np 8 ./mpi/GenericIOBenchmarkRead /tmp/out.gio
+    Read 9 variables from /tmp/out (4688028 bytes) in 0.223067s: 20.0426 MB/s [excluding header read]
+
+The read benchmark always reads all of the input data. The output benchmark takes two numerical parameters, one if the
+number of data rows to write, and the second is a random seed (which slightly perturbs the per-rank output sizes, but
+not by much). Each row is 36 bytes for these benchmarks.
+
+The write benchmark can be passed the -c parameter to enable output compression. Both benchmarks take an optional -a
+parameter to request that homogeneous aggregates (i.e. "float4") be used instead of using separate arrays for each
+position/velocity component.
+
diff --git a/docs/executables/tools.rst b/docs/executables/tools.rst
new file mode 100644
index 0000000..f8aab0f
--- /dev/null
+++ b/docs/executables/tools.rst
@@ -0,0 +1,3 @@
+Executables
+===========
+
diff --git a/docs/index.rst b/docs/index.rst
index d8fadfa..415b4c2 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -5,6 +5,7 @@
 
 .. only:: latex
 
+   =======================
    GenericIO Documentation
    =======================
 
@@ -15,7 +16,7 @@
 .. only:: latex
 
    .. toctree::
-      :maxdepth: 2
+      :maxdepth: 3
 
       self
 
@@ -26,8 +27,19 @@
       python/readwrite
       python/class_interface
       python/mpi
+      python/legacy_python
 
    .. toctree::
       :caption: C++ Interface
       :maxdepth: 2
 
+      cpp/genericio
+      cpp/library
+
+   .. toctree::
+      :caption: Executables
+      :maxdepth: 1
+
+      executables/tools
+      executables/benchmarks
+      environment
diff --git a/docs/python/class_interface.rst b/docs/python/class_interface.rst
index 6372b12..2aa0e32 100644
--- a/docs/python/class_interface.rst
+++ b/docs/python/class_interface.rst
@@ -2,6 +2,12 @@ The Python Class Interface
 ==========================
 .. currentmodule:: pygio
 
+The :class:`PyGenericIO` python class directly interfaces the C++ ``GenericIO``
+class. Writing data is currently not supported through that interface (use
+:func:`write_genericio` instead). Note that reading data works without manually
+adding variables (:class:`pygio::PyGenericIO::VariableInfo`) to the class instance.
+Instead, a list of variable names can be passed to :meth:`PyGenericIO.read`.
+
 .. code-block:: python
 
    # instantiate a GenericIO class
@@ -22,11 +28,15 @@ The Python Class Interface
    data_partial = gio_file.read(["x", "y", "z"])
 
 Further methods and members of GenericIO can easly be interfaced by editing
-``python_new/genericio.cpp``.
+``python/genericio.cpp``.
 
 
 References
 ----------
 
 .. autoclass:: PyGenericIO
-   :members:
\ No newline at end of file
+   :members:
+
+.. autoclass:: pygio::PyGenericIO.VariableInfo
+   :members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/python/legacy_python.rst b/docs/python/legacy_python.rst
new file mode 100644
index 0000000..4e9cf72
--- /dev/null
+++ b/docs/python/legacy_python.rst
@@ -0,0 +1,46 @@
+Legacy python module
+--------------------
+
+.. warning::
+
+   This documentation is for the old python module. It is recommened to use the
+   newer, pybind11 based version
+
+
+The repository includes a genericio Python module that can read
+genericio-formatted files and return numpy arrays. This is included in the
+standard build. To use it, once you've built genericio, you can read genericio
+data as follows:
+
+.. code-block:: bash
+
+   $ export PYTHONPATH=${GENERICIO_DIR}/legacy_python
+   $ python
+
+
+>>> import genericio
+>>> genericio.gio_inspect('m000-99.fofproperties')
+Number of Elements: 1691
+[data type] Variable name
+---------------------------------------------
+[i 32] fof_halo_count
+[i 64] fof_halo_tag
+[f 32] fof_halo_mass
+[f 32] fof_halo_mean_x
+[f 32] fof_halo_mean_y
+[f 32] fof_halo_mean_z
+[f 32] fof_halo_mean_vx
+[f 32] fof_halo_mean_vy
+[f 32] fof_halo_mean_vz
+[f 32] fof_halo_vel_disp
+(i=integer,f=floating point, number bits size)
+
+
+>>> genericio.gio_read('m000-99.fofproperties','fof_halo_mass')
+array([[  4.58575588e+13],
+       [  5.00464689e+13],
+       [  5.07078771e+12],
+       ...,
+       [  1.35221006e+13],
+       [  5.29125710e+12],
+       [  7.12849857e+12]], dtype=float32)
\ No newline at end of file
diff --git a/docs/python/mpi.rst b/docs/python/mpi.rst
index 721b7ef..51a0e04 100644
--- a/docs/python/mpi.rst
+++ b/docs/python/mpi.rst
@@ -1,2 +1,94 @@
 Using GenericIO with MPI
-========================
\ No newline at end of file
+========================
+
+The `pygio` library is working with mpi4py. Here is an example:
+
+.. code-block:: python
+
+    from mpi4py import MPI
+    comm = MPI.COMM_WORLD
+    rank = comm.Get_rank()
+    ranks = comm.Get_size()
+
+    import numpy as np
+    import pygio
+
+    # read locally
+    data = pygio.read_genericio("generic_io_file")
+    # get local number of elements from the first element in dictionary
+    num_elems = len(next(iter(data.values())))
+    # reduce total number of elements
+    num_elems_total = comm.allreduce(num_elems)
+    if rank == 0:
+        print(f"Reading file with {ranks} ranks")
+        print(f"Total number of particles: {num_elems_total}")
+        print("The data contains the following variables:")
+        for k, d in data.items():
+            print(f"\t{k:5s}, dtype={d.dtype}")
+
+    for i in range(ranks):
+        if i == rank:
+            print(f"rank {rank} read {num_elems} elements")
+        comm.Barrier()
+
+
+It can be executed with ``mpirun`` or ``mpiexec``:
+
+.. code-block:: bash
+
+    mpirun -n 8 python testpygio_mpi.py
+
+Here is an output for 1 and 8 ranks:
+
+.. code-block:: none
+
+    # 1 Rank
+    Reading file with 1 ranks
+    Total number of particles: 562500
+    The data contains the following variables:
+            id   , dtype=int64
+            vx   , dtype=float32
+            vy   , dtype=float32
+            vz   , dtype=float32
+            x    , dtype=float32
+            y    , dtype=float32
+            z    , dtype=float32
+    rank 0 read 562500 elements
+
+    # 8 Ranks
+    Reading file with 8 ranks
+    Total number of particles: 562500
+    The data contains the following variables:
+            id   , dtype=int64
+            vx   , dtype=float32
+            vy   , dtype=float32
+            vz   , dtype=float32
+            x    , dtype=float32
+            y    , dtype=float32
+            z    , dtype=float32
+    rank 0 read 70000 elements
+    rank 1 read 70000 elements
+    rank 2 read 70000 elements
+    rank 3 read 70000 elements
+    rank 4 read 70625 elements
+    rank 5 read 70625 elements
+    rank 6 read 70625 elements
+    rank 7 read 70625 elements
+
+
+Importing the non-MPI version
+-----------------------------
+
+Some compute facilities prevent the loading of MPI libraries on login nodes. In order
+to still be able to use ``pygio`` on the login nodes, the non-MPI library
+can be loaded by setting the `GENERICIO_NO_MPI` environment variable before
+importing `pygio`:
+
+.. code-block:: python
+
+   import os
+   os.environ['GENERICIO_NO_MPI'] = 'True'
+   import pygio
+
+A warning will be printed that the writing capabilities of genericio are not
+available in this mode.
\ No newline at end of file
diff --git a/docs/python/readwrite.rst b/docs/python/readwrite.rst
index 409f31f..402e0be 100644
--- a/docs/python/readwrite.rst
+++ b/docs/python/readwrite.rst
@@ -3,14 +3,50 @@ Reading and Writing Data
 
 .. currentmodule:: pygio
 
+The ``pygio`` module contains a high-level functional interface for inspecting,
+reading, and writing particle and halo data. In addition, there is a class-based
+interface which mimics the C++ ``GenericIO`` class. More information about
+:class:`PyGenericIO` can be found :doc:`here <./class_interface>`.
+
+.. note::
+
+   The GenericIO python library supports **MPI** with
+   `mpi4py <https://mpi4py.readthedocs.io/en/stable/>`_. Under MPI, each rank
+   will read a separate "chunk" of the file (if the file was written with
+   multiple ranks). The function :func:`read_num_elems` will return the number
+   of particles / halos that the local rank will read and can be different on
+   each rank. More information on how to use this library with MPI can be found
+   :doc:`here <./mpi>`.
+
+To get an overview of the data contained in a GenericIO file, use the
+:func:`inspect_genericio` function, which lists all variables, data types and
+number of elements (particles or halos) contained in the file.
+
 .. code-block:: python
 
-   import numpy as np
    import pygio
 
    # inspect file
    pygio.inspect_genericio("generic_io_file")
 
+Some additional inspection functions are:
+
+- :func:`read_num_elems` returns the number of elements in the file
+- :func:`read_total_num_elems` returns the total/global number of elements in the file
+- :func:`read_variable_names` returns a list of variable names defined in the file
+- :func:`read_variable_dtypes` returns a dictionary with ``variable_name -> numpy.dtype``
+- :func:`read_phys_scale` returns a list of 3 floats describing the box size
+- :func:`read_phys_origin` returns a list of 3 floats describing the box origin
+
+
+Reading GenericIO files
+-----------------------
+
+Data contained in GenericIO files can be loaded into numpy arrays using the
+:func:`read_genericio` function. By default, all variables stored in the
+file will be loaded. If not all variables are needed, the memory footprint and
+read speed can be improved by passing a list of variable names to to the
+function.
 
 .. code-block:: python
 
@@ -23,15 +59,33 @@ Reading and Writing Data
    data_partial = pygio.read_genericio("generic_io_file", ["x", "y", "z"])
    data_x = data_partial["x"]
 
+
+Writing GenericIO files
+-----------------------
+
+Data arrays can be stored into GenericIO files using the :func:`write_genericio`
+function. The data has to be a dictionary, with the variable name as key and the
+corresponding one dimensional numpy array as value. The numpy arrays have to
+meet the following criteria:
+
+- 1 dimensional
+- same length
+- datatype has to be ``np.float32``, ``np.float64``, ``np.int64`` or
+  ``np.uint16``
+
+In addition, one needs to specify the physical origin (default [0, 0, 0]) and
+the physical box size as a list of 3 floats.
+
 .. code-block:: python
 
     data = {
-        "x": np.ones(100),
-        "y": np.ones(100)
+        "x": np.random.uniform(0, 1, 100, dtype=np.float32),
+        "y": np.random.uniform(0, 1, 100, dtype=np.float32),
+        "z": np.random.uniform(0, 1, 100, dtype=np.float32)
     }
 
    # write data to file
-   pygio.write_genericio("new_generic_io_file",
+   pygio.write_genericio("new_file.gio",
         data,
         phys_scale = [1, 1, 1],
         phys_origin = [0, 0, 0]
@@ -47,6 +101,8 @@ References
 
 .. autofunction:: read_num_elems
 
+.. autofunction:: read_total_num_elems
+
 .. autofunction:: read_variable_names
 
 .. autofunction:: read_variable_dtypes
diff --git a/python/README.md b/python/README.md
deleted file mode 100644
index b14a2ce..0000000
--- a/python/README.md
+++ /dev/null
@@ -1,155 +0,0 @@
-# Python module
-
-This version of `pygio` is pip-installable and works with `mpi4py`.
-
-## Requirements
-
-Currently, a CMake version >= 3.11.0 is required to fetch dependencies during configuration. 
-The `pygio` module also requires MPI libraries to be findable by CMake's FindMPI. The compiler
-needs to support C++17 (make sure that `CC` and `CXX` point to the correct compiler)
-
-## Install
-
-The python library can be installed by running pip in the **main folder**:
-```bash
-pip install .
-```
-It will use the compiler referred by the `CC` and `CXX` environment variable. If the compiler
-supports OpenMP, the library will be threaded. Make sure to set `OMP_NUM_THREADS` to an
-appropriate variable, in particluar when using multiple MPI ranks per node.
-
-## Usage
-
-The library can then be imported in python. Here is a small example script:
-```python
-import numpy as np
-import pygio
-
-# inspect file
-pygio.inspect_genericio("generic_io_file")
-
-# read all variables
-data = pygio.read_genericio("generic_io_file")
-
-# read only a subset of variables
-data_partial = pygio.read_genericio("generic_io_file", ["x", "y", "z"])
-data_x = data_partial["x"]
-
-# write data to file
-pygio.write_genericio("new_generic_io_file", 
-     variables = {"x": np.ones(100), "y": np.ones(100)},
-     phys_scale = [1, 1, 1],
-     phys_origin = [0, 0, 0],
-     method = PyGenericIO.FileIO.FileIOPOSIX
-     )
-
-
-### USING THE CLASS BASED INTERFACE ###
-
-# instantiate a GenericIO class
-gio_file = pygio.PyGenericIO("generic_io_file")
-
-# inspect (prints to python stdout, also works in notebook)
-gio_file.inspect()
-
-# get variables
-gio_vars = gio_file.get_variables()
-
-# print variable names
-for var in gio_vars:
-    print(var.name, var.size, var.element_size, var.is_float)
-    
-# read data
-data_all = gio_file.read()
-data_partial = gio_file.read(["x", "y", "z"])
-```
-
-Further methods and members of GenericIO can easly be interfaced by editing `python_new/genericio.cpp`.
-
-
-### Using MPI
-The `pygio` library is working with mpi4py. Here is an example file:
-```python
-from mpi4py import MPI
-comm = MPI.COMM_WORLD
-rank = comm.Get_rank()
-ranks = comm.Get_size()
-
-import numpy as np
-import pygio
-
-# read locally
-data = pygio.read_genericio("generic_io_file")
-# get local number of elements from the first element in dictionary
-num_elems = len(next(iter(data.values())))
-# reduce total number of elements
-num_elems_total = comm.allreduce(num_elems)
-if rank == 0:
-    print(f"Reading file with {ranks} ranks")
-    print(f"Total number of particles: {num_elems_total}")
-    print("The data contains the following variables:")
-    for k, d in data.items():
-        print(f"\t{k:5s}, dtype={d.dtype}")
-
-for i in range(ranks):
-    if i == rank:
-        print(f"rank {rank} read {num_elems} elements")
-    comm.Barrier()
-```
-
-It can be executed with `mpirun`:
-```bash
-mpirun -n 8 python testpygio_mpi.py
-```
-
-Here is an output for 1 and 8 ranks:
-```
-Reading file with 1 ranks
-Total number of particles: 562500
-The data contains the following variables:
-        id   , dtype=int64
-        vx   , dtype=float32
-        vy   , dtype=float32
-        vz   , dtype=float32
-        x    , dtype=float32
-        y    , dtype=float32
-        z    , dtype=float32
-rank 0 read 562500 elements
-```
-
-```
-Reading file with 8 ranks
-Total number of particles: 562500
-The data contains the following variables:
-        id   , dtype=int64
-        vx   , dtype=float32
-        vy   , dtype=float32
-        vz   , dtype=float32
-        x    , dtype=float32
-        y    , dtype=float32
-        z    , dtype=float32
-rank 0 read 70000 elements
-rank 1 read 70000 elements
-rank 2 read 70000 elements
-rank 3 read 70000 elements
-rank 4 read 70625 elements
-rank 5 read 70625 elements
-rank 6 read 70625 elements
-rank 7 read 70625 elements
-```
-
-### Force the non-MPI version to be imported
-
-Some clusters prevent the loading of MPI libraries on the login nodes. In order
-to still be able to use the same `pygio` on the login nodes, the non-MPI library
-can be loaded by setting the `GENERICIO_NO_MPI` environment variable before
-importing `pygio`:
-
-```python
-import os
-os.environ['GENERICIO_NO_MPI'] = 'True'
-import pygio
-```
-
-A warning will be printed that the writing capabilities of genericio are not
-available in this mode.
\ No newline at end of file
diff --git a/python/genericio.cpp b/python/genericio.cpp
index 34aab98..089951b 100644
--- a/python/genericio.cpp
+++ b/python/genericio.cpp
@@ -368,14 +368,13 @@ PYBIND11_MODULE(pygio, m) {
       .def("read_nranks", (int (PyGenericIO::*)(void))(&PyGenericIO::readNRanks))
       .def("read_variable_names", &PyGenericIO::read_variable_names)
       .def("read_variable_dtypes", &PyGenericIO::read_variable_dtypes)
-      .def("read", &PyGenericIO::read, py::arg("variables")=nullptr, py::arg("print_stats")=true, py::arg("collective_stats")=true)
       .def("get_source_ranks", &PyGenericIO::getSourceRanks)
 #ifndef GENERICIO_NO_MPI
       .def("rebalance_source_ranks", &PyGenericIO::rebalanceSourceRanks)
 #endif
       ;
 
-  py::class_<gio::GenericIO::VariableInfo>(pyGenericIO, "VariableInfo")
+  py::class_<PyGenericIO::VariableInfo>(pyGenericIO, "VariableInfo")
       .def_readonly("name", &gio::GenericIO::VariableInfo::Name)
       .def_readonly("size", &gio::GenericIO::VariableInfo::Size)
       .def_readonly("element_size", &gio::GenericIO::VariableInfo::ElementSize)
diff --git a/setup.py b/setup.py
index 17e1361..ee277fa 100644
--- a/setup.py
+++ b/setup.py
@@ -3,7 +3,8 @@ import re
 import sys
 import subprocess
 import platform
-#import versioneer
+
+# import versioneer
 
 from setuptools import setup, Extension
 from setuptools.command.build_ext import build_ext
@@ -11,7 +12,7 @@ from distutils.version import LooseVersion
 
 
 class CMakeExtension(Extension):
-    def __init__(self, name, sourcedir=''):
+    def __init__(self, name, sourcedir=""):
         Extension.__init__(self, name, sources=[])
         self.sourcedir = os.path.abspath(sourcedir)
 
@@ -19,64 +20,78 @@ class CMakeExtension(Extension):
 class CMakeBuild(build_ext):
     def run(self):
         try:
-            out = subprocess.check_output(['cmake', '--version'])
+            out = subprocess.check_output(["cmake", "--version"])
         except OSError:
-            raise RuntimeError("CMake must be installed to build the following extensions: " +
-                               ", ".join(e.name for e in self.extensions))
-        cmake_version = LooseVersion(re.search(r'version\s*([\d.]+)', out.decode()).group(1))
-        if cmake_version < '3.11.0':
+            raise RuntimeError(
+                "CMake must be installed to build the following extensions: "
+                + ", ".join(e.name for e in self.extensions)
+            )
+        cmake_version = LooseVersion(
+            re.search(r"version\s*([\d.]+)", out.decode()).group(1)
+        )
+        if cmake_version < "3.11.0":
             raise RuntimeError("CMake >= 3.11.0 is required")
         for ext in self.extensions:
             self.build_extension(ext)
 
     def build_extension(self, ext):
         extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name)))
-        cmake_args = ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + extdir,
-                      '-DGENERICIO_NEW_PYTHON_LIBRARY=ON',
-                      '-DGENERICIO_PYTHON_LIBRARY=OFF',
-                      '-DGENERICIO_MPI_EXECUTABLES=OFF',
-                      '-DGENERICIO_FRONTEND_EXECUTABLES=OFF',
-                      '-DPYTHON_EXECUTABLE=' + sys.executable]
-        cfg = 'Debug' if self.debug else 'Release'
-        build_args = ['--config', cfg]
+        cmake_args = [
+            "-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=" + extdir,
+            "-DGENERICIO_PYTHON_LIBRARY=ON",
+            "-DGENERICIO_LEGACY_PYTHON_LIBRARY=OFF",
+            "-DGENERICIO_MPI_EXECUTABLES=OFF",
+            "-DGENERICIO_FRONTEND_EXECUTABLES=OFF",
+            "-DPYTHON_EXECUTABLE=" + sys.executable,
+        ]
+        cfg = "Debug" if self.debug else "Release"
+        build_args = ["--config", cfg]
 
         if platform.system() == "Windows":
-            cmake_args += ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}'.format(cfg.upper(), extdir)]
+            cmake_args += [
+                "-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}".format(cfg.upper(), extdir)
+            ]
             if sys.maxsize > 2 ** 32:
-                cmake_args += ['-A', 'x64']
-            build_args += ['--', '/m']
+                cmake_args += ["-A", "x64"]
+            build_args += ["--", "/m"]
         else:
-            cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg]
-            build_args += ['--', '-j']
+            cmake_args += ["-DCMAKE_BUILD_TYPE=" + cfg]
+            build_args += ["--", "-j"]
 
         env = os.environ.copy()
-        env['CXXFLAGS'] = '{} -DVERSION_INFO=\\"{}\\"'.format(env.get('CXXFLAGS', ''), self.distribution.get_version())
+        env["CXXFLAGS"] = '{} -DVERSION_INFO=\\"{}\\"'.format(
+            env.get("CXXFLAGS", ""), self.distribution.get_version()
+        )
         if not os.path.exists(self.build_temp):
             os.makedirs(self.build_temp)
-        subprocess.check_call(['cmake', ext.sourcedir] + cmake_args, cwd=self.build_temp, env=env)
-        subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_temp)
+        subprocess.check_call(
+            ["cmake", ext.sourcedir] + cmake_args, cwd=self.build_temp, env=env
+        )
+        subprocess.check_call(
+            ["cmake", "--build", "."] + build_args, cwd=self.build_temp
+        )
 
 
-#def get_cmdclass():
+# def get_cmdclass():
 #    cmdclass = versioneer.get_cmdclass()
 #    cmdclass.update({"build_ext": CMakeBuild})
 #    return cmdclass
 
 
 setup(
-    name='pygio',
-    #version=versioneer.get_version(),
+    name="pygio",
+    # version=versioneer.get_version(),
     version=0.1,
-    author='',
-    author_email='',
-    ext_package='pygio',
-    packages=['pygio'],
-    package_dir={"": "new_python"},
-    ext_modules=[CMakeExtension('pygio')],
-    #cmdclass=get_cmdclass(),
+    author="",
+    author_email="",
+    ext_package="pygio",
+    packages=["pygio"],
+    package_dir={"": "python"},
+    ext_modules=[CMakeExtension("pygio")],
+    # cmdclass=get_cmdclass(),
     cmdclass={
-        'build_ext': CMakeBuild,
+        "build_ext": CMakeBuild,
     },
     zip_safe=False,
-    install_requires=['numpy']
+    install_requires=["numpy"],
 )
-- 
GitLab