From 02348385b66e6efd2ebaa3a7a780a0068a1aaffc Mon Sep 17 00:00:00 2001 From: Michael Buehlmann <buehlmann.michi@gmail.com> Date: Tue, 11 Jan 2022 12:49:55 -0600 Subject: [PATCH] update documentation --- README.md | 152 ++++++++++++++++--------------- docs/conf.py | 1 + docs/cpp/genericio.rst | 2 + docs/cpp/library.rst | 2 + docs/environment.rst | 43 +++++++++ docs/executables/benchmarks.rst | 23 +++++ docs/executables/tools.rst | 3 + docs/index.rst | 14 ++- docs/python/class_interface.rst | 14 ++- docs/python/legacy_python.rst | 46 ++++++++++ docs/python/mpi.rst | 94 ++++++++++++++++++- docs/python/readwrite.rst | 64 ++++++++++++- python/README.md | 155 -------------------------------- python/genericio.cpp | 3 +- setup.py | 85 ++++++++++-------- 15 files changed, 428 insertions(+), 273 deletions(-) create mode 100644 docs/cpp/genericio.rst create mode 100644 docs/cpp/library.rst create mode 100644 docs/environment.rst create mode 100644 docs/executables/benchmarks.rst create mode 100644 docs/executables/tools.rst create mode 100644 docs/python/legacy_python.rst delete mode 100644 python/README.md diff --git a/README.md b/README.md index 009fe49..eb7a488 100644 --- a/README.md +++ b/README.md @@ -1,108 +1,114 @@ -GenericIO -========= +# GenericIO -GenericIO is a write-optimized library for writing self-describing scientific data files on large-scale parallel file systems. +GenericIO is a write-optimized library for writing self-describing scientific +data files on large-scale parallel file systems. -Reference ---------- +## Reference -Habib, et al., HACC: Simulating Future Sky Surveys on State-of-the-Art Supercomputing Architectures, New Astronomy, 2015 +Habib, et al., HACC: Simulating Future Sky Surveys on State-of-the-Art +Supercomputing Architectures, New Astronomy, 2015 (http://arxiv.org/abs/1410.2805). -Source Code ------------ +## Obtaining the Source Code The most recent version of source is available by cloning this repo: ```bash git clone https://xgitlab.cels.anl.gov/hacc/genericio.git ``` -There is also a history of code [releases](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases): -- [2019-04-17](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20190417) -- [2017-09-25](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20170925) -- [2016-08-29](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20160829) -- [2016-04-12](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20160412) -- [2015-06-08](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20150608) +There is also a history of code +[releases](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases): +[2019-04-17](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20190417) / +[2017-09-25](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20170925) / +[2016-08-29](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20160829) / +[2016-04-12](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20160412) / +[2015-06-08](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20150608) / -Output file partitions (subfiles) ---------------------------------- +----- -If you're running on an IBM BG/Q supercomputer, then the number of subfiles (partitions) chosen is based on the I/O nodes in an automatic way. Otherwise, by default, the GenericIO library picks the number of subfiles based on a fairly-naive hostname-based hashing scheme. This works reasonably-well on small clusters, but not on larger systems. On a larger system, you might want to set these environmental variables: +## Building Executables / C++Library -```bash -GENERICIO_PARTITIONS_USE_NAME=0 -GENERICIO_RANK_PARTITIONS=256 -``` +The executables and ``libgenericio`` can be built either with +[CMake](https://cmake.org/) (minimum version 3.10) or with +[GNUMake](https://www.gnu.org/software/make/). The following executables will +be built: -Where the number of partitions (256 above) determines the number of subfiles used. If you're using a Lustre file system, for example, an optimal number of files is: +- ``frontend/GenericIOPrint`` print data to stdout (non-MPI version) +- ``frontend/GenericIOVerify`` verify and try reading data (non-MPI version) +- ``mpi/GenericIOBenchmarkRead`` reading benchmark, works on data written with ``GenericIOBenchmarkWrite`` +- ``mpi/GenericIOBenchmarkWrite`` writing benchmark +- ``mpi/GenericIOPrint`` print data to stdout +- ``mpi/genericIORewrite`` rewrite data with a different number of ranks +- ``mpi/genericIOVerify`` verify and try reading data -``` -# of files * stripe count ~ # OSTs -``` - -On Titan, for example, there are 1008 OSTs, and a default stripe count of 4, so we use approximately 256 files. - -Benchmarks ----------- +**Using CMake** -Once you build the library and associated programs (using make), you can run, for example: +Note that the executables / libraries will be located in +``build/<frontend/mpi>``. CMake will use the compiler pointed to in the ``CC`` +and ``CXX`` environmental variables. ```bash -$ mpirun -np 8 ./mpi/GenericIOBenchmarkWrite /tmp/out.gio 123456 2 -Wrote 9 variables to /tmp/out (4691036 bytes) in 0.2361s: 18.9484 MB/s +mkdir build && cd build +cmake .. +make -j4 ``` +**Using Make** + +Make will create the executables / libraries under the main directory. Edit the +``CC``, ``CXX``, ``MPICC``, and ``MPICXX`` variables in the GNUmakefile to +change the compiler. + ```bash -$ mpirun -np 8 ./mpi/GenericIOBenchmarkRead /tmp/out.gio -Read 9 variables from /tmp/out (4688028 bytes) in 0.223067s: 20.0426 MB/s [excluding header read] +make ``` -The read benchmark always reads all of the input data. The output benchmark takes two numerical parameters, one if the number of data rows to write, and the second is a random seed (which slightly perturbs the per-rank output sizes, but not by much). Each row is 36 bytes for these benchmarks. +## Installing the Python Library -The write benchmark can be passed the -c parameter to enable output compression. Both benchmarks take an optional -a parameter to request that homogeneous aggregates (i.e. "float4") be used instead of using separate arrays for each position/velocity component. +The `pygio` library is pip-installable and works with `mpi4py`. +**Requirements** -Python module -------------- +Currently, a **CMake version >= 3.11.0** is required to fetch dependencies +during configuration. The ``pygio`` module also requires MPI libraries to be +findable by CMake's FindMPI. The compiler needs to support **C++17** (make sure +that ``CC`` and ``CXX`` point to the correct compiler) -[Click here to go to the README for the python interface](new_python/README.md) +**Install** +The python library can be installed by running pip in the **main folder**: +```bash +pip install . +``` -Legacy python module --------------------- +It will use the compiler referred by the ``CC`` and ``CXX`` environment +variable. If the compiler supports OpenMP, the library will be threaded. Make +sure to set ``OMP_NUM_THREADS`` to an appropriate variable, in particluar when +using multiple MPI ranks per node. -*This documentation is for the old python module. It is recommened to use the newer, pybind11 based version, see above* +----- +## Output file partitions (subfiles) -The repository includes a genericio Python module that can read genericio-formatted files and return numpy arrays. This is included in the standard build. To use it, once you've built genericio, you can read genericio data as follows: +If you're running on an IBM BG/Q supercomputer, then the number of subfiles +(partitions) chosen is based on the I/O nodes in an automatic way. Otherwise, by +default, the GenericIO library picks the number of subfiles based on a +fairly-naive hostname-based hashing scheme. This works reasonably-well on small +clusters, but not on larger systems. On a larger system, you might want to set +these environmental variables: ```bash -$ export PYTHONPATH=${GENERICIO_DIR}/legacy_python -$ python ->>> import genericio ->>> genericio.gio_inspect('m000-99.fofproperties') -Number of Elements: 1691 -[data type] Variable name ---------------------------------------------- -[i 32] fof_halo_count -[i 64] fof_halo_tag -[f 32] fof_halo_mass -[f 32] fof_halo_mean_x -[f 32] fof_halo_mean_y -[f 32] fof_halo_mean_z -[f 32] fof_halo_mean_vx -[f 32] fof_halo_mean_vy -[f 32] fof_halo_mean_vz -[f 32] fof_halo_vel_disp - -(i=integer,f=floating point, number bits size) ->>> genericio.gio_read('m000-99.fofproperties','fof_halo_mass') -array([[ 4.58575588e+13], - [ 5.00464689e+13], - [ 5.07078771e+12], - ..., - [ 1.35221006e+13], - [ 5.29125710e+12], - [ 7.12849857e+12]], dtype=float32) - -``` \ No newline at end of file +GENERICIO_PARTITIONS_USE_NAME=0 +GENERICIO_RANK_PARTITIONS=256 +``` + +Where the number of partitions (256 above) determines the number of subfiles +used. If you're using a Lustre file system, for example, an optimal number of +files is: + +``` +# of files * stripe count ~ # OSTs +``` + +On Titan, for example, there are 1008 OSTs, and a default stripe count of 4, so +we use approximately 256 files. diff --git a/docs/conf.py b/docs/conf.py index b2991e1..4c27594 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -30,6 +30,7 @@ author = "Hal Finkel, et al." # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ + "sphinx.ext.doctest", "sphinx.ext.autodoc", "sphinx.ext.napoleon", "sphinx.ext.autodoc.typehints", diff --git a/docs/cpp/genericio.rst b/docs/cpp/genericio.rst new file mode 100644 index 0000000..657d452 --- /dev/null +++ b/docs/cpp/genericio.rst @@ -0,0 +1,2 @@ +GenericIO C++ Class +=================== \ No newline at end of file diff --git a/docs/cpp/library.rst b/docs/cpp/library.rst new file mode 100644 index 0000000..b3712cd --- /dev/null +++ b/docs/cpp/library.rst @@ -0,0 +1,2 @@ +GenericIO and CMake +=================== \ No newline at end of file diff --git a/docs/environment.rst b/docs/environment.rst new file mode 100644 index 0000000..7dc96ca --- /dev/null +++ b/docs/environment.rst @@ -0,0 +1,43 @@ +Runtime Options +=============== + +Library / Executables +--------------------- + +``GENERICIO_RANK0_CREATE_ALL`` + +``GENERICIO_COMPRESS`` + +``GENERICIO_FORCE_BLOCKS`` + +``GENERICIO_RETRY_COUNT`` + +``GENERICIO_RETRY_SLEEP`` + +``GENERICIO_VERBOSE`` + +``GENERICIO_PARTITIONS_USE_NAME`` + +``GENERICIO_RANK_PARTITIONS`` + +Executables +----------- + +``GENERICIO_USE_MPIIO`` + +BLOSC +----- + +``BLOSC_CLEVEL`` + +``BLOSC_SHUFFLE`` + +``BLOSC_TYPESIZE`` + +``BLOSC_COMPRESSOR`` + +``BLOSC_BLOCKSIZE`` + +``BLOSC_NTHREADS`` + +``BLOSC_NOLOCK`` \ No newline at end of file diff --git a/docs/executables/benchmarks.rst b/docs/executables/benchmarks.rst new file mode 100644 index 0000000..c1f017b --- /dev/null +++ b/docs/executables/benchmarks.rst @@ -0,0 +1,23 @@ +Benchmarks +========== + +Once you build the library and associated programs (using make), you can run, for example: + +.. code-block:: + + $ mpirun -np 8 ./mpi/GenericIOBenchmarkWrite /tmp/out.gio 123456 2 + Wrote 9 variables to /tmp/out (4691036 bytes) in 0.2361s: 18.9484 MB/s + +.. code-block:: + + $ mpirun -np 8 ./mpi/GenericIOBenchmarkRead /tmp/out.gio + Read 9 variables from /tmp/out (4688028 bytes) in 0.223067s: 20.0426 MB/s [excluding header read] + +The read benchmark always reads all of the input data. The output benchmark takes two numerical parameters, one if the +number of data rows to write, and the second is a random seed (which slightly perturbs the per-rank output sizes, but +not by much). Each row is 36 bytes for these benchmarks. + +The write benchmark can be passed the -c parameter to enable output compression. Both benchmarks take an optional -a +parameter to request that homogeneous aggregates (i.e. "float4") be used instead of using separate arrays for each +position/velocity component. + diff --git a/docs/executables/tools.rst b/docs/executables/tools.rst new file mode 100644 index 0000000..f8aab0f --- /dev/null +++ b/docs/executables/tools.rst @@ -0,0 +1,3 @@ +Executables +=========== + diff --git a/docs/index.rst b/docs/index.rst index d8fadfa..415b4c2 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -5,6 +5,7 @@ .. only:: latex + ======================= GenericIO Documentation ======================= @@ -15,7 +16,7 @@ .. only:: latex .. toctree:: - :maxdepth: 2 + :maxdepth: 3 self @@ -26,8 +27,19 @@ python/readwrite python/class_interface python/mpi + python/legacy_python .. toctree:: :caption: C++ Interface :maxdepth: 2 + cpp/genericio + cpp/library + + .. toctree:: + :caption: Executables + :maxdepth: 1 + + executables/tools + executables/benchmarks + environment diff --git a/docs/python/class_interface.rst b/docs/python/class_interface.rst index 6372b12..2aa0e32 100644 --- a/docs/python/class_interface.rst +++ b/docs/python/class_interface.rst @@ -2,6 +2,12 @@ The Python Class Interface ========================== .. currentmodule:: pygio +The :class:`PyGenericIO` python class directly interfaces the C++ ``GenericIO`` +class. Writing data is currently not supported through that interface (use +:func:`write_genericio` instead). Note that reading data works without manually +adding variables (:class:`pygio::PyGenericIO::VariableInfo`) to the class instance. +Instead, a list of variable names can be passed to :meth:`PyGenericIO.read`. + .. code-block:: python # instantiate a GenericIO class @@ -22,11 +28,15 @@ The Python Class Interface data_partial = gio_file.read(["x", "y", "z"]) Further methods and members of GenericIO can easly be interfaced by editing -``python_new/genericio.cpp``. +``python/genericio.cpp``. References ---------- .. autoclass:: PyGenericIO - :members: \ No newline at end of file + :members: + +.. autoclass:: pygio::PyGenericIO.VariableInfo + :members: + :undoc-members: \ No newline at end of file diff --git a/docs/python/legacy_python.rst b/docs/python/legacy_python.rst new file mode 100644 index 0000000..4e9cf72 --- /dev/null +++ b/docs/python/legacy_python.rst @@ -0,0 +1,46 @@ +Legacy python module +-------------------- + +.. warning:: + + This documentation is for the old python module. It is recommened to use the + newer, pybind11 based version + + +The repository includes a genericio Python module that can read +genericio-formatted files and return numpy arrays. This is included in the +standard build. To use it, once you've built genericio, you can read genericio +data as follows: + +.. code-block:: bash + + $ export PYTHONPATH=${GENERICIO_DIR}/legacy_python + $ python + + +>>> import genericio +>>> genericio.gio_inspect('m000-99.fofproperties') +Number of Elements: 1691 +[data type] Variable name +--------------------------------------------- +[i 32] fof_halo_count +[i 64] fof_halo_tag +[f 32] fof_halo_mass +[f 32] fof_halo_mean_x +[f 32] fof_halo_mean_y +[f 32] fof_halo_mean_z +[f 32] fof_halo_mean_vx +[f 32] fof_halo_mean_vy +[f 32] fof_halo_mean_vz +[f 32] fof_halo_vel_disp +(i=integer,f=floating point, number bits size) + + +>>> genericio.gio_read('m000-99.fofproperties','fof_halo_mass') +array([[ 4.58575588e+13], + [ 5.00464689e+13], + [ 5.07078771e+12], + ..., + [ 1.35221006e+13], + [ 5.29125710e+12], + [ 7.12849857e+12]], dtype=float32) \ No newline at end of file diff --git a/docs/python/mpi.rst b/docs/python/mpi.rst index 721b7ef..51a0e04 100644 --- a/docs/python/mpi.rst +++ b/docs/python/mpi.rst @@ -1,2 +1,94 @@ Using GenericIO with MPI -======================== \ No newline at end of file +======================== + +The `pygio` library is working with mpi4py. Here is an example: + +.. code-block:: python + + from mpi4py import MPI + comm = MPI.COMM_WORLD + rank = comm.Get_rank() + ranks = comm.Get_size() + + import numpy as np + import pygio + + # read locally + data = pygio.read_genericio("generic_io_file") + # get local number of elements from the first element in dictionary + num_elems = len(next(iter(data.values()))) + # reduce total number of elements + num_elems_total = comm.allreduce(num_elems) + if rank == 0: + print(f"Reading file with {ranks} ranks") + print(f"Total number of particles: {num_elems_total}") + print("The data contains the following variables:") + for k, d in data.items(): + print(f"\t{k:5s}, dtype={d.dtype}") + + for i in range(ranks): + if i == rank: + print(f"rank {rank} read {num_elems} elements") + comm.Barrier() + + +It can be executed with ``mpirun`` or ``mpiexec``: + +.. code-block:: bash + + mpirun -n 8 python testpygio_mpi.py + +Here is an output for 1 and 8 ranks: + +.. code-block:: none + + # 1 Rank + Reading file with 1 ranks + Total number of particles: 562500 + The data contains the following variables: + id , dtype=int64 + vx , dtype=float32 + vy , dtype=float32 + vz , dtype=float32 + x , dtype=float32 + y , dtype=float32 + z , dtype=float32 + rank 0 read 562500 elements + + # 8 Ranks + Reading file with 8 ranks + Total number of particles: 562500 + The data contains the following variables: + id , dtype=int64 + vx , dtype=float32 + vy , dtype=float32 + vz , dtype=float32 + x , dtype=float32 + y , dtype=float32 + z , dtype=float32 + rank 0 read 70000 elements + rank 1 read 70000 elements + rank 2 read 70000 elements + rank 3 read 70000 elements + rank 4 read 70625 elements + rank 5 read 70625 elements + rank 6 read 70625 elements + rank 7 read 70625 elements + + +Importing the non-MPI version +----------------------------- + +Some compute facilities prevent the loading of MPI libraries on login nodes. In order +to still be able to use ``pygio`` on the login nodes, the non-MPI library +can be loaded by setting the `GENERICIO_NO_MPI` environment variable before +importing `pygio`: + +.. code-block:: python + + import os + os.environ['GENERICIO_NO_MPI'] = 'True' + import pygio + +A warning will be printed that the writing capabilities of genericio are not +available in this mode. \ No newline at end of file diff --git a/docs/python/readwrite.rst b/docs/python/readwrite.rst index 409f31f..402e0be 100644 --- a/docs/python/readwrite.rst +++ b/docs/python/readwrite.rst @@ -3,14 +3,50 @@ Reading and Writing Data .. currentmodule:: pygio +The ``pygio`` module contains a high-level functional interface for inspecting, +reading, and writing particle and halo data. In addition, there is a class-based +interface which mimics the C++ ``GenericIO`` class. More information about +:class:`PyGenericIO` can be found :doc:`here <./class_interface>`. + +.. note:: + + The GenericIO python library supports **MPI** with + `mpi4py <https://mpi4py.readthedocs.io/en/stable/>`_. Under MPI, each rank + will read a separate "chunk" of the file (if the file was written with + multiple ranks). The function :func:`read_num_elems` will return the number + of particles / halos that the local rank will read and can be different on + each rank. More information on how to use this library with MPI can be found + :doc:`here <./mpi>`. + +To get an overview of the data contained in a GenericIO file, use the +:func:`inspect_genericio` function, which lists all variables, data types and +number of elements (particles or halos) contained in the file. + .. code-block:: python - import numpy as np import pygio # inspect file pygio.inspect_genericio("generic_io_file") +Some additional inspection functions are: + +- :func:`read_num_elems` returns the number of elements in the file +- :func:`read_total_num_elems` returns the total/global number of elements in the file +- :func:`read_variable_names` returns a list of variable names defined in the file +- :func:`read_variable_dtypes` returns a dictionary with ``variable_name -> numpy.dtype`` +- :func:`read_phys_scale` returns a list of 3 floats describing the box size +- :func:`read_phys_origin` returns a list of 3 floats describing the box origin + + +Reading GenericIO files +----------------------- + +Data contained in GenericIO files can be loaded into numpy arrays using the +:func:`read_genericio` function. By default, all variables stored in the +file will be loaded. If not all variables are needed, the memory footprint and +read speed can be improved by passing a list of variable names to to the +function. .. code-block:: python @@ -23,15 +59,33 @@ Reading and Writing Data data_partial = pygio.read_genericio("generic_io_file", ["x", "y", "z"]) data_x = data_partial["x"] + +Writing GenericIO files +----------------------- + +Data arrays can be stored into GenericIO files using the :func:`write_genericio` +function. The data has to be a dictionary, with the variable name as key and the +corresponding one dimensional numpy array as value. The numpy arrays have to +meet the following criteria: + +- 1 dimensional +- same length +- datatype has to be ``np.float32``, ``np.float64``, ``np.int64`` or + ``np.uint16`` + +In addition, one needs to specify the physical origin (default [0, 0, 0]) and +the physical box size as a list of 3 floats. + .. code-block:: python data = { - "x": np.ones(100), - "y": np.ones(100) + "x": np.random.uniform(0, 1, 100, dtype=np.float32), + "y": np.random.uniform(0, 1, 100, dtype=np.float32), + "z": np.random.uniform(0, 1, 100, dtype=np.float32) } # write data to file - pygio.write_genericio("new_generic_io_file", + pygio.write_genericio("new_file.gio", data, phys_scale = [1, 1, 1], phys_origin = [0, 0, 0] @@ -47,6 +101,8 @@ References .. autofunction:: read_num_elems +.. autofunction:: read_total_num_elems + .. autofunction:: read_variable_names .. autofunction:: read_variable_dtypes diff --git a/python/README.md b/python/README.md deleted file mode 100644 index b14a2ce..0000000 --- a/python/README.md +++ /dev/null @@ -1,155 +0,0 @@ -# Python module - -This version of `pygio` is pip-installable and works with `mpi4py`. - -## Requirements - -Currently, a CMake version >= 3.11.0 is required to fetch dependencies during configuration. -The `pygio` module also requires MPI libraries to be findable by CMake's FindMPI. The compiler -needs to support C++17 (make sure that `CC` and `CXX` point to the correct compiler) - -## Install - -The python library can be installed by running pip in the **main folder**: -```bash -pip install . -``` -It will use the compiler referred by the `CC` and `CXX` environment variable. If the compiler -supports OpenMP, the library will be threaded. Make sure to set `OMP_NUM_THREADS` to an -appropriate variable, in particluar when using multiple MPI ranks per node. - -## Usage - -The library can then be imported in python. Here is a small example script: -```python -import numpy as np -import pygio - -# inspect file -pygio.inspect_genericio("generic_io_file") - -# read all variables -data = pygio.read_genericio("generic_io_file") - -# read only a subset of variables -data_partial = pygio.read_genericio("generic_io_file", ["x", "y", "z"]) -data_x = data_partial["x"] - -# write data to file -pygio.write_genericio("new_generic_io_file", - variables = {"x": np.ones(100), "y": np.ones(100)}, - phys_scale = [1, 1, 1], - phys_origin = [0, 0, 0], - method = PyGenericIO.FileIO.FileIOPOSIX - ) - - -### USING THE CLASS BASED INTERFACE ### - -# instantiate a GenericIO class -gio_file = pygio.PyGenericIO("generic_io_file") - -# inspect (prints to python stdout, also works in notebook) -gio_file.inspect() - -# get variables -gio_vars = gio_file.get_variables() - -# print variable names -for var in gio_vars: - print(var.name, var.size, var.element_size, var.is_float) - -# read data -data_all = gio_file.read() -data_partial = gio_file.read(["x", "y", "z"]) -``` - -Further methods and members of GenericIO can easly be interfaced by editing `python_new/genericio.cpp`. - - -### Using MPI -The `pygio` library is working with mpi4py. Here is an example file: -```python -from mpi4py import MPI -comm = MPI.COMM_WORLD -rank = comm.Get_rank() -ranks = comm.Get_size() - -import numpy as np -import pygio - -# read locally -data = pygio.read_genericio("generic_io_file") -# get local number of elements from the first element in dictionary -num_elems = len(next(iter(data.values()))) -# reduce total number of elements -num_elems_total = comm.allreduce(num_elems) -if rank == 0: - print(f"Reading file with {ranks} ranks") - print(f"Total number of particles: {num_elems_total}") - print("The data contains the following variables:") - for k, d in data.items(): - print(f"\t{k:5s}, dtype={d.dtype}") - -for i in range(ranks): - if i == rank: - print(f"rank {rank} read {num_elems} elements") - comm.Barrier() -``` - -It can be executed with `mpirun`: -```bash -mpirun -n 8 python testpygio_mpi.py -``` - -Here is an output for 1 and 8 ranks: -``` -Reading file with 1 ranks -Total number of particles: 562500 -The data contains the following variables: - id , dtype=int64 - vx , dtype=float32 - vy , dtype=float32 - vz , dtype=float32 - x , dtype=float32 - y , dtype=float32 - z , dtype=float32 -rank 0 read 562500 elements -``` - -``` -Reading file with 8 ranks -Total number of particles: 562500 -The data contains the following variables: - id , dtype=int64 - vx , dtype=float32 - vy , dtype=float32 - vz , dtype=float32 - x , dtype=float32 - y , dtype=float32 - z , dtype=float32 -rank 0 read 70000 elements -rank 1 read 70000 elements -rank 2 read 70000 elements -rank 3 read 70000 elements -rank 4 read 70625 elements -rank 5 read 70625 elements -rank 6 read 70625 elements -rank 7 read 70625 elements -``` - -### Force the non-MPI version to be imported - -Some clusters prevent the loading of MPI libraries on the login nodes. In order -to still be able to use the same `pygio` on the login nodes, the non-MPI library -can be loaded by setting the `GENERICIO_NO_MPI` environment variable before -importing `pygio`: - -```python -import os -os.environ['GENERICIO_NO_MPI'] = 'True' -import pygio -``` - -A warning will be printed that the writing capabilities of genericio are not -available in this mode. \ No newline at end of file diff --git a/python/genericio.cpp b/python/genericio.cpp index 34aab98..089951b 100644 --- a/python/genericio.cpp +++ b/python/genericio.cpp @@ -368,14 +368,13 @@ PYBIND11_MODULE(pygio, m) { .def("read_nranks", (int (PyGenericIO::*)(void))(&PyGenericIO::readNRanks)) .def("read_variable_names", &PyGenericIO::read_variable_names) .def("read_variable_dtypes", &PyGenericIO::read_variable_dtypes) - .def("read", &PyGenericIO::read, py::arg("variables")=nullptr, py::arg("print_stats")=true, py::arg("collective_stats")=true) .def("get_source_ranks", &PyGenericIO::getSourceRanks) #ifndef GENERICIO_NO_MPI .def("rebalance_source_ranks", &PyGenericIO::rebalanceSourceRanks) #endif ; - py::class_<gio::GenericIO::VariableInfo>(pyGenericIO, "VariableInfo") + py::class_<PyGenericIO::VariableInfo>(pyGenericIO, "VariableInfo") .def_readonly("name", &gio::GenericIO::VariableInfo::Name) .def_readonly("size", &gio::GenericIO::VariableInfo::Size) .def_readonly("element_size", &gio::GenericIO::VariableInfo::ElementSize) diff --git a/setup.py b/setup.py index 17e1361..ee277fa 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,8 @@ import re import sys import subprocess import platform -#import versioneer + +# import versioneer from setuptools import setup, Extension from setuptools.command.build_ext import build_ext @@ -11,7 +12,7 @@ from distutils.version import LooseVersion class CMakeExtension(Extension): - def __init__(self, name, sourcedir=''): + def __init__(self, name, sourcedir=""): Extension.__init__(self, name, sources=[]) self.sourcedir = os.path.abspath(sourcedir) @@ -19,64 +20,78 @@ class CMakeExtension(Extension): class CMakeBuild(build_ext): def run(self): try: - out = subprocess.check_output(['cmake', '--version']) + out = subprocess.check_output(["cmake", "--version"]) except OSError: - raise RuntimeError("CMake must be installed to build the following extensions: " + - ", ".join(e.name for e in self.extensions)) - cmake_version = LooseVersion(re.search(r'version\s*([\d.]+)', out.decode()).group(1)) - if cmake_version < '3.11.0': + raise RuntimeError( + "CMake must be installed to build the following extensions: " + + ", ".join(e.name for e in self.extensions) + ) + cmake_version = LooseVersion( + re.search(r"version\s*([\d.]+)", out.decode()).group(1) + ) + if cmake_version < "3.11.0": raise RuntimeError("CMake >= 3.11.0 is required") for ext in self.extensions: self.build_extension(ext) def build_extension(self, ext): extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name))) - cmake_args = ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + extdir, - '-DGENERICIO_NEW_PYTHON_LIBRARY=ON', - '-DGENERICIO_PYTHON_LIBRARY=OFF', - '-DGENERICIO_MPI_EXECUTABLES=OFF', - '-DGENERICIO_FRONTEND_EXECUTABLES=OFF', - '-DPYTHON_EXECUTABLE=' + sys.executable] - cfg = 'Debug' if self.debug else 'Release' - build_args = ['--config', cfg] + cmake_args = [ + "-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=" + extdir, + "-DGENERICIO_PYTHON_LIBRARY=ON", + "-DGENERICIO_LEGACY_PYTHON_LIBRARY=OFF", + "-DGENERICIO_MPI_EXECUTABLES=OFF", + "-DGENERICIO_FRONTEND_EXECUTABLES=OFF", + "-DPYTHON_EXECUTABLE=" + sys.executable, + ] + cfg = "Debug" if self.debug else "Release" + build_args = ["--config", cfg] if platform.system() == "Windows": - cmake_args += ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}'.format(cfg.upper(), extdir)] + cmake_args += [ + "-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}".format(cfg.upper(), extdir) + ] if sys.maxsize > 2 ** 32: - cmake_args += ['-A', 'x64'] - build_args += ['--', '/m'] + cmake_args += ["-A", "x64"] + build_args += ["--", "/m"] else: - cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg] - build_args += ['--', '-j'] + cmake_args += ["-DCMAKE_BUILD_TYPE=" + cfg] + build_args += ["--", "-j"] env = os.environ.copy() - env['CXXFLAGS'] = '{} -DVERSION_INFO=\\"{}\\"'.format(env.get('CXXFLAGS', ''), self.distribution.get_version()) + env["CXXFLAGS"] = '{} -DVERSION_INFO=\\"{}\\"'.format( + env.get("CXXFLAGS", ""), self.distribution.get_version() + ) if not os.path.exists(self.build_temp): os.makedirs(self.build_temp) - subprocess.check_call(['cmake', ext.sourcedir] + cmake_args, cwd=self.build_temp, env=env) - subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_temp) + subprocess.check_call( + ["cmake", ext.sourcedir] + cmake_args, cwd=self.build_temp, env=env + ) + subprocess.check_call( + ["cmake", "--build", "."] + build_args, cwd=self.build_temp + ) -#def get_cmdclass(): +# def get_cmdclass(): # cmdclass = versioneer.get_cmdclass() # cmdclass.update({"build_ext": CMakeBuild}) # return cmdclass setup( - name='pygio', - #version=versioneer.get_version(), + name="pygio", + # version=versioneer.get_version(), version=0.1, - author='', - author_email='', - ext_package='pygio', - packages=['pygio'], - package_dir={"": "new_python"}, - ext_modules=[CMakeExtension('pygio')], - #cmdclass=get_cmdclass(), + author="", + author_email="", + ext_package="pygio", + packages=["pygio"], + package_dir={"": "python"}, + ext_modules=[CMakeExtension("pygio")], + # cmdclass=get_cmdclass(), cmdclass={ - 'build_ext': CMakeBuild, + "build_ext": CMakeBuild, }, zip_safe=False, - install_requires=['numpy'] + install_requires=["numpy"], ) -- GitLab