diff --git a/.gitignore b/.gitignore index 0a541248e8bb3ba7ef52bc330d8c15e0f87fcbda..d5a3234c96bb459e2f5bb7aaa90b92fa2b99cca9 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ build/ mpi/ frontend/ *.o -python/genericio.pyc -new_python/pygio.egg-info -new_python/build \ No newline at end of file +*.pyc +*.egg-info +.vscode +docs/_build \ No newline at end of file diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000000000000000000000000000000000000..1e506d45e7bfdfd2d76e8012ef9d638b71a304a3 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,66 @@ +stages: + - build + - docs + - deploy + + +image: debian:latest +before_script: + - apt update && apt -y install git gcc cmake mpich + +build_makefile: + stage: build + script: + - make + +build_cmake: + stage: build + script: + - mkdir build && cd build + - cmake .. + - make + +build_python: + stage: build + before_script: + - apt update && apt -y install git gcc cmake mpich python3 python3-pip + - python3 -m pip install numpy + script: + - python3 setup.py bdist_wheel + - python3 -m pip install dist/* + artifacts: + paths: + - dist/*.whl + variables: + PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache/pip" + cache: + paths: + - .cache/pip + - venv/ + +build_docs: + stage: docs + dependencies: + - build_python + before_script: + - apt update && apt -y install mpich python3 python3-pip + - python3 -m pip install --upgrade pip + - python3 -m pip install numpy + - python3 -m pip install dist/* + - python3 -m pip install Sphinx sphinx-rtd-theme sphinxcontrib-napoleon numpydoc myst-parser + script: + - cd docs + - make dirhtml + artifacts: + paths: + - docs/_build/dirhtml/ + +deploy_docs: + stage: deploy + dependencies: + - build_docs + script: + - rsync -avp docs/_build/dirhtml /tmp/ + tags: + - docs + diff --git a/CMakeLists.txt b/CMakeLists.txt index d9b1f0cac2f2f8bc0acac79813af4512dad4d374..fc55603e5761cb8dd2d19f1d6cc57c395f1fa195 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,13 +28,13 @@ else() set(GENERICIO_MPI_EXECUTABLES OFF) endif() option(GENERICIO_FRONTEND_EXECUTABLES "build frontend executables?" ${GENERICIO_MASTER_PROJECT}) -option(GENERICIO_PYTHON_LIBRARY "build python library?" ${GENERICIO_MASTER_PROJECT}) +option(GENERICIO_LEGACY_PYTHON_LIBRARY "build legacy python library?" ${GENERICIO_MASTER_PROJECT}) # new python target: only if cmake at least version 3.11 if(NOT (CMAKE_VERSION VERSION_LESS 3.11.0)) - option(GENERICIO_NEW_PYTHON_LIBRARY "build (new) python library with pybind11?" OFF) + option(GENERICIO_PYTHON_LIBRARY "build the python library with pybind11?" OFF) else() - message(WANRING " cmake version < 3.11.0, cannot build new python library") - set(GENERICIO_NEW_PYTHON_LIBRARY OFF) + message(WANRING " cmake version < 3.11.0, cannot build the python library") + set(GENERICIO_PYTHON_LIBRARY OFF) endif() ############################################################################### @@ -43,7 +43,7 @@ add_subdirectory(thirdparty) ############################################################################### # GenericIO sources, libraries, and executables -set(GenericIO_Sources +set(GenericIO_Sources GenericIO.h GenericIO.cxx ) @@ -77,16 +77,16 @@ endif() # MPI Executables if(GENERICIO_MPI_EXECUTABLES) set(MPI_Executables - GenericIOPrint - GenericIOVerify - GenericIOBenchmarkRead - GenericIOBenchmarkWrite + GenericIOPrint + GenericIOVerify + GenericIOBenchmarkRead + GenericIOBenchmarkWrite GenericIORewrite ) foreach(executable ${MPI_Executables}) add_executable("${executable}_MPI" "${executable}.cxx") target_link_libraries("${executable}_MPI" PRIVATE genericio_mpi) - set_target_properties("${executable}_MPI" PROPERTIES + set_target_properties("${executable}_MPI" PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/mpi" OUTPUT_NAME ${executable}) endforeach() @@ -105,17 +105,18 @@ if(GENERICIO_FRONTEND_EXECUTABLES) endforeach() endif() -# Old python library -if(GENERICIO_PYTHON_LIBRARY) - add_library(pygio SHARED python/lib/gio.cxx python/lib/gio.h) - target_link_libraries(pygio PRIVATE genericio) +# Legacy python library +if(GENERICIO_LEGACY_PYTHON_LIBRARY) + add_library(pygio_legacy SHARED legacy_python/lib/gio.cxx legacy_python/lib/gio.h) + target_link_libraries(pygio_legacy PRIVATE genericio) # GNUmakefile compatibility: also move to frontend (could be improved) - set_target_properties(pygio PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/frontend") + set_target_properties(pygio_legacy PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/frontend") + set_target_properties(pygio_legacy PROPERTIES OUTPUT_NAME pygio) # GNUmakefile compatibility: copy python files to build directory so that relative paths are correct - file(COPY python/genericio.py python/example.py DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/python) + file(COPY legacy_python/genericio.py legacy_python/example.py DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/legacy_python) endif() -# New python library -if(GENERICIO_NEW_PYTHON_LIBRARY) - add_subdirectory(new_python) +# Python library +if(GENERICIO_PYTHON_LIBRARY) + add_subdirectory(python) endif() \ No newline at end of file diff --git a/GNUmakefile b/GNUmakefile index 4338cbdefcd302b6cae8085357d5aab6b2f43f5e..4881c6e9b857a131ad4f5e3dec7d1ed313134f8a 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -1,31 +1,31 @@ # Copyright (C) 2015, UChicago Argonne, LLC # All Rights Reserved -# +# # Generic IO (ANL-15-066) # Hal Finkel, Argonne National Laboratory -# +# # OPEN SOURCE LICENSE -# +# # Under the terms of Contract No. DE-AC02-06CH11357 with UChicago Argonne, # LLC, the U.S. Government retains certain rights in this software. -# +# # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: -# +# # 1. Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. -# +# # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. -# +# # 3. Neither the names of UChicago Argonne, LLC or the Department of Energy # nor the names of its contributors may be used to endorse or promote # products derived from this software without specific prior written # permission. -# +# # ***************************************************************************** -# +# # DISCLAIMER # THE SOFTWARE IS SUPPLIED “AS IS†WITHOUT WARRANTY OF ANY KIND. NEITHER THE # UNTED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR @@ -34,7 +34,7 @@ # ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, DATA, APPARATUS, # PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE # PRIVATELY OWNED RIGHTS. -# +# # ***************************************************************************** CC = gcc @@ -204,7 +204,7 @@ else FE_SHARED := -shared endif -$(FEDIR)/libpygio.so: $(FEDIR)/GenericIO.o $(FEDIR)/python/lib/gio.o $(FE_BLOSC_O) +$(FEDIR)/libpygio.so: $(FEDIR)/GenericIO.o $(FEDIR)/legacy_python/lib/gio.o $(FE_BLOSC_O) $(CXX) $(FE_CXXFLAGS) $(FE_SHARED) -o $@ $^ $(FEDIR)/GenericIOSQLite.so: $(FEDIR)/GenericIOSQLite.o $(FEDIR)/GenericIO.o $(FE_BLOSC_O) @@ -258,5 +258,5 @@ frontend-sqlite: $(FEDIR)/GenericIOSQLite.so $(FEDIR)/sqlite3 fe-sqlite: frontend-sqlite clean: - rm -rf frontend mpi python/genericio.pyc + rm -rf frontend mpi legacy_python/genericio.pyc diff --git a/README.md b/README.md index cbd764d2386a1f5ad2fe4e4eda7224c82f788ab9..9db28ee694fda879ad80b1638d64954968e687ea 100644 --- a/README.md +++ b/README.md @@ -1,96 +1,117 @@ # GenericIO -GenericIO is a write-optimized library for writing self-describing scientific data files on large-scale parallel file systems. +GenericIO is a write-optimized library for writing self-describing scientific +data files on large-scale parallel file systems. + +* Repository: [git.cels.anl.gov](https://git.cels.anl.gov/hacc/genericio) +* Documentation: [CPACdocs](https://www.hep.anl.gov/CPACdocs/genericio/) ## Reference -Habib, et al., HACC: Simulating Future Sky Surveys on State-of-the-Art Supercomputing Architectures, New Astronomy, 2015 +Habib, et al., HACC: Simulating Future Sky Surveys on State-of-the-Art +Supercomputing Architectures, New Astronomy, 2015 (http://arxiv.org/abs/1410.2805). -## Source Code +## Obtaining the Source Code The most recent version of source is available by cloning this repo: ```bash - git clone https://xgitlab.cels.anl.gov/hacc/genericio.git +git clone https://git.cels.anl.gov/hacc/genericio.git ``` -There is also a history of code [releases](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases): - - [2019-04-17](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20190417) - - [2017-09-25](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20170925) - - [2016-08-29](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20160829) - - [2016-04-12](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20160412) - - [2015-06-08](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20150608) +There is also a history of code +[releases](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases): +[2019-04-17](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20190417) / +[2017-09-25](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20170925) / +[2016-08-29](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20160829) / +[2016-04-12](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20160412) / +[2015-06-08](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20150608) / -## Output file partitions (subfiles) +----- + +## Building Executables / C++Library + +The executables and ``libgenericio`` can be built either with +[CMake](https://cmake.org/) (minimum version 3.10) or with +[GNUMake](https://www.gnu.org/software/make/). The following executables will +be built: + +- ``frontend/GenericIOPrint`` print data to stdout (non-MPI version) +- ``frontend/GenericIOVerify`` verify and try reading data (non-MPI version) +- ``mpi/GenericIOBenchmarkRead`` reading benchmark, works on data written with ``GenericIOBenchmarkWrite`` +- ``mpi/GenericIOBenchmarkWrite`` writing benchmark +- ``mpi/GenericIOPrint`` print data to stdout +- ``mpi/genericIORewrite`` rewrite data with a different number of ranks +- ``mpi/genericIOVerify`` verify and try reading data -If you're running on an IBM BG/Q supercomputer, then the number of subfiles (partitions) chosen is based on the I/O nodes in an automatic way. Otherwise, by default, the GenericIO library picks the number of subfiles based on a fairly-naive hostname-based hashing scheme. This works reasonably-well on small clusters, but not on larger systems. On a larger system, you might want to set these environmental variables: +**Using CMake** + +Note that the executables / libraries will be located in +``build/<frontend/mpi>``. CMake will use the compiler pointed to in the ``CC`` +and ``CXX`` environmental variables. ```bash - GENERICIO_PARTITIONS_USE_NAME=0 - GENERICIO_RANK_PARTITIONS=256 +mkdir build && cd build +cmake .. +make -j4 ``` -Where the number of partitions (256 above) determines the number of subfiles used. If you're using a Lustre file system, for example, an optimal number of files is: +**Using Make** -``` - # of files * stripe count ~ # OSTs +Make will create the executables / libraries under the main directory. Edit the +``CC``, ``CXX``, ``MPICC``, and ``MPICXX`` variables in the GNUmakefile to +change the compiler. + +```bash +make ``` -On Titan, for example, there are 1008 OSTs, and a default stripe count of 4, so we use approximately 256 files. +## Installing the Python Library -## Benchmarks +The `pygio` library is pip-installable and works with `mpi4py`. -Once you build the library and associated programs (using make), you can run, for example: +**Requirements** -```bash - $ mpirun -np 8 ./mpi/GenericIOBenchmarkWrite /tmp/out.gio 123456 2 - Wrote 9 variables to /tmp/out (4691036 bytes) in 0.2361s: 18.9484 MB/s -``` +Currently, a **CMake version >= 3.11.0** is required to fetch dependencies +during configuration. The ``pygio`` module also requires MPI libraries to be +findable by CMake's FindMPI. The compiler needs to support **C++17** (make sure +that ``CC`` and ``CXX`` point to the correct compiler) +**Install** + +The python library can be installed by running pip in the **main folder**: ```bash - $ mpirun -np 8 ./mpi/GenericIOBenchmarkRead /tmp/out.gio - Read 9 variables from /tmp/out (4688028 bytes) in 0.223067s: 20.0426 MB/s [excluding header read] +pip install . ``` -The read benchmark always reads all of the input data. The output benchmark takes two numerical parameters, one if the number of data rows to write, and the second is a random seed (which slightly perturbs the per-rank output sizes, but not by much). Each row is 36 bytes for these benchmarks. +It will use the compiler referred by the ``CC`` and ``CXX`` environment +variable. If the compiler supports OpenMP, the library will be threaded. Make +sure to set ``OMP_NUM_THREADS`` to an appropriate variable, in particluar when +using multiple MPI ranks per node. -The write benchmark can be passed the -c parameter to enable output compression. Both benchmarks take an optional -a parameter to request that homogeneous aggregates (i.e. "float4") be used instead of using separate arrays for each position/velocity component. +----- -## Python module +## Output file partitions (subfiles) -The repository includes a genericio Python module that can read genericio-formatted files and return numpy arrays. This is included in the standard build. To use it, once you've built genericio, you can read genericio data as follows: +If you're running on an IBM BG/Q supercomputer, then the number of subfiles +(partitions) chosen is based on the I/O nodes in an automatic way. Otherwise, by +default, the GenericIO library picks the number of subfiles based on a +fairly-naive hostname-based hashing scheme. This works reasonably-well on small +clusters, but not on larger systems. On a larger system, you might want to set +these environmental variables: ```bash -$ export PYTHONPATH=${GENERICIO_DIR}/python -$ python ->>> import genericio ->>> genericio.gio_inspect('m000-99.fofproperties') -Number of Elements: 1691 -[data type] Variable name ---------------------------------------------- -[i 32] fof_halo_count -[i 64] fof_halo_tag -[f 32] fof_halo_mass -[f 32] fof_halo_mean_x -[f 32] fof_halo_mean_y -[f 32] fof_halo_mean_z -[f 32] fof_halo_mean_vx -[f 32] fof_halo_mean_vy -[f 32] fof_halo_mean_vz -[f 32] fof_halo_vel_disp - -(i=integer,f=floating point, number bits size) ->>> genericio.gio_read('m000-99.fofproperties','fof_halo_mass') -array([[ 4.58575588e+13], - [ 5.00464689e+13], - [ 5.07078771e+12], - ..., - [ 1.35221006e+13], - [ 5.29125710e+12], - [ 7.12849857e+12]], dtype=float32) - +GENERICIO_PARTITIONS_USE_NAME=0 +GENERICIO_RANK_PARTITIONS=256 ``` -## Alternative python module +Where the number of partitions (256 above) determines the number of subfiles +used. If you're using a Lustre file system, for example, an optimal number of +files is: + +``` +# of files * stripe count ~ # OSTs +``` -[Click here to go to the README for the alternative python interface](new_python/README.md) \ No newline at end of file +On Titan, for example, there are 1008 OSTs, and a default stripe count of 4, so +we use approximately 256 files. diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..d4bb2cbb9eddb1bb1b4f366623044af8e4830919 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/_static/css/custom.css b/docs/_static/css/custom.css new file mode 100644 index 0000000000000000000000000000000000000000..00f75f79c4555994df593345c9662fb783b0941d --- /dev/null +++ b/docs/_static/css/custom.css @@ -0,0 +1,7 @@ +table.full-width { + width: 100%; +} + +table.full-width td { + white-space: normal !important; +} \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000000000000000000000000000000000000..4c275948e1c2236455f250be80def14dda635486 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,100 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +import os, sys, shutil, subprocess +import re +from pathlib import Path + +DIR = Path(__file__).parent.resolve() + + +# -- Project information ----------------------------------------------------- + +project = "GenericIO" +copyright = "2021, Hal Finkel, et al." +author = "Hal Finkel, et al." + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + "sphinx.ext.doctest", + "sphinx.ext.autodoc", + "sphinx.ext.napoleon", + "sphinx.ext.autodoc.typehints", + "sphinx.ext.autosummary", + "sphinx.ext.viewcode", + "sphinx.ext.autosectionlabel", + "myst_parser", +] + +autosectionlabel_prefix_document = True + +autodoc_typehints = "description" +add_module_names = False + +autosummary_generate = False +napoleon_numpy_docstring = True +napoleon_use_admonition_for_examples = True +napoleon_use_admonition_for_notes = True + +source_suffix = {".rst": "restructuredtext", ".md": "markdown"} + +# Add any paths that contain templates here, relative to this directory. +templates_path = ["_templates"] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "README.md"] + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = "sphinx_rtd_theme" +html_theme_options = {"prev_next_buttons_location": None} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ["_static"] + + +def prepare(app): + with open(DIR.parent / "README.md") as f: + contents = f.read() + + # Filter out section titles for index.rst for LaTeX + if app.builder.name == "latex": + contents = re.sub(r"^(.*)\n[-~]{3,}$", r"**\1**", contents, flags=re.MULTILINE) + + with open(DIR / "README.md", "w") as f: + f.write(contents) + + +def clean_up(app, exception): + (DIR / "README.md").unlink() + + +def setup(app): + app.add_css_file("css/custom.css") + # Copy the readme in + app.connect("builder-inited", prepare) + + # Clean up the generated readme + app.connect("build-finished", clean_up) diff --git a/docs/cpp/genericio.rst b/docs/cpp/genericio.rst new file mode 100644 index 0000000000000000000000000000000000000000..657d45253f345d3a99d72b532046df383a038707 --- /dev/null +++ b/docs/cpp/genericio.rst @@ -0,0 +1,2 @@ +GenericIO C++ Class +=================== \ No newline at end of file diff --git a/docs/cpp/library.rst b/docs/cpp/library.rst new file mode 100644 index 0000000000000000000000000000000000000000..a3f3d20c7a5f0cad8ece31cbb8cb1537584df7c4 --- /dev/null +++ b/docs/cpp/library.rst @@ -0,0 +1,80 @@ +GenericIO and CMake +=================== + + +Adding GenericIO as a dependency to a CMake C/C++ project is straight forward. +As an example, check out the `monofonIC <https://bitbucket.org/ohahn/monofonic>`_ +library which has GenericIO as an optional dependency. + +CMake >=3.11, <3.14 +------------------- + +If you're using ``CMake >= 3.11``, you can use the ``FetchContent`` routine to +download GenericIO at compile-time. Alternatively, you can include GenericIO as +a ``git`` submodule or directly copy the source code into the repository (not +recommended). + +Here is an example for a very basic ``CMakeFile.txt``: + +.. code-block:: cmake + + cmake_minimum_required(VERSION 3.11) + project(TestGenericIO CXX) + + # Load GenericIO + include(FetchContent) + FetchContent_Declare( + genericio + GIT_REPOSITORY https://git.cels.anl.gov/hacc/genericio.git + GIT_TAG master + GIT_SHALLOW YES + GIT_PROGRESS TRUE + USES_TERMINAL_DOWNLOAD TRUE # <---- this is needed only for Ninja + ) + + FetchContent_GetProperties(genericio) + if(NOT genericio_POPULATED) + set(FETCHCONTENT_QUIET OFF) + FetchContent_Populate(genericio) + add_subdirectory(${genericio_SOURCE_DIR} ${genericio_BINARY_DIR}) + endif() + + # Add an executable + add_executable(TestGenericIO test_genericio.cpp) + + # Link to GenericIO + target_link_libraries(TestGenericIO PRIVATE genericio::genericio_mpi) + +The last line will add the GenericIO headers to the include directories and +automatically link the library during compile time. If you want to compile your +program without the MPI library, you can link to the non-MPI version of +GenericIO: ``genericio::genericio``. If MPI is not available on your system, +then only the non-MPI version will be available as an option. + +CMake >= 3.14 +------------- + +With more recent CMake versions, the ``CMakeLists.txt`` file can be simplified + +.. code-block:: cmake + + cmake_minimum_required(VERSION 3.14) + project(TestGenericIO CXX) + + # Load GenericIO + include(FetchContent) + FetchContent_Declare( + genericio + GIT_REPOSITORY https://git.cels.anl.gov/hacc/genericio.git + GIT_TAG master + GIT_SHALLOW YES + GIT_PROGRESS TRUE + USES_TERMINAL_DOWNLOAD TRUE # <---- this is needed only for Ninja + ) + FetchContent_MakeAvailable(genericio) + + # Add an executable + add_executable(TestGenericIO test_genericio.cpp) + + # Link to GenericIO + target_link_libraries(TestGenericIO PRIVATE genericio::genericio_mpi) diff --git a/docs/environment.rst b/docs/environment.rst new file mode 100644 index 0000000000000000000000000000000000000000..7dc96caf4bbf3047d542271511c97d91eecff868 --- /dev/null +++ b/docs/environment.rst @@ -0,0 +1,43 @@ +Runtime Options +=============== + +Library / Executables +--------------------- + +``GENERICIO_RANK0_CREATE_ALL`` + +``GENERICIO_COMPRESS`` + +``GENERICIO_FORCE_BLOCKS`` + +``GENERICIO_RETRY_COUNT`` + +``GENERICIO_RETRY_SLEEP`` + +``GENERICIO_VERBOSE`` + +``GENERICIO_PARTITIONS_USE_NAME`` + +``GENERICIO_RANK_PARTITIONS`` + +Executables +----------- + +``GENERICIO_USE_MPIIO`` + +BLOSC +----- + +``BLOSC_CLEVEL`` + +``BLOSC_SHUFFLE`` + +``BLOSC_TYPESIZE`` + +``BLOSC_COMPRESSOR`` + +``BLOSC_BLOCKSIZE`` + +``BLOSC_NTHREADS`` + +``BLOSC_NOLOCK`` \ No newline at end of file diff --git a/docs/executables/benchmarks.rst b/docs/executables/benchmarks.rst new file mode 100644 index 0000000000000000000000000000000000000000..c1f017b34ce46c74a6b09d8c066d66ac3e345e43 --- /dev/null +++ b/docs/executables/benchmarks.rst @@ -0,0 +1,23 @@ +Benchmarks +========== + +Once you build the library and associated programs (using make), you can run, for example: + +.. code-block:: + + $ mpirun -np 8 ./mpi/GenericIOBenchmarkWrite /tmp/out.gio 123456 2 + Wrote 9 variables to /tmp/out (4691036 bytes) in 0.2361s: 18.9484 MB/s + +.. code-block:: + + $ mpirun -np 8 ./mpi/GenericIOBenchmarkRead /tmp/out.gio + Read 9 variables from /tmp/out (4688028 bytes) in 0.223067s: 20.0426 MB/s [excluding header read] + +The read benchmark always reads all of the input data. The output benchmark takes two numerical parameters, one if the +number of data rows to write, and the second is a random seed (which slightly perturbs the per-rank output sizes, but +not by much). Each row is 36 bytes for these benchmarks. + +The write benchmark can be passed the -c parameter to enable output compression. Both benchmarks take an optional -a +parameter to request that homogeneous aggregates (i.e. "float4") be used instead of using separate arrays for each +position/velocity component. + diff --git a/docs/executables/tools.rst b/docs/executables/tools.rst new file mode 100644 index 0000000000000000000000000000000000000000..c05f74fb9ba12aa5d41e0a6f315a1521f6ac7e90 --- /dev/null +++ b/docs/executables/tools.rst @@ -0,0 +1,93 @@ +Executables +=========== + +- ``frontend/GenericIOPrint`` print data to stdout (non-MPI version) +- ``frontend/GenericIOVerify`` verify and try reading data (non-MPI version) +- ``mpi/GenericIOPrint`` print data to stdout +- ``mpi/GenericIORewrite`` rewrite data with a different number of ranks +- ``mpi/GenericIOVerify`` verify and try reading data +- ``mpi/GenericIOBenchmarkRead`` reading benchmark, works on data written + with ``GenericIOBenchmarkWrite`` +- ``mpi/GenericIOBenchmarkWrite`` writing benchmark + +-------------------------------------------------------------------------------- + +.. _doc-GenericIOPrint: + +GenericIOPrint +-------------- + +inspect GenericIO file and print data to stdout + +.. code-block:: none + + Usage: GenericIOPrint [--no-rank-info|--no-data|--show-map] <filename> + + Options: + --no-rank-info don't print source rank information (coords and #rows) + --no-data don't print data + --show-map ??? + +-------------------------------------------------------------------------------- + +.. _doc-GenericIOVerify: + +GenericIOVerify +--------------- + +verify and try reading data + +.. code-block:: none + + Usage: GenericIOVerify [-v] <filename1> [<filename2> ...] + + Options: + -v verbose + +-------------------------------------------------------------------------------- + +.. _doc-GenericIORewrite: + +GenericIORewrite +---------------- + +rewrite data with a different number of ranks + +.. code-block:: none + + Usage: GenericIORewrite <filenameOld> <filenameNew> + +-------------------------------------------------------------------------------- + +.. _doc-GenericIOBenchmarkRead: + +GenericIOBenchmarkRead +---------------------- + +reading benchmark, works on data written with ``GenericIOBenchmarkWrite`` + +.. code-block:: none + + Usage: GenericIOBenchmarkRead [-a] <filename> + + Options: + -a UseAOS (Array-Of-Structures for pos and vel) + +-------------------------------------------------------------------------------- + +.. _doc-GenericIOBenchmarkWrite: + +GenericIOBenchmarkWrite +----------------------- + +writing benchmark + +.. code-block:: none + + Usage: GenericIOBenchmarkWrite [-a] [-c] [-l] <filename> <NP> <seed> + + Options: + -a UseAOS (Array-Of-Structures for pos and vel) + -c compress data + -l UseLC (Lossy Compression) + diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..415b4c22dbbd1d74bb09767439a546caea1d9c59 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,45 @@ +.. GenericIO documentation master file, created by + sphinx-quickstart on Fri Dec 10 09:52:19 2021. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +.. only:: latex + + ======================= + GenericIO Documentation + ======================= + +.. include:: README.md + :parser: myst_parser.sphinx_ + + +.. only:: latex + + .. toctree:: + :maxdepth: 3 + + self + + .. toctree:: + :caption: Python Interface + :maxdepth: 2 + + python/readwrite + python/class_interface + python/mpi + python/legacy_python + + .. toctree:: + :caption: C++ Interface + :maxdepth: 2 + + cpp/genericio + cpp/library + + .. toctree:: + :caption: Executables + :maxdepth: 1 + + executables/tools + executables/benchmarks + environment diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000000000000000000000000000000000000..2119f51099bf37e4fdb6071dce9f451ea44c62dd --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/python/class_interface.rst b/docs/python/class_interface.rst new file mode 100644 index 0000000000000000000000000000000000000000..2aa0e32201fb05a2ddfccc41780caf263a413574 --- /dev/null +++ b/docs/python/class_interface.rst @@ -0,0 +1,42 @@ +The Python Class Interface +========================== +.. currentmodule:: pygio + +The :class:`PyGenericIO` python class directly interfaces the C++ ``GenericIO`` +class. Writing data is currently not supported through that interface (use +:func:`write_genericio` instead). Note that reading data works without manually +adding variables (:class:`pygio::PyGenericIO::VariableInfo`) to the class instance. +Instead, a list of variable names can be passed to :meth:`PyGenericIO.read`. + +.. code-block:: python + + # instantiate a GenericIO class + gio_file = pygio.PyGenericIO("generic_io_file") + + # inspect (prints to python stdout, also works in notebook) + gio_file.inspect() + + # get variables + gio_vars = gio_file.get_variables() + + # print variable names + for var in gio_vars: + print(var.name, var.size, var.element_size, var.is_float) + + # read data + data_all = gio_file.read() + data_partial = gio_file.read(["x", "y", "z"]) + +Further methods and members of GenericIO can easly be interfaced by editing +``python/genericio.cpp``. + + +References +---------- + +.. autoclass:: PyGenericIO + :members: + +.. autoclass:: pygio::PyGenericIO.VariableInfo + :members: + :undoc-members: \ No newline at end of file diff --git a/docs/python/legacy_python.rst b/docs/python/legacy_python.rst new file mode 100644 index 0000000000000000000000000000000000000000..4e9cf726e543de4d967a352c1d804ddda540882e --- /dev/null +++ b/docs/python/legacy_python.rst @@ -0,0 +1,46 @@ +Legacy python module +-------------------- + +.. warning:: + + This documentation is for the old python module. It is recommened to use the + newer, pybind11 based version + + +The repository includes a genericio Python module that can read +genericio-formatted files and return numpy arrays. This is included in the +standard build. To use it, once you've built genericio, you can read genericio +data as follows: + +.. code-block:: bash + + $ export PYTHONPATH=${GENERICIO_DIR}/legacy_python + $ python + + +>>> import genericio +>>> genericio.gio_inspect('m000-99.fofproperties') +Number of Elements: 1691 +[data type] Variable name +--------------------------------------------- +[i 32] fof_halo_count +[i 64] fof_halo_tag +[f 32] fof_halo_mass +[f 32] fof_halo_mean_x +[f 32] fof_halo_mean_y +[f 32] fof_halo_mean_z +[f 32] fof_halo_mean_vx +[f 32] fof_halo_mean_vy +[f 32] fof_halo_mean_vz +[f 32] fof_halo_vel_disp +(i=integer,f=floating point, number bits size) + + +>>> genericio.gio_read('m000-99.fofproperties','fof_halo_mass') +array([[ 4.58575588e+13], + [ 5.00464689e+13], + [ 5.07078771e+12], + ..., + [ 1.35221006e+13], + [ 5.29125710e+12], + [ 7.12849857e+12]], dtype=float32) \ No newline at end of file diff --git a/docs/python/mpi.rst b/docs/python/mpi.rst new file mode 100644 index 0000000000000000000000000000000000000000..51a0e041666bdeebe3d79538c5a0d4bca943fb6c --- /dev/null +++ b/docs/python/mpi.rst @@ -0,0 +1,94 @@ +Using GenericIO with MPI +======================== + +The `pygio` library is working with mpi4py. Here is an example: + +.. code-block:: python + + from mpi4py import MPI + comm = MPI.COMM_WORLD + rank = comm.Get_rank() + ranks = comm.Get_size() + + import numpy as np + import pygio + + # read locally + data = pygio.read_genericio("generic_io_file") + # get local number of elements from the first element in dictionary + num_elems = len(next(iter(data.values()))) + # reduce total number of elements + num_elems_total = comm.allreduce(num_elems) + if rank == 0: + print(f"Reading file with {ranks} ranks") + print(f"Total number of particles: {num_elems_total}") + print("The data contains the following variables:") + for k, d in data.items(): + print(f"\t{k:5s}, dtype={d.dtype}") + + for i in range(ranks): + if i == rank: + print(f"rank {rank} read {num_elems} elements") + comm.Barrier() + + +It can be executed with ``mpirun`` or ``mpiexec``: + +.. code-block:: bash + + mpirun -n 8 python testpygio_mpi.py + +Here is an output for 1 and 8 ranks: + +.. code-block:: none + + # 1 Rank + Reading file with 1 ranks + Total number of particles: 562500 + The data contains the following variables: + id , dtype=int64 + vx , dtype=float32 + vy , dtype=float32 + vz , dtype=float32 + x , dtype=float32 + y , dtype=float32 + z , dtype=float32 + rank 0 read 562500 elements + + # 8 Ranks + Reading file with 8 ranks + Total number of particles: 562500 + The data contains the following variables: + id , dtype=int64 + vx , dtype=float32 + vy , dtype=float32 + vz , dtype=float32 + x , dtype=float32 + y , dtype=float32 + z , dtype=float32 + rank 0 read 70000 elements + rank 1 read 70000 elements + rank 2 read 70000 elements + rank 3 read 70000 elements + rank 4 read 70625 elements + rank 5 read 70625 elements + rank 6 read 70625 elements + rank 7 read 70625 elements + + +Importing the non-MPI version +----------------------------- + +Some compute facilities prevent the loading of MPI libraries on login nodes. In order +to still be able to use ``pygio`` on the login nodes, the non-MPI library +can be loaded by setting the `GENERICIO_NO_MPI` environment variable before +importing `pygio`: + +.. code-block:: python + + import os + os.environ['GENERICIO_NO_MPI'] = 'True' + import pygio + +A warning will be printed that the writing capabilities of genericio are not +available in this mode. \ No newline at end of file diff --git a/docs/python/readwrite.rst b/docs/python/readwrite.rst new file mode 100644 index 0000000000000000000000000000000000000000..d4176da57d1988d9d62bab4c1ea1f11062a3bbcc --- /dev/null +++ b/docs/python/readwrite.rst @@ -0,0 +1,126 @@ +Reading and Writing Data +======================== + +.. currentmodule:: pygio + +The ``pygio`` module contains a high-level functional interface for inspecting, +reading, and writing particle and halo data. In addition, there is a class-based +interface which mimics the C++ ``GenericIO`` class. More information about +:class:`PyGenericIO` can be found :doc:`here <./class_interface>`. + +.. note:: + + The GenericIO python library supports **MPI** with + `mpi4py <https://mpi4py.readthedocs.io/en/stable/>`_. Under MPI, each rank + will read a separate "chunk" of the file (if the file was written with + multiple ranks). The function :func:`read_num_elems` will return the number + of particles / halos that the local rank will read and can be different on + each rank. More information on how to use this library with MPI can be found + :doc:`here <./mpi>`. + +To get an overview of the data contained in a GenericIO file, use the +:func:`inspect_genericio` function, which lists all variables, data types and +number of elements (particles or halos) contained in the file. + +.. code-block:: python + + import pygio + + # inspect file + pygio.inspect_genericio("generic_io_file") + +Some additional inspection functions are: + +- :func:`read_num_elems` returns the number of elements in the file +- :func:`read_total_num_elems` returns the total/global number of elements in the file +- :func:`read_variable_names` returns a list of variable names defined in the file +- :func:`read_variable_dtypes` returns a dictionary with ``variable_name -> numpy.dtype`` +- :func:`read_phys_scale` returns a list of 3 floats describing the box size +- :func:`read_phys_origin` returns a list of 3 floats describing the box origin + + +Reading GenericIO files +----------------------- + +Data contained in GenericIO files can be loaded into numpy arrays using the +:func:`read_genericio` function. By default, all variables stored in the +file will be loaded. If not all variables are needed, the memory footprint and +read speed can be improved by passing a list of variable names to to the +function. + +.. code-block:: python + + # read all variables + data = pygio.read_genericio("generic_io_file") + +.. code-block:: python + + # read only a subset of variables + data_partial = pygio.read_genericio("generic_io_file", ["x", "y", "z"]) + data_x = data_partial["x"] + + +Writing GenericIO files +----------------------- + +Data arrays can be stored into GenericIO files using the :func:`write_genericio` +function. The data has to be a dictionary, with the variable name as key and the +corresponding one dimensional numpy array as value. The numpy arrays have to +meet the following criteria: + +- 1 dimensional +- same length +- datatype has to be ``np.float32``, ``np.float64``, ``np.int64`` or + ``np.uint16`` + +In addition, one needs to specify the physical origin (default [0, 0, 0]) and +the physical box size as a list of 3 floats. + +.. code-block:: python + + data = { + "x": np.random.uniform(0, 1, 100, dtype=np.float32), + "y": np.random.uniform(0, 1, 100, dtype=np.float32), + "z": np.random.uniform(0, 1, 100, dtype=np.float32) + } + + # write data to file + pygio.write_genericio("new_file.gio", + data, + phys_scale = [1, 1, 1], + phys_origin = [0, 0, 0] + ) + + +References +---------- + +.. autofunction:: inspect_genericio + +.. autofunction:: read_genericio + +.. autofunction:: read_num_elems + +.. autofunction:: read_total_num_elems + +.. autofunction:: read_variable_names + +.. autofunction:: read_variable_dtypes + +.. autofunction:: read_phys_scale + +.. autofunction:: read_phys_origin + +.. autofunction:: write_genericio + +.. autoclass:: FileIO + :noindex: + +.. autoclass:: MismatchBehavior + :noindex: + +.. autofunction:: setDefaultShouldCompress + +.. autofunction:: setNaturalDefaultPartition + +.. autofunction:: setCollectiveMPIIOThreshold diff --git a/python/example.py b/legacy_python/example.py similarity index 100% rename from python/example.py rename to legacy_python/example.py diff --git a/python/genericio.py b/legacy_python/genericio.py similarity index 100% rename from python/genericio.py rename to legacy_python/genericio.py diff --git a/python/lib/gio.cxx b/legacy_python/lib/gio.cxx similarity index 100% rename from python/lib/gio.cxx rename to legacy_python/lib/gio.cxx diff --git a/python/lib/gio.h b/legacy_python/lib/gio.h similarity index 100% rename from python/lib/gio.h rename to legacy_python/lib/gio.h diff --git a/new_python/CMakeLists.txt b/new_python/CMakeLists.txt deleted file mode 100644 index 78e047c3a4a52e35ed7a0240a6d7012f1690ef10..0000000000000000000000000000000000000000 --- a/new_python/CMakeLists.txt +++ /dev/null @@ -1,27 +0,0 @@ -cmake_minimum_required(VERSION 3.11) -set(CMAKE_CXX_STANDARD 17) - -include(FetchContent) -FetchContent_Declare( - pybind11 - GIT_REPOSITORY https://github.com/pybind/pybind11.git - GIT_TAG v2.6.1 -) - -FetchContent_GetProperties(pybind11) -if(NOT pybind11_POPULATED) - FetchContent_Populate(pybind11) - add_subdirectory(${pybind11_SOURCE_DIR} ${pybind11_BINARY_DIR}) -endif() - -# the mpi version -if(MPI_FOUND) -pybind11_add_module(pygio_new genericio.cpp) -set_target_properties(pygio_new PROPERTIES OUTPUT_NAME pygio) -target_link_libraries(pygio_new PRIVATE genericio_mpi) -endif() - -# The no-mpi version -pybind11_add_module(pygio_new_nompi genericio.cpp) -set_target_properties(pygio_new_nompi PROPERTIES OUTPUT_NAME pygio_nompi) -target_link_libraries(pygio_new_nompi PRIVATE genericio) \ No newline at end of file diff --git a/new_python/README.md b/new_python/README.md deleted file mode 100644 index b14a2ced5af8afab9f93e30cb85a18e8ce19bd1d..0000000000000000000000000000000000000000 --- a/new_python/README.md +++ /dev/null @@ -1,155 +0,0 @@ -# Python module - -This version of `pygio` is pip-installable and works with `mpi4py`. - -## Requirements - -Currently, a CMake version >= 3.11.0 is required to fetch dependencies during configuration. -The `pygio` module also requires MPI libraries to be findable by CMake's FindMPI. The compiler -needs to support C++17 (make sure that `CC` and `CXX` point to the correct compiler) - -## Install - -The python library can be installed by running pip in the **main folder**: -```bash -pip install . -``` -It will use the compiler referred by the `CC` and `CXX` environment variable. If the compiler -supports OpenMP, the library will be threaded. Make sure to set `OMP_NUM_THREADS` to an -appropriate variable, in particluar when using multiple MPI ranks per node. - -## Usage - -The library can then be imported in python. Here is a small example script: -```python -import numpy as np -import pygio - -# inspect file -pygio.inspect_genericio("generic_io_file") - -# read all variables -data = pygio.read_genericio("generic_io_file") - -# read only a subset of variables -data_partial = pygio.read_genericio("generic_io_file", ["x", "y", "z"]) -data_x = data_partial["x"] - -# write data to file -pygio.write_genericio("new_generic_io_file", - variables = {"x": np.ones(100), "y": np.ones(100)}, - phys_scale = [1, 1, 1], - phys_origin = [0, 0, 0], - method = PyGenericIO.FileIO.FileIOPOSIX - ) - - -### USING THE CLASS BASED INTERFACE ### - -# instantiate a GenericIO class -gio_file = pygio.PyGenericIO("generic_io_file") - -# inspect (prints to python stdout, also works in notebook) -gio_file.inspect() - -# get variables -gio_vars = gio_file.get_variables() - -# print variable names -for var in gio_vars: - print(var.name, var.size, var.element_size, var.is_float) - -# read data -data_all = gio_file.read() -data_partial = gio_file.read(["x", "y", "z"]) -``` - -Further methods and members of GenericIO can easly be interfaced by editing `python_new/genericio.cpp`. - - -### Using MPI -The `pygio` library is working with mpi4py. Here is an example file: -```python -from mpi4py import MPI -comm = MPI.COMM_WORLD -rank = comm.Get_rank() -ranks = comm.Get_size() - -import numpy as np -import pygio - -# read locally -data = pygio.read_genericio("generic_io_file") -# get local number of elements from the first element in dictionary -num_elems = len(next(iter(data.values()))) -# reduce total number of elements -num_elems_total = comm.allreduce(num_elems) -if rank == 0: - print(f"Reading file with {ranks} ranks") - print(f"Total number of particles: {num_elems_total}") - print("The data contains the following variables:") - for k, d in data.items(): - print(f"\t{k:5s}, dtype={d.dtype}") - -for i in range(ranks): - if i == rank: - print(f"rank {rank} read {num_elems} elements") - comm.Barrier() -``` - -It can be executed with `mpirun`: -```bash -mpirun -n 8 python testpygio_mpi.py -``` - -Here is an output for 1 and 8 ranks: -``` -Reading file with 1 ranks -Total number of particles: 562500 -The data contains the following variables: - id , dtype=int64 - vx , dtype=float32 - vy , dtype=float32 - vz , dtype=float32 - x , dtype=float32 - y , dtype=float32 - z , dtype=float32 -rank 0 read 562500 elements -``` - -``` -Reading file with 8 ranks -Total number of particles: 562500 -The data contains the following variables: - id , dtype=int64 - vx , dtype=float32 - vy , dtype=float32 - vz , dtype=float32 - x , dtype=float32 - y , dtype=float32 - z , dtype=float32 -rank 0 read 70000 elements -rank 1 read 70000 elements -rank 2 read 70000 elements -rank 3 read 70000 elements -rank 4 read 70625 elements -rank 5 read 70625 elements -rank 6 read 70625 elements -rank 7 read 70625 elements -``` - -### Force the non-MPI version to be imported - -Some clusters prevent the loading of MPI libraries on the login nodes. In order -to still be able to use the same `pygio` on the login nodes, the non-MPI library -can be loaded by setting the `GENERICIO_NO_MPI` environment variable before -importing `pygio`: - -```python -import os -os.environ['GENERICIO_NO_MPI'] = 'True' -import pygio -``` - -A warning will be printed that the writing capabilities of genericio are not -available in this mode. \ No newline at end of file diff --git a/new_python/genericio.cpp b/new_python/genericio.cpp deleted file mode 100644 index 8719477a9dbf27d70df43db99cf430fb2eba6951..0000000000000000000000000000000000000000 --- a/new_python/genericio.cpp +++ /dev/null @@ -1,302 +0,0 @@ -#include "GenericIO.h" -#include <pybind11/pybind11.h> -#include <pybind11/numpy.h> -#include <pybind11/stl.h> -#include <sstream> -#include <string> -#include <vector> -#include <map> -#include <cstdint> -#include <optional> - -#ifndef GENERICIO_NO_MPI -#include <mpi.h> -#endif - -namespace py = pybind11; - -class PyGenericIO : public gio::GenericIO { -public: - PyGenericIO(const std::string& filename, gio::GenericIO::FileIO method=gio::GenericIO::FileIOPOSIX, gio::GenericIO::MismatchBehavior redistribute=gio::GenericIO::MismatchRedistribute) -#ifdef GENERICIO_NO_MPI - : gio::GenericIO(filename, method), num_ranks(0) { -#else - : gio::GenericIO(MPI_COMM_WORLD, filename, method), num_ranks(0) { -#endif - // open headers and rank info - openAndReadHeader(redistribute); - num_ranks = readNRanks(); - // read variable info - getVariableInfo(variables); - } - - void inspect() { - int rank; - #ifdef GENERICIO_NO_MPI - rank = 0; - #else - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - #endif - if(rank == 0) { - std::stringstream s; - s << "Number of Elements: " << readNumElems() << "\n"; - s << "Total number of Elements: " << readTotalNumElems() << "\n"; - s << "[data type] Variable name\n"; - s << "---------------------------------------------\n"; - for (int i = 0; i < variables.size(); ++i) { - gio::GenericIO::VariableInfo vinfo = variables[i]; - if (vinfo.IsFloat) - s << "[f"; - else - s << "[i"; - int NumElements = vinfo.Size / vinfo.ElementSize; - s << " " << vinfo.ElementSize * 8; - if (NumElements > 1) - s << "x" << NumElements; - s << "] "; - s << vinfo.Name << "\n"; - } - s << "\n(i=integer,f=floating point, number bits size)\n"; - py::print(s.str()); - } - } - - std::map<std::string, py::array> read( - std::optional<std::vector<std::string>> var_names, - bool print_stats=true, - bool collective_stats=true - ) { - // read number of elements - int64_t num_elem = readNumElems(); - - // if no argument, read all - if(!var_names.has_value()) { - var_names.emplace(std::vector<std::string>()); - for(const auto& v: variables) { - var_names->push_back(v.Name); - } - } - - clearVariables(); - std::map<std::string, py::array> result; - - for(const std::string& var_name: *var_names) { - auto varp = std::find_if( - variables.begin(), - variables.end(), - [&var_name](const auto& v){ return v.Name == var_name; } - ); - if (varp != variables.end()) { - // extra space - py::ssize_t readsize = num_elem + requestedExtraSpace()/(*varp).ElementSize; - if((*varp).IsFloat && (*varp).ElementSize == 4) { - result[var_name] = py::array_t<float>(readsize); - addVariable(*varp, result[var_name].mutable_data(), gio::GenericIO::VarHasExtraSpace); - } else if((*varp).IsFloat && (*varp).ElementSize == 8) { - result[var_name] = py::array_t<double>(readsize); - addVariable(*varp, result[var_name].mutable_data(), gio::GenericIO::VarHasExtraSpace); - } else if(!(*varp).IsFloat && (*varp).ElementSize == 4) { - result[var_name] = py::array_t<int32_t>(readsize); - addVariable(*varp, result[var_name].mutable_data(), gio::GenericIO::VarHasExtraSpace); - } else if(!(*varp).IsFloat && (*varp).ElementSize == 8) { - result[var_name] = py::array_t<int64_t>(readsize); - addVariable(*varp, result[var_name].mutable_data(), gio::GenericIO::VarHasExtraSpace); - } else if(!(*varp).IsFloat && (*varp).ElementSize == 2) { - result[var_name] = py::array_t<uint16_t>(readsize); - addVariable(*varp, result[var_name].mutable_data(), gio::GenericIO::VarHasExtraSpace); - } - } - } - - readData(-1, print_stats, collective_stats); - #ifndef GENERICIO_NO_MPI - MPI_Barrier(MPI_COMM_WORLD); - #endif - - // get rid of extraspace - std::for_each(result.begin(), result.end(), [&](auto& item){ item.second.resize({num_elem}); }); - - return result; - } - - const std::vector<gio::GenericIO::VariableInfo> &get_variables() { - return variables; - } - - std::array<double, 3> read_phys_origin() { - std::array<double, 3> origin; - readPhysOrigin(origin.data()); - return origin; - } - - std::array<double, 3> read_phys_scale() { - std::array<double, 3> scale; - readPhysScale(scale.data()); - return scale; - } - -private: - int num_ranks; - std::vector<gio::GenericIO::VariableInfo> variables; -}; - -std::map<std::string, py::array> read_genericio( - std::string filename, - std::optional<std::vector<std::string>> var_names, - PyGenericIO::FileIO method=PyGenericIO::FileIO::FileIOPOSIX, - PyGenericIO::MismatchBehavior redistribute=PyGenericIO::MismatchBehavior::MismatchRedistribute, - bool print_stats=true, - bool collective_stats=true, - bool rebalance_source_ranks=false - ) { - PyGenericIO reader(filename, method, redistribute); -#ifndef GENERICIO_NO_MPI - if(rebalance_source_ranks) - reader.rebalanceSourceRanks(); -#endif - return reader.read(var_names, print_stats, collective_stats); -} - -void inspect_genericio( - std::string filename, - PyGenericIO::FileIO method=PyGenericIO::FileIO::FileIOPOSIX, - PyGenericIO::MismatchBehavior redistribute=PyGenericIO::MismatchBehavior::MismatchRedistribute - ) { - PyGenericIO reader(filename, method, redistribute); - reader.inspect(); -} - -#ifndef GENERICIO_NO_MPI -void write_genericio( - std::string filename, - std::map<std::string, py::array> variables, - std::array<double, 3> phys_scale, std::array<double, 3> phys_origin, - PyGenericIO::FileIO method=PyGenericIO::FileIO::FileIOPOSIX - ) { - // check data integrity, find particle count - int64_t particle_count = -1; - for(auto const& [name, data]: variables) { - if(data.ndim() != 1) { - throw std::runtime_error("dimension of array must be 1 (" + name + ")"); - } - if(particle_count == -1) { - particle_count = data.size(); - } else if(particle_count != data.size()) { - throw std::runtime_error("arrays do not have same length (" + name + ")"); - } - } - - gio::GenericIO writer(MPI_COMM_WORLD, filename, method); - - writer.setNumElems(particle_count); - - // set size - for (int d = 0; d < 3; ++d) { - writer.setPhysOrigin(phys_origin[d], d); - writer.setPhysScale(phys_scale[d], d); - } - - for(auto& [name, data]: variables) { - if(py::isinstance<py::array_t<float>>(data)) - writer.addVariable(name.c_str(), reinterpret_cast<float*>(data.mutable_data())); - else if(py::isinstance<py::array_t<double>>(data)) - writer.addVariable(name.c_str(), reinterpret_cast<double*>(data.mutable_data())); - else if(py::isinstance<py::array_t<int32_t>>(data)) - writer.addVariable(name.c_str(), reinterpret_cast<int32_t*>(data.mutable_data())); - else if(py::isinstance<py::array_t<int64_t>>(data)) - writer.addVariable(name.c_str(), reinterpret_cast<int64_t*>(data.mutable_data())); - else if(py::isinstance<py::array_t<uint16_t>>(data)) - writer.addVariable(name.c_str(), reinterpret_cast<uint16_t*>(data.mutable_data())); - else - throw std::runtime_error("array dtype not supported for " + name); - } - writer.write(); - MPI_Barrier(MPI_COMM_WORLD); - -} -#endif - - -#ifdef GENERICIO_NO_MPI -PYBIND11_MODULE(pygio_nompi, m) { - m.doc() = "genericio python module (no MPI support)"; -#else // GENERICIO_NO_MPI -PYBIND11_MODULE(pygio, m) { - m.doc() = "genericio python module (with MPI support)"; - m.def("_init_mpi", [](){ - int initialized; - MPI_Initialized(&initialized); - if(!initialized) { - int level_provided; - MPI_Init_thread(nullptr, nullptr, MPI_THREAD_SINGLE, &level_provided); - } - }); -#endif - - py::class_<PyGenericIO> pyGenericIO(m, "PyGenericIO"); - - py::enum_<PyGenericIO::FileIO>(pyGenericIO, "FileIO") - .value("FileIOMPI", PyGenericIO::FileIO::FileIOMPI) - .value("FileIOPOSIX", PyGenericIO::FileIO::FileIOPOSIX) - .value("FileIOMPICollective", PyGenericIO::FileIO::FileIOMPICollective); - - py::enum_<PyGenericIO::MismatchBehavior>(pyGenericIO, "MismatchBehavior") - .value("MismatchAllowed", PyGenericIO::MismatchBehavior::MismatchAllowed) - .value("MismatchDisallowed", PyGenericIO::MismatchBehavior::MismatchDisallowed) - .value("MismatchRedistribute", PyGenericIO::MismatchBehavior::MismatchRedistribute); - - pyGenericIO.def( - py::init<std::string, PyGenericIO::FileIO, PyGenericIO::MismatchBehavior>(), - py::arg("filename"), - py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX, - py::arg("redistribute")=PyGenericIO::MismatchBehavior::MismatchRedistribute) - .def("inspect", &PyGenericIO::inspect, "Print variable infos and size of GenericIO file") - .def("get_variables", &PyGenericIO::get_variables, "Get a list of VariableInformations defined in the GenericIO file") - .def("read_num_elems", (size_t (PyGenericIO::*)(int))(&PyGenericIO::readNumElems), py::arg("eff_rank")=-1) - .def("read_total_num_elems", (uint64_t (PyGenericIO::*)(void))(&PyGenericIO::readTotalNumElems)) - .def("read_phys_origin", &PyGenericIO::read_phys_origin) - .def("read_phys_scale", &PyGenericIO::read_phys_scale) - .def("read", &PyGenericIO::read, py::arg("variables")=nullptr, py::arg("print_stats")=true, py::arg("collective_stats")=true) - .def("get_source_ranks", &PyGenericIO::getSourceRanks) -#ifndef GENERICIO_NO_MPI - .def("rebalance_source_ranks", &PyGenericIO::rebalanceSourceRanks) -#endif - ; - - py::class_<gio::GenericIO::VariableInfo>(pyGenericIO, "VariableInfo") - .def_readonly("name", &gio::GenericIO::VariableInfo::Name) - .def_readonly("size", &gio::GenericIO::VariableInfo::Size) - .def_readonly("element_size", &gio::GenericIO::VariableInfo::ElementSize) - .def_readonly("is_float", &gio::GenericIO::VariableInfo::IsFloat) - .def("__repr__", [](const gio::GenericIO::VariableInfo &vi) { - return std::string("<PyGenericIO.VariableInfo type=") + - (vi.IsFloat ? "float" : "int") + " name='" + vi.Name + "'>"; - }); - - m.def("read_genericio", &read_genericio, - py::arg("filename"), - py::arg("variables")=nullptr, - py::kw_only(), - py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX, - py::arg("redistribute")=PyGenericIO::MismatchBehavior::MismatchRedistribute, - py::arg("print_stats")=true, - py::arg("collective_stats")=true, - py::arg("rebalance_sourceranks")=false, - py::return_value_policy::move); - - m.def("inspect_genericio", &inspect_genericio, - py::arg("filename"), - py::kw_only(), - py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX, - py::arg("redistribute")=PyGenericIO::MismatchBehavior::MismatchRedistribute); - -#ifndef GENERICIO_NO_MPI - m.def("write_genericio", &write_genericio, - py::arg("filename"), - py::arg("variables"), - py::arg("phys_scale"), - py::arg("phys_origin") = std::array<double, 3>({0., 0., 0.}), - py::kw_only(), - py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX); -#endif -} diff --git a/new_python/pygio/__init__.py b/new_python/pygio/__init__.py deleted file mode 100644 index 17ea2496000907a2e0df98cf6f9f13f8129b320b..0000000000000000000000000000000000000000 --- a/new_python/pygio/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from __future__ import print_function -import os - -_GENERICIO_NO_MPI = False -if 'GENERICIO_NO_MPI' in os.environ: - _GENERICIO_NO_MPI = os.environ['GENERICIO_NO_MPI'].lower() in ['true', 'yes', 'y'] - -if _GENERICIO_NO_MPI: - print("WARNING: the pygio module without MPI support has been loaded (due to the GENERICIO_NO_MPI env variable). Writing GenericIO files not supported.") - from .pygio_nompi import * - -else: - # try to load the MPI library (or the no-mpi library, in case of missing MPI during compilation) - from . import pygio as _pygio - try: - _pygio._init_mpi() - except: - print("WARNING: The pygio module has been compiled without MPI support. Writing GenericIO files not supported.") - - from .pygio import * \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000000000000000000000000000000000..346bbadf545bc295c39bb44972d998de354e6a8b --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,7 @@ +[build-system] +requires = [ + "setuptools>=42", + "wheel", + "cmake>=3.11", +] +build-backend = "setuptools.build_meta" \ No newline at end of file diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..99f05bcbc6a0c3ee2a179c826b2df7f4cbfa447d --- /dev/null +++ b/python/CMakeLists.txt @@ -0,0 +1,25 @@ +cmake_minimum_required(VERSION 3.11) +set(CMAKE_CXX_STANDARD 17) + +include(FetchContent) +FetchContent_Declare( + pybind11 + GIT_REPOSITORY https://github.com/pybind/pybind11.git + GIT_TAG v2.9.1 +) + +FetchContent_GetProperties(pybind11) +if(NOT pybind11_POPULATED) + FetchContent_Populate(pybind11) + add_subdirectory(${pybind11_SOURCE_DIR} ${pybind11_BINARY_DIR}) +endif() + +# the mpi version +if(MPI_FOUND) +pybind11_add_module(pygio genericio.cpp) +target_link_libraries(pygio PRIVATE genericio_mpi) +endif() + +# The no-mpi version +pybind11_add_module(pygio_nompi genericio.cpp) +target_link_libraries(pygio_nompi PRIVATE genericio) \ No newline at end of file diff --git a/python/genericio.cpp b/python/genericio.cpp new file mode 100644 index 0000000000000000000000000000000000000000..53df6f5502cec0c448fe6eb2b86e38315c10fc57 --- /dev/null +++ b/python/genericio.cpp @@ -0,0 +1,637 @@ +#include "GenericIO.h" +#include <pybind11/pybind11.h> +#include <pybind11/numpy.h> +#include <pybind11/stl.h> +#include <sstream> +#include <string> +#include <vector> +#include <map> +#include <cstdint> +#include <optional> + +#ifndef GENERICIO_NO_MPI +#include <mpi.h> +#endif + +namespace py = pybind11; + +class PyGenericIO : public gio::GenericIO { +public: + PyGenericIO( + const std::string& filename, + gio::GenericIO::FileIO method=gio::GenericIO::FileIOPOSIX, + gio::GenericIO::MismatchBehavior redistribute=gio::GenericIO::MismatchRedistribute, + int eff_rank = -1) +#ifdef GENERICIO_NO_MPI + : gio::GenericIO(filename, method), num_ranks(0) { +#else + : gio::GenericIO(MPI_COMM_WORLD, filename, method), num_ranks(0) { +#endif + // open headers and rank info + openAndReadHeader(redistribute, eff_rank); + num_ranks = readNRanks(); + // read variable info + getVariableInfo(variables); + } + + void inspect() { + int rank; + #ifdef GENERICIO_NO_MPI + rank = 0; + #else + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + #endif + if(rank == 0) { + std::stringstream s; + s << "Number of Elements: " << readNumElems() << "\n"; + s << "Total number of Elements: " << readTotalNumElems() << "\n"; + s << "[data type] Variable name\n"; + s << "---------------------------------------------\n"; + for (int i = 0; i < variables.size(); ++i) { + gio::GenericIO::VariableInfo vinfo = variables[i]; + if (vinfo.IsFloat) + s << "[f"; + else + s << "[i"; + int NumElements = vinfo.Size / vinfo.ElementSize; + s << " " << vinfo.ElementSize * 8; + if (NumElements > 1) + s << "x" << NumElements; + s << "] "; + s << vinfo.Name << "\n"; + } + s << "\n(i=integer,f=floating point, number bits size)\n"; + py::print(s.str()); + } + } + + std::vector<std::string> read_variable_names() { + std::vector<std::string> variable_names; + for(const auto& v: variables) { + variable_names.push_back(v.Name); + } + return variable_names; + } + + std::map<std::string, py::dtype> read_variable_dtypes() { + std::map<std::string, py::dtype> variable_dtypes; + + for(const auto& var: variables) { + auto var_name = var.Name; + if(var.IsFloat && var.ElementSize == 4) + variable_dtypes[var_name] = py::dtype("f4"); + else if(var.IsFloat && var.ElementSize == 8) + variable_dtypes[var_name] = py::dtype("f8"); + else if(!var.IsFloat && var.ElementSize == 4) + variable_dtypes[var_name] = py::dtype("i4"); + else if(!var.IsFloat && var.ElementSize == 8) + variable_dtypes[var_name] = py::dtype("i8"); + else if(!var.IsFloat && var.ElementSize == 2) + variable_dtypes[var_name] = py::dtype("u2"); + else + throw std::runtime_error(std::string("Unknown data type in GenericIO for variable ") + var_name); + } + return variable_dtypes; + } + + std::map<std::string, py::array> read( + std::optional<std::vector<std::string>> var_names, + bool print_stats=true, + bool collective_stats=true, + int eff_rank=-1 + ) { + int rank; + #ifdef GENERICIO_NO_MPI + rank = 0; + #else + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + #endif + + // read number of elements + int64_t num_elem = readNumElems(eff_rank); + + // if no argument, read all + if(!var_names.has_value()) { + var_names.emplace(read_variable_names()); + } + + clearVariables(); + std::map<std::string, py::array> result; + + for(const std::string& var_name: *var_names) { + auto varp = std::find_if( + variables.begin(), + variables.end(), + [&var_name](const auto& v){ return v.Name == var_name; } + ); + if (varp != variables.end()) { + // extra space + py::ssize_t readsize = num_elem + requestedExtraSpace()/(*varp).ElementSize; + if((*varp).IsFloat && (*varp).ElementSize == 4) { + result[var_name] = py::array_t<float>(readsize); + addVariable(*varp, result[var_name].mutable_data(), gio::GenericIO::VarHasExtraSpace); + } else if((*varp).IsFloat && (*varp).ElementSize == 8) { + result[var_name] = py::array_t<double>(readsize); + addVariable(*varp, result[var_name].mutable_data(), gio::GenericIO::VarHasExtraSpace); + } else if(!(*varp).IsFloat && (*varp).ElementSize == 4) { + result[var_name] = py::array_t<int32_t>(readsize); + addVariable(*varp, result[var_name].mutable_data(), gio::GenericIO::VarHasExtraSpace); + } else if(!(*varp).IsFloat && (*varp).ElementSize == 8) { + result[var_name] = py::array_t<int64_t>(readsize); + addVariable(*varp, result[var_name].mutable_data(), gio::GenericIO::VarHasExtraSpace); + } else if(!(*varp).IsFloat && (*varp).ElementSize == 2) { + result[var_name] = py::array_t<uint16_t>(readsize); + addVariable(*varp, result[var_name].mutable_data(), gio::GenericIO::VarHasExtraSpace); + } else { + throw std::runtime_error(std::string("Unknown data type in GenericIO for variable ") + var_name); + } + } + } + + readData(eff_rank, print_stats, collective_stats); + clearVariables(); + + #ifndef GENERICIO_NO_MPI + MPI_Barrier(MPI_COMM_WORLD); + #endif + + // get rid of extraspace + std::for_each(result.begin(), result.end(), [&](auto& item){ item.second.resize({num_elem}); }); + + return result; + } + + const std::vector<VariableInfo> &get_variables() { + return variables; + } + + std::array<double, 3> read_phys_origin() { + std::array<double, 3> origin; + readPhysOrigin(origin.data()); + return origin; + } + + std::array<double, 3> read_phys_scale() { + std::array<double, 3> scale; + readPhysScale(scale.data()); + return scale; + } + + std::vector<int> get_source_ranks() { + std::vector<int> sr; + getSourceRanks(sr); + return sr; + } + +private: + int num_ranks; + std::vector<VariableInfo> variables; +}; + +std::map<std::string, py::array> read_genericio( + std::string filename, + std::optional<std::vector<std::string>> var_names, + PyGenericIO::FileIO method=PyGenericIO::FileIO::FileIOPOSIX, + PyGenericIO::MismatchBehavior redistribute=PyGenericIO::MismatchBehavior::MismatchRedistribute, + bool print_stats=true, + bool collective_stats=true, + bool rebalance_source_ranks=false, + int eff_rank=-1 + ) { + PyGenericIO reader(filename, method, redistribute, eff_rank); +#ifndef GENERICIO_NO_MPI + if(rebalance_source_ranks) + reader.rebalanceSourceRanks(); +#endif + return reader.read(var_names, print_stats, collective_stats, eff_rank); +} + +void inspect_genericio( + std::string filename, + PyGenericIO::FileIO method=PyGenericIO::FileIO::FileIOPOSIX, + PyGenericIO::MismatchBehavior redistribute=PyGenericIO::MismatchBehavior::MismatchRedistribute + ) { + PyGenericIO reader(filename, method, redistribute); + reader.inspect(); +} + +std::array<double, 3> read_phys_scale( + std::string filename, + PyGenericIO::FileIO method=PyGenericIO::FileIO::FileIOPOSIX, + PyGenericIO::MismatchBehavior redistribute=PyGenericIO::MismatchBehavior::MismatchRedistribute +) { + PyGenericIO reader(filename, method, redistribute); + return reader.read_phys_scale(); +} + +std::array<double, 3> read_phys_origin( + std::string filename, + PyGenericIO::FileIO method=PyGenericIO::FileIO::FileIOPOSIX, + PyGenericIO::MismatchBehavior redistribute=PyGenericIO::MismatchBehavior::MismatchRedistribute +) { + PyGenericIO reader(filename, method, redistribute); + return reader.read_phys_origin(); +} + +std::vector<std::string> read_variable_names( + std::string filename, + PyGenericIO::FileIO method=PyGenericIO::FileIO::FileIOPOSIX, + PyGenericIO::MismatchBehavior redistribute=PyGenericIO::MismatchBehavior::MismatchRedistribute +) { + PyGenericIO reader(filename, method, redistribute); + return reader.read_variable_names(); +} + +std::map<std::string, py::dtype> read_variable_dtypes( + std::string filename, + PyGenericIO::FileIO method=PyGenericIO::FileIO::FileIOPOSIX, + PyGenericIO::MismatchBehavior redistribute=PyGenericIO::MismatchBehavior::MismatchRedistribute +) { + PyGenericIO reader(filename, method, redistribute); + return reader.read_variable_dtypes(); +} + +int64_t read_num_elems( + std::string filename, + PyGenericIO::FileIO method=PyGenericIO::FileIO::FileIOPOSIX, + PyGenericIO::MismatchBehavior redistribute=PyGenericIO::MismatchBehavior::MismatchRedistribute +) { + PyGenericIO reader(filename, method, redistribute); + return reader.readNumElems(); +} + +int64_t read_total_num_elems( + std::string filename, + PyGenericIO::FileIO method=PyGenericIO::FileIO::FileIOPOSIX, + PyGenericIO::MismatchBehavior redistribute=PyGenericIO::MismatchBehavior::MismatchRedistribute +) { + PyGenericIO reader(filename, method, redistribute); + return reader.readTotalNumElems(); +} + +#ifndef GENERICIO_NO_MPI +void write_genericio( + std::string filename, + std::map<std::string, py::array> variables, + std::array<double, 3> phys_scale, std::array<double, 3> phys_origin, + PyGenericIO::FileIO method=PyGenericIO::FileIO::FileIOPOSIX + ) { + // check data integrity, find particle count + int64_t particle_count = -1; + for(auto const& [name, data]: variables) { + if(data.ndim() != 1) { + throw std::runtime_error("dimension of array must be 1 (" + name + ")"); + } + if(particle_count == -1) { + particle_count = data.size(); + } else if(particle_count != data.size()) { + throw std::runtime_error("arrays do not have same length (" + name + ")"); + } + } + + gio::GenericIO writer(MPI_COMM_WORLD, filename, method); + + writer.setNumElems(particle_count); + + // set size + for (int d = 0; d < 3; ++d) { + writer.setPhysOrigin(phys_origin[d], d); + writer.setPhysScale(phys_scale[d], d); + } + + for(auto& [name, data]: variables) { + if(py::isinstance<py::array_t<float>>(data)) + writer.addVariable(name.c_str(), reinterpret_cast<float*>(data.mutable_data())); + else if(py::isinstance<py::array_t<double>>(data)) + writer.addVariable(name.c_str(), reinterpret_cast<double*>(data.mutable_data())); + else if(py::isinstance<py::array_t<int32_t>>(data)) + writer.addVariable(name.c_str(), reinterpret_cast<int32_t*>(data.mutable_data())); + else if(py::isinstance<py::array_t<int64_t>>(data)) + writer.addVariable(name.c_str(), reinterpret_cast<int64_t*>(data.mutable_data())); + else if(py::isinstance<py::array_t<uint16_t>>(data)) + writer.addVariable(name.c_str(), reinterpret_cast<uint16_t*>(data.mutable_data())); + else + throw std::runtime_error("array dtype not supported for " + name); + } + writer.write(); + MPI_Barrier(MPI_COMM_WORLD); + +} +#endif + + +#ifdef GENERICIO_NO_MPI +PYBIND11_MODULE(pygio_nompi, m) { + m.doc() = "genericio python module (no MPI support)"; +#else // GENERICIO_NO_MPI +PYBIND11_MODULE(pygio, m) { + m.doc() = "genericio python module (with MPI support)"; + m.def("_init_mpi", [](){ + int initialized; + MPI_Initialized(&initialized); + if(!initialized) { + int level_provided; + MPI_Init_thread(nullptr, nullptr, MPI_THREAD_SINGLE, &level_provided); + } + }); +#endif + + py::class_<PyGenericIO> pyGenericIO(m, "PyGenericIO"); + + // ENUMS + py::enum_<PyGenericIO::FileIO>(pyGenericIO, "FileIO") + .value("FileIOMPI", PyGenericIO::FileIO::FileIOMPI) + .value("FileIOPOSIX", PyGenericIO::FileIO::FileIOPOSIX) + .value("FileIOMPICollective", PyGenericIO::FileIO::FileIOMPICollective); + + py::enum_<PyGenericIO::MismatchBehavior>(pyGenericIO, "MismatchBehavior") + .value("MismatchAllowed", PyGenericIO::MismatchBehavior::MismatchAllowed) + .value("MismatchDisallowed", PyGenericIO::MismatchBehavior::MismatchDisallowed) + .value("MismatchRedistribute", PyGenericIO::MismatchBehavior::MismatchRedistribute); + + pyGenericIO.def( + py::init<std::string, PyGenericIO::FileIO, PyGenericIO::MismatchBehavior>(), + py::arg("filename"), + py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX, + py::arg("redistribute")=PyGenericIO::MismatchBehavior::MismatchRedistribute) + .def("inspect", &PyGenericIO::inspect, "Print variable infos and size of GenericIO file") + .def("get_variables", &PyGenericIO::get_variables, "Get a list of VariableInformations defined in the GenericIO file") + .def("read_num_elems", (size_t (PyGenericIO::*)(int))(&PyGenericIO::readNumElems), py::arg("eff_rank")=-1) + .def("read_total_num_elems", (uint64_t (PyGenericIO::*)(void))(&PyGenericIO::readTotalNumElems)) + .def("read_phys_origin", &PyGenericIO::read_phys_origin) + .def("read_phys_scale", &PyGenericIO::read_phys_scale) + .def("read", &PyGenericIO::read, + py::arg("variables")=nullptr, + py::kw_only(), + py::arg("print_stats")=true, + py::arg("collective_stats")=true, + py::arg("eff_rank")=-1) + .def("read_nranks", (int (PyGenericIO::*)(void))(&PyGenericIO::readNRanks)) + .def("read_variable_names", &PyGenericIO::read_variable_names) + .def("read_variable_dtypes", &PyGenericIO::read_variable_dtypes) + .def("get_source_ranks", &PyGenericIO::getSourceRanks) + .def_static("setDefaultShouldCompress", &PyGenericIO::setDefaultShouldCompress) + .def_static("setNaturalDefaultPartition", &PyGenericIO::setNaturalDefaultPartition) + .def_static("setDefaultFileIOType", &PyGenericIO::setDefaultFileIOType) +#ifndef GENERICIO_NO_MPI + .def("rebalance_source_ranks", &PyGenericIO::rebalanceSourceRanks) + .def_static("setCollectiveMPIIOThreshold", &PyGenericIO::setCollectiveMPIIOThreshold) +#endif + ; + + py::class_<PyGenericIO::VariableInfo>(pyGenericIO, "VariableInfo") + .def_readonly("name", &gio::GenericIO::VariableInfo::Name) + .def_readonly("size", &gio::GenericIO::VariableInfo::Size) + .def_readonly("element_size", &gio::GenericIO::VariableInfo::ElementSize) + .def_readonly("is_float", &gio::GenericIO::VariableInfo::IsFloat) + .def("__repr__", [](const gio::GenericIO::VariableInfo &vi) { + return std::string("<PyGenericIO.VariableInfo type=") + + (vi.IsFloat ? "float" : "int") + " name='" + vi.Name + "'>"; + }); + + + m.def("read_genericio", &read_genericio, R"Delim( + Read data from a GenericIO file + + Parameters + ---------- + filename: str + path to the GenericIO file + + variables: List[str] + A list of variable names that should be read. If ``None``, all + variables contained in the file will be read + + method: PyGenericIO.FileIO + file handling method (POSIX/MPI) + + redistribute: PyGenericIO.MismatchBehavior + whether to allow mismatching ranks + + print_stats: bool + if ``True``, print throughput statistics after reading + + collective_stats: bool + if ``True``, aggregate statistics among reading ranks (if using MPI) + + rebalance_sourceranks: bool + if ``True``, the code will re-assign the file ranks to the reading + MPI ranks to equalize the data size each rank is reading. Only + relevant if using MPI and more ranks were used to write the file + than reading. + + Returns + ------- + data: Mapping[str, np.ndarray] + + )Delim", + py::arg("filename"), + py::arg("variables")=nullptr, + py::kw_only(), + py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX, + py::arg("redistribute")=PyGenericIO::MismatchBehavior::MismatchRedistribute, + py::arg("print_stats")=true, + py::arg("collective_stats")=true, + py::arg("rebalance_sourceranks")=false, + py::arg("eff_rank")=-1, + py::return_value_policy::move); + + + m.def("inspect_genericio", &inspect_genericio, R"Delim( + Print a summary of variables and types defined in the GenericIO file + + Parameters + ---------- + filename: str + path to the GenericIO file + + method: PyGenericIO.FileIO + file handling method (POSIX/MPI) + + redistribute: PyGenericIO.MismatchBehavior + whether to allow mismatching ranks + )Delim", + py::arg("filename"), + py::kw_only(), + py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX, + py::arg("redistribute")=PyGenericIO::MismatchBehavior::MismatchRedistribute); + + m.def("read_num_elems", &read_num_elems, R"Delim( + Read the (local) number of objects (the number of objects that would be + read by this rank when calling :func:`read_genericio`) + + Parameters + ---------- + filename: str + path to the GenericIO file + + method: PyGenericIO.FileIO + file handling method (POSIX/MPI) + + redistribute: PyGenericIO.MismatchBehavior + whether to allow mismatching ranks + + Returns + ------- + nlocal: int + the number of objects assigned to this rank + )Delim", + py::arg("filename"), + py::kw_only(), + py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX, + py::arg("redistribute")=PyGenericIO::MismatchBehavior::MismatchRedistribute); + + m.def("read_total_num_elems", &read_total_num_elems, R"Delim( + Read the total number of objects (the number of objects that would be + read by all ranks combined when calling :func:`read_genericio`) + + Parameters + ---------- + filename: str + path to the GenericIO file + + method: PyGenericIO.FileIO + file handling method (POSIX/MPI) + + redistribute: PyGenericIO.MismatchBehavior + whether to allow mismatching ranks + + Returns + ------- + ntotal: int + the total number of objects stored in the GenericIO file + )Delim", + py::arg("filename"), + py::kw_only(), + py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX, + py::arg("redistribute")=PyGenericIO::MismatchBehavior::MismatchRedistribute); + + m.def("read_variable_names", &read_variable_names, R"Delim( + Get a list of variable names stored in the GenericIO file + + Parameters + ---------- + filename: str + path to the GenericIO file + + method: PyGenericIO.FileIO + file handling method (POSIX/MPI) + + redistribute: PyGenericIO.MismatchBehavior + whether to allow mismatching ranks + + Returns + ------- + variable_names: List[str] + the list of variable names defined in the GenericIO file + )Delim", + py::arg("filename"), + py::kw_only(), + py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX, + py::arg("redistribute")=PyGenericIO::MismatchBehavior::MismatchRedistribute); + + m.def("read_variable_dtypes", &read_variable_dtypes, R"Delim( + Get a dictionary of dtypes mapped to the variable names + + Parameters + ---------- + filename: str + path to the GenericIO file + + method: PyGenericIO.FileIO + file handling method (POSIX/MPI) + + redistribute: PyGenericIO.MismatchBehavior + whether to allow mismatching ranks + + Returns + ------- + variable_dtypes: Mapping[str, np.dtype] + a map ``variable_name -> dtype`` for each variable in the GenericIO file + )Delim", + py::arg("filename"), + py::kw_only(), + py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX, + py::arg("redistribute")=PyGenericIO::MismatchBehavior::MismatchRedistribute); + + m.def("read_phys_scale", &read_phys_scale, R"Delim( + Read the box size that is stored in the GenericIO file + + Parameters + ---------- + filename: str + path to the GenericIO file + + method: PyGenericIO.FileIO + file handling method (POSIX/MPI) + + redistribute: PyGenericIO.MismatchBehavior + whether to allow mismatching ranks + + Returns + ------- + phys_scale: List[float] + the box length for each dimension (3 elements long) + )Delim", + py::arg("filename"), + py::kw_only(), + py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX, + py::arg("redistribute")=PyGenericIO::MismatchBehavior::MismatchRedistribute); + + m.def("read_phys_origin", &read_phys_scale, R"Delim( + Read the origin / reference point of the box that is stored in the GenericIO file + + Parameters + ---------- + filename: str + path to the GenericIO file + + method: PyGenericIO.FileIO + file handling method (POSIX/MPI) + + redistribute: PyGenericIO.MismatchBehavior + whether to allow mismatching ranks + + Returns + ------- + phys_origin: List[float] + the box origin coordinates (3 elements long) + )Delim", + py::arg("filename"), + py::kw_only(), + py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX, + py::arg("redistribute")=PyGenericIO::MismatchBehavior::MismatchRedistribute); + + + +#ifndef GENERICIO_NO_MPI + m.def("write_genericio", &write_genericio, R"Delim( + Write data as a GenericIO file + + Parameters + ---------- + filename: str + path to the GenericIO file + + data: Mapping[str, np.ndarray] + a dictionary, with all items being 1-dimensional numpy arrays of + the same length. Currently, only float32, float64, int32, int64 and + uint16 data types are supported + + phys_scale: List[float] + the physical size of the box that the data belongs to (3 elements) + + phys_origin: List[float] + the origin coordinates of the box that the data belongs to (3 elements) + + method: PyGenericIO.FileIO + file handling method (POSIX/MPI) + )Delim", + py::arg("filename"), + py::arg("data"), + py::arg("phys_scale"), + py::arg("phys_origin") = std::array<double, 3>({0., 0., 0.}), + py::kw_only(), + py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX); +#endif +} diff --git a/python/pygio/__init__.py b/python/pygio/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ce4f4fbd7658223b197b333cd2b928aa52e0d4a9 --- /dev/null +++ b/python/pygio/__init__.py @@ -0,0 +1,40 @@ +from __future__ import print_function +import os + +_GENERICIO_NO_MPI = False +if "GENERICIO_NO_MPI" in os.environ: + _GENERICIO_NO_MPI = os.environ["GENERICIO_NO_MPI"].lower() in ["true", "yes", "y"] + +if _GENERICIO_NO_MPI: + print( + "WARNING: the pygio module without MPI support has been loaded (due to the GENERICIO_NO_MPI env variable). Writing GenericIO files not supported." + ) + from .pygio_nompi import * + +else: + # try to load the MPI library (or the no-mpi library, in case of missing MPI during compilation) + from . import pygio as _pygio + + try: + _pygio._init_mpi() + except: + print( + "WARNING: The pygio module has been compiled without MPI support. Writing GenericIO files not supported." + ) + _GENERICIO_NO_MPI = True + from .pygio import * + + +# move some ENUMs and static functions up to the module namespace +FileIO = PyGenericIO.FileIO +MismatchBehavior = PyGenericIO.MismatchBehavior +setDefaultShouldCompress = PyGenericIO.setDefaultShouldCompress +setNaturalDefaultPartition = PyGenericIO.setNaturalDefaultPartition +setDefaultFileIOType = PyGenericIO.setDefaultFileIOType +if not _GENERICIO_NO_MPI: + setCollectiveMPIIOThreshold = PyGenericIO.setCollectiveMPIIOThreshold + +# sensible defaults? +if not _GENERICIO_NO_MPI: + setNaturalDefaultPartition() + setDefaultFileIOType(FileIO.FileIOMPICollective) diff --git a/setup.py b/setup.py index 17e13614bc39990d5a93b5fef4138573e2b846c0..c6f32433378e69e702c004c084655b3756e87518 100644 --- a/setup.py +++ b/setup.py @@ -3,15 +3,20 @@ import re import sys import subprocess import platform -#import versioneer + +# import versioneer from setuptools import setup, Extension from setuptools.command.build_ext import build_ext from distutils.version import LooseVersion +# for more info, check +# https://github.com/pybind/cmake_example +# TODO: update the CMakeBuild class + class CMakeExtension(Extension): - def __init__(self, name, sourcedir=''): + def __init__(self, name, sourcedir=""): Extension.__init__(self, name, sources=[]) self.sourcedir = os.path.abspath(sourcedir) @@ -19,64 +24,78 @@ class CMakeExtension(Extension): class CMakeBuild(build_ext): def run(self): try: - out = subprocess.check_output(['cmake', '--version']) + out = subprocess.check_output(["cmake", "--version"]) except OSError: - raise RuntimeError("CMake must be installed to build the following extensions: " + - ", ".join(e.name for e in self.extensions)) - cmake_version = LooseVersion(re.search(r'version\s*([\d.]+)', out.decode()).group(1)) - if cmake_version < '3.11.0': + raise RuntimeError( + "CMake must be installed to build the following extensions: " + + ", ".join(e.name for e in self.extensions) + ) + cmake_version = LooseVersion( + re.search(r"version\s*([\d.]+)", out.decode()).group(1) + ) + if cmake_version < "3.11.0": raise RuntimeError("CMake >= 3.11.0 is required") for ext in self.extensions: self.build_extension(ext) def build_extension(self, ext): extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name))) - cmake_args = ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + extdir, - '-DGENERICIO_NEW_PYTHON_LIBRARY=ON', - '-DGENERICIO_PYTHON_LIBRARY=OFF', - '-DGENERICIO_MPI_EXECUTABLES=OFF', - '-DGENERICIO_FRONTEND_EXECUTABLES=OFF', - '-DPYTHON_EXECUTABLE=' + sys.executable] - cfg = 'Debug' if self.debug else 'Release' - build_args = ['--config', cfg] + cmake_args = [ + "-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=" + extdir, + "-DGENERICIO_PYTHON_LIBRARY=ON", + "-DGENERICIO_LEGACY_PYTHON_LIBRARY=OFF", + "-DGENERICIO_MPI_EXECUTABLES=OFF", + "-DGENERICIO_FRONTEND_EXECUTABLES=OFF", + "-DPYTHON_EXECUTABLE=" + sys.executable, + ] + cfg = "Debug" if self.debug else "Release" + build_args = ["--config", cfg] if platform.system() == "Windows": - cmake_args += ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}'.format(cfg.upper(), extdir)] - if sys.maxsize > 2 ** 32: - cmake_args += ['-A', 'x64'] - build_args += ['--', '/m'] + cmake_args += [ + "-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}".format(cfg.upper(), extdir) + ] + if sys.maxsize > 2**32: + cmake_args += ["-A", "x64"] + build_args += ["--", "/m"] else: - cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg] - build_args += ['--', '-j'] + cmake_args += ["-DCMAKE_BUILD_TYPE=" + cfg] + build_args += ["--", "-j"] env = os.environ.copy() - env['CXXFLAGS'] = '{} -DVERSION_INFO=\\"{}\\"'.format(env.get('CXXFLAGS', ''), self.distribution.get_version()) + env["CXXFLAGS"] = '{} -DVERSION_INFO=\\"{}\\"'.format( + env.get("CXXFLAGS", ""), self.distribution.get_version() + ) if not os.path.exists(self.build_temp): os.makedirs(self.build_temp) - subprocess.check_call(['cmake', ext.sourcedir] + cmake_args, cwd=self.build_temp, env=env) - subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_temp) + subprocess.check_call( + ["cmake", ext.sourcedir] + cmake_args, cwd=self.build_temp, env=env + ) + subprocess.check_call( + ["cmake", "--build", "."] + build_args, cwd=self.build_temp + ) -#def get_cmdclass(): +# def get_cmdclass(): # cmdclass = versioneer.get_cmdclass() # cmdclass.update({"build_ext": CMakeBuild}) # return cmdclass setup( - name='pygio', - #version=versioneer.get_version(), + name="pygio", + # version=versioneer.get_version(), version=0.1, - author='', - author_email='', - ext_package='pygio', - packages=['pygio'], - package_dir={"": "new_python"}, - ext_modules=[CMakeExtension('pygio')], - #cmdclass=get_cmdclass(), + author="", + author_email="", + ext_package="pygio", + packages=["pygio"], + package_dir={"": "python"}, + ext_modules=[CMakeExtension("pygio")], + # cmdclass=get_cmdclass(), cmdclass={ - 'build_ext': CMakeBuild, + "build_ext": CMakeBuild, }, zip_safe=False, - install_requires=['numpy'] + install_requires=["numpy"], )