diff --git a/.gitignore b/.gitignore index 0a541248e8bb3ba7ef52bc330d8c15e0f87fcbda..d5a3234c96bb459e2f5bb7aaa90b92fa2b99cca9 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ build/ mpi/ frontend/ *.o -python/genericio.pyc -new_python/pygio.egg-info -new_python/build \ No newline at end of file +*.pyc +*.egg-info +.vscode +docs/_build \ No newline at end of file diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index cdb07eb61028cedf4b8de7d2b4eda4c1a52a4bbf..e919cbf73c64ca8013673de983e712932bff0b90 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -33,10 +33,3 @@ build_python: paths: - .cache/pip - venv/ - - -# run tests using the binary built before -# test: -# stage: test -# script: -# - ./runmytests.sh diff --git a/CMakeLists.txt b/CMakeLists.txt index d9b1f0cac2f2f8bc0acac79813af4512dad4d374..fc55603e5761cb8dd2d19f1d6cc57c395f1fa195 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,13 +28,13 @@ else() set(GENERICIO_MPI_EXECUTABLES OFF) endif() option(GENERICIO_FRONTEND_EXECUTABLES "build frontend executables?" ${GENERICIO_MASTER_PROJECT}) -option(GENERICIO_PYTHON_LIBRARY "build python library?" ${GENERICIO_MASTER_PROJECT}) +option(GENERICIO_LEGACY_PYTHON_LIBRARY "build legacy python library?" ${GENERICIO_MASTER_PROJECT}) # new python target: only if cmake at least version 3.11 if(NOT (CMAKE_VERSION VERSION_LESS 3.11.0)) - option(GENERICIO_NEW_PYTHON_LIBRARY "build (new) python library with pybind11?" OFF) + option(GENERICIO_PYTHON_LIBRARY "build the python library with pybind11?" OFF) else() - message(WANRING " cmake version < 3.11.0, cannot build new python library") - set(GENERICIO_NEW_PYTHON_LIBRARY OFF) + message(WANRING " cmake version < 3.11.0, cannot build the python library") + set(GENERICIO_PYTHON_LIBRARY OFF) endif() ############################################################################### @@ -43,7 +43,7 @@ add_subdirectory(thirdparty) ############################################################################### # GenericIO sources, libraries, and executables -set(GenericIO_Sources +set(GenericIO_Sources GenericIO.h GenericIO.cxx ) @@ -77,16 +77,16 @@ endif() # MPI Executables if(GENERICIO_MPI_EXECUTABLES) set(MPI_Executables - GenericIOPrint - GenericIOVerify - GenericIOBenchmarkRead - GenericIOBenchmarkWrite + GenericIOPrint + GenericIOVerify + GenericIOBenchmarkRead + GenericIOBenchmarkWrite GenericIORewrite ) foreach(executable ${MPI_Executables}) add_executable("${executable}_MPI" "${executable}.cxx") target_link_libraries("${executable}_MPI" PRIVATE genericio_mpi) - set_target_properties("${executable}_MPI" PROPERTIES + set_target_properties("${executable}_MPI" PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/mpi" OUTPUT_NAME ${executable}) endforeach() @@ -105,17 +105,18 @@ if(GENERICIO_FRONTEND_EXECUTABLES) endforeach() endif() -# Old python library -if(GENERICIO_PYTHON_LIBRARY) - add_library(pygio SHARED python/lib/gio.cxx python/lib/gio.h) - target_link_libraries(pygio PRIVATE genericio) +# Legacy python library +if(GENERICIO_LEGACY_PYTHON_LIBRARY) + add_library(pygio_legacy SHARED legacy_python/lib/gio.cxx legacy_python/lib/gio.h) + target_link_libraries(pygio_legacy PRIVATE genericio) # GNUmakefile compatibility: also move to frontend (could be improved) - set_target_properties(pygio PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/frontend") + set_target_properties(pygio_legacy PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/frontend") + set_target_properties(pygio_legacy PROPERTIES OUTPUT_NAME pygio) # GNUmakefile compatibility: copy python files to build directory so that relative paths are correct - file(COPY python/genericio.py python/example.py DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/python) + file(COPY legacy_python/genericio.py legacy_python/example.py DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/legacy_python) endif() -# New python library -if(GENERICIO_NEW_PYTHON_LIBRARY) - add_subdirectory(new_python) +# Python library +if(GENERICIO_PYTHON_LIBRARY) + add_subdirectory(python) endif() \ No newline at end of file diff --git a/GNUmakefile b/GNUmakefile index 4338cbdefcd302b6cae8085357d5aab6b2f43f5e..4881c6e9b857a131ad4f5e3dec7d1ed313134f8a 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -1,31 +1,31 @@ # Copyright (C) 2015, UChicago Argonne, LLC # All Rights Reserved -# +# # Generic IO (ANL-15-066) # Hal Finkel, Argonne National Laboratory -# +# # OPEN SOURCE LICENSE -# +# # Under the terms of Contract No. DE-AC02-06CH11357 with UChicago Argonne, # LLC, the U.S. Government retains certain rights in this software. -# +# # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: -# +# # 1. Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. -# +# # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. -# +# # 3. Neither the names of UChicago Argonne, LLC or the Department of Energy # nor the names of its contributors may be used to endorse or promote # products derived from this software without specific prior written # permission. -# +# # ***************************************************************************** -# +# # DISCLAIMER # THE SOFTWARE IS SUPPLIED “AS IS†WITHOUT WARRANTY OF ANY KIND. NEITHER THE # UNTED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR @@ -34,7 +34,7 @@ # ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, DATA, APPARATUS, # PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE # PRIVATELY OWNED RIGHTS. -# +# # ***************************************************************************** CC = gcc @@ -204,7 +204,7 @@ else FE_SHARED := -shared endif -$(FEDIR)/libpygio.so: $(FEDIR)/GenericIO.o $(FEDIR)/python/lib/gio.o $(FE_BLOSC_O) +$(FEDIR)/libpygio.so: $(FEDIR)/GenericIO.o $(FEDIR)/legacy_python/lib/gio.o $(FE_BLOSC_O) $(CXX) $(FE_CXXFLAGS) $(FE_SHARED) -o $@ $^ $(FEDIR)/GenericIOSQLite.so: $(FEDIR)/GenericIOSQLite.o $(FEDIR)/GenericIO.o $(FE_BLOSC_O) @@ -258,5 +258,5 @@ frontend-sqlite: $(FEDIR)/GenericIOSQLite.so $(FEDIR)/sqlite3 fe-sqlite: frontend-sqlite clean: - rm -rf frontend mpi python/genericio.pyc + rm -rf frontend mpi legacy_python/genericio.pyc diff --git a/README.md b/README.md index cbd764d2386a1f5ad2fe4e4eda7224c82f788ab9..009fe49e390499e55bf9ee68c1bb2a4afe73b6fe 100644 --- a/README.md +++ b/README.md @@ -1,67 +1,83 @@ -# GenericIO +GenericIO +========= GenericIO is a write-optimized library for writing self-describing scientific data files on large-scale parallel file systems. -## Reference +Reference +--------- Habib, et al., HACC: Simulating Future Sky Surveys on State-of-the-Art Supercomputing Architectures, New Astronomy, 2015 (http://arxiv.org/abs/1410.2805). -## Source Code +Source Code +----------- The most recent version of source is available by cloning this repo: ```bash - git clone https://xgitlab.cels.anl.gov/hacc/genericio.git +git clone https://xgitlab.cels.anl.gov/hacc/genericio.git ``` There is also a history of code [releases](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases): - - [2019-04-17](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20190417) - - [2017-09-25](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20170925) - - [2016-08-29](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20160829) - - [2016-04-12](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20160412) - - [2015-06-08](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20150608) +- [2019-04-17](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20190417) +- [2017-09-25](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20170925) +- [2016-08-29](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20160829) +- [2016-04-12](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20160412) +- [2015-06-08](https://xgitlab.cels.anl.gov/hacc/genericio/-/releases/20150608) -## Output file partitions (subfiles) +Output file partitions (subfiles) +--------------------------------- If you're running on an IBM BG/Q supercomputer, then the number of subfiles (partitions) chosen is based on the I/O nodes in an automatic way. Otherwise, by default, the GenericIO library picks the number of subfiles based on a fairly-naive hostname-based hashing scheme. This works reasonably-well on small clusters, but not on larger systems. On a larger system, you might want to set these environmental variables: ```bash - GENERICIO_PARTITIONS_USE_NAME=0 - GENERICIO_RANK_PARTITIONS=256 +GENERICIO_PARTITIONS_USE_NAME=0 +GENERICIO_RANK_PARTITIONS=256 ``` Where the number of partitions (256 above) determines the number of subfiles used. If you're using a Lustre file system, for example, an optimal number of files is: ``` - # of files * stripe count ~ # OSTs +# of files * stripe count ~ # OSTs ``` On Titan, for example, there are 1008 OSTs, and a default stripe count of 4, so we use approximately 256 files. -## Benchmarks +Benchmarks +---------- Once you build the library and associated programs (using make), you can run, for example: ```bash - $ mpirun -np 8 ./mpi/GenericIOBenchmarkWrite /tmp/out.gio 123456 2 - Wrote 9 variables to /tmp/out (4691036 bytes) in 0.2361s: 18.9484 MB/s +$ mpirun -np 8 ./mpi/GenericIOBenchmarkWrite /tmp/out.gio 123456 2 +Wrote 9 variables to /tmp/out (4691036 bytes) in 0.2361s: 18.9484 MB/s ``` ```bash - $ mpirun -np 8 ./mpi/GenericIOBenchmarkRead /tmp/out.gio - Read 9 variables from /tmp/out (4688028 bytes) in 0.223067s: 20.0426 MB/s [excluding header read] +$ mpirun -np 8 ./mpi/GenericIOBenchmarkRead /tmp/out.gio +Read 9 variables from /tmp/out (4688028 bytes) in 0.223067s: 20.0426 MB/s [excluding header read] ``` The read benchmark always reads all of the input data. The output benchmark takes two numerical parameters, one if the number of data rows to write, and the second is a random seed (which slightly perturbs the per-rank output sizes, but not by much). Each row is 36 bytes for these benchmarks. The write benchmark can be passed the -c parameter to enable output compression. Both benchmarks take an optional -a parameter to request that homogeneous aggregates (i.e. "float4") be used instead of using separate arrays for each position/velocity component. -## Python module + +Python module +------------- + +[Click here to go to the README for the python interface](new_python/README.md) + + +Legacy python module +-------------------- + +*This documentation is for the old python module. It is recommened to use the newer, pybind11 based version, see above* + The repository includes a genericio Python module that can read genericio-formatted files and return numpy arrays. This is included in the standard build. To use it, once you've built genericio, you can read genericio data as follows: ```bash -$ export PYTHONPATH=${GENERICIO_DIR}/python +$ export PYTHONPATH=${GENERICIO_DIR}/legacy_python $ python >>> import genericio >>> genericio.gio_inspect('m000-99.fofproperties') @@ -84,13 +100,9 @@ Number of Elements: 1691 array([[ 4.58575588e+13], [ 5.00464689e+13], [ 5.07078771e+12], - ..., + ..., [ 1.35221006e+13], [ 5.29125710e+12], [ 7.12849857e+12]], dtype=float32) -``` - -## Alternative python module - -[Click here to go to the README for the alternative python interface](new_python/README.md) \ No newline at end of file +``` \ No newline at end of file diff --git a/python/example.py b/legacy_python/example.py similarity index 100% rename from python/example.py rename to legacy_python/example.py diff --git a/python/genericio.py b/legacy_python/genericio.py similarity index 100% rename from python/genericio.py rename to legacy_python/genericio.py diff --git a/python/lib/gio.cxx b/legacy_python/lib/gio.cxx similarity index 100% rename from python/lib/gio.cxx rename to legacy_python/lib/gio.cxx diff --git a/python/lib/gio.h b/legacy_python/lib/gio.h similarity index 100% rename from python/lib/gio.h rename to legacy_python/lib/gio.h diff --git a/new_python/CMakeLists.txt b/new_python/CMakeLists.txt deleted file mode 100644 index 78e047c3a4a52e35ed7a0240a6d7012f1690ef10..0000000000000000000000000000000000000000 --- a/new_python/CMakeLists.txt +++ /dev/null @@ -1,27 +0,0 @@ -cmake_minimum_required(VERSION 3.11) -set(CMAKE_CXX_STANDARD 17) - -include(FetchContent) -FetchContent_Declare( - pybind11 - GIT_REPOSITORY https://github.com/pybind/pybind11.git - GIT_TAG v2.6.1 -) - -FetchContent_GetProperties(pybind11) -if(NOT pybind11_POPULATED) - FetchContent_Populate(pybind11) - add_subdirectory(${pybind11_SOURCE_DIR} ${pybind11_BINARY_DIR}) -endif() - -# the mpi version -if(MPI_FOUND) -pybind11_add_module(pygio_new genericio.cpp) -set_target_properties(pygio_new PROPERTIES OUTPUT_NAME pygio) -target_link_libraries(pygio_new PRIVATE genericio_mpi) -endif() - -# The no-mpi version -pybind11_add_module(pygio_new_nompi genericio.cpp) -set_target_properties(pygio_new_nompi PROPERTIES OUTPUT_NAME pygio_nompi) -target_link_libraries(pygio_new_nompi PRIVATE genericio) \ No newline at end of file diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..ab526fc749ff47692e7062c4943b4fca872da1ec --- /dev/null +++ b/python/CMakeLists.txt @@ -0,0 +1,25 @@ +cmake_minimum_required(VERSION 3.11) +set(CMAKE_CXX_STANDARD 17) + +include(FetchContent) +FetchContent_Declare( + pybind11 + GIT_REPOSITORY https://github.com/pybind/pybind11.git + GIT_TAG v2.6.1 +) + +FetchContent_GetProperties(pybind11) +if(NOT pybind11_POPULATED) + FetchContent_Populate(pybind11) + add_subdirectory(${pybind11_SOURCE_DIR} ${pybind11_BINARY_DIR}) +endif() + +# the mpi version +if(MPI_FOUND) +pybind11_add_module(pygio genericio.cpp) +target_link_libraries(pygio PRIVATE genericio_mpi) +endif() + +# The no-mpi version +pybind11_add_module(pygio_nompi genericio.cpp) +target_link_libraries(pygio_nompi PRIVATE genericio) \ No newline at end of file diff --git a/new_python/README.md b/python/README.md similarity index 100% rename from new_python/README.md rename to python/README.md diff --git a/new_python/genericio.cpp b/python/genericio.cpp similarity index 100% rename from new_python/genericio.cpp rename to python/genericio.cpp diff --git a/new_python/pygio/__init__.py b/python/pygio/__init__.py similarity index 100% rename from new_python/pygio/__init__.py rename to python/pygio/__init__.py