From c5e84c78f5341cfe5932fc3e2c895ce00d0fda18 Mon Sep 17 00:00:00 2001 From: Michael Buehlmann <buehlmann.michi@gmail.com> Date: Wed, 15 Dec 2021 16:45:43 -0600 Subject: [PATCH] add basic documentation / sphinx setup --- docs/Makefile | 20 ++ docs/_static/css/custom.css | 7 + docs/conf.py | 99 ++++++++++ docs/index.rst | 33 ++++ docs/make.bat | 35 ++++ docs/python/class_interface.rst | 32 ++++ docs/python/mpi.rst | 2 + docs/python/readwrite.rst | 58 ++++++ python/genericio.cpp | 325 ++++++++++++++++++++++++++++++-- 9 files changed, 600 insertions(+), 11 deletions(-) create mode 100644 docs/Makefile create mode 100644 docs/_static/css/custom.css create mode 100644 docs/conf.py create mode 100644 docs/index.rst create mode 100644 docs/make.bat create mode 100644 docs/python/class_interface.rst create mode 100644 docs/python/mpi.rst create mode 100644 docs/python/readwrite.rst diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d4bb2cb --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/_static/css/custom.css b/docs/_static/css/custom.css new file mode 100644 index 0000000..00f75f7 --- /dev/null +++ b/docs/_static/css/custom.css @@ -0,0 +1,7 @@ +table.full-width { + width: 100%; +} + +table.full-width td { + white-space: normal !important; +} \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..b2991e1 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,99 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +import os, sys, shutil, subprocess +import re +from pathlib import Path + +DIR = Path(__file__).parent.resolve() + + +# -- Project information ----------------------------------------------------- + +project = "GenericIO" +copyright = "2021, Hal Finkel, et al." +author = "Hal Finkel, et al." + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.napoleon", + "sphinx.ext.autodoc.typehints", + "sphinx.ext.autosummary", + "sphinx.ext.viewcode", + "sphinx.ext.autosectionlabel", + "myst_parser", +] + +autosectionlabel_prefix_document = True + +autodoc_typehints = "description" +add_module_names = False + +autosummary_generate = False +napoleon_numpy_docstring = True +napoleon_use_admonition_for_examples = True +napoleon_use_admonition_for_notes = True + +source_suffix = {".rst": "restructuredtext", ".md": "markdown"} + +# Add any paths that contain templates here, relative to this directory. +templates_path = ["_templates"] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "README.md"] + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = "sphinx_rtd_theme" +html_theme_options = {"prev_next_buttons_location": None} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ["_static"] + + +def prepare(app): + with open(DIR.parent / "README.md") as f: + contents = f.read() + + # Filter out section titles for index.rst for LaTeX + if app.builder.name == "latex": + contents = re.sub(r"^(.*)\n[-~]{3,}$", r"**\1**", contents, flags=re.MULTILINE) + + with open(DIR / "README.md", "w") as f: + f.write(contents) + + +def clean_up(app, exception): + (DIR / "README.md").unlink() + + +def setup(app): + app.add_css_file("css/custom.css") + # Copy the readme in + app.connect("builder-inited", prepare) + + # Clean up the generated readme + app.connect("build-finished", clean_up) diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..d8fadfa --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,33 @@ +.. GenericIO documentation master file, created by + sphinx-quickstart on Fri Dec 10 09:52:19 2021. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +.. only:: latex + + GenericIO Documentation + ======================= + +.. include:: README.md + :parser: myst_parser.sphinx_ + + +.. only:: latex + + .. toctree:: + :maxdepth: 2 + + self + + .. toctree:: + :caption: Python Interface + :maxdepth: 2 + + python/readwrite + python/class_interface + python/mpi + + .. toctree:: + :caption: C++ Interface + :maxdepth: 2 + diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..2119f51 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/python/class_interface.rst b/docs/python/class_interface.rst new file mode 100644 index 0000000..6372b12 --- /dev/null +++ b/docs/python/class_interface.rst @@ -0,0 +1,32 @@ +The Python Class Interface +========================== +.. currentmodule:: pygio + +.. code-block:: python + + # instantiate a GenericIO class + gio_file = pygio.PyGenericIO("generic_io_file") + + # inspect (prints to python stdout, also works in notebook) + gio_file.inspect() + + # get variables + gio_vars = gio_file.get_variables() + + # print variable names + for var in gio_vars: + print(var.name, var.size, var.element_size, var.is_float) + + # read data + data_all = gio_file.read() + data_partial = gio_file.read(["x", "y", "z"]) + +Further methods and members of GenericIO can easly be interfaced by editing +``python_new/genericio.cpp``. + + +References +---------- + +.. autoclass:: PyGenericIO + :members: \ No newline at end of file diff --git a/docs/python/mpi.rst b/docs/python/mpi.rst new file mode 100644 index 0000000..721b7ef --- /dev/null +++ b/docs/python/mpi.rst @@ -0,0 +1,2 @@ +Using GenericIO with MPI +======================== \ No newline at end of file diff --git a/docs/python/readwrite.rst b/docs/python/readwrite.rst new file mode 100644 index 0000000..409f31f --- /dev/null +++ b/docs/python/readwrite.rst @@ -0,0 +1,58 @@ +Reading and Writing Data +======================== + +.. currentmodule:: pygio + +.. code-block:: python + + import numpy as np + import pygio + + # inspect file + pygio.inspect_genericio("generic_io_file") + + +.. code-block:: python + + # read all variables + data = pygio.read_genericio("generic_io_file") + +.. code-block:: python + + # read only a subset of variables + data_partial = pygio.read_genericio("generic_io_file", ["x", "y", "z"]) + data_x = data_partial["x"] + +.. code-block:: python + + data = { + "x": np.ones(100), + "y": np.ones(100) + } + + # write data to file + pygio.write_genericio("new_generic_io_file", + data, + phys_scale = [1, 1, 1], + phys_origin = [0, 0, 0] + ) + + +References +---------- + +.. autofunction:: inspect_genericio + +.. autofunction:: read_genericio + +.. autofunction:: read_num_elems + +.. autofunction:: read_variable_names + +.. autofunction:: read_variable_dtypes + +.. autofunction:: read_phys_scale + +.. autofunction:: read_phys_origin + +.. autofunction:: write_genericio \ No newline at end of file diff --git a/python/genericio.cpp b/python/genericio.cpp index a2b1e0c..34aab98 100644 --- a/python/genericio.cpp +++ b/python/genericio.cpp @@ -65,6 +65,35 @@ public: } } + std::vector<std::string> read_variable_names() { + std::vector<std::string> variable_names; + for(const auto& v: variables) { + variable_names.push_back(v.Name); + } + return variable_names; + } + + std::map<std::string, py::dtype> read_variable_dtypes() { + std::map<std::string, py::dtype> variable_dtypes; + + for(const auto& var: variables) { + auto var_name = var.Name; + if(var.IsFloat && var.ElementSize == 4) + variable_dtypes[var_name] = py::dtype("f4"); + else if(var.IsFloat && var.ElementSize == 8) + variable_dtypes[var_name] = py::dtype("f8"); + else if(!var.IsFloat && var.ElementSize == 4) + variable_dtypes[var_name] = py::dtype("i4"); + else if(!var.IsFloat && var.ElementSize == 8) + variable_dtypes[var_name] = py::dtype("i8"); + else if(!var.IsFloat && var.ElementSize == 2) + variable_dtypes[var_name] = py::dtype("u2"); + else + throw std::runtime_error(std::string("Unknown data type in GenericIO for variable ") + var_name); + } + return variable_dtypes; + } + std::map<std::string, py::array> read( std::optional<std::vector<std::string>> var_names, bool print_stats=true, @@ -83,10 +112,7 @@ public: // if no argument, read all if(!var_names.has_value()) { - var_names.emplace(std::vector<std::string>()); - for(const auto& v: variables) { - var_names->push_back(v.Name); - } + var_names.emplace(read_variable_names()); } clearVariables(); @@ -116,6 +142,8 @@ public: } else if(!(*varp).IsFloat && (*varp).ElementSize == 2) { result[var_name] = py::array_t<uint16_t>(readsize); addVariable(*varp, result[var_name].mutable_data(), gio::GenericIO::VarHasExtraSpace); + } else { + throw std::runtime_error(std::string("Unknown data type in GenericIO for variable ") + var_name); } } } @@ -133,7 +161,7 @@ public: return result; } - const std::vector<gio::GenericIO::VariableInfo> &get_variables() { + const std::vector<VariableInfo> &get_variables() { return variables; } @@ -157,7 +185,7 @@ public: private: int num_ranks; - std::vector<gio::GenericIO::VariableInfo> variables; + std::vector<VariableInfo> variables; }; std::map<std::string, py::array> read_genericio( @@ -187,6 +215,60 @@ void inspect_genericio( reader.inspect(); } +std::array<double, 3> read_phys_scale( + std::string filename, + PyGenericIO::FileIO method=PyGenericIO::FileIO::FileIOPOSIX, + PyGenericIO::MismatchBehavior redistribute=PyGenericIO::MismatchBehavior::MismatchRedistribute +) { + PyGenericIO reader(filename, method, redistribute); + return reader.read_phys_scale(); +} + +std::array<double, 3> read_phys_origin( + std::string filename, + PyGenericIO::FileIO method=PyGenericIO::FileIO::FileIOPOSIX, + PyGenericIO::MismatchBehavior redistribute=PyGenericIO::MismatchBehavior::MismatchRedistribute +) { + PyGenericIO reader(filename, method, redistribute); + return reader.read_phys_origin(); +} + +std::vector<std::string> read_variable_names( + std::string filename, + PyGenericIO::FileIO method=PyGenericIO::FileIO::FileIOPOSIX, + PyGenericIO::MismatchBehavior redistribute=PyGenericIO::MismatchBehavior::MismatchRedistribute +) { + PyGenericIO reader(filename, method, redistribute); + return reader.read_variable_names(); +} + +std::map<std::string, py::dtype> read_variable_dtypes( + std::string filename, + PyGenericIO::FileIO method=PyGenericIO::FileIO::FileIOPOSIX, + PyGenericIO::MismatchBehavior redistribute=PyGenericIO::MismatchBehavior::MismatchRedistribute +) { + PyGenericIO reader(filename, method, redistribute); + return reader.read_variable_dtypes(); +} + +int64_t read_num_elems( + std::string filename, + PyGenericIO::FileIO method=PyGenericIO::FileIO::FileIOPOSIX, + PyGenericIO::MismatchBehavior redistribute=PyGenericIO::MismatchBehavior::MismatchRedistribute +) { + PyGenericIO reader(filename, method, redistribute); + return reader.readNumElems(); +} + +int64_t read_total_num_elems( + std::string filename, + PyGenericIO::FileIO method=PyGenericIO::FileIO::FileIOPOSIX, + PyGenericIO::MismatchBehavior redistribute=PyGenericIO::MismatchBehavior::MismatchRedistribute +) { + PyGenericIO reader(filename, method, redistribute); + return reader.readTotalNumElems(); +} + #ifndef GENERICIO_NO_MPI void write_genericio( std::string filename, @@ -283,8 +365,11 @@ PYBIND11_MODULE(pygio, m) { py::arg("print_stats")=true, py::arg("collective_stats")=true, py::arg("eff_rank")=-1) - .def("get_source_ranks", &PyGenericIO::get_source_ranks) .def("read_nranks", (int (PyGenericIO::*)(void))(&PyGenericIO::readNRanks)) + .def("read_variable_names", &PyGenericIO::read_variable_names) + .def("read_variable_dtypes", &PyGenericIO::read_variable_dtypes) + .def("read", &PyGenericIO::read, py::arg("variables")=nullptr, py::arg("print_stats")=true, py::arg("collective_stats")=true) + .def("get_source_ranks", &PyGenericIO::getSourceRanks) #ifndef GENERICIO_NO_MPI .def("rebalance_source_ranks", &PyGenericIO::rebalanceSourceRanks) #endif @@ -300,7 +385,42 @@ PYBIND11_MODULE(pygio, m) { (vi.IsFloat ? "float" : "int") + " name='" + vi.Name + "'>"; }); - m.def("read_genericio", &read_genericio, + + m.def("read_genericio", &read_genericio, R"Delim( + Read data from a GenericIO file + + Parameters + ---------- + filename: str + path to the GenericIO file + + variables: List[str] + A list of variable names that should be read. If ``None``, all + variables contained in the file will be read + + method: PyGenericIO.FileIO + file handling method (POSIX/MPI) + + redistribute: PyGenericIO.MismatchBehavior + whether to allow mismatching ranks + + print_stats: bool + if ``True``, print throughput statistics after reading + + collective_stats: bool + if ``True``, aggregate statistics among reading ranks (if using MPI) + + rebalance_sourceranks: bool + if ``True``, the code will re-assign the file ranks to the reading + MPI ranks to equalize the data size each rank is reading. Only + relevant if using MPI and more ranks were used to write the file + than reading. + + Returns + ------- + data: Mapping[str, np.ndarray] + + )Delim", py::arg("filename"), py::arg("variables")=nullptr, py::kw_only(), @@ -312,16 +432,199 @@ PYBIND11_MODULE(pygio, m) { py::arg("eff_rank")=-1, py::return_value_policy::move); - m.def("inspect_genericio", &inspect_genericio, + + m.def("inspect_genericio", &inspect_genericio, R"Delim( + Print a summary of variables and types defined in the GenericIO file + + Parameters + ---------- + filename: str + path to the GenericIO file + + method: PyGenericIO.FileIO + file handling method (POSIX/MPI) + + redistribute: PyGenericIO.MismatchBehavior + whether to allow mismatching ranks + )Delim", + py::arg("filename"), + py::kw_only(), + py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX, + py::arg("redistribute")=PyGenericIO::MismatchBehavior::MismatchRedistribute); + + m.def("read_num_elems", &read_num_elems, R"Delim( + Read the (local) number of objects (the number of objects that would be + read by this rank when calling :func:`read_genericio`) + + Parameters + ---------- + filename: str + path to the GenericIO file + + method: PyGenericIO.FileIO + file handling method (POSIX/MPI) + + redistribute: PyGenericIO.MismatchBehavior + whether to allow mismatching ranks + + Returns + ------- + nlocal: int + the number of objects assigned to this rank + )Delim", + py::arg("filename"), + py::kw_only(), + py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX, + py::arg("redistribute")=PyGenericIO::MismatchBehavior::MismatchRedistribute); + + m.def("read_total_num_elems", &read_total_num_elems, R"Delim( + Read the total number of objects (the number of objects that would be + read by all ranks combined when calling :func:`read_genericio`) + + Parameters + ---------- + filename: str + path to the GenericIO file + + method: PyGenericIO.FileIO + file handling method (POSIX/MPI) + + redistribute: PyGenericIO.MismatchBehavior + whether to allow mismatching ranks + + Returns + ------- + ntotal: int + the total number of objects stored in the GenericIO file + )Delim", py::arg("filename"), py::kw_only(), py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX, py::arg("redistribute")=PyGenericIO::MismatchBehavior::MismatchRedistribute); + m.def("read_variable_names", &read_variable_names, R"Delim( + Get a list of variable names stored in the GenericIO file + + Parameters + ---------- + filename: str + path to the GenericIO file + + method: PyGenericIO.FileIO + file handling method (POSIX/MPI) + + redistribute: PyGenericIO.MismatchBehavior + whether to allow mismatching ranks + + Returns + ------- + variable_names: List[str] + the list of variable names defined in the GenericIO file + )Delim", + py::arg("filename"), + py::kw_only(), + py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX, + py::arg("redistribute")=PyGenericIO::MismatchBehavior::MismatchRedistribute); + + m.def("read_variable_dtypes", &read_variable_dtypes, R"Delim( + Get a dictionary of dtypes mapped to the variable names + + Parameters + ---------- + filename: str + path to the GenericIO file + + method: PyGenericIO.FileIO + file handling method (POSIX/MPI) + + redistribute: PyGenericIO.MismatchBehavior + whether to allow mismatching ranks + + Returns + ------- + variable_dtypes: Mapping[str, np.dtype] + a map ``variable_name -> dtype`` for each variable in the GenericIO file + )Delim", + py::arg("filename"), + py::kw_only(), + py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX, + py::arg("redistribute")=PyGenericIO::MismatchBehavior::MismatchRedistribute); + + m.def("read_phys_scale", &read_phys_scale, R"Delim( + Read the box size that is stored in the GenericIO file + + Parameters + ---------- + filename: str + path to the GenericIO file + + method: PyGenericIO.FileIO + file handling method (POSIX/MPI) + + redistribute: PyGenericIO.MismatchBehavior + whether to allow mismatching ranks + + Returns + ------- + phys_scale: List[float] + the box length for each dimension (3 elements long) + )Delim", + py::arg("filename"), + py::kw_only(), + py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX, + py::arg("redistribute")=PyGenericIO::MismatchBehavior::MismatchRedistribute); + + m.def("read_phys_origin", &read_phys_scale, R"Delim( + Read the origin / reference point of the box that is stored in the GenericIO file + + Parameters + ---------- + filename: str + path to the GenericIO file + + method: PyGenericIO.FileIO + file handling method (POSIX/MPI) + + redistribute: PyGenericIO.MismatchBehavior + whether to allow mismatching ranks + + Returns + ------- + phys_origin: List[float] + the box origin coordinates (3 elements long) + )Delim", + py::arg("filename"), + py::kw_only(), + py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX, + py::arg("redistribute")=PyGenericIO::MismatchBehavior::MismatchRedistribute); + + + #ifndef GENERICIO_NO_MPI - m.def("write_genericio", &write_genericio, + m.def("write_genericio", &write_genericio, R"Delim( + Write data as a GenericIO file + + Parameters + ---------- + filename: str + path to the GenericIO file + + data: Mapping[str, np.ndarray] + a dictionary, with all items being 1-dimensional numpy arrays of + the same length. Currently, only float32, float64, int32, int64 and + uint16 data types are supported + + phys_scale: List[float] + the physical size of the box that the data belongs to (3 elements) + + phys_origin: List[float] + the origin coordinates of the box that the data belongs to (3 elements) + + method: PyGenericIO.FileIO + file handling method (POSIX/MPI) + )Delim", py::arg("filename"), - py::arg("variables"), + py::arg("data"), py::arg("phys_scale"), py::arg("phys_origin") = std::array<double, 3>({0., 0., 0.}), py::kw_only(), -- GitLab