Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • hacc/genericio
  • mbuehlmann/genericio
2 results
Show changes
Commits on Source (23)
......@@ -5,4 +5,7 @@ frontend/
*.pyc
*.egg-info
.vscode
docs/_build
\ No newline at end of file
docs/_build
.cache
.venv
python/pygio/_version.py
\ No newline at end of file
......@@ -23,11 +23,12 @@ build_cmake:
build_python:
stage: build
before_script:
- apt update && apt -y install git gcc cmake mpich python3 python3-pip
- python3 -m pip install numpy
- apt update && apt -y install git gcc cmake mpich python3 python3-pip python3-venv
- python3 -m venv ./venv
- ./venv/bin/python -m pip install numpy wheel
script:
- python3 setup.py bdist_wheel
- python3 -m pip install dist/*
- ./venv/bin/python setup.py bdist_wheel
- ./venv/bin/python -m pip install dist/*
artifacts:
paths:
- dist/*.whl
......@@ -43,12 +44,13 @@ build_docs:
dependencies:
- build_python
before_script:
- apt update && apt -y install mpich python3 python3-pip
- python3 -m pip install --upgrade pip
- python3 -m pip install numpy
- python3 -m pip install dist/*
- python3 -m pip install Sphinx sphinx-rtd-theme sphinxcontrib-napoleon numpydoc myst-parser
- apt update && apt -y install mpich python3 python3-pip python3-venv
- python3 -m venv ./venv
- ./venv/bin/python -m pip install numpy wheel
- ./venv/bin/python -m pip install dist/*
- ./venv/bin/python -m pip install Sphinx sphinx-rtd-theme sphinxcontrib-napoleon numpydoc myst-parser
script:
- source ./venv/bin/activate
- cd docs
- make dirhtml
artifacts:
......
......@@ -46,11 +46,9 @@ MPICXX = mpicxx
all: fe-progs mpi-progs
sql: fe-sqlite
VELOC_INSTALL_DIR := $(HOME)/deploy
BLOSC_CPPFLAGS := \
-Ithirdparty/blosc \
-DHAVE_LZ4 -DHAVE_SNAPPY -DHAVE_ZLIB -DHAVE_ZSTD \
-DHAVE_LZ4 -DHAVE_SNAPPY -DHAVE_ZLIB -DHAVE_ZSTD -DHAVE_UNISTD_H \
-Ithirdparty/blosc/internal-complibs/zlib-1.2.8 \
-Ithirdparty/blosc/internal-complibs/lz4-1.7.2 \
-Ithirdparty/blosc/internal-complibs/snappy-1.1.1 \
......@@ -62,6 +60,7 @@ BLOSC_CPPFLAGS := \
-Ithirdparty/blosc/internal-complibs/zstd-0.7.4/decompress \
-Ithirdparty/SZ/sz/include
# Set the VELOC_INSTALL_DIR environment variable to enable VELOC support
ifdef VELOC_INSTALL_DIR
BLOSC_CPPFLAGS := $(BLOSC_CPPFLAGS) \
-DGENERICIO_WITH_VELOC \
......@@ -69,6 +68,7 @@ BLOSC_CPPFLAGS := $(BLOSC_CPPFLAGS) \
-I$(VELOC_INSTALL_DIR)/include
BASE_CXXFLAGS := \
-L$(VELOC_INSTALL_DIR)/lib -L$(VELOC_INSTALL_DIR)/lib64 \
-Wl,-rpath,$(VELOC_INSTALL_DIR)/lib64 -Wl,-rpath,$(VELOC_INSTALL_DIR)/lib\
-Wl,--copy-dt-needed-entries -lveloc-client
endif
......
......@@ -1793,39 +1793,6 @@ void GenericIO::readData(int EffRank, size_t RowOffset, int Rank,
}
}
#ifndef GENERICIO_NO_MPI
void GenericIO::rebalanceSourceRanks() {
if(Redistributing) {
int NRanks, Rank;
MPI_Comm_rank(Comm, &Rank);
MPI_Comm_size(Comm, &NRanks);
std::vector<std::pair<int, size_t>> rank_sizes;
std::vector<std::tuple<int, size_t, std::vector<int>>> new_source_ranks;
for(int i=0; i<NRanks; ++i) {
new_source_ranks.emplace_back(std::make_tuple(i, 0ul, std::vector<int>()));
}
for(int i=0; i<readNRanks(); ++i) {
rank_sizes.emplace_back(std::make_pair(i, readNumElems(i)));
}
std::sort(rank_sizes.begin(), rank_sizes.end(), [](const auto& p1, const auto& p2){ return p1.second > p2.second; });
// Distribute ranks
for(size_t i=0; i<rank_sizes.size(); ++i) {
// Assign to first rank
std::get<2>(new_source_ranks[0]).push_back(rank_sizes[i].first);
std::get<1>(new_source_ranks[0]) += rank_sizes[i].second;
// Reorder ranks (could be optimized since array already sorted)
std::stable_sort(new_source_ranks.begin(), new_source_ranks.end(), [](const auto& s1, const auto& s2){ return std::get<1>(s1) < std::get<1>(s2); });
}
// copy own array
SourceRanks.resize(0);
std::copy(std::get<2>(new_source_ranks[Rank]).begin(), std::get<2>(new_source_ranks[Rank]).end(), std::back_inserter(SourceRanks));
} else {
std::cerr << "rebalancing source ranks has no effect when Redistributing==false" << std::endl;
}
}
#endif
void GenericIO::getVariableInfo(vector<VariableInfo> &VI) {
if (FH.isBigEndian())
getVariableInfo<true>(VI);
......@@ -1863,11 +1830,28 @@ void GenericIO::getVariableInfo(vector<VariableInfo> &VI) {
}
}
bool GenericIO::hasVariable(const std::string &Name) {
vector<VariableInfo> VI;
getVariableInfo(VI);
for (size_t i = 0; i < VI.size(); ++i)
if (VI[i].Name == Name)
return true;
return false;
}
void GenericIO::setNaturalDefaultPartition() {
#ifdef __bgq__
DefaultPartition = MPIX_IO_link_id();
#elif !defined(GENERICIO_NO_MPI)
bool UseName = true;
int rank, nranks;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &nranks);
// assign by 8bit hash of MPI_Get_processor_name
bool UseName = false;
const char *EnvStr = getenv("GENERICIO_PARTITIONS_USE_NAME");
if (EnvStr) {
int Mod = atoi(EnvStr);
......@@ -1888,15 +1872,26 @@ void GenericIO::setNaturalDefaultPartition() {
DefaultPartition = color;
}
// This is for debugging.
// assign round robin to max number given by GENERICIO_RANK_PARTITIONS
int roundRobinAssign = 0;
EnvStr = getenv("GENERICIO_RANK_PARTITIONS");
if (EnvStr) {
int Mod = atoi(EnvStr);
if (Mod > 0) {
int Rank;
MPI_Comm_rank(MPI_COMM_WORLD, &Rank);
DefaultPartition += Rank % Mod;
}
roundRobinAssign = atoi(EnvStr);
}
// default: one file per rank, up to 256 files. Warn if we combine ranks in
// single file
if(!UseName && (roundRobinAssign==0)) {
if(rank == 0 && nranks > 256)
std::cerr << "WARNING: Running with more than 256 MPI ranks, and "
<< "GENERICIO_RANK_PARTITIONS is not set.\n"
<< "GenericIO will limit the number of file partitions to 256"
<< std::endl;
roundRobinAssign = 256;
}
if (roundRobinAssign > 0) {
DefaultPartition = rank % roundRobinAssign;
}
#endif
#ifdef GENERICIO_WITH_VELOC
......
......@@ -434,14 +434,13 @@ public:
void getVariableInfo(std::vector<VariableInfo> &VI);
bool hasVariable(const std::string &Name);
std::size_t readNumElems(int EffRank = -1);
void readCoords(int Coords[3], int EffRank = -1);
int readGlobalRankNumber(int EffRank = -1);
void readData(int EffRank = -1, bool PrintStats = true, bool CollStats = true);
#ifndef GENERICIO_NO_MPI
void rebalanceSourceRanks();
#endif
void getSourceRanks(std::vector<int> &SR);
void close() {
......@@ -462,9 +461,25 @@ public:
static void setNaturalDefaultPartition();
static void flushAll() {
static bool flushAll() {
bool ret = true;
#ifdef GENERICIO_WITH_VELOC
bool retLocal = GenericFileIO_VELOC::flush();//With veloc do a blocking flush of anything outstanding and return if all outstanding tasks were successful
#ifndef GENERICIO_NO_MPI
MPI_Allreduce(&retLocal, &ret, 1, MPI_C_BOOL, MPI_LAND, MPI_COMM_WORLD);//Global reduction on if flush was successful
#else
ret = retLocal;
#endif
#endif//GENERICIO_WITH_VELOC
return ret;//by default return that flush was successful (as it does not do anything)
}
static void shutdown() {
#ifdef GENERICIO_WITH_VELOC
GenericFileIO_VELOC::flush();
#ifndef GENERICIO_NO_MPI
MPI_Barrier(MPI_COMM_WORLD);//Barrier to wait for all ranks to finish
#endif
#endif
}
......
......@@ -128,7 +128,7 @@ int main(int argc, char *argv[]) {
GenericIO::setNaturalDefaultPartition();
char *mpiioName = argv[a++];
size_t Np = atoi(argv[a++])/commRanks;
size_t Np = atol(argv[a++])/commRanks;
int seed = atoi(argv[a++]);
srand48(seed + commRank);
......
......@@ -115,10 +115,11 @@ Instructions can be found here: [https://veloc.readthedocs.io](https://veloc.rea
**Install**
Set the VELOC_INSTALL_DIR variable in GNUMakefile to the root of the VELOC installation directory.
Set the VELOC_INSTALL_DIR variable in GNUMakefile to the root of the VELOC installation directory.
Then proceed to compile and link GIO as usual.
**Run**
Define the GENERICIO_USE_VELOC environment variable as the path to the VELOC configuration file.
An example is available here: veloc.cfg
Define the GENERICIO_USE_VELOC environment variable as the path to the scratch directory.
The scratch directory will be used as a local cache and needs to be a NVMe mount point on the compute node.
Define the VELOC_MAX_CACHE_SIZE environment variable as the maximum size (in bytes) of unflushed data allowed in the scratch folder.
[build-system]
requires = [
"setuptools>=42",
"setuptools>=45",
"setuptools_scm[toml]>=6.2",
"wheel",
"cmake>=3.11",
]
build-backend = "setuptools.build_meta"
\ No newline at end of file
build-backend = "setuptools.build_meta"
......@@ -5,7 +5,7 @@ include(FetchContent)
FetchContent_Declare(
pybind11
GIT_REPOSITORY https://github.com/pybind/pybind11.git
GIT_TAG v2.9.2
GIT_TAG v2.11.1
)
FetchContent_GetProperties(pybind11)
......@@ -16,10 +16,10 @@ endif()
# the mpi version
if(MPI_FOUND)
pybind11_add_module(pygio genericio.cpp)
target_link_libraries(pygio PRIVATE genericio_mpi)
pybind11_add_module(pygio_impl genericio.cpp)
target_link_libraries(pygio_impl PRIVATE genericio_mpi)
endif()
# The no-mpi version
pybind11_add_module(pygio_nompi genericio.cpp)
target_link_libraries(pygio_nompi PRIVATE genericio)
\ No newline at end of file
pybind11_add_module(pygio_nompi_impl genericio.cpp)
target_link_libraries(pygio_nompi_impl PRIVATE genericio)
\ No newline at end of file
......@@ -192,6 +192,18 @@ public:
return sr;
}
std::array<int, 3> read_dims() {
std::array<int, 3> sd;
readDims(sd.data());
return sd;
}
std::array<int, 3> read_coords(int eff_rank=-1) {
std::array<int, 3> sc;
readCoords(sc.data(), eff_rank);
return sc;
}
private:
int num_ranks;
std::vector<VariableInfo> variables;
......@@ -204,14 +216,9 @@ std::map<std::string, py::array> read_genericio(
PyGenericIO::MismatchBehavior redistribute=PyGenericIO::MismatchBehavior::MismatchRedistribute,
bool print_stats=true,
bool collective_stats=true,
bool rebalance_source_ranks=false,
int eff_rank=-1
) {
PyGenericIO reader(filename, method, redistribute, eff_rank);
#ifndef GENERICIO_NO_MPI
if(rebalance_source_ranks)
reader.rebalanceSourceRanks();
#endif
return reader.read(var_names, print_stats, collective_stats, eff_rank);
}
......@@ -242,6 +249,25 @@ std::array<double, 3> read_phys_origin(
return reader.read_phys_origin();
}
std::array<int, 3> read_dims(
std::string filename,
PyGenericIO::FileIO method=PyGenericIO::FileIO::FileIOPOSIX,
PyGenericIO::MismatchBehavior redistribute=PyGenericIO::MismatchBehavior::MismatchRedistribute
) {
PyGenericIO reader(filename, method, redistribute);
return reader.read_dims();
}
std::array<int, 3> read_coords(
std::string filename,
PyGenericIO::FileIO method=PyGenericIO::FileIO::FileIOPOSIX,
PyGenericIO::MismatchBehavior redistribute=PyGenericIO::MismatchBehavior::MismatchRedistribute,
int eff_rank=-1
) {
PyGenericIO reader(filename, method, redistribute);
return reader.read_coords(eff_rank);
}
std::vector<std::string> read_variable_names(
std::string filename,
PyGenericIO::FileIO method=PyGenericIO::FileIO::FileIOPOSIX,
......@@ -263,10 +289,11 @@ std::map<std::string, py::dtype> read_variable_dtypes(
int64_t read_num_elems(
std::string filename,
PyGenericIO::FileIO method=PyGenericIO::FileIO::FileIOPOSIX,
PyGenericIO::MismatchBehavior redistribute=PyGenericIO::MismatchBehavior::MismatchRedistribute
PyGenericIO::MismatchBehavior redistribute=PyGenericIO::MismatchBehavior::MismatchRedistribute,
int eff_rank=-1
) {
PyGenericIO reader(filename, method, redistribute);
return reader.readNumElems();
return reader.readNumElems(eff_rank);
}
int64_t read_total_num_elems(
......@@ -330,10 +357,10 @@ void write_genericio(
#ifdef GENERICIO_NO_MPI
PYBIND11_MODULE(pygio_nompi, m) {
PYBIND11_MODULE(pygio_nompi_impl, m) {
m.doc() = "genericio python module (no MPI support)";
#else // GENERICIO_NO_MPI
PYBIND11_MODULE(pygio, m) {
PYBIND11_MODULE(pygio_impl, m) {
m.doc() = "genericio python module (with MPI support)";
m.def("_init_mpi", [](){
int initialized;
......@@ -369,6 +396,8 @@ PYBIND11_MODULE(pygio, m) {
.def("read_total_num_elems", (uint64_t (PyGenericIO::*)(void))(&PyGenericIO::readTotalNumElems))
.def("read_phys_origin", &PyGenericIO::read_phys_origin)
.def("read_phys_scale", &PyGenericIO::read_phys_scale)
.def("read_dims", &PyGenericIO::read_dims)
.def("read_coords", &PyGenericIO::read_coords)
.def("read", &PyGenericIO::read,
py::arg("variables")=nullptr,
py::kw_only(),
......@@ -377,13 +406,13 @@ PYBIND11_MODULE(pygio, m) {
py::arg("eff_rank")=-1)
.def("read_nranks", (int (PyGenericIO::*)(void))(&PyGenericIO::readNRanks))
.def("read_variable_names", &PyGenericIO::read_variable_names)
.def("has_variable", &PyGenericIO::hasVariable)
.def("read_variable_dtypes", &PyGenericIO::read_variable_dtypes)
.def("get_source_ranks", &PyGenericIO::getSourceRanks)
.def_static("setDefaultShouldCompress", &PyGenericIO::setDefaultShouldCompress)
.def_static("setNaturalDefaultPartition", &PyGenericIO::setNaturalDefaultPartition)
.def_static("setDefaultFileIOType", &PyGenericIO::setDefaultFileIOType)
#ifndef GENERICIO_NO_MPI
.def("rebalance_source_ranks", &PyGenericIO::rebalanceSourceRanks)
.def_static("setCollectiveMPIIOThreshold", &PyGenericIO::setCollectiveMPIIOThreshold)
#endif
;
......@@ -423,11 +452,10 @@ PYBIND11_MODULE(pygio, m) {
collective_stats: bool
if ``True``, aggregate statistics among reading ranks (if using MPI)
rebalance_sourceranks: bool
if ``True``, the code will re-assign the file ranks to the reading
MPI ranks to equalize the data size each rank is reading. Only
relevant if using MPI and more ranks were used to write the file
than reading.
eff_rank: int
if -1, use the MPI rank of the calling process (or redistribute if
redistribute is set to MismatchRedistribute). Otherwise, use the
specified rank (if using MPI).
Returns
-------
......@@ -441,7 +469,6 @@ PYBIND11_MODULE(pygio, m) {
py::arg("redistribute")=PyGenericIO::MismatchBehavior::MismatchRedistribute,
py::arg("print_stats")=true,
py::arg("collective_stats")=true,
py::arg("rebalance_sourceranks")=false,
py::arg("eff_rank")=-1,
py::return_value_policy::move);
......@@ -480,6 +507,11 @@ PYBIND11_MODULE(pygio, m) {
redistribute: PyGenericIO.MismatchBehavior
whether to allow mismatching ranks
eff_rank: int
if -1, use the MPI rank of the calling process (or redistribute if
redistribute is set to MismatchRedistribute). Otherwise, use the
specified rank (if using MPI).
Returns
-------
nlocal: int
......@@ -488,7 +520,8 @@ PYBIND11_MODULE(pygio, m) {
py::arg("filename"),
py::kw_only(),
py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX,
py::arg("redistribute")=PyGenericIO::MismatchBehavior::MismatchRedistribute);
py::arg("redistribute")=PyGenericIO::MismatchBehavior::MismatchRedistribute,
py::arg("eff_rank")=-1);
m.def("read_total_num_elems", &read_total_num_elems, R"Delim(
Read the total number of objects (the number of objects that would be
......@@ -611,6 +644,64 @@ PYBIND11_MODULE(pygio, m) {
py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX,
py::arg("redistribute")=PyGenericIO::MismatchBehavior::MismatchRedistribute);
m.def("read_dims", &read_dims, R"Delim(
Read the topological decomposition of the box that is stored in the GenericIO file
Parameters
----------
filename: str
path to the GenericIO file
method: PyGenericIO.FileIO
file handling method (POSIX)
redistribute: PyGenericIO.MismatchBehavior
whether to allow mismatching ranks
Returns
-------
dims: List[int]
the box decomposition (3 elements long)
)Delim",
py::arg("filename"),
py::kw_only(),
py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX,
py::arg("redistribute")=PyGenericIO::MismatchBehavior::MismatchRedistribute);
m.def("read_coords", &read_coords, R"Delim(
Read the rank coordinates of the source_rank this rank is reading
Parameters
----------
filename: str
path to the GenericIO file
method: PyGenericIO.FileIO
file handling method (POSIX)
redistribute: PyGenericIO.MismatchBehavior
whether to allow mismatching ranks
eff_rank: int
if -1, use the MPI rank of the calling process (or redistribute if
redistribute is set to MismatchRedistribute). Otherwise, use the
specified rank (if using MPI).
Returns
-------
coords: List[int]
the rank coordinates (3 elements long)
Note
----
If eff_rank == -1 and redistribute is enabled, the rank coordinates
will be [0, 0, 0]
)Delim",
py::arg("filename"),
py::kw_only(),
py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX,
py::arg("redistribute")=PyGenericIO::MismatchBehavior::MismatchRedistribute,
py::arg("eff_rank")=-1);
#ifndef GENERICIO_NO_MPI
......
from __future__ import print_function
import os
from ._version import __version__
_GENERICIO_NO_MPI = False
if "GENERICIO_NO_MPI" in os.environ:
......@@ -7,22 +8,38 @@ if "GENERICIO_NO_MPI" in os.environ:
if _GENERICIO_NO_MPI:
print(
"WARNING: the pygio module without MPI support has been loaded (due to the GENERICIO_NO_MPI env variable). Writing GenericIO files not supported."
"WARNING: the pygio module without MPI support has been loaded (due to the "
"GENERICIO_NO_MPI env variable). Writing GenericIO files not supported."
)
from .pygio_nompi import *
from . import pygio_nompi_impl as _pygio_impl
else:
# try to load the MPI library (or the no-mpi library, in case of missing MPI during compilation)
from . import pygio as _pygio
# try to load the MPI library
try:
_pygio._init_mpi()
except:
from . import pygio_impl as _pygio_impl
except ImportError:
print(
"WARNING: The pygio module has been compiled without MPI support. Writing GenericIO files not supported."
"WARNING: The pygio module has been compiled without MPI support. Writing "
"GenericIO files not supported."
)
_GENERICIO_NO_MPI = True
from .pygio import *
from . import pygio_nompi_impl as _pygio_impl
else:
# MPI version successfully loaded -> init MPI
_pygio_impl._init_mpi()
PyGenericIO = _pygio_impl.PyGenericIO
read_genericio = _pygio_impl.read_genericio
inspect_genericio = _pygio_impl.inspect_genericio
read_num_elems = _pygio_impl.read_num_elems
read_total_num_elems = _pygio_impl.read_total_num_elems
read_variable_names = _pygio_impl.read_variable_names
read_variable_dtypes = _pygio_impl.read_variable_dtypes
read_phys_scale = _pygio_impl.read_phys_scale
read_phys_origin = _pygio_impl.read_phys_origin
if not _GENERICIO_NO_MPI:
write_genericio = _pygio_impl.write_genericio
# move some ENUMs and static functions up to the module namespace
......
......@@ -84,8 +84,10 @@ class CMakeBuild(build_ext):
setup(
name="pygio",
# version=versioneer.get_version(),
version=0.1,
use_scm_version={
"write_to": "python/pygio/_version.py",
"write_to_template": '__version__ = "{version}"\n',
},
author="",
author_email="",
ext_package="pygio",
......
......@@ -113,7 +113,7 @@ target_include_directories(blosc INTERFACE
blosc
SZ/sz/include
)
target_compile_definitions(blosc PRIVATE HAVE_LZ4 HAVE_SNAPPY HAVE_ZLIB HAVE_ZSTD)
target_compile_definitions(blosc PRIVATE HAVE_LZ4 HAVE_SNAPPY HAVE_ZLIB HAVE_ZSTD HAVE_UNISTD_H)
if(OpenMP_FOUND)
target_link_libraries(blosc PRIVATE OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
endif()
\ No newline at end of file
endif()
......@@ -10,8 +10,8 @@
static std::string get_veloc_cfg() {
char *ptr = getenv("GENERICIO_USE_VELOC");
if (ptr == NULL)
FATAL("GENERICIO_USE_VELOC environment variable missing, must point to a valid VELOC configuration file");
return std::string(ptr);
return "";
return ptr;
}
namespace gio {
......@@ -40,8 +40,11 @@ void GenericFileIO_VELOC::write(const void *buf, size_t count, off_t offset, con
throw std::runtime_error("Unable to write " + D + " to file: " + FileName + ": " + strerror(errno));
}
void GenericFileIO_VELOC::flush() {
veloc::cached_file_t::flush();
bool GenericFileIO_VELOC::flush() {
return veloc::cached_file_t::flush();
}
void GenericFileIO_VELOC::shutdown() {
veloc::cached_file_t::shutdown();
}
......
......@@ -15,7 +15,8 @@ public:
void setSize(size_t sz);
void read(void *buf, size_t count, off_t offset, const std::string &D);
void write(const void *buf, size_t count, off_t offset, const std::string &D);
static void flush();
static bool flush();
static void shutdown();
protected:
int FH;
......
scratch = /tmp/scratch
persistent = /tmp/persistent
mode = async
\ No newline at end of file