diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..d4bb2cbb9eddb1bb1b4f366623044af8e4830919
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = .
+BUILDDIR      = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/_static/css/custom.css b/docs/_static/css/custom.css
new file mode 100644
index 0000000000000000000000000000000000000000..00f75f79c4555994df593345c9662fb783b0941d
--- /dev/null
+++ b/docs/_static/css/custom.css
@@ -0,0 +1,7 @@
+table.full-width {
+    width: 100%;
+}
+
+table.full-width td {
+    white-space: normal !important;
+}
\ No newline at end of file
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 0000000000000000000000000000000000000000..b2991e171b723915a45985ae736b5cfffccb874b
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,99 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os, sys, shutil, subprocess
+import re
+from pathlib import Path
+
+DIR = Path(__file__).parent.resolve()
+
+
+# -- Project information -----------------------------------------------------
+
+project = "GenericIO"
+copyright = "2021, Hal Finkel, et al."
+author = "Hal Finkel, et al."
+
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    "sphinx.ext.autodoc",
+    "sphinx.ext.napoleon",
+    "sphinx.ext.autodoc.typehints",
+    "sphinx.ext.autosummary",
+    "sphinx.ext.viewcode",
+    "sphinx.ext.autosectionlabel",
+    "myst_parser",
+]
+
+autosectionlabel_prefix_document = True
+
+autodoc_typehints = "description"
+add_module_names = False
+
+autosummary_generate = False
+napoleon_numpy_docstring = True
+napoleon_use_admonition_for_examples = True
+napoleon_use_admonition_for_notes = True
+
+source_suffix = {".rst": "restructuredtext", ".md": "markdown"}
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ["_templates"]
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "README.md"]
+
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = "sphinx_rtd_theme"
+html_theme_options = {"prev_next_buttons_location": None}
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ["_static"]
+
+
+def prepare(app):
+    with open(DIR.parent / "README.md") as f:
+        contents = f.read()
+
+    # Filter out section titles for index.rst for LaTeX
+    if app.builder.name == "latex":
+        contents = re.sub(r"^(.*)\n[-~]{3,}$", r"**\1**", contents, flags=re.MULTILINE)
+
+    with open(DIR / "README.md", "w") as f:
+        f.write(contents)
+
+
+def clean_up(app, exception):
+    (DIR / "README.md").unlink()
+
+
+def setup(app):
+    app.add_css_file("css/custom.css")
+    # Copy the readme in
+    app.connect("builder-inited", prepare)
+
+    # Clean up the generated readme
+    app.connect("build-finished", clean_up)
diff --git a/docs/index.rst b/docs/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..d8fadfa89764e907ab30ba43fa23c84ed2d5b4f6
--- /dev/null
+++ b/docs/index.rst
@@ -0,0 +1,33 @@
+.. GenericIO documentation master file, created by
+   sphinx-quickstart on Fri Dec 10 09:52:19 2021.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+.. only:: latex
+
+   GenericIO Documentation
+   =======================
+
+.. include:: README.md
+   :parser: myst_parser.sphinx_
+
+
+.. only:: latex
+
+   .. toctree::
+      :maxdepth: 2
+
+      self
+
+   .. toctree::
+      :caption: Python Interface
+      :maxdepth: 2
+
+      python/readwrite
+      python/class_interface
+      python/mpi
+
+   .. toctree::
+      :caption: C++ Interface
+      :maxdepth: 2
+
diff --git a/docs/make.bat b/docs/make.bat
new file mode 100644
index 0000000000000000000000000000000000000000..2119f51099bf37e4fdb6071dce9f451ea44c62dd
--- /dev/null
+++ b/docs/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=.
+set BUILDDIR=_build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.http://sphinx-doc.org/
+	exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
diff --git a/docs/python/class_interface.rst b/docs/python/class_interface.rst
new file mode 100644
index 0000000000000000000000000000000000000000..6372b1233001818b6362c114d3b97837e4635e99
--- /dev/null
+++ b/docs/python/class_interface.rst
@@ -0,0 +1,32 @@
+The Python Class Interface
+==========================
+.. currentmodule:: pygio
+
+.. code-block:: python
+
+   # instantiate a GenericIO class
+   gio_file = pygio.PyGenericIO("generic_io_file")
+
+   # inspect (prints to python stdout, also works in notebook)
+   gio_file.inspect()
+
+   # get variables
+   gio_vars = gio_file.get_variables()
+
+   # print variable names
+   for var in gio_vars:
+       print(var.name, var.size, var.element_size, var.is_float)
+
+   # read data
+   data_all = gio_file.read()
+   data_partial = gio_file.read(["x", "y", "z"])
+
+Further methods and members of GenericIO can easly be interfaced by editing
+``python_new/genericio.cpp``.
+
+
+References
+----------
+
+.. autoclass:: PyGenericIO
+   :members:
\ No newline at end of file
diff --git a/docs/python/mpi.rst b/docs/python/mpi.rst
new file mode 100644
index 0000000000000000000000000000000000000000..721b7efe95d4ab5e473f687c06290e776e1115cc
--- /dev/null
+++ b/docs/python/mpi.rst
@@ -0,0 +1,2 @@
+Using GenericIO with MPI
+========================
\ No newline at end of file
diff --git a/docs/python/readwrite.rst b/docs/python/readwrite.rst
new file mode 100644
index 0000000000000000000000000000000000000000..409f31f5066e75f9fa089e93f227458e7d7ab27f
--- /dev/null
+++ b/docs/python/readwrite.rst
@@ -0,0 +1,58 @@
+Reading and Writing Data
+========================
+
+.. currentmodule:: pygio
+
+.. code-block:: python
+
+   import numpy as np
+   import pygio
+
+   # inspect file
+   pygio.inspect_genericio("generic_io_file")
+
+
+.. code-block:: python
+
+   # read all variables
+   data = pygio.read_genericio("generic_io_file")
+
+.. code-block:: python
+
+   # read only a subset of variables
+   data_partial = pygio.read_genericio("generic_io_file", ["x", "y", "z"])
+   data_x = data_partial["x"]
+
+.. code-block:: python
+
+    data = {
+        "x": np.ones(100),
+        "y": np.ones(100)
+    }
+
+   # write data to file
+   pygio.write_genericio("new_generic_io_file",
+        data,
+        phys_scale = [1, 1, 1],
+        phys_origin = [0, 0, 0]
+    )
+
+
+References
+----------
+
+.. autofunction:: inspect_genericio
+
+.. autofunction:: read_genericio
+
+.. autofunction:: read_num_elems
+
+.. autofunction:: read_variable_names
+
+.. autofunction:: read_variable_dtypes
+
+.. autofunction:: read_phys_scale
+
+.. autofunction:: read_phys_origin
+
+.. autofunction:: write_genericio
\ No newline at end of file
diff --git a/python/genericio.cpp b/python/genericio.cpp
index a2b1e0c6ef4e46ab37354c1c27e076d6d57bf3fd..34aab983d52f8fad612f9bd90d91ba1ad86c2b8d 100644
--- a/python/genericio.cpp
+++ b/python/genericio.cpp
@@ -65,6 +65,35 @@ public:
     }
   }
 
+  std::vector<std::string> read_variable_names() {
+    std::vector<std::string> variable_names;
+    for(const auto& v: variables) {
+        variable_names.push_back(v.Name);
+      }
+    return variable_names;
+  }
+
+  std::map<std::string, py::dtype> read_variable_dtypes() {
+    std::map<std::string, py::dtype> variable_dtypes;
+
+    for(const auto& var: variables) {
+      auto var_name = var.Name;
+      if(var.IsFloat && var.ElementSize == 4)
+        variable_dtypes[var_name] = py::dtype("f4");
+      else if(var.IsFloat && var.ElementSize == 8)
+        variable_dtypes[var_name] = py::dtype("f8");
+      else if(!var.IsFloat && var.ElementSize == 4)
+        variable_dtypes[var_name] = py::dtype("i4");
+      else if(!var.IsFloat && var.ElementSize == 8)
+        variable_dtypes[var_name] = py::dtype("i8");
+      else if(!var.IsFloat && var.ElementSize == 2)
+        variable_dtypes[var_name] = py::dtype("u2");
+      else
+        throw std::runtime_error(std::string("Unknown data type in GenericIO for variable ") + var_name);
+    }
+    return variable_dtypes;
+  }
+
   std::map<std::string, py::array> read(
       std::optional<std::vector<std::string>> var_names,
       bool print_stats=true,
@@ -83,10 +112,7 @@ public:
 
     // if no argument, read all
     if(!var_names.has_value()) {
-      var_names.emplace(std::vector<std::string>());
-      for(const auto& v: variables) {
-        var_names->push_back(v.Name);
-      }
+      var_names.emplace(read_variable_names());
     }
 
     clearVariables();
@@ -116,6 +142,8 @@ public:
         } else if(!(*varp).IsFloat && (*varp).ElementSize == 2) {
           result[var_name] = py::array_t<uint16_t>(readsize);
           addVariable(*varp, result[var_name].mutable_data(), gio::GenericIO::VarHasExtraSpace);
+        } else {
+          throw std::runtime_error(std::string("Unknown data type in GenericIO for variable ") + var_name);
         }
       }
     }
@@ -133,7 +161,7 @@ public:
     return result;
   }
 
-  const std::vector<gio::GenericIO::VariableInfo> &get_variables() {
+  const std::vector<VariableInfo> &get_variables() {
     return variables;
   }
 
@@ -157,7 +185,7 @@ public:
 
 private:
   int num_ranks;
-  std::vector<gio::GenericIO::VariableInfo> variables;
+  std::vector<VariableInfo> variables;
 };
 
 std::map<std::string, py::array> read_genericio(
@@ -187,6 +215,60 @@ void inspect_genericio(
   reader.inspect();
 }
 
+std::array<double, 3> read_phys_scale(
+    std::string filename,
+    PyGenericIO::FileIO method=PyGenericIO::FileIO::FileIOPOSIX,
+    PyGenericIO::MismatchBehavior redistribute=PyGenericIO::MismatchBehavior::MismatchRedistribute
+) {
+  PyGenericIO reader(filename, method, redistribute);
+  return reader.read_phys_scale();
+}
+
+std::array<double, 3> read_phys_origin(
+    std::string filename,
+    PyGenericIO::FileIO method=PyGenericIO::FileIO::FileIOPOSIX,
+    PyGenericIO::MismatchBehavior redistribute=PyGenericIO::MismatchBehavior::MismatchRedistribute
+) {
+  PyGenericIO reader(filename, method, redistribute);
+  return reader.read_phys_origin();
+}
+
+std::vector<std::string> read_variable_names(
+    std::string filename,
+    PyGenericIO::FileIO method=PyGenericIO::FileIO::FileIOPOSIX,
+    PyGenericIO::MismatchBehavior redistribute=PyGenericIO::MismatchBehavior::MismatchRedistribute
+) {
+  PyGenericIO reader(filename, method, redistribute);
+  return reader.read_variable_names();
+}
+
+std::map<std::string, py::dtype> read_variable_dtypes(
+    std::string filename,
+    PyGenericIO::FileIO method=PyGenericIO::FileIO::FileIOPOSIX,
+    PyGenericIO::MismatchBehavior redistribute=PyGenericIO::MismatchBehavior::MismatchRedistribute
+) {
+  PyGenericIO reader(filename, method, redistribute);
+  return reader.read_variable_dtypes();
+}
+
+int64_t read_num_elems(
+    std::string filename,
+    PyGenericIO::FileIO method=PyGenericIO::FileIO::FileIOPOSIX,
+    PyGenericIO::MismatchBehavior redistribute=PyGenericIO::MismatchBehavior::MismatchRedistribute
+) {
+  PyGenericIO reader(filename, method, redistribute);
+  return reader.readNumElems();
+}
+
+int64_t read_total_num_elems(
+    std::string filename,
+    PyGenericIO::FileIO method=PyGenericIO::FileIO::FileIOPOSIX,
+    PyGenericIO::MismatchBehavior redistribute=PyGenericIO::MismatchBehavior::MismatchRedistribute
+) {
+  PyGenericIO reader(filename, method, redistribute);
+  return reader.readTotalNumElems();
+}
+
 #ifndef GENERICIO_NO_MPI
 void write_genericio(
     std::string filename,
@@ -283,8 +365,11 @@ PYBIND11_MODULE(pygio, m) {
         py::arg("print_stats")=true,
         py::arg("collective_stats")=true,
         py::arg("eff_rank")=-1)
-      .def("get_source_ranks", &PyGenericIO::get_source_ranks)
       .def("read_nranks", (int (PyGenericIO::*)(void))(&PyGenericIO::readNRanks))
+      .def("read_variable_names", &PyGenericIO::read_variable_names)
+      .def("read_variable_dtypes", &PyGenericIO::read_variable_dtypes)
+      .def("read", &PyGenericIO::read, py::arg("variables")=nullptr, py::arg("print_stats")=true, py::arg("collective_stats")=true)
+      .def("get_source_ranks", &PyGenericIO::getSourceRanks)
 #ifndef GENERICIO_NO_MPI
       .def("rebalance_source_ranks", &PyGenericIO::rebalanceSourceRanks)
 #endif
@@ -300,7 +385,42 @@ PYBIND11_MODULE(pygio, m) {
                (vi.IsFloat ? "float" : "int") + " name='" + vi.Name + "'>";
       });
 
-  m.def("read_genericio", &read_genericio,
+
+  m.def("read_genericio", &read_genericio, R"Delim(
+        Read data from a GenericIO file
+
+        Parameters
+        ----------
+        filename: str
+            path to the GenericIO file
+
+        variables: List[str]
+            A list of variable names that should be read. If ``None``, all
+            variables contained in the file will be read
+
+        method: PyGenericIO.FileIO
+            file handling method (POSIX/MPI)
+
+        redistribute: PyGenericIO.MismatchBehavior
+            whether to allow mismatching ranks
+
+        print_stats: bool
+            if ``True``, print throughput statistics after reading
+
+        collective_stats: bool
+            if ``True``, aggregate statistics among reading ranks (if using MPI)
+
+        rebalance_sourceranks: bool
+            if ``True``, the code will re-assign the file ranks to the reading
+            MPI ranks to equalize the data size each rank is reading. Only
+            relevant if using MPI and more ranks were used to write the file
+            than reading.
+
+        Returns
+        -------
+        data: Mapping[str, np.ndarray]
+
+  )Delim",
         py::arg("filename"),
         py::arg("variables")=nullptr,
         py::kw_only(),
@@ -312,16 +432,199 @@ PYBIND11_MODULE(pygio, m) {
         py::arg("eff_rank")=-1,
         py::return_value_policy::move);
 
-  m.def("inspect_genericio", &inspect_genericio,
+
+  m.def("inspect_genericio", &inspect_genericio, R"Delim(
+        Print a summary of variables and types defined in the GenericIO file
+
+        Parameters
+        ----------
+        filename: str
+            path to the GenericIO file
+
+        method: PyGenericIO.FileIO
+            file handling method (POSIX/MPI)
+
+        redistribute: PyGenericIO.MismatchBehavior
+            whether to allow mismatching ranks
+  )Delim",
+        py::arg("filename"),
+        py::kw_only(),
+        py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX,
+        py::arg("redistribute")=PyGenericIO::MismatchBehavior::MismatchRedistribute);
+
+  m.def("read_num_elems", &read_num_elems, R"Delim(
+        Read the (local) number of objects (the number of objects that would be
+        read by this rank when calling :func:`read_genericio`)
+
+        Parameters
+        ----------
+        filename: str
+            path to the GenericIO file
+
+        method: PyGenericIO.FileIO
+            file handling method (POSIX/MPI)
+
+        redistribute: PyGenericIO.MismatchBehavior
+            whether to allow mismatching ranks
+
+        Returns
+        -------
+        nlocal: int
+            the number of objects assigned to this rank
+  )Delim",
+        py::arg("filename"),
+        py::kw_only(),
+        py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX,
+        py::arg("redistribute")=PyGenericIO::MismatchBehavior::MismatchRedistribute);
+
+  m.def("read_total_num_elems", &read_total_num_elems, R"Delim(
+        Read the total number of objects (the number of objects that would be
+        read by all ranks combined when calling :func:`read_genericio`)
+
+        Parameters
+        ----------
+        filename: str
+            path to the GenericIO file
+
+        method: PyGenericIO.FileIO
+            file handling method (POSIX/MPI)
+
+        redistribute: PyGenericIO.MismatchBehavior
+            whether to allow mismatching ranks
+
+        Returns
+        -------
+        ntotal: int
+            the total number of objects stored in the GenericIO file
+  )Delim",
         py::arg("filename"),
         py::kw_only(),
         py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX,
         py::arg("redistribute")=PyGenericIO::MismatchBehavior::MismatchRedistribute);
 
+  m.def("read_variable_names", &read_variable_names, R"Delim(
+        Get a list of variable names stored in the GenericIO file
+
+        Parameters
+        ----------
+        filename: str
+            path to the GenericIO file
+
+        method: PyGenericIO.FileIO
+            file handling method (POSIX/MPI)
+
+        redistribute: PyGenericIO.MismatchBehavior
+            whether to allow mismatching ranks
+
+        Returns
+        -------
+        variable_names: List[str]
+            the list of variable names defined in the GenericIO file
+  )Delim",
+        py::arg("filename"),
+        py::kw_only(),
+        py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX,
+        py::arg("redistribute")=PyGenericIO::MismatchBehavior::MismatchRedistribute);
+
+  m.def("read_variable_dtypes", &read_variable_dtypes, R"Delim(
+        Get a dictionary of dtypes mapped to the variable names
+
+        Parameters
+        ----------
+        filename: str
+            path to the GenericIO file
+
+        method: PyGenericIO.FileIO
+            file handling method (POSIX/MPI)
+
+        redistribute: PyGenericIO.MismatchBehavior
+            whether to allow mismatching ranks
+
+        Returns
+        -------
+        variable_dtypes: Mapping[str, np.dtype]
+            a map ``variable_name -> dtype`` for each variable in the GenericIO file
+  )Delim",
+        py::arg("filename"),
+        py::kw_only(),
+        py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX,
+        py::arg("redistribute")=PyGenericIO::MismatchBehavior::MismatchRedistribute);
+
+  m.def("read_phys_scale", &read_phys_scale, R"Delim(
+        Read the box size that is stored in the GenericIO file
+
+        Parameters
+        ----------
+        filename: str
+            path to the GenericIO file
+
+        method: PyGenericIO.FileIO
+            file handling method (POSIX/MPI)
+
+        redistribute: PyGenericIO.MismatchBehavior
+            whether to allow mismatching ranks
+
+        Returns
+        -------
+        phys_scale: List[float]
+            the box length for each dimension (3 elements long)
+  )Delim",
+        py::arg("filename"),
+        py::kw_only(),
+        py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX,
+        py::arg("redistribute")=PyGenericIO::MismatchBehavior::MismatchRedistribute);
+
+  m.def("read_phys_origin", &read_phys_scale, R"Delim(
+        Read the origin / reference point of the box that is stored in the GenericIO file
+
+        Parameters
+        ----------
+        filename: str
+            path to the GenericIO file
+
+        method: PyGenericIO.FileIO
+            file handling method (POSIX/MPI)
+
+        redistribute: PyGenericIO.MismatchBehavior
+            whether to allow mismatching ranks
+
+        Returns
+        -------
+        phys_origin: List[float]
+            the box origin coordinates (3 elements long)
+  )Delim",
+        py::arg("filename"),
+        py::kw_only(),
+        py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX,
+        py::arg("redistribute")=PyGenericIO::MismatchBehavior::MismatchRedistribute);
+
+
+
 #ifndef GENERICIO_NO_MPI
-  m.def("write_genericio", &write_genericio,
+  m.def("write_genericio", &write_genericio, R"Delim(
+        Write data as a GenericIO file
+
+        Parameters
+        ----------
+        filename: str
+            path to the GenericIO file
+
+        data: Mapping[str, np.ndarray]
+            a dictionary, with all items being 1-dimensional numpy arrays of
+            the same length. Currently, only float32, float64, int32, int64 and
+            uint16 data types are supported
+
+        phys_scale: List[float]
+            the physical size of the box that the data belongs to (3 elements)
+
+        phys_origin: List[float]
+            the origin coordinates of the box that the data belongs to (3 elements)
+
+        method: PyGenericIO.FileIO
+            file handling method (POSIX/MPI)
+  )Delim",
         py::arg("filename"),
-        py::arg("variables"),
+        py::arg("data"),
         py::arg("phys_scale"),
         py::arg("phys_origin") = std::array<double, 3>({0., 0., 0.}),
         py::kw_only(),