diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7736e298cf617464c2bc6c7b29b376de24fc6cca..d9b1f0cac2f2f8bc0acac79813af4512dad4d374 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -11,8 +11,6 @@ endif()
 set(CMAKE_CXX_STANDARD 14)
 set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
 
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -g")
-
 # PIC for everything (can be made target specific)
 set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 
diff --git a/GenericIO.cxx b/GenericIO.cxx
index 4082cfebc3eae5ded215b96032f6199c0005140e..de66c46af8d7c0e3f20387a2ffd6bcb6bb6f05aa 100644
--- a/GenericIO.cxx
+++ b/GenericIO.cxx
@@ -57,6 +57,7 @@ extern "C" {
 #include <cassert>
 #include <cstddef>
 #include <cstring>
+#include <tuple>
 
 #ifndef GENERICIO_NO_MPI
 #include <ctime>
@@ -1787,6 +1788,39 @@ void GenericIO::readData(int EffRank, size_t RowOffset, int Rank,
   }
 }
 
+#ifndef GENERICIO_NO_MPI
+void GenericIO::rebalanceSourceRanks() {
+  if(Redistributing) {
+    int NRanks, Rank;
+    MPI_Comm_rank(Comm, &Rank);
+    MPI_Comm_size(Comm, &NRanks);
+
+    std::vector<std::pair<int, size_t>> rank_sizes;
+    std::vector<std::tuple<int, size_t, std::vector<int>>> new_source_ranks;
+    for(int i=0; i<NRanks; ++i) {
+      new_source_ranks.emplace_back(std::make_tuple(i, 0ul, std::vector<int>()));
+    }
+    for(int i=0; i<readNRanks(); ++i) {
+      rank_sizes.emplace_back(std::make_pair(i, readNumElems(i)));
+    }
+    std::sort(rank_sizes.begin(), rank_sizes.end(), [](const auto& p1, const auto& p2){ return p1.second > p2.second; });
+    // Distribute ranks
+    for(size_t i=0; i<rank_sizes.size(); ++i) {
+      // Assign to first rank
+      std::get<2>(new_source_ranks[0]).push_back(rank_sizes[i].first);
+      std::get<1>(new_source_ranks[0]) += rank_sizes[i].second;
+      // Reorder ranks (could be optimized since array already sorted)
+      std::stable_sort(new_source_ranks.begin(), new_source_ranks.end(), [](const auto& s1, const auto& s2){ return std::get<1>(s1) < std::get<1>(s2); });
+    }
+    // copy own array
+    SourceRanks.resize(0);
+    std::copy(std::get<2>(new_source_ranks[Rank]).begin(), std::get<2>(new_source_ranks[Rank]).end(), std::back_inserter(SourceRanks));
+  } else {
+    std::cerr << "rebalancing source ranks has no effect when Redistributing==false" << std::endl;
+  }
+}
+#endif
+
 void GenericIO::getVariableInfo(vector<VariableInfo> &VI) {
   if (FH.isBigEndian())
     getVariableInfo<true>(VI);
diff --git a/GenericIO.h b/GenericIO.h
index 05a71f69926112b2d80fd54ae9acceed626fc1a2..37cbe77d4e31b027966a1e40753446def1323178 100644
--- a/GenericIO.h
+++ b/GenericIO.h
@@ -448,7 +448,9 @@ public:
   int readGlobalRankNumber(int EffRank = -1);
 
   void readData(int EffRank = -1, bool PrintStats = true, bool CollStats = true);
-
+#ifndef GENERICIO_NO_MPI
+  void rebalanceSourceRanks();
+#endif
   void getSourceRanks(std::vector<int> &SR);
 
   void close() {
diff --git a/new_python/genericio.cpp b/new_python/genericio.cpp
index dbb419ce5a68a38a56e5df09995fb76281502bd5..e8ad1d960a7a85a9b13e36623f35c8d8315dc23a 100644
--- a/new_python/genericio.cpp
+++ b/new_python/genericio.cpp
@@ -136,8 +136,10 @@ private:
   std::vector<gio::GenericIO::VariableInfo> variables;
 };
 
-std::map<std::string, py::array> read_genericio(std::string filename, std::optional<std::vector<std::string>> var_names, PyGenericIO::FileIO method=PyGenericIO::FileIO::FileIOPOSIX, PyGenericIO::MismatchBehavior redistribute=PyGenericIO::MismatchBehavior::MismatchRedistribute) {
+std::map<std::string, py::array> read_genericio(std::string filename, std::optional<std::vector<std::string>> var_names, PyGenericIO::FileIO method=PyGenericIO::FileIO::FileIOPOSIX, PyGenericIO::MismatchBehavior redistribute=PyGenericIO::MismatchBehavior::MismatchRedistribute, bool rebalance_source_ranks=false) {
   PyGenericIO reader(filename, method, redistribute);
+  if(rebalance_source_ranks)
+    reader.rebalanceSourceRanks();
   return reader.read(var_names);
 }
 
@@ -223,7 +225,9 @@ PYBIND11_MODULE(pygio, m) {
       .def("read_total_num_elems", (uint64_t (PyGenericIO::*)(void))(&PyGenericIO::readTotalNumElems))
       .def("read_phys_origin", &PyGenericIO::read_phys_origin)
       .def("read_phys_scale", &PyGenericIO::read_phys_scale)
-      .def("read", &PyGenericIO::read, py::arg("variables")=nullptr);
+      .def("read", &PyGenericIO::read, py::arg("variables")=nullptr)
+      .def("get_source_ranks", &PyGenericIO::getSourceRanks)
+      .def("rebalance_source_ranks", &PyGenericIO::rebalanceSourceRanks);
 
   py::class_<gio::GenericIO::VariableInfo>(pyGenericIO, "VariableInfo")
       .def_readonly("name", &gio::GenericIO::VariableInfo::Name)
@@ -234,7 +238,7 @@ PYBIND11_MODULE(pygio, m) {
         return std::string("<PyGenericIO.VariableInfo type=") +
                (vi.IsFloat ? "float" : "int") + " name='" + vi.Name + "'>";
       });
-  m.def("read_genericio", &read_genericio, py::arg("filename"), py::arg("variables")=nullptr, py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX, py::arg("redistribute")=PyGenericIO::MismatchBehavior::MismatchRedistribute, py::return_value_policy::move);
+  m.def("read_genericio", &read_genericio, py::arg("filename"), py::arg("variables")=nullptr, py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX, py::arg("redistribute")=PyGenericIO::MismatchBehavior::MismatchRedistribute, py::arg("rebalance_sourceranks")=false, py::return_value_policy::move);
   m.def("inspect_genericio", &inspect_genericio, py::arg("filename"), py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX, py::arg("redistribute")=PyGenericIO::MismatchBehavior::MismatchRedistribute);
 #ifndef GENERICIO_NO_MPI
   m.def("write_genericio", &write_genericio, py::arg("filename"), py::arg("variables"), py::arg("phys_scale"), py::arg("phys_origin") = std::array<double, 3>({0., 0., 0.}), py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX);