diff --git a/src/include/fields3d.inl b/src/include/fields3d.inl index 3b86747607..42e32514b8 100644 --- a/src/include/fields3d.inl +++ b/src/include/fields3d.inl @@ -3,6 +3,7 @@ #include "io_common.h" #include "kg/io.h" +#include "fields3d.hxx" // ====================================================================== // Variable diff --git a/src/include/output_fields.hxx b/src/include/output_fields.hxx index 01fa891eec..1a39679d47 100644 --- a/src/include/output_fields.hxx +++ b/src/include/output_fields.hxx @@ -184,9 +184,8 @@ public: prof_stop(pr_eval); if (do_pfield) { - if (!opened_io_pfd_) { + if (!io_pfd_) { io_pfd_.open("pfd" + GetItem::suffix(), pfield.data_dir); - opened_io_pfd_ = true; } prof_start(pr_pfd); @@ -209,9 +208,8 @@ public: } if (do_tfield) { - if (!opened_io_tfd_) { + if (!io_tfd_) { io_tfd_.open("tfd" + GetItem::suffix(), tfield.data_dir); - opened_io_tfd_ = true; } prof_start(pr_tfd); @@ -236,8 +234,6 @@ public: private: Writer io_pfd_; Writer io_tfd_; - bool opened_io_pfd_ = false; - bool opened_io_tfd_ = false; std::unique_ptr tfd_; int naccum_ = 0; }; diff --git a/src/include/output_particles.hxx b/src/include/output_particles.hxx index 0774726a29..6a54a3946c 100644 --- a/src/include/output_particles.hxx +++ b/src/include/output_particles.hxx @@ -5,14 +5,14 @@ struct OutputParticlesParams { - const char* data_dir; - const char* basename; + const char* data_dir = "."; + const char* basename = "prt"; int every_step; Int3 lo; Int3 hi; bool use_independent_io; - const char* romio_cb_write; - const char* romio_ds_write; + const char* romio_cb_write = nullptr; + const char* romio_ds_write = nullptr; }; // ====================================================================== diff --git a/src/libpsc/psc_output_particles/output_particles_adios2_impl.hxx b/src/libpsc/psc_output_particles/output_particles_adios2_impl.hxx new file mode 100644 index 0000000000..2b8b382e19 --- /dev/null +++ b/src/libpsc/psc_output_particles/output_particles_adios2_impl.hxx @@ -0,0 +1,375 @@ +#include + +#include "grid.hxx" +#include "output_particles.hxx" +#include "diagnostic_base.hxx" + +struct OutputParticlesAdios2Params : OutputParticlesParams +{ + bool write_x = true; + bool write_y = true; + bool write_z = true; + bool write_px = true; + bool write_py = true; + bool write_pz = true; + bool write_q = false; + bool write_m = false; + bool write_w = true; + bool write_id = false; + bool write_tag = false; + + /** + * An `adios2::IO` parameter. Sets the number of procs that aggregate data + * before writing to disk. Default of 0 means do something smart for larger + * runs. For smaller runs not limited by file io, can improve write + * performance by a decent bit by maxing this out (so every proc is an + * aggregator). + */ + int NumAggregators = 0; + + OutputParticlesAdios2Params() {} + + OutputParticlesAdios2Params(OutputParticlesParams params) + : OutputParticlesParams{params} + {} +}; + +std::string get_file_name(std::string basename, std::string kind_name, int step) +{ + int min_step_digits = 9; + std::string padded_step = std::to_string(step); + if (padded_step.length() < min_step_digits) { + padded_step.insert(0, min_step_digits - padded_step.length(), '0'); + } + return basename + "." + kind_name + "." + padded_step + ".bp"; +} + +template +class OutputParticlesAdios2 + : OutputParticlesBase + , public ParticleDiagnosticBase +{ + using real_t = RealOverride; + + static OutputParticlesAdios2Params adjust_params( + const OutputParticlesAdios2Params& params_in, const Grid_t& grid) + { + OutputParticlesAdios2Params params = params_in; + for (int d = 0; d < 3; d++) { + if (params.hi[d] == 0) { + params.hi[d] = grid.domain.gdims[d]; + } + assert(params.lo[d] >= 0); + assert(params.hi[d] <= grid.domain.gdims[d]); + } + return params; + } + +public: + OutputParticlesAdios2(const Grid_t& grid, + const OutputParticlesAdios2Params& params) + : params_{adjust_params(params, grid)} + {} + + void init(const Grid_t& grid) + { + io_ = adios_.DeclareIO("PrtWriter"); + io_.SetParameter("NumAggregators", std::to_string(params_.NumAggregators)); + // TODO set more IO parameters, configured by params_ + // (the default options are generally better at large scales, but doesn't + // take advantage of relatively higher file bandwidth at smaller scales) + + unsigned long local_n = 0; // gets set later + + if (params_.write_x) + io_.DefineVariable("x", {adios2::JoinedDim}, {}, {local_n}); + if (params_.write_y) + io_.DefineVariable("y", {adios2::JoinedDim}, {}, {local_n}); + if (params_.write_z) + io_.DefineVariable("z", {adios2::JoinedDim}, {}, {local_n}); + if (params_.write_px) + io_.DefineVariable("ux", {adios2::JoinedDim}, {}, {local_n}); + if (params_.write_py) + io_.DefineVariable("uy", {adios2::JoinedDim}, {}, {local_n}); + if (params_.write_pz) + io_.DefineVariable("uz", {adios2::JoinedDim}, {}, {local_n}); + if (params_.write_q) + io_.DefineVariable("q", {adios2::JoinedDim}, {}, {local_n}); + if (params_.write_m) + io_.DefineVariable("m", {adios2::JoinedDim}, {}, {local_n}); + if (params_.write_w) + io_.DefineVariable("w", {adios2::JoinedDim}, {}, {local_n}); + if (params_.write_id) + io_.DefineVariable("id", {adios2::JoinedDim}, {}, + {local_n}); + if (params_.write_tag) + io_.DefineVariable("tag", {adios2::JoinedDim}, {}, + {local_n}); + + // run-level attributes + io_.DefineAttribute("gdims", grid.domain.gdims.data(), + grid.domain.gdims.size()); + io_.DefineAttribute("corner", grid.domain.corner.data(), + grid.domain.corner.size()); + io_.DefineAttribute("length", grid.domain.length.data(), + grid.domain.length.size()); + io_.DefineAttribute("prts_per_unit_density", + grid.norm.prts_per_unit_density); + + // step-level attributes + bool allow_modification{true}; + io_.DefineAttribute("step", grid.timestep(), "", "/", allow_modification); + io_.DefineAttribute("time", grid.timestep() * grid.dt, "", "/", + allow_modification); + + // species-level attributes + io_.DefineAttribute("name", "", "", "/", allow_modification); + if (!params_.write_q) + io_.DefineAttribute("q", 0.0, "", "/", allow_modification); + if (!params_.write_m) + io_.DefineAttribute("m", 0.0, "", "/", allow_modification); + + init_ = true; + } + + void perform_diagnostic(Mparticles& mprts) override + { + static int pr_all, pr_count, pr_fill, pr_schedule, pr_write; + if (!pr_all) { + pr_all = prof_register("outp_adios2", 1., 0, 0); + pr_count = prof_register("outp_adios2: count", 1., 0, 0); + pr_fill = prof_register("outp_adios2: fill", 1., 0, 0); + pr_schedule = prof_register("outp_adios2: schedule", 1., 0, 0); + pr_write = prof_register("outp_adios2: write", 1., 0, 0); + } + + const Grid_t& grid = mprts.grid(); + + if (params_.every_step <= 0 || grid.timestep() % params_.every_step != 0) { + return; + } + + prof_start(pr_all); + + if (!init_) { + init(grid); + } + + io_.DefineAttribute("step", grid.timestep()); + io_.DefineAttribute("time", grid.timestep() * grid.dt); + + //\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\. + // count number of particles of each kind + prof_start(pr_count); + + int n_kinds = grid.kinds.size(); + std::vector local_n_prts_per_kind(n_kinds); + + for (int p = 0; p < grid.n_patches(); p++) { + for (auto prt = mprts.begin(p); prt != mprts.end(p); prt++) { + local_n_prts_per_kind[prt->kind]++; + } + } + + prof_stop(pr_count); + /////////////////////////////////////////////// + + for (int kind_idx = 0; kind_idx < n_kinds; kind_idx++) { + Grid_t::Kind kind = grid.kinds[kind_idx]; + unsigned long local_n_of_kind = local_n_prts_per_kind[kind_idx]; + + mpi_printf(grid.comm(), "***** Writing PRT output for '%s'\n", + kind.name.c_str()); + + std::string file_name = + get_file_name(params_.basename, kind.name, grid.timestep()); + auto engine = io_.Open(file_name, adios2::Mode::Write); + engine.BeginStep(adios2::StepMode::Append); + + //\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\. + // data must live until EndStep(), so must be scoped accordingly + + std::vector xs; + std::vector ys; + std::vector zs; + std::vector pxs; + std::vector pys; + std::vector pzs; + std::vector qs; + std::vector ms; + std::vector ws; + std::vector ids; + std::vector tags; + + //|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| + // reserve as needed + if (kind_idx == 0) + prof_start(pr_fill); + else + prof_restart(pr_fill); + + // TODO OPT maybe just keep these temps around instead of reallocating + // on every write + + if (params_.write_x) + xs.reserve(local_n_of_kind); + if (params_.write_y) + ys.reserve(local_n_of_kind); + if (params_.write_z) + zs.reserve(local_n_of_kind); + if (params_.write_px) + pxs.reserve(local_n_of_kind); + if (params_.write_py) + pys.reserve(local_n_of_kind); + if (params_.write_pz) + pzs.reserve(local_n_of_kind); + if (params_.write_q) + qs.reserve(local_n_of_kind); + if (params_.write_m) + ms.reserve(local_n_of_kind); + if (params_.write_w) + ws.reserve(local_n_of_kind); + if (params_.write_id) + ids.reserve(local_n_of_kind); + if (params_.write_tag) + tags.reserve(local_n_of_kind); + + //|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| + // populate data + + for (int p = 0; p < grid.n_patches(); p++) { + Grid_t::Patch patch = grid.patches[p]; + for (auto prt = mprts.begin(p); prt != mprts.end(p); prt++) { + if (prt->kind != kind_idx) + continue; + + // FIXME OPT: this is 11 ifs in a hot loop. Kinda just hoping that + // branch prediction and/or cache hits make this ok. + + if (params_.write_x) + xs.push_back(prt->x[0] + patch.xb[0]); + if (params_.write_y) + ys.push_back(prt->x[1] + patch.xb[1]); + if (params_.write_z) + zs.push_back(prt->x[2] + patch.xb[2]); + if (params_.write_px) + pxs.push_back(prt->u[0]); + if (params_.write_py) + pys.push_back(prt->u[1]); + if (params_.write_pz) + pzs.push_back(prt->u[2]); + if (params_.write_q) + qs.push_back(kind.q); + if (params_.write_m) + ms.push_back(kind.m); + if (params_.write_w) + ws.push_back(prt->qni_wni / kind.q); + if (params_.write_id) + ids.push_back(prt->id()); + if (params_.write_tag) + tags.push_back(prt->tag()); + } + } + + prof_stop(pr_fill); + //|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| + // schedule puts + if (kind_idx == 0) + prof_start(pr_schedule); + else + prof_restart(pr_schedule); + + if (params_.write_x) { + adios2::Variable var = io_.InquireVariable("x"); + var.SetSelection({{}, {local_n_of_kind}}); + engine.Put(var, xs.data(), adios2::Mode::Deferred); + } + if (params_.write_y) { + adios2::Variable var = io_.InquireVariable("y"); + var.SetSelection({{}, {local_n_of_kind}}); + engine.Put(var, ys.data(), adios2::Mode::Deferred); + } + if (params_.write_z) { + adios2::Variable var = io_.InquireVariable("z"); + var.SetSelection({{}, {local_n_of_kind}}); + engine.Put(var, zs.data(), adios2::Mode::Deferred); + } + if (params_.write_px) { + adios2::Variable var = io_.InquireVariable("ux"); + var.SetSelection({{}, {local_n_of_kind}}); + engine.Put(var, pxs.data(), adios2::Mode::Deferred); + } + if (params_.write_py) { + adios2::Variable var = io_.InquireVariable("uy"); + var.SetSelection({{}, {local_n_of_kind}}); + engine.Put(var, pys.data(), adios2::Mode::Deferred); + } + if (params_.write_pz) { + adios2::Variable var = io_.InquireVariable("uz"); + var.SetSelection({{}, {local_n_of_kind}}); + engine.Put(var, pzs.data(), adios2::Mode::Deferred); + } + if (params_.write_q) { + adios2::Variable var = io_.InquireVariable("q"); + var.SetSelection({{}, {local_n_of_kind}}); + engine.Put(var, qs.data(), adios2::Mode::Deferred); + } + if (params_.write_m) { + adios2::Variable var = io_.InquireVariable("m"); + var.SetSelection({{}, {local_n_of_kind}}); + engine.Put(var, ms.data(), adios2::Mode::Deferred); + } + if (params_.write_w) { + adios2::Variable var = io_.InquireVariable("w"); + var.SetSelection({{}, {local_n_of_kind}}); + engine.Put(var, ws.data(), adios2::Mode::Deferred); + } + if (params_.write_id) { + adios2::Variable var = + io_.InquireVariable("id"); + var.SetSelection({{}, {local_n_of_kind}}); + engine.Put(var, ids.data(), adios2::Mode::Deferred); + } + if (params_.write_tag) { + adios2::Variable var = + io_.InquireVariable("tag"); + var.SetSelection({{}, {local_n_of_kind}}); + engine.Put(var, tags.data(), adios2::Mode::Deferred); + } + + prof_stop(pr_schedule); + //|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| + // update io-level attributes + + io_.DefineAttribute("name", kind.name); + if (!params_.write_q) + io_.DefineAttribute("q", kind.q); + if (!params_.write_m) + io_.DefineAttribute("m", kind.m); + + //|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| + // performs puts - data must live until this point + if (kind_idx == 0) + prof_start(pr_write); + else + prof_restart(pr_write); + + engine.EndStep(); + engine.Close(); + + prof_stop(pr_write); + ////////////////////////////////////////////////////////////////////// + } + + prof_stop(pr_all); + } + + void operator()(Mparticles& mprts) { this->perform_diagnostic(mprts); } + +private: + const OutputParticlesAdios2Params params_; + adios2::ADIOS adios_{MPI_COMM_WORLD}; + adios2::IO io_; + bool init_ = false; +}; diff --git a/src/libpsc/psc_output_particles/output_particles_hdf5_impl.hxx b/src/libpsc/psc_output_particles/output_particles_hdf5_impl.hxx index a1b41cda8e..02f9d547b9 100644 --- a/src/libpsc/psc_output_particles/output_particles_hdf5_impl.hxx +++ b/src/libpsc/psc_output_particles/output_particles_hdf5_impl.hxx @@ -199,7 +199,7 @@ public: #endif prof_stop(pr_C); - write_time(grid.time(), file, dxpl); + write_scalars(grid, file, dxpl); write_domain(grid.domain, file, dxpl); prof_start(pr_D); @@ -221,7 +221,7 @@ public: } private: - void write_time(double time, hid_t group, hid_t dxpl) + void write_scalars(const Grid_t& grid, hid_t group, hid_t dxpl) { herr_t ierr; @@ -243,11 +243,22 @@ private: hid_t dset = H5Dcreate(group, "time", H5T_NATIVE_DOUBLE, filespace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); H5_CHK(dset); + double time = grid.time(); ierr = H5Dwrite(dset, H5T_NATIVE_DOUBLE, memspace, filespace, dxpl, &time); CE; ierr = H5Dclose(dset); CE; + dset = H5Dcreate(group, "prts_per_unit_density", H5T_NATIVE_DOUBLE, + filespace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + H5_CHK(dset); + double prts_per_unit_density = grid.norm.prts_per_unit_density; + ierr = H5Dwrite(dset, H5T_NATIVE_DOUBLE, memspace, filespace, dxpl, + &prts_per_unit_density); + CE; + ierr = H5Dclose(dset); + CE; + ierr = H5Sclose(filespace); CE; ierr = H5Sclose(memspace); @@ -788,15 +799,25 @@ public: template void operator()(_Mparticles& mprts) { + static int pr_all; + if (!pr_all) { + pr_all = prof_register("outp", 1.0, 0, 0); + } + const auto& grid = mprts.grid(); if (params_.every_step <= 0 || grid.timestep() % params_.every_step != 0) { return; } + prof_start(pr_all); + mpi_printf(grid.comm(), "***** Writing PRT output\n"); + detail::OutputParticlesHdf5<_Mparticles, ParticleSelector> impl{grid, params_}; impl(mprts, writer_); + + prof_stop(pr_all); } // FIXME, handles MparticlesVpic by conversion for now diff --git a/src/libpsc/psc_output_particles/output_particles_none_impl.hxx b/src/libpsc/psc_output_particles/output_particles_none_impl.hxx deleted file mode 100644 index af63604564..0000000000 --- a/src/libpsc/psc_output_particles/output_particles_none_impl.hxx +++ /dev/null @@ -1,14 +0,0 @@ - -#include "output_particles.hxx" - -struct OutputParticlesNone - : OutputParticlesParams - , OutputParticlesBase -{ - OutputParticlesNone(const Grid_t& grid, const OutputParticlesParams& params) - {} - - template - void operator()(Mparticles& mprts_base) - {} -}; diff --git a/src/libpsc/tests/test_output_particles.cxx b/src/libpsc/tests/test_output_particles.cxx index 19e5523c39..b7ec537b0a 100644 --- a/src/libpsc/tests/test_output_particles.cxx +++ b/src/libpsc/tests/test_output_particles.cxx @@ -6,6 +6,7 @@ #include #include "../libpsc/psc_output_particles/output_particles_ascii_impl.hxx" #include "../libpsc/psc_output_particles/output_particles_hdf5_impl.hxx" +#include "../libpsc/psc_output_particles/output_particles_adios2_impl.hxx" template struct Config @@ -73,7 +74,8 @@ using OutputParticlesTestTypes = ::testing::Types< Config> #endif - >; + , + Config>>; TYPED_TEST_SUITE(OutputParticlesTest, OutputParticlesTestTypes); @@ -108,8 +110,6 @@ TYPED_TEST(OutputParticlesTest, Test1) auto params = OutputParticlesParams{}; params.every_step = 1; - params.data_dir = "."; - params.basename = "prt"; auto outp = OutputParticles{grid, params}; outp(mprts); diff --git a/src/psc_2d_shock.cxx b/src/psc_2d_shock.cxx index 7a701a4dea..e8f62683aa 100644 --- a/src/psc_2d_shock.cxx +++ b/src/psc_2d_shock.cxx @@ -472,8 +472,6 @@ void run() // -- output particles OutputParticlesParams outp_params{}; outp_params.every_step = 20000; - outp_params.data_dir = "."; - outp_params.basename = "prt"; OutputParticles outp{grid, outp_params}; int oute_interval = -100; diff --git a/src/psc_bgk.cxx b/src/psc_bgk.cxx index 9ea97959d9..6c41e66a71 100644 --- a/src/psc_bgk.cxx +++ b/src/psc_bgk.cxx @@ -529,8 +529,6 @@ static void run(int argc, char** argv) // -- output particles OutputParticlesParams outp_params{}; outp_params.every_step = g.particles_every; - outp_params.data_dir = "."; - outp_params.basename = "prt"; OutputParticles outp{grid, outp_params}; int oute_interval = -100; diff --git a/src/psc_bubble_yz.cxx b/src/psc_bubble_yz.cxx index 4396e4acac..7177b21021 100644 --- a/src/psc_bubble_yz.cxx +++ b/src/psc_bubble_yz.cxx @@ -328,8 +328,6 @@ static void run() // -- output particles OutputParticlesParams outp_params{}; outp_params.every_step = 0; - outp_params.data_dir = "."; - outp_params.basename = "prt"; OutputParticles outp{grid, outp_params}; int oute_interval = 100; diff --git a/src/psc_config.hxx b/src/psc_config.hxx index d2a08e9c9a..977ed9e54d 100644 --- a/src/psc_config.hxx +++ b/src/psc_config.hxx @@ -12,7 +12,6 @@ #include "../libpsc/psc_output_fields/fields_item_fields.hxx" #include "../libpsc/psc_output_fields/fields_item_moments_1st.hxx" #include "../libpsc/psc_output_particles/output_particles_hdf5_impl.hxx" -#include "../libpsc/psc_output_particles/output_particles_none_impl.hxx" #include "../libpsc/psc_push_fields/marder_impl.hxx" #include "../libpsc/psc_push_particles/1vb/psc_push_particles_1vb.h" #include "../libpsc/psc_sort/psc_sort_impl.hxx" diff --git a/src/psc_flatfoil_yz.cxx b/src/psc_flatfoil_yz.cxx index 399dffe76b..f562cf5e15 100644 --- a/src/psc_flatfoil_yz.cxx +++ b/src/psc_flatfoil_yz.cxx @@ -531,8 +531,6 @@ void run() #else outp_params.every_step = -400; #endif - outp_params.data_dir = "."; - outp_params.basename = "prt"; outp_params.lo = {0, 0, grid.domain.gdims[2] / 2}; OutputParticles outp{grid, outp_params}; diff --git a/src/psc_harris_yz.cxx b/src/psc_harris_yz.cxx index 023f812032..bf84fa6123 100644 --- a/src/psc_harris_yz.cxx +++ b/src/psc_harris_yz.cxx @@ -410,8 +410,6 @@ void run() // -- output particles OutputParticlesParams outp_params{}; outp_params.every_step = -4; - outp_params.data_dir = "."; - outp_params.basename = "prt"; OutputParticles outp{grid, outp_params}; int oute_interval = -100; diff --git a/src/psc_shock.cxx b/src/psc_shock.cxx index 72d1259c6a..2ff164d5f9 100644 --- a/src/psc_shock.cxx +++ b/src/psc_shock.cxx @@ -7,6 +7,7 @@ #include "include/boundary_injector.hxx" #include "input_params.hxx" #include "kg/include/kg/VecRange.hxx" +#include "libpsc/psc_output_particles/output_particles_adios2_impl.hxx" // ====================================================================== // PSC configuration @@ -798,11 +799,9 @@ static void run(int argc, char** argv) out_moments.pfield.out_interval = out_interval; // -- output particles - OutputParticlesParams outp_params{}; + OutputParticlesAdios2Params outp_params{}; outp_params.every_step = out_interval; - outp_params.data_dir = "."; - outp_params.basename = "prt"; - OutputParticles outp{grid, outp_params}; + OutputParticlesAdios2 outp{grid, outp_params}; int oute_interval = -100; DiagEnergies oute{grid.comm(), oute_interval}; diff --git a/src/psc_whistler.cxx b/src/psc_whistler.cxx index 4ddeca2101..322178f9cc 100644 --- a/src/psc_whistler.cxx +++ b/src/psc_whistler.cxx @@ -307,8 +307,6 @@ void run() // -- output particles OutputParticlesParams outp_params{}; outp_params.every_step = 0; - outp_params.data_dir = "."; - outp_params.basename = "prt"; OutputParticles outp{grid, outp_params}; int oute_interval = 100;