diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
new file mode 100644
index 00000000..687e39d9
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -0,0 +1,90 @@
+name: "\U0001FAB2 Bug report"
+description: "Bug or test failure in ExaGO code"
+#title: "Bug: "
+labels: [bug]
+body:
+ # CHECKBOX:
+ - type: checkboxes
+ id: issue_relates_to
+ attributes:
+ label: "Issue applies to:"
+ description: You may select more than one.
+ options:
+ - label: pflow
+ - label: OPFLOW
+ - label: TCOPFLOW
+ - label: SCOPFLOW
+ - label: SOPFLOW
+ - label: CMake and build system
+ - label: Spack
+ - label: Visualization
+ - label: Documentation
+ - label: Other
+ validations:
+ required: true
+
+ - type: textarea
+ id: summary
+ attributes:
+ label: "Summary"
+ description: "Please provide detailed description of the issue."
+ validations:
+ required: true
+
+ - type: textarea
+ id: reproduce
+ attributes:
+ label: "Description how to reproduce the issue"
+ description: "Please provide launch command and the output when you encounter the issue (if applicable)."
+ value: |
+
+ ```console
+ $ ./opflow
+ (...)
+ ```
+
+
+
+ ```
+ $ ./opflow
+ (...)
+ ```
+
+ validations:
+ required: false
+
+ - type: input
+ id: exago_version
+ attributes:
+ label: "ExaGO version"
+ description: |
+ Please include the version of ExaGO™ which demonstrates the problem.
+ If building from source on a non-tagged version, please include the commit hash.
+ validations:
+ required: true
+
+ - type: textarea
+ id: system_info
+ attributes:
+ label: "System and environment details"
+ description: |
+ Please include details about your system, such as:
+ - Operating system
+ - Compilers
+ - Hardware backends (CUDA, HIP)
+ - Dependencies and package versions
+ validations:
+ required: true
+
+ - type: textarea
+ id: additional_information
+ attributes:
+ label: "Additional information"
+ description: "Upload additional information such as debugging backtraces, logs and screenshots, when applicable."
+
+ - type: textarea
+ id: possible_fix
+ attributes:
+ label: "Possible fix or workaround"
+ description: "Describe your proposed fix, when applicable."
diff --git a/.github/ISSUE_TEMPLATE/discussion.yml b/.github/ISSUE_TEMPLATE/discussion.yml
new file mode 100644
index 00000000..0bc3f3a5
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/discussion.yml
@@ -0,0 +1,55 @@
+name: "❓ Discussion"
+description: "A question or discussion about ExaGO code, design, or development process."
+#title: "Bug: "
+labels: [question]
+body:
+ # CHECKBOX:
+ - type: checkboxes
+ id: issue_relates_to
+ attributes:
+ label: "Issue applies to:"
+ description: You may select more than one.
+ options:
+ - label: PFLOW
+ - label: OPFLOW
+ - label: TCOPFLOW
+ - label: SCOPFLOW
+ - label: SOPFLOW
+ - label: CMake and build system
+ - label: Spack
+ - label: Visualization
+ - label: Documentation
+ - label: Other
+ validations:
+ required: true
+
+ - type: textarea
+ id: summary
+ attributes:
+ label: "Summary"
+ description: "Please pose question or the description of the discussion topic."
+ validations:
+ required: true
+
+ - type: textarea
+ id: objective
+ attributes:
+ label: "Objective"
+ description: "Please provide the objective of the discussion. What question do you want to answer? What do you want to achieve?"
+ validations:
+ required: true
+
+ - type: input
+ id: exago_version
+ attributes:
+ label: "ExaGO version"
+ description: |
+ Please include the relevant version of ExaGO™, if applicable.
+ validations:
+ required: false
+
+ - type: textarea
+ id: additional_information
+ attributes:
+ label: "Additional information"
+ description: "Add references or examples relevant to the discussion."
diff --git a/.github/ISSUE_TEMPLATE/exago_issue.yml b/.github/ISSUE_TEMPLATE/exago_issue.yml
index 58a124c2..f33e5b1b 100644
--- a/.github/ISSUE_TEMPLATE/exago_issue.yml
+++ b/.github/ISSUE_TEMPLATE/exago_issue.yml
@@ -1,23 +1,8 @@
-name: "Issue Report"
-description: "Issue in ExaGO code"
+name: "⚒️ Other"
+description: "An issue in ExaGO code that does not fit into other categories"
#title: "Bug: "
#labels: [bug]
body:
- # DROPDOWN:
- - type: dropdown
- id: issue_type
- attributes:
- label: Issue type
- description: Please select from pulldown menu.
- options:
- - ""
- - Bug
- - Feature Request
- - Discussion
- - Other
- validations:
- required: true
-
# CHECKBOX:
- type: checkboxes
id: issue_relates_to
diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml
new file mode 100644
index 00000000..013614a0
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature_request.yml
@@ -0,0 +1,75 @@
+name: "\U0001F38A Feature request"
+description: "Suggest adding a feature that is not yet in ExaGO."
+#title: "Bug: "
+labels: [enhancement]
+body:
+ # CHECKBOX:
+ - type: checkboxes
+ id: issue_relates_to
+ attributes:
+ label: "Issue applies to:"
+ description: You may select more than one.
+ options:
+ - label: PFLOW
+ - label: OPFLOW
+ - label: TCOPFLOW
+ - label: SCOPFLOW
+ - label: SOPFLOW
+ - label: CMake and build system
+ - label: Spack
+ - label: Visualization
+ - label: Documentation
+ - label: Other
+ validations:
+ required: true
+
+ - type: textarea
+ id: summary
+ attributes:
+ label: "Summary"
+ description: "Please add a concise summary of your suggestion here."
+ validations:
+ required: true
+
+ - type: textarea
+ id: reproduce
+ attributes:
+ label: "Rationale"
+ description: "Is your feature request related to a problem? Please describe it!."
+ validations:
+ required: true
+
+ - type: input
+ id: exago_version
+ attributes:
+ label: "ExaGO version"
+ description: |
+ Please include the relevantversion of ExaGO™.
+ If building from source on a non-tagged version, please include the commit hash.
+ validations:
+ required: true
+
+ - type: textarea
+ id: system_info
+ attributes:
+ label: "System and environment details"
+ description: |
+ Is your feature system specific? If so, please include relevant details, such as:
+ - Operating system
+ - Compilers
+ - Hardware backends (CUDA, HIP)
+ - Dependencies and package versions
+ validations:
+ required: false
+
+ - type: textarea
+ id: proposed_solution
+ attributes:
+ label: "Proposed solution"
+ description: "Describe your proposed feature and highlight implementation details, when applicable."
+
+ - type: textarea
+ id: additional_information
+ attributes:
+ label: "Additional information"
+ description: "Add any relevant references or examples."
diff --git a/.gitlab/pnnl/base.gitlab-ci.yml b/.gitlab/pnnl/base.gitlab-ci.yml
index d2ebab92..cbb275d3 100644
--- a/.gitlab/pnnl/base.gitlab-ci.yml
+++ b/.gitlab/pnnl/base.gitlab-ci.yml
@@ -267,7 +267,7 @@ stages:
git commit -m "Update ${MY_CLUSTER} spack built tcl modules - [${MY_CLUSTER}-test]"
# Re-target GitHub as our remote
- git remote set-url origin https://gitlab-ci-token:${SPACK_GIT_TOKEN}@github.com/pnnl/ExaGO.git
+ git remote set-url origin https://gitlab-ci-token:${SPACK_GIT_TOKEN}@github.com/ornl/ExaGO.git
# Do a rebase incase another pipeline has pushed since build started
git pull --rebase origin ${CI_COMMIT_REF_NAME}
diff --git a/INSTALL.md b/INSTALL.md
index e40897c8..bfea50e3 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -24,10 +24,10 @@ The rest of this document assumes you want to install with CMake and you already
### Acquiring the Source Code
```console
-git clone https://github.com/pnnl/ExaGO.git
+git clone https://github.com/ornl/ExaGO.git
# Or if you have SSH keys set up:
-git clone git@github.com:pnnl/ExaGO.git
+git clone git@github.com:ornl/ExaGO.git
```
### Dependencies
@@ -84,7 +84,7 @@ If you are using the HiOp solver, HiOp must be built with the same RAJA and Umpi
### CMake Workflow
```console
-git clone git@github.com:pnnl/ExaGO.git
+git clone git@github.com:ornl/ExaGO.git
cd ExaGO
git submodule update --init --recursive
mkdir build
diff --git a/SUPPORT.md b/SUPPORT.md
index ca68dacc..c94b8121 100644
--- a/SUPPORT.md
+++ b/SUPPORT.md
@@ -1 +1 @@
-Please submit issues at [ExaGO's GitLab issues page](https://gitlab.pnnl.gov/exasgd/frameworks/exago/-/issues) for support.
+Please submit issues at [ExaGO's GitHub issues page](https://github.com/ORNL/ExaGO/issues) for support.
diff --git a/docs/developer_guidelines.md b/docs/developer_guidelines.md
index 4fbd373e..328c5020 100644
--- a/docs/developer_guidelines.md
+++ b/docs/developer_guidelines.md
@@ -16,7 +16,7 @@
**If you don't have direct write access to the ExaGO repository:**
1. Fork the repository on GitHub:
- - Navigate to https://github.com/pnnl/ExaGO
+ - Navigate to https://github.com/ornl/ExaGO
- Click the "Fork" button in the top right
- This creates your fork at `https://github.com/yourusername/ExaGO`
@@ -29,7 +29,7 @@
**If you have direct write access:**
```bash
-$ git clone https://github.com/pnnl/ExaGO.git
+$ git clone https://github.com/ornl/ExaGO.git
$ cd ExaGO
```
@@ -38,7 +38,7 @@ $ cd ExaGO
After cloning, configure the `upstream` remote to point to the original repository:
```bash
-$ git remote add upstream https://github.com/pnnl/ExaGO.git
+$ git remote add upstream https://github.com/ornl/ExaGO.git
$ git fetch upstream
```
@@ -49,7 +49,7 @@ $ git remote -v
```
You should see:
-- `upstream` pointing to `https://github.com/pnnl/ExaGO.git` (the original repository)
+- `upstream` pointing to `https://github.com/ornl/ExaGO.git` (the original repository)
- `origin` pointing to your fork (if you forked) or the original repository (if you have direct access)
#### P022: Branch Off Of `develop` Unless You Have a Good Reason Not To
@@ -85,18 +85,18 @@ $ git push --force-with-lease origin develop
```
**Note on remotes:**
-- `upstream` refers to the original ExaGO repository (`https://github.com/pnnl/ExaGO.git`) - you **pull** from here to stay up to date
+- `upstream` refers to the original ExaGO repository (`https://github.com/ornl/ExaGO.git`) - you **pull** from here to stay up to date
- `origin` refers to your fork (if you forked) or the original repository (if you have direct access) - you **push** to here
**Submitting a Pull Request:**
If working with a fork, after pushing your feature branch to `origin`, create a Pull Request on GitHub:
-1. Navigate to the original ExaGO repository at https://github.com/pnnl/ExaGO
+1. Navigate to the original ExaGO repository at https://github.com/ornl/ExaGO
2. GitHub will typically show a banner to create a PR from your recently pushed branch
3. **Ensure a corresponding feature branch exists on upstream**: Coordinate with maintainers to confirm that a feature branch (e.g., `my-cool-feature`) has been created on the upstream repository. This allows CI tests to run on the upstream feature branch before merging to `develop`
-4. Create a PR from `yourusername:my-cool-feature` → `pnnl:my-cool-feature`
+4. Create a PR from `yourusername:my-cool-feature` → `ornl:my-cool-feature`
-This will ensure you are rebased on the most recent development branch. If you see [open pull requests on ExaGO's repository](https://github.com/pnnl/ExaGO/pulls) that touch the same lines of code that you are working on, please coordinate with the developers of that merge request so your work doesn't conflict.
+This will ensure you are rebased on the most recent development branch. If you see [open pull requests on ExaGO's repository](https://github.com/ornl/ExaGO/pulls) that touch the same lines of code that you are working on, please coordinate with the developers of that merge request so your work doesn't conflict.
#### P001: Don't Run Continuous Integration Pipelines If You Only Changed Documentation
@@ -272,7 +272,7 @@ You may also run `buildsystem/tools/cmake_format.pl -i`.
1. [LLVM Flang's style guide](https://github.com/llvm/llvm-project/blob/main/flang/docs/C%2B%2Bstyle.md)
1. [Google's style guide](https://google.github.io/styleguide/cppguide.html)
-1. [ExaGO's public repository](https://github.com/pnnl/ExaGO)
+1. [ExaGO's public repository](https://github.com/ornl/ExaGO)
1. [Chris Beams's blog post on writing commit messages](https://chris.beams.io/posts/git-commit)
1. [Pablo Ariasblog post on cmake best practices](https://pabloariasal.github.io/2018/02/19/its-time-to-do-cmake-right/)
1. [Semantic Versioning](https://semver.org/spec/v2.0.0.html)
diff --git a/docs/exago_policy_compatibility.md b/docs/exago_policy_compatibility.md
index 21d681ff..bc736bbd 100644
--- a/docs/exago_policy_compatibility.md
+++ b/docs/exago_policy_compatibility.md
@@ -6,7 +6,7 @@ and should be considered when filling out this form.
Please, provide information on your compability status for each mandatory policy, and if possible also for recommended policies.
If you are not compatible, state what is lacking and what are your plans on how to achieve compliance.
-**Website:** https://github.com/pnnl/ExaGO
+**Website:** https://github.com/ornl/ExaGO
### Mandatory Policies
@@ -16,12 +16,12 @@ If you are not compatible, state what is lacking and what are your plans on how
|**M2.** Provide a comprehensive test suite for correctness of installation verification. |Full|Comprehensive test suite with 21 unit tests and 94 integration/funcitonality tests (depending on the branch). See the `tests` directory for the full test suite. `tests/unit` contains unit tests, `tests/functionality` contains full end-to-end tests of the ExaGO applications libraries (eg OPFLOW and SOPFLOW). `tests/interfaces` contains tests for the Python bindings, which are a work in progress.|
|**M3.** Employ user-provided MPI communicator (no `MPI_COMM_WORLD`). Don't assume a full MPI 3 implementation without checking. Provide an option to prevent any changes to MPI error-handling if it is changed by default. |Full| ExaGO is built on PETSc, and uses PETSc's APIs for interacting with MPI. All application structures (eg OPFLOW) are constructed with an MPI communicator, and `MPI_COMM_WORLD`. `grep`ing for `MPI_COMM_WORLD` in ExaGO's repository will identify the test drivers, example application drivers, and an initialization utility as using `MPI_COMM_WORLD`. The utility (`src/utils/utils.cpp`) will only use the global communicator if no communicator is given, and the drivers use the global communicator as examples.|
|**M4.** Give best effort at portability to key architectures (standard Linux distributions, GNU, Clang, vendor compilers, and target machines at ALCF, NERSC, OLCF). |Full| Continuous integration tests each branch on multiple platforms, including IBM Power9 at ORNL and PNNL, and x86 at PNNL. CI on an AMD platform is in progress. |
-|**M5.** Provide a documented, reliable way to contact the development team. |Full|[Submit issues on Github page linked here.](https://github.com/pnnl/ExaGO/issues) The SUPPORT file in the top-level directory points users to this location as well.|
+|**M5.** Provide a documented, reliable way to contact the development team. |Full|[Submit issues on Github page linked here.](https://github.com/ornl/ExaGO/issues) The SUPPORT file in the top-level directory points users to this location as well.|
|**M6.** Respect system resources and settings made by other previously called packages (e.g. signal handling). |Full| No signal hanlders are overridden. |
|**M7.** Come with an open source (BSD style) license. |Full|See [LICENSE](/LICENSE) file in root of source directory. ExaGO uses PNNL/Battelle's BSD-style license.|
|**M8.** Provide a runtime API to return the current version number of the software. |Full|Header `exago_config.h` defines version and configuration information, and we expose various runtime APIs for software information, such as `ExaGOVersionGetVersion` and `ExaGOVersionGetReleaseDate`.|
|**M9.** Use a limited and well-defined symbol, macro, library, and include file name space. |Full| All macros are prefixed with `EXAGO_` and headers installed under `exago/` directory. |
-|**M10.** Provide an xSDK team accessible repository (not necessarily publicly available). |Full|[Public Github repository linked here](https://github.com/pnnl/ExaGO). |
+|**M10.** Provide an xSDK team accessible repository (not necessarily publicly available). |Full|[Public Github repository linked here](https://github.com/ornl/ExaGO). |
|**M11.** Have no hardwired print or IO statements that cannot be turned off.
|Full|Logging may be disabled with `ExaGOLogSetMinLogLevel(EXAGO_LOG_DISABLE)`
or by enabling the CMake option `EXAGO_ENABLE_LOGGING=OFF` to ensure the logger is fully disabled.|
@@ -39,7 +39,7 @@ M2 details : optional: provide more details about approac
| Policy |Support| Notes |
|------------------------|-------|-------------------------|
-|**R1.** Have a public repository. |Full| [Public GitHub repository linked here](https://github.com/pnnl/ExaGO). |
+|**R1.** Have a public repository. |Full| [Public GitHub repository linked here](https://github.com/ornl/ExaGO). |
|**R2.** Possible to run test suite under valgrind in order to test for memory corruption issues. |Full| It is possible to run any of the application drivers and test drivers under Valgrind. This has only been test with the leakcheck tool, and not any of the other tools from Valgrind. |
|**R3.** Adopt and document consistent system for error conditions/exceptions. |Full| ExaGO makes thorough use of return codes and error checking, particularly the PETSc macros such as `ERRCHKQ`. |
|**R4.** Free all system resources acquired as soon as they are no longer needed. |Full| Memory for the model is allocated at the beginning of the program and freed at the end. ExaGO also allows for using an external solver, in which case the memory is used by a thrid-party library. These libraries (PETSc, Ipopt, and HiOp) also adequately free memory they allocate. |
diff --git a/docs/python_bindings.md b/docs/python_bindings.md
index a9957f7f..011d1934 100644
--- a/docs/python_bindings.md
+++ b/docs/python_bindings.md
@@ -75,7 +75,7 @@ API. Failure to do so will cause segmentation faults or other memory
errors.
***If you identify components of the C++ API that you need to call from Python,
-please [open an issue on our issues page](https://github.com/pnnl/ExaGO/issues).***
+please [open an issue on our issues page](https://github.com/ornl/ExaGO/issues).***
### Building with MPI
diff --git a/docs/web/README.ci_clusters.md b/docs/web/README.ci_clusters.md
index ec76025e..47f56276 100644
--- a/docs/web/README.ci_clusters.md
+++ b/docs/web/README.ci_clusters.md
@@ -23,7 +23,7 @@ computation.
$ # Set this variable to one of newell, marianas, or ascent
$ export MY_CLUSTER=newell
-$ git clone https://github.com/pnnl/ExaGO.git
+$ git clone https://github.com/ornl/ExaGO.git
$ cd exago
$ mkdir build install
@@ -214,7 +214,7 @@ the tests, you'll have to pass some additional options.
```console
$ export MY_CLUSTER=ascent
-$ git clone https://github.com/pnnl/ExaGO.git
+$ git clone https://github.com/ornl/ExaGO.git
$ mkdir build install
$ cd exago
diff --git a/src/opflow/CMakeLists.txt b/src/opflow/CMakeLists.txt
index d0553657..fd767aea 100644
--- a/src/opflow/CMakeLists.txt
+++ b/src/opflow/CMakeLists.txt
@@ -20,6 +20,7 @@ if(EXAGO_ENABLE_RAJA)
set(OPFLOW_FORM_SRC
${OPFLOW_FORM_SRC} model/power_bal_hiop/pbpolrajahiopsparse.cpp
model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp
+ model/power_bal_hiop/pbpolrajahiopsparse_gpu.cpp
)
endif()
endif()
@@ -39,8 +40,9 @@ set_source_files_properties(${OPFLOW_SRC} PROPERTIES LANGUAGE CXX)
if(EXAGO_ENABLE_RAJA AND EXAGO_ENABLE_CUDA)
set_source_files_properties(
model/power_bal_hiop/pbpolrajahiopkernels.cpp
- model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp PROPERTIES LANGUAGE
- CUDA
+ model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp
+ model/power_bal_hiop/pbpolrajahiopsparse_gpu.cpp PROPERTIES LANGUAGE
+ CUDA
)
endif()
diff --git a/src/opflow/model/power_bal_hiop/paramsrajahiop.cpp b/src/opflow/model/power_bal_hiop/paramsrajahiop.cpp
index 27e2cfa0..583f273b 100644
--- a/src/opflow/model/power_bal_hiop/paramsrajahiop.cpp
+++ b/src/opflow/model/power_bal_hiop/paramsrajahiop.cpp
@@ -27,6 +27,11 @@ int BUSParamsRajaHiop::destroy(OPFLOW opflow) {
h_allocator_.deallocate(jacsp_idx);
h_allocator_.deallocate(jacsq_idx);
}
+ h_allocator_.deallocate(ispv);
+ h_allocator_.deallocate(gineqidx);
+ h_allocator_.deallocate(ineqjacsp_idx);
+ h_allocator_.deallocate(genoffset);
+ h_allocator_.deallocate(ngenONbus);
#ifdef EXAGO_ENABLE_GPU
d_allocator_.deallocate(isref_dev_);
@@ -46,6 +51,11 @@ int BUSParamsRajaHiop::destroy(OPFLOW opflow) {
d_allocator_.deallocate(jacsp_idx_dev_);
d_allocator_.deallocate(jacsq_idx_dev_);
}
+ d_allocator_.deallocate(ispv_dev_);
+ d_allocator_.deallocate(gineqidx_dev_);
+ d_allocator_.deallocate(ineqjacsp_idx_dev_);
+ d_allocator_.deallocate(genoffset_dev_);
+ d_allocator_.deallocate(ngenONbus_dev_);
#endif
return 0;
@@ -80,6 +90,11 @@ int BUSParamsRajaHiop::copy(OPFLOW opflow) {
resmgr.copy(jacsq_idx_dev_, jacsq_idx);
resmgr.copy(powerimbalance_penalty_dev_, powerimbalance_penalty);
}
+ resmgr.copy(ispv_dev_, ispv);
+ resmgr.copy(gineqidx_dev_, gineqidx);
+ resmgr.copy(ineqjacsp_idx_dev_, ineqjacsp_idx);
+ resmgr.copy(genoffset_dev_, genoffset);
+ resmgr.copy(ngenONbus_dev_, ngenONbus);
#else
isref_dev_ = isref;
isisolated_dev_ = isisolated;
@@ -96,6 +111,11 @@ int BUSParamsRajaHiop::copy(OPFLOW opflow) {
jacsp_idx_dev_ = jacsp_idx;
jacsq_idx_dev_ = jacsq_idx;
powerimbalance_penalty_dev_ = powerimbalance_penalty;
+ ispv_dev_ = ispv;
+ gineqidx_dev_ = gineqidx;
+ ineqjacsp_idx_dev_ = ineqjacsp_idx;
+ genoffset_dev_ = genoffset;
+ ngenONbus_dev_ = ngenONbus;
#endif
return 0;
}
@@ -132,18 +152,31 @@ int BUSParamsRajaHiop::allocate(OPFLOW opflow) {
jacsp_idx = paramAlloc(h_allocator_, nbus);
jacsq_idx = paramAlloc(h_allocator_, nbus);
}
+ ispv = paramAlloc(h_allocator_, nbus);
+ gineqidx = paramAlloc(h_allocator_, nbus);
+ ineqjacsp_idx = paramAlloc(h_allocator_, nbus);
+ genoffset = paramAlloc(h_allocator_, nbus);
+ ngenONbus = paramAlloc(h_allocator_, nbus);
/* Memzero arrays */
resmgr.memset(isref, 0, nbus * sizeof(int));
resmgr.memset(ispvpq, 0, nbus * sizeof(int));
resmgr.memset(isisolated, 0, nbus * sizeof(int));
+ resmgr.memset(ispv, 0, nbus * sizeof(int));
+ resmgr.memset(gineqidx, 0, nbus * sizeof(int));
+ resmgr.memset(ineqjacsp_idx, 0, nbus * sizeof(int));
+ resmgr.memset(genoffset, 0, nbus * sizeof(int));
+ resmgr.memset(ngenONbus, 0, nbus * sizeof(int));
+ int genoff = 0;
for (int i = 0; i < nbus; i++) {
bus = &ps->bus[i];
loc = bus->startxVloc;
xidx[i] = opflow->idxn2sd_map[loc];
gidx[i] = bus->starteqloc;
+ genoffset[i] = genoff;
+ genoff += bus->ngenON;
if (bus->ide == REF_BUS)
isref[i] = 1;
@@ -152,6 +185,12 @@ int BUSParamsRajaHiop::allocate(OPFLOW opflow) {
else
ispvpq[i] = 1;
+ if (bus->ide == PV_BUS)
+ ispv[i] = 1;
+
+ ngenONbus[i] = bus->ngenON;
+ gineqidx[i] = bus->startineqloc;
+
if (opflow->genbusvoltagetype == FIXED_AT_SETPOINT) {
if (bus->ide == REF_BUS || bus->ide == PV_BUS) {
/* Hold voltage at reference and PV buses */
@@ -200,6 +239,11 @@ int BUSParamsRajaHiop::allocate(OPFLOW opflow) {
jacsp_idx_dev_ = paramAlloc(d_allocator_, nbus);
jacsq_idx_dev_ = paramAlloc(d_allocator_, nbus);
}
+ ispv_dev_ = paramAlloc(d_allocator_, nbus);
+ gineqidx_dev_ = paramAlloc(d_allocator_, nbus);
+ ineqjacsp_idx_dev_ = paramAlloc(d_allocator_, nbus);
+ genoffset_dev_ = paramAlloc(d_allocator_, nbus);
+ ngenONbus_dev_ = paramAlloc(d_allocator_, nbus);
#endif
return 0;
}
@@ -231,6 +275,8 @@ int LINEParamsRajaHiop::copy(OPFLOW opflow) {
resmgr.copy(gineqidx_dev_, gineqidx);
resmgr.copy(gbineqidx_dev_, gbineqidx);
resmgr.copy(linelimidx_dev_, linelimidx);
+ resmgr.copy(ineqjacsp_idx_dev_, ineqjacsp_idx);
+ resmgr.copy(xslackidx_dev_, xslackidx);
}
#else
Gff_dev_ = Gff;
@@ -250,6 +296,8 @@ int LINEParamsRajaHiop::copy(OPFLOW opflow) {
gineqidx_dev_ = gineqidx;
gbineqidx_dev_ = gbineqidx;
linelimidx_dev_ = linelimidx;
+ ineqjacsp_idx_dev_ = ineqjacsp_idx;
+ xslackidx_dev_ = xslackidx;
}
#endif
return 0;
@@ -277,6 +325,8 @@ int LINEParamsRajaHiop::destroy(OPFLOW opflow) {
h_allocator_.deallocate(gineqidx);
h_allocator_.deallocate(gbineqidx);
h_allocator_.deallocate(linelimidx);
+ h_allocator_.deallocate(ineqjacsp_idx);
+ h_allocator_.deallocate(xslackidx);
}
#ifdef EXAGO_ENABLE_GPU
@@ -301,6 +351,8 @@ int LINEParamsRajaHiop::destroy(OPFLOW opflow) {
d_allocator_.deallocate(gineqidx_dev_);
d_allocator_.deallocate(gbineqidx_dev_);
d_allocator_.deallocate(linelimidx_dev_);
+ d_allocator_.deallocate(ineqjacsp_idx_dev_);
+ d_allocator_.deallocate(xslackidx_dev_);
}
#endif
@@ -348,6 +400,8 @@ int LINEParamsRajaHiop::allocate(OPFLOW opflow) {
linelimidx = paramAlloc(h_allocator_, nlinelim);
gineqidx = paramAlloc(h_allocator_, nlinelim);
gbineqidx = paramAlloc(h_allocator_, nlinelim);
+ ineqjacsp_idx = paramAlloc(h_allocator_, nlinelim);
+ xslackidx = paramAlloc(h_allocator_, nlinelim);
}
PetscInt j = 0;
@@ -391,6 +445,9 @@ int LINEParamsRajaHiop::allocate(OPFLOW opflow) {
gbineqidx[j] = opflow->nconeq + line->startineqloc;
gineqidx[j] = line->startineqloc;
linelimidx[j] = linei;
+ if (opflow->allow_lineflow_violation) {
+ xslackidx[j] = opflow->idxn2sd_map[line->startxslackloc];
+ }
j++;
}
@@ -420,6 +477,8 @@ int LINEParamsRajaHiop::allocate(OPFLOW opflow) {
gineqidx_dev_ = paramAlloc(d_allocator_, nlinelim);
gbineqidx_dev_ = paramAlloc(d_allocator_, nlinelim);
linelimidx_dev_ = paramAlloc(d_allocator_, nlinelim);
+ ineqjacsp_idx_dev_ = paramAlloc(d_allocator_, nlinelim);
+ xslackidx_dev_ = paramAlloc(d_allocator_, nlinelim);
}
#endif
return 0;
@@ -558,7 +617,10 @@ int GENParamsRajaHiop::destroy(OPFLOW opflow) {
h_allocator_.deallocate(qt);
h_allocator_.deallocate(qb);
h_allocator_.deallocate(isrenewable);
+ h_allocator_.deallocate(apf);
+ h_allocator_.deallocate(vs);
h_allocator_.deallocate(xidx);
+ h_allocator_.deallocate(xpdevidx);
h_allocator_.deallocate(gidxbus);
h_allocator_.deallocate(eqjacspbus_idx);
h_allocator_.deallocate(eqjacsqbus_idx);
@@ -581,7 +643,10 @@ int GENParamsRajaHiop::destroy(OPFLOW opflow) {
d_allocator_.deallocate(qt_dev_);
d_allocator_.deallocate(qb_dev_);
d_allocator_.deallocate(isrenewable_dev_);
+ d_allocator_.deallocate(apf_dev_);
+ d_allocator_.deallocate(vs_dev_);
d_allocator_.deallocate(xidx_dev_);
+ d_allocator_.deallocate(xpdevidx_dev_);
d_allocator_.deallocate(gidxbus_dev_);
d_allocator_.deallocate(eqjacspbus_idx_dev_);
d_allocator_.deallocate(eqjacsqbus_idx_dev_);
@@ -615,8 +680,11 @@ int GENParamsRajaHiop::copy(OPFLOW opflow) {
resmgr.copy(qt_dev_, qt);
resmgr.copy(qb_dev_, qb);
resmgr.copy(isrenewable_dev_, isrenewable);
+ resmgr.copy(apf_dev_, apf);
+ resmgr.copy(vs_dev_, vs);
resmgr.copy(xidx_dev_, xidx);
+ resmgr.copy(xpdevidx_dev_, xpdevidx);
resmgr.copy(gidxbus_dev_, gidxbus);
resmgr.copy(eqjacspbus_idx_dev_, eqjacspbus_idx);
@@ -639,7 +707,10 @@ int GENParamsRajaHiop::copy(OPFLOW opflow) {
qt_dev_ = qt;
qb_dev_ = qb;
isrenewable_dev_ = isrenewable;
+ apf_dev_ = apf;
+ vs_dev_ = vs;
xidx_dev_ = xidx;
+ xpdevidx_dev_ = xpdevidx;
gidxbus_dev_ = gidxbus;
eqjacspbus_idx_dev_ = eqjacspbus_idx;
eqjacsqbus_idx_dev_ = eqjacsqbus_idx;
@@ -682,8 +753,11 @@ int GENParamsRajaHiop::allocate(OPFLOW opflow) {
qt = paramAlloc(h_allocator_, ngenON);
qb = paramAlloc(h_allocator_, ngenON);
isrenewable = paramAlloc(h_allocator_, ngenON);
+ apf = paramAlloc(h_allocator_, ngenON);
+ vs = paramAlloc(h_allocator_, ngenON);
xidx = paramAlloc(h_allocator_, ngenON);
+ xpdevidx = paramAlloc(h_allocator_, ngenON);
gidxbus = paramAlloc(h_allocator_, ngenON);
eqjacspbus_idx = paramAlloc(h_allocator_, ngenON);
@@ -720,11 +794,16 @@ int GENParamsRajaHiop::allocate(OPFLOW opflow) {
qt[geni] = gen->qt;
qb[geni] = gen->qb;
isrenewable[geni] = (int)gen->isrenewable;
+ apf[geni] = gen->apf;
+ vs[geni] = gen->vs;
if (opflow->has_gensetpoint) {
pgs[geni] = gen->pgs;
}
xidx[geni] = opflow->idxn2sd_map[loc];
+ xpdevidx[geni] = (opflow->has_gensetpoint && !gen->isrenewable)
+ ? opflow->idxn2sd_map[gen->startxpdevloc]
+ : -1;
gidxbus[geni] = gloc;
if (opflow->has_gensetpoint) {
geqidxgen[geni] = gen->starteqloc;
@@ -748,8 +827,11 @@ int GENParamsRajaHiop::allocate(OPFLOW opflow) {
qt_dev_ = paramAlloc(d_allocator_, ngenON);
qb_dev_ = paramAlloc(d_allocator_, ngenON);
isrenewable_dev_ = paramAlloc(d_allocator_, ngenON);
+ apf_dev_ = paramAlloc(d_allocator_, ngenON);
+ vs_dev_ = paramAlloc(d_allocator_, ngenON);
xidx_dev_ = paramAlloc(d_allocator_, ngenON);
+ xpdevidx_dev_ = paramAlloc(d_allocator_, ngenON);
gidxbus_dev_ = paramAlloc(d_allocator_, ngenON);
eqjacspbus_idx_dev_ = paramAlloc(d_allocator_, ngenON);
diff --git a/src/opflow/model/power_bal_hiop/paramsrajahiop.h b/src/opflow/model/power_bal_hiop/paramsrajahiop.h
index 91f1fb9f..6121a4c6 100644
--- a/src/opflow/model/power_bal_hiop/paramsrajahiop.h
+++ b/src/opflow/model/power_bal_hiop/paramsrajahiop.h
@@ -28,7 +28,12 @@ struct BUSParamsRajaHiop {
vector */
int *jacsp_idx; /* Location number in the sparse Jacobian for Pimb */
int *jacsq_idx; /* Location number in the sparse Jacobian for Qimb */
- int *hesssp_idx; /* Location number in the Hessian */
+ int *hesssp_idx; /* KS: Hessian indices */
+ int *ispv; /* KS: ispv[i] = 1 if bus is PV bus */
+ int *gineqidx; /* KS: starting position of bus ineq constraints */
+ int *ineqjacsp_idx; /* KS: index in flat sparse ineq Jacobian array */
+ int *genoffset; /* KS: Offset into flattened gen array for this bus */
+ int *ngenONbus; /* KS: Number of ON generators on this bus */
// Device data
int *isref_dev_; /* isref[i] = 1 if bus is reference bus */
@@ -49,6 +54,11 @@ struct BUSParamsRajaHiop {
int *jacsp_idx_dev_; /* Location number in the sparse Jacobian for Pimb */
int *jacsq_idx_dev_; /* Location number in the sparse Jacobian for Qimb */
int *hesssp_idx_dev_; /* Location number in the Hessian */
+ int *ispv_dev_; /* KS: dev counterpart of ispv */
+ int *gineqidx_dev_; /* KS: dev counterpart of gineqidx */
+ int *ineqjacsp_idx_dev_; /* KS: device counterpart of ineqjacsp_idx_ */
+ int *genoffset_dev_; /* KS: dev counterpart of genoffset */
+ int *ngenONbus_dev_; /* KS: dev counterpart of ngenONbus */
int allocate(OPFLOW);
int destroy(OPFLOW);
@@ -72,9 +82,12 @@ struct GENParamsRajaHiop {
double *qt; /* min. reactive power gen. limits */
double *qb; /* max. reactive power gen. limits */
double *pgs; /* real power output setpoint */
+ double *apf; /* generator AGC participation factor */
+ double *vs; /* voltage setpoint */
int *isrenewable; /* Is renewable generator? */
- int *xidx; /* starting locations in X vector */
+ int *xidx; /* starting locations in X vector */
+ int *xpdevidx; /* KS: tarting locations of deviation variables in X vector */
int *
gidxbus; /* starting locations in constraint vector for bus constraints */
int *geqidxgen; /* starting locations in equality constraint vector for gen
@@ -104,9 +117,12 @@ struct GENParamsRajaHiop {
double *qt_dev_; /* min. reactive power gen. limits */
double *qb_dev_; /* max. reactive power gen. limits */
double *pgs_dev_; /* real power output setpoint */
+ double *apf_dev_; /* KS: device counterpart of apf */
+ double *vs_dev_; /* KS: device counterpart of vs */
int *isrenewable_dev_; /* Is renewable generator? */
int *xidx_dev_; /* starting locations in X vector */
+ int *xpdevidx_dev_; /* KS: device coutnerpart of xpdevidx*/
int *gidxbus_dev_; /* starting locations in constraint vector for bus
constraints */
int *geqidxgen_dev_; /* starting locations in equality constraint vector for
@@ -143,7 +159,7 @@ struct LOADParamsRajaHiop {
double *pl; /* active power demand */
double *ql; /* reactive power demand */
double *loadloss_penalty; /* Penalty for load loss */
- int *xidx; /* starting location in X vector */
+ int *xidx; /* KS: starting location in X vector */
int *gidx; /* starting location in constraint vector */
/* The following members are only used with HIOP */
@@ -194,7 +210,9 @@ struct LINEParamsRajaHiop {
constraint */
int *gbineqidx; /* Starting location to insert contribution to inequality
constraint bound */
- int *linelimidx; /* Indices for subset of lines that have finite limits */
+ int *linelimidx; /* Indices for subset of lines that have finite limits */
+ int *ineqjacsp_idx; /* KS: Position in flat sparse ineq Jacobian array */
+ int *xslackidx; /* Starting location of slack variables in X vector */
// Device data
double *Gff_dev_; /* From side self conductance */
@@ -217,7 +235,9 @@ struct LINEParamsRajaHiop {
int *gbineqidx_dev_; /* Starting location to insert contribution to inequality
constraint bound */
int *
- linelimidx_dev_; /* Indices for subset of lines that have finite limits */
+ linelimidx_dev_; /* Indices for subset of lines that have finite limits */
+ int *ineqjacsp_idx_dev_; /* KS: Position in flat sparse ineq Jacobian array */
+ int *xslackidx_dev_; /* Starting location of slack variables in X vector */
int allocate(OPFLOW);
int destroy(OPFLOW);
@@ -248,6 +268,8 @@ struct PbpolModelRajaHiop : public _p_FormPBPOLRAJAHIOP {
LINEParamsRajaHiop lineparams;
BUSParamsRajaHiop busparams;
+ int agc_xidx; /* KS: X-vector index for the AGC delta-P variable (ps->startxloc) */
+
// Arrays to store Jacobian and Hessian indices and entries on CPU (used with
// GPU sparse model)
int *i_jaceq,
diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp
index fa609378..2cc4c16a 100644
--- a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp
+++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp
@@ -221,9 +221,85 @@ PetscErrorCode OPFLOWModelSetUp_PBPOLRAJAHIOPSPARSE(OPFLOW opflow) {
LOADParamsRajaHiop *loadparams = &pbpolrajahiopsparse->loadparams;
LINEParamsRajaHiop *lineparams = &pbpolrajahiopsparse->lineparams;
- /* Need to compute the number of nonzeros in equality, inequality constraint
- * Jacobians and Hessian */
+ PS ps = opflow->ps;
+ PSBUS bus;
+ PSGEN gen;
+ PSLINE line;
+ PetscInt i, k;
+
+ /* KS: Store the AGC variable index (scalar) */
+ if (opflow->use_agc) {
+ pbpolrajahiopsparse->agc_xidx = opflow->idxn2sd_map[ps->startxloc];
+ } else {
+ pbpolrajahiopsparse->agc_xidx = -1;
+ }
+
+ /* KS: Compute the number of nonzeros in equality, inequality constraint
+ * Jacobians and Hessian. Equality and Hessian counts are still obtained
+ * from PETSc via get_sparse_blocks_info; inequality count is computed
+ * axplicitly so we can skip PETSc */
int nnz_eqjacsp = 0, nnz_ineqjacsp = 0, nnz_hesssp = 0;
+
+ /*
+ * KS: Count inequality Jacobian non-zeros. The traversal order must match
+ * the startineqloc assignment in OPFLOWModelSetUp_PBPOL: for each bus
+ * (bus ineq, then gen ineq), then for each line.
+ */
+ int geni = 0, gi;
+ for (i = 0; i < ps->nbus; i++) {
+ bus = &ps->bus[i];
+
+ /* Bus voltage-Q-bounds constraints (FIXED_WITHIN_QBOUNDS) */
+ if (opflow->genbusvoltagetype == FIXED_WITHIN_QBOUNDS) {
+ if (bus->ide == PV_BUS || bus->ide == REF_BUS) {
+ busparams->ineqjacsp_idx[i] = nnz_ineqjacsp;
+ /* 2 rows, each with ngenON + 1 entries (one per gen Qg + one for V) */
+ nnz_ineqjacsp += 2 * (bus->ngenON + 1);
+ }/
+ }
+
+ /* KS: Generator set-point constraints */
+ gi = 0;
+ if (opflow->has_gensetpoint) {
+ for (k = 0; k < bus->ngen; k++) {
+ ierr = PSBUSGetGen(bus, k, &gen);
+ CHKERRQ(ierr);
+ if (!gen->status)
+ continue;
+ if (!gen->isrenewable) {
+ genparams->ineqjacspgen_idx[geni + gi] = nnz_ineqjacsp;
+ if (opflow->use_agc) {
+ nnz_ineqjacsp += 6; /* 2 rows x 3 entries (Pg, delPg, delP) */
+ }
+ }
+ gi++;
+ }
+ }
+
+ geni += bus->ngenON;
+ }
+
+ /* Line flow constraints */
+ int linej = 0;
+ for (i = 0; i < ps->nline; i++) {
+ line = &ps->line[i];
+ if (!line->status)
+ continue;
+ if (line->isdcline)
+ continue;
+
+ if (linej < opflow->nlinesmon && opflow->linesmon[linej] == i) {
+ lineparams->ineqjacsp_idx[linej] = nnz_ineqjacsp;
+ /* 2 rows x 4 entries (thetaf, Vmf, thetat, Vmt) */
+ int entries_per_line = 8;
+ if (opflow->allow_lineflow_violation) {
+ entries_per_line += 2; /* 1 slack entry per row */
+ }
+ nnz_ineqjacsp += entries_per_line;
+ linej++;
+ }
+ }
+
opflow->nnz_eqjacsp = nnz_eqjacsp;
opflow->nnz_ineqjacsp = nnz_ineqjacsp;
opflow->nnz_hesssp = nnz_hesssp;
diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse_gpu.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse_gpu.cpp
new file mode 100644
index 00000000..5fc79ca1
--- /dev/null
+++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse_gpu.cpp
@@ -0,0 +1,250 @@
+
+#include
+
+#if defined(EXAGO_ENABLE_RAJA)
+#if defined(EXAGO_ENABLE_HIOP_SPARSE)
+
+#include
+#include
+
+#include "pbpolrajahiopsparse_gpu.hpp"
+
+void ComputeIneqJacValuesGPU_PBPOLRAJAHIOPSPARSE(OPFLOW opflow,
+ const double *x_dev,
+ double *jacd_dev) {
+ PbpolModelRajaHiop *pbpolrajahiopsparse =
+ reinterpret_cast(opflow->model);
+ GENParamsRajaHiop *genparams = &pbpolrajahiopsparse->genparams;
+ BUSParamsRajaHiop *busparams = &pbpolrajahiopsparse->busparams;
+
+ /*
+ * Generator set-point inequality Jacobian (has_gensetpoint && use_agc).
+ *
+ * PETSc reference (pbpol.cpp:1191-1232):
+ * Row gloc: [apf*delP - delPg, -(Pg - pt), apf*(Pg - pt)]
+ * Row gloc+1: [apf*delP - delPg, (pb - Pg), -apf*(pb - Pg)]
+ *
+ * Flat array layout per generator (6 entries):
+ * [0..2] = row 0 values (Pg, delPg, delP columns)
+ * [3..5] = row 1 values (Pg, delPg, delP columns)
+ */
+ if (opflow->has_gensetpoint && opflow->use_agc) {
+ int *g_ineqjacsp_idx = genparams->ineqjacspgen_idx_dev_;
+ int *g_xidx = genparams->xidx_dev_;
+ int *g_xpdevidx = genparams->xpdevidx_dev_;
+ int *g_isrenewable = genparams->isrenewable_dev_;
+ double *g_apf = genparams->apf_dev_;
+ double *g_pt = genparams->pt_dev_;
+ double *g_pb = genparams->pb_dev_;
+ int agc_xidx = pbpolrajahiopsparse->agc_xidx;
+
+ RAJA::forall(
+ RAJA::RangeSegment(0, genparams->ngenON),
+ RAJA_LAMBDA(RAJA::Index_type i) {
+ if (g_isrenewable[i])
+ return;
+
+ int base = g_ineqjacsp_idx[i];
+ double Pg = x_dev[g_xidx[i]];
+ double delPg = x_dev[g_xpdevidx[i]];
+ double delP = x_dev[agc_xidx];
+ double apf = g_apf[i];
+ double pt = g_pt[i];
+ double pb = g_pb[i];
+
+ double apf_delP_minus_delPg = apf * delP - delPg;
+
+ /* Row 0: d/d{Pg, delPg, delP} of (apf*delP - delPg)*(Pg - pt) */
+ jacd_dev[base + 0] = apf_delP_minus_delPg;
+ jacd_dev[base + 1] = -(Pg - pt);
+ jacd_dev[base + 2] = apf * (Pg - pt);
+
+ /* Row 1: d/d{Pg, delPg, delP} of (delPg - apf*delP)*(pb - Pg) */
+ jacd_dev[base + 3] = apf_delP_minus_delPg;
+ jacd_dev[base + 4] = pb - Pg;
+ jacd_dev[base + 5] = -apf * (pb - Pg);
+ });
+ }
+
+ /*
+ * FIXED_WITHIN_QBOUNDS voltage constraint Jacobian.
+ *
+ * PETSc reference (pbpol.cpp:1235-1278):
+ * For each PV/REF bus with ngenON generators:
+ * Row 0 (gloc): [Vset_0-V, Vset_1-V, ..., Qmax-Q]
+ * Row 1 (gloc+1): [Vset_0-V, Vset_1-V, ..., Qmin-Q]
+ *
+ * Flat array layout per bus (2 * (ngenON + 1) entries):
+ * Row 0: ngenON Qg derivative entries + 1 V derivative entry
+ * Row 1: ngenON Qg derivative entries + 1 V derivative entry
+ *
+ * This kernel parallelizes over buses. The inner loop over generators
+ * is sequential within each thread (variable-length per bus).
+ */
+ if (opflow->genbusvoltagetype == FIXED_WITHIN_QBOUNDS) {
+ int *b_ispv = busparams->ispv_dev_;
+ int *b_isref = busparams->isref_dev_;
+ int *b_xidx = busparams->xidx_dev_;
+ int *b_ineqjacsp_idx = busparams->ineqjacsp_idx_dev_;
+ int *b_genoffset = busparams->genoffset_dev_;
+ int *b_ngenONbus = busparams->ngenONbus_dev_;
+ int *g_xidx = genparams->xidx_dev_;
+ double *g_qt = genparams->qt_dev_;
+ double *g_qb = genparams->qb_dev_;
+ double *g_vs = genparams->vs_dev_;
+
+ RAJA::forall(
+ RAJA::RangeSegment(0, busparams->nbus),
+ RAJA_LAMBDA(RAJA::Index_type i) {
+ if (!b_ispv[i] && !b_isref[i])
+ return;
+
+ int base = b_ineqjacsp_idx[i];
+ int ngen = b_ngenONbus[i];
+ int goff = b_genoffset[i];
+ double V = x_dev[b_xidx[i] + 1];
+
+ double Q = 0.0, Qmax = 0.0, Qmin = 0.0;
+ double Vset = 0.0;
+
+ /* Row 0 and Row 1: Qg derivative entries */
+ for (int k = 0; k < ngen; k++) {
+ int gidx = goff + k;
+ double Qg = x_dev[g_xidx[gidx] + 1];
+ Q += Qg;
+ Qmax += g_qt[gidx];
+ Qmin += g_qb[gidx];
+ Vset = g_vs[gidx];
+
+ double dQg = Vset - V;
+ /* Row 0 entry for this gen's Qg */
+ jacd_dev[base + k] = dQg;
+ /* Row 1 entry for this gen's Qg */
+ jacd_dev[base + ngen + 1 + k] = dQg;
+ }
+
+ /* Row 0: V derivative entry (last in this row) */
+ jacd_dev[base + ngen] = Qmax - Q;
+ /* Row 1: V derivative entry (last in this row) */
+ jacd_dev[base + ngen + 1 + ngen] = Qmin - Q;
+ });
+ }
+
+ /*
+ * Line flow constraint Jacobian.
+ *
+ * Computes dSf2/d{thetaf,Vmf,thetat,Vmt} and dSt2/d{thetaf,Vmf,thetat,Vmt}
+ * for each monitored line, plus optional slack variable entries (-1.0).
+ *
+ * Flat array layout per line (8 entries without slack, 10 with):
+ * Row 0 (Sf2): [dSf2_dthetaf, dSf2_dVmf, dSf2_dthetat, dSf2_dVmt,
+ * (-1.0 if slack)]
+ * Row 1 (St2): [dSt2_dthetaf, dSt2_dVmf, dSt2_dthetat, dSt2_dVmt,
+ * (-1.0 if slack)]
+ */
+ LINEParamsRajaHiop *lineparams = &pbpolrajahiopsparse->lineparams;
+
+ if (lineparams->nlinelim) {
+ double *Gff_arr = lineparams->Gff_dev_;
+ double *Bff_arr = lineparams->Bff_dev_;
+ double *Gft_arr = lineparams->Gft_dev_;
+ double *Bft_arr = lineparams->Bft_dev_;
+ double *Gtf_arr = lineparams->Gtf_dev_;
+ double *Btf_arr = lineparams->Btf_dev_;
+ double *Gtt_arr = lineparams->Gtt_dev_;
+ double *Btt_arr = lineparams->Btt_dev_;
+ int *linelimidx = lineparams->linelimidx_dev_;
+ int *xidxf = lineparams->xidxf_dev_;
+ int *xidxt = lineparams->xidxt_dev_;
+ int *ineqjacsp_idx = lineparams->ineqjacsp_idx_dev_;
+ int has_slack = (int)opflow->allow_lineflow_violation;
+ int row_stride = 4 + has_slack;
+
+ RAJA::forall(
+ RAJA::RangeSegment(0, lineparams->nlinelim),
+ RAJA_LAMBDA(RAJA::Index_type i) {
+ int j = linelimidx[i];
+ int base = ineqjacsp_idx[i];
+
+ double thetaf = x_dev[xidxf[j]];
+ double Vmf = x_dev[xidxf[j] + 1];
+ double thetat = x_dev[xidxt[j]];
+ double Vmt = x_dev[xidxt[j] + 1];
+ double thetaft = thetaf - thetat;
+ double thetatf = thetat - thetaf;
+
+ double Gff = Gff_arr[j], Bff = Bff_arr[j];
+ double Gft = Gft_arr[j], Bft = Bft_arr[j];
+ double Gtf = Gtf_arr[j], Btf = Btf_arr[j];
+ double Gtt = Gtt_arr[j], Btt = Btt_arr[j];
+
+ double sin_ft = sin(thetaft), cos_ft = cos(thetaft);
+ double sin_tf = sin(thetatf), cos_tf = cos(thetatf);
+
+ double Pf = Gff * Vmf * Vmf +
+ Vmf * Vmt * (Gft * cos_ft + Bft * sin_ft);
+ double Qf = -Bff * Vmf * Vmf +
+ Vmf * Vmt * (-Bft * cos_ft + Gft * sin_ft);
+ double Pt = Gtt * Vmt * Vmt +
+ Vmt * Vmf * (Gtf * cos_tf + Btf * sin_tf);
+ double Qt = -Btt * Vmt * Vmt +
+ Vmt * Vmf * (-Btf * cos_tf + Gtf * sin_tf);
+
+ double dSf2_dPf = 2 * Pf, dSf2_dQf = 2 * Qf;
+ double dSt2_dPt = 2 * Pt, dSt2_dQt = 2 * Qt;
+
+ double dPf_dthetaf = Vmf * Vmt * (-Gft * sin_ft + Bft * cos_ft);
+ double dPf_dVmf = 2 * Gff * Vmf +
+ Vmt * (Gft * cos_ft + Bft * sin_ft);
+ double dPf_dthetat = Vmf * Vmt * (Gft * sin_ft - Bft * cos_ft);
+ double dPf_dVmt = Vmf * (Gft * cos_ft + Bft * sin_ft);
+
+ double dQf_dthetaf = Vmf * Vmt * (Bft * sin_ft + Gft * cos_ft);
+ double dQf_dVmf = -2 * Bff * Vmf +
+ Vmt * (-Bft * cos_ft + Gft * sin_ft);
+ double dQf_dthetat = Vmf * Vmt * (-Bft * sin_ft - Gft * cos_ft);
+ double dQf_dVmt = Vmf * (-Bft * cos_ft + Gft * sin_ft);
+
+ double dPt_dthetat = Vmt * Vmf * (-Gtf * sin_tf + Btf * cos_tf);
+ double dPt_dVmt = 2 * Gtt * Vmt +
+ Vmf * (Gtf * cos_tf + Btf * sin_tf);
+ double dPt_dthetaf = Vmt * Vmf * (Gtf * sin_tf - Btf * cos_tf);
+ double dPt_dVmf = Vmt * (Gtf * cos_tf + Btf * sin_tf);
+
+ double dQt_dthetat = Vmt * Vmf * (Btf * sin_tf + Gtf * cos_tf);
+ double dQt_dVmt = -2 * Btt * Vmt +
+ Vmf * (-Btf * cos_tf + Gtf * sin_tf);
+ double dQt_dthetaf = Vmt * Vmf * (-Btf * sin_tf - Gtf * cos_tf);
+ double dQt_dVmf = Vmt * (-Btf * cos_tf + Gtf * sin_tf);
+
+ /* Row 0 (Sf2): derivatives w.r.t. thetaf, Vmf, thetat, Vmt */
+ jacd_dev[base + 0] =
+ dSf2_dPf * dPf_dthetaf + dSf2_dQf * dQf_dthetaf;
+ jacd_dev[base + 1] =
+ dSf2_dPf * dPf_dVmf + dSf2_dQf * dQf_dVmf;
+ jacd_dev[base + 2] =
+ dSf2_dPf * dPf_dthetat + dSf2_dQf * dQf_dthetat;
+ jacd_dev[base + 3] =
+ dSf2_dPf * dPf_dVmt + dSf2_dQf * dQf_dVmt;
+
+ /* Row 1 (St2): derivatives w.r.t. thetaf, Vmf, thetat, Vmt */
+ jacd_dev[base + row_stride + 0] =
+ dSt2_dPt * dPt_dthetaf + dSt2_dQt * dQt_dthetaf;
+ jacd_dev[base + row_stride + 1] =
+ dSt2_dPt * dPt_dVmf + dSt2_dQt * dQt_dVmf;
+ jacd_dev[base + row_stride + 2] =
+ dSt2_dPt * dPt_dthetat + dSt2_dQt * dQt_dthetat;
+ jacd_dev[base + row_stride + 3] =
+ dSt2_dPt * dPt_dVmt + dSt2_dQt * dQt_dVmt;
+
+ /* Slack variable entries (Step 5) */
+ if (has_slack) {
+ jacd_dev[base + 4] = -1.0;
+ jacd_dev[base + row_stride + 4] = -1.0;
+ }
+ });
+ }
+}
+
+#endif // EXAGO_ENABLE_HIOP_SPARSE
+#endif // EXAGO_ENABLE_RAJA
diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse_gpu.hpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse_gpu.hpp
new file mode 100644
index 00000000..29d82e87
--- /dev/null
+++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse_gpu.hpp
@@ -0,0 +1,30 @@
+#include
+
+#if defined(EXAGO_ENABLE_RAJA)
+#if defined(EXAGO_ENABLE_HIOP_SPARSE)
+
+#pragma once
+
+#include
+#include "pbpolrajahiopsparse.hpp"
+
+/**
+ * GPU-only (PETSc-free) computation of inequality constraint Jacobian values.
+ *
+ * Replaces the PETSc-based path that calls
+ * opflow->modelops.computeinequalityconstraintjacobian followed by
+ * MatGetRow extraction. Writes directly into device memory using RAJA
+ * kernels, without PETSc Mat/Vec operations; no H2D, D2H copies back and forth.
+ *
+ * @param opflow The OPFLOW problem context
+ * @param x_dev Device array of variable values
+ * @param jacd_dev Device output array for inequality Jacobian values
+ * (points to the ineq portion of the sparse Jacobian,
+ * i.e. MJacS_dev + nnz_eqjacsp)
+ */
+void ComputeIneqJacValuesGPU_PBPOLRAJAHIOPSPARSE(OPFLOW opflow,
+ const double *x_dev,
+ double *jacd_dev);
+
+#endif // EXAGO_ENABLE_HIOP_SPARSE
+#endif // EXAGO_ENABLE_RAJA
diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp
index adc10c8e..11b39171 100644
--- a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp
+++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp
@@ -14,6 +14,7 @@
#include
#include "pbpolrajahiopsparsekernels.hpp"
#include "pbpolrajahiopsparse.hpp"
+#include "pbpolrajahiopsparse_gpu.hpp"
PetscErrorCode OPFLOWSetInitialGuessArray_PBPOLRAJAHIOPSPARSE(OPFLOW opflow,
double *x0_dev) {
@@ -545,40 +546,11 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE(
ierr = PetscLogEventBegin(opflow->ineqconsjaclogger, 0, 0, 0, 0);
CHKERRQ(ierr);
- ierr = VecGetArray(opflow->X, &x);
- CHKERRQ(ierr);
-
- // Copy from device to host
- umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST");
- registerWith(x, opflow->nx, resmgr, h_allocator_);
- resmgr.copy((double *)x, (double *)x_dev);
-
- ierr = VecRestoreArray(opflow->X, &x);
- CHKERRQ(ierr);
-
- /* Compute inequality constraint jacobian */
- ierr = (*opflow->modelops.computeinequalityconstraintjacobian)(
- opflow, opflow->X, opflow->Jac_Gi);
- CHKERRQ(ierr);
-
- ierr = MatGetSize(opflow->Jac_Gi, &nrow, &ncol);
- CHKERRQ(ierr);
-
- values = pbpolrajahiopsparse->val_jacineq;
- /* Copy over values */
- for (i = 0; i < nrow; i++) {
- ierr = MatGetRow(opflow->Jac_Gi, i, &nvals, &cols, &vals);
- CHKERRQ(ierr);
- for (j = 0; j < nvals; j++) {
- values[j] = vals[j];
- }
- values += nvals;
- ierr = MatRestoreRow(opflow->Jac_Gi, i, &nvals, &cols, &vals);
- CHKERRQ(ierr);
- }
- // Copy over val_jacineq to device
- resmgr.copy(MJacS_dev + opflow->nnz_eqjacsp,
- pbpolrajahiopsparse->val_jacineq);
+ /* KS: Compute inequality constraint Jacobian directly on device.
+ No H2D, D2H copies: x_dev is already on device, output goes
+ straight into the ineq portion of MJacS_dev. */
+ ComputeIneqJacValuesGPU_PBPOLRAJAHIOPSPARSE(
+ opflow, x_dev, MJacS_dev + opflow->nnz_eqjacsp);
ierr = PetscLogEventEnd(opflow->ineqconsjaclogger, 0, 0, 0, 0);
CHKERRQ(ierr);
diff --git a/src/opflow/solver/hiop/opflow_hiopsparsegpu.cpp b/src/opflow/solver/hiop/opflow_hiopsparsegpu.cpp
index d7a9539d..ffde17c9 100644
--- a/src/opflow/solver/hiop/opflow_hiopsparsegpu.cpp
+++ b/src/opflow/solver/hiop/opflow_hiopsparsegpu.cpp
@@ -69,20 +69,9 @@ bool OPFLOWHIOPSPARSEGPUInterface::get_sparse_blocks_info(
nnz_sparse_Jaceq = opflow->nnz_eqjacsp = info_eq.nz_used;
- nnz_sparse_Jacineq = 0;
- if (opflow->Nconineq) {
- ierr = (*opflow->modelops.computeinequalityconstraintjacobian)(
- opflow, opflow->X, opflow->Jac_Gi);
- CHKERRQ(ierr);
- ierr =
- MatSetOption(opflow->Jac_Gi, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
- CHKERRQ(ierr);
-
- ierr = MatGetInfo(opflow->Jac_Gi, MAT_LOCAL, &info_ineq);
- CHKERRQ(ierr);
-
- nnz_sparse_Jacineq = opflow->nnz_ineqjacsp = info_ineq.nz_used;
- }
+ /* KS: Use pre-computed nnz_ineqjacsp from model setup (avoids PETSc Mat
+ assembly just for counting non-zeros -- not sure if faster or if it scales but hey no PETSc!). */
+ nnz_sparse_Jacineq = opflow->nnz_ineqjacsp;
/* Compute non-zeros for Hessian */
ierr = (*opflow->modelops.computehessian)(opflow, opflow->X, opflow->Lambdae,
diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt
index ace550c4..13c6b989 100644
--- a/tests/unit/CMakeLists.txt
+++ b/tests/unit/CMakeLists.txt
@@ -13,6 +13,17 @@ add_executable(test_pflow test_pflow.cpp utils/test_acopf_utils.cpp)
target_link_libraries(test_pflow ExaGO::PFLOW)
target_include_directories(test_pflow PRIVATE ./utils)
+if(EXAGO_ENABLE_RAJA AND EXAGO_ENABLE_HIOP_SPARSE)
+ if(EXAGO_ENABLE_CUDA)
+ set_source_files_properties(test_ineqjac_gpu.cpp PROPERTIES LANGUAGE CUDA)
+ endif()
+ add_executable(test_ineqjac_gpu test_ineqjac_gpu.cpp)
+ target_link_libraries(test_ineqjac_gpu ExaGO::OPFLOW)
+ target_include_directories(
+ test_ineqjac_gpu PRIVATE ${CMAKE_SOURCE_DIR}/src/opflow
+ )
+endif()
+
add_executable(test_error_handler test_error_handler.cpp)
target_link_libraries(test_error_handler ExaGO::UTILS)
@@ -49,6 +60,22 @@ if(EXAGO_RUN_TESTS)
${network_files}
)
+ if(EXAGO_ENABLE_RAJA AND EXAGO_ENABLE_HIOP_SPARSE)
+ exago_add_test(
+ NAME
+ "UNIT_TEST_8_INEQJAC_GPU"
+ DEPENDS
+ HIOP
+ COMMAND
+ ${RUNCMD}
+ $
+ -opflow_genbusvoltage
+ VARIABLE_WITHIN_BOUNDS
+ NETFILES
+ ${network_files}
+ )
+ endif()
+
exago_add_test(
NAME UNIT_TESTS_UTILS COMMAND $
)
diff --git a/tests/unit/test_ineqjac_gpu.cpp b/tests/unit/test_ineqjac_gpu.cpp
new file mode 100644
index 00000000..e5dc2f75
--- /dev/null
+++ b/tests/unit/test_ineqjac_gpu.cpp
@@ -0,0 +1,394 @@
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+
+#if defined(EXAGO_ENABLE_RAJA)
+#include
+#include
+#include
+#include
+#endif
+
+#ifdef EXAGO_ENABLE_GPU
+#include
+#endif
+
+#include "model/power_bal_hiop/paramsrajahiop.h"
+#include "model/power_bal_hiop/pbpolrajahiopsparse_gpu.hpp"
+
+static const double TOL = 1e-8;
+
+static int compare_arrays(const double *ref, const double *gpu, int n,
+ const char *label) {
+ int fail = 0;
+ for (int i = 0; i < n; i++) {
+ if (std::abs(ref[i] - gpu[i]) / (1.0 + std::abs(ref[i])) > TOL) {
+ std::cout << " MISMATCH " << label << "[" << i << "]: PETSc=" << ref[i]
+ << " GPU=" << gpu[i]
+ << " diff=" << std::abs(ref[i] - gpu[i]) << std::endl;
+ fail++;
+ }
+ }
+ return fail;
+}
+
+/**
+ * Test 8: Validate GPU inequality constraint Jacobian values against PETSc.
+ *
+ * First solves with IPOPT/PBPOL to obtain a realistic solution, then
+ * sets up the PBPOLRAJAHIOPSPARSE model and evaluates the inequality
+ * Jacobian at the converged solution using both the PETSc reference
+ * path and the GPU RAJA kernel. Compares element-by-element.
+ */
+int main(int argc, char **argv) {
+ PetscErrorCode ierr;
+ PetscBool flg;
+ char file_c_str[PETSC_MAX_PATH_LEN];
+ std::string file;
+ char appname[] = "opflow";
+ MPI_Comm comm = MPI_COMM_WORLD;
+ char help[] = "Test 8: GPU ineq Jacobian validation\n";
+ int fail = 0;
+
+ ierr = ExaGOInitialize(comm, &argc, &argv, appname, help);
+ if (ierr) {
+ fprintf(stderr, "Could not initialize ExaGO.\n");
+ return ierr;
+ }
+
+ ierr = PetscOptionsGetString(NULL, NULL, "-netfile", file_c_str,
+ PETSC_MAX_PATH_LEN, &flg);
+ CHKERRQ(ierr);
+ if (!flg)
+ file = "../datafiles/case9/case9mod.m";
+ else
+ file.assign(file_c_str);
+
+ std::cout << "=== Test 8: GPU Inequality Jacobian Validation ===" << std::endl;
+ std::cout << "Network file: " << file << std::endl;
+
+ /* ------------------------------------------------------------------
+ * Step 1: Solve with IPOPT/PBPOL to get a realistic solution
+ * ------------------------------------------------------------------ */
+ OPFLOW opflow_ref;
+ Vec Xsol;
+
+ ierr = OPFLOWCreate(PETSC_COMM_WORLD, &opflow_ref);
+ CHKERRQ(ierr);
+ ierr = OPFLOWReadMatPowerData(opflow_ref, file.c_str());
+ CHKERRQ(ierr);
+ ierr = OPFLOWSetModel(opflow_ref, OPFLOWMODEL_PBPOL);
+ CHKERRQ(ierr);
+ ierr = OPFLOWSetSolver(opflow_ref, OPFLOWSOLVER_IPOPT);
+ CHKERRQ(ierr);
+ ierr = OPFLOWSolve(opflow_ref);
+ CHKERRQ(ierr);
+ ierr = OPFLOWGetSolution(opflow_ref, &Xsol);
+ CHKERRQ(ierr);
+
+ std::cout << "IPOPT solve complete." << std::endl;
+
+ /* Get the solution array (natural ordering) */
+ PetscInt nx_ref;
+ ierr = VecGetSize(Xsol, &nx_ref);
+ CHKERRQ(ierr);
+ double *xsol_nat;
+ ierr = VecGetArray(Xsol, &xsol_nat);
+ CHKERRQ(ierr);
+
+ /* ------------------------------------------------------------------
+ * Step 2: Create the PBPOLRAJAHIOPSPARSE model and set up
+ * ------------------------------------------------------------------ */
+ OPFLOW opflow;
+ ierr = OPFLOWCreate(PETSC_COMM_WORLD, &opflow);
+ CHKERRQ(ierr);
+ ierr = OPFLOWReadMatPowerData(opflow, file.c_str());
+ CHKERRQ(ierr);
+ ierr = OPFLOWSetModel(opflow, OPFLOWMODEL_PBPOLRAJAHIOPSPARSE);
+ CHKERRQ(ierr);
+ ierr = OPFLOWSetSolver(opflow, OPFLOWSOLVER_HIOPSPARSEGPU);
+ CHKERRQ(ierr);
+ ierr = OPFLOWSetUp(opflow);
+ CHKERRQ(ierr);
+
+ int nx, nconeq, nconineq;
+ ierr = OPFLOWGetSizes(opflow, &nx, &nconeq, &nconineq);
+ CHKERRQ(ierr);
+
+ std::cout << "nx=" << nx << " nconeq=" << nconeq
+ << " nconineq=" << nconineq
+ << " nnz_ineqjacsp=" << opflow->nnz_ineqjacsp << std::endl;
+
+ if (!nconineq) {
+ std::cout << "No inequality constraints -- nothing to test. PASS."
+ << std::endl;
+ ierr = VecRestoreArray(Xsol, &xsol_nat);
+ CHKERRQ(ierr);
+ ierr = OPFLOWDestroy(&opflow_ref);
+ CHKERRQ(ierr);
+ ierr = OPFLOWDestroy(&opflow);
+ CHKERRQ(ierr);
+ ExaGOFinalize();
+ return 0;
+ }
+
+ /* ------------------------------------------------------------------
+ * Step 3: Copy IPOPT solution into opflow->X (natural ordering)
+ * ------------------------------------------------------------------ */
+ double *x_nat;
+ ierr = VecGetArray(opflow->X, &x_nat);
+ CHKERRQ(ierr);
+ for (int i = 0; i < nx; i++)
+ x_nat[i] = xsol_nat[i];
+ ierr = VecRestoreArray(opflow->X, &x_nat);
+ CHKERRQ(ierr);
+
+ ierr = VecRestoreArray(Xsol, &xsol_nat);
+ CHKERRQ(ierr);
+
+ std::cout << "Evaluating Jacobian at IPOPT-converged solution." << std::endl;
+
+ /* ------------------------------------------------------------------
+ * Step 4: Compute reference inequality Jacobian via PETSc.
+ * The first call establishes the sparsity pattern, the second
+ * computes the actual values at the converged solution.
+ * ------------------------------------------------------------------ */
+ ierr = (*opflow->modelops.computeinequalityconstraintjacobian)(
+ opflow, opflow->X, opflow->Jac_Gi);
+ CHKERRQ(ierr);
+ ierr = MatSetOption(opflow->Jac_Gi, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
+ CHKERRQ(ierr);
+ ierr = (*opflow->modelops.computeinequalityconstraintjacobian)(
+ opflow, opflow->X, opflow->Jac_Gi);
+ CHKERRQ(ierr);
+
+ int nnz = opflow->nnz_ineqjacsp;
+
+ auto &resmgr = umpire::ResourceManager::getInstance();
+ umpire::Allocator h_allocator = resmgr.getAllocator("HOST");
+
+ double *ref_vals =
+ static_cast(h_allocator.allocate(nnz * sizeof(double)));
+
+ PetscInt nrow, ncol;
+ ierr = MatGetSize(opflow->Jac_Gi, &nrow, &ncol);
+ CHKERRQ(ierr);
+
+ double *vptr = ref_vals;
+ for (int i = 0; i < nrow; i++) {
+ PetscInt nvals;
+ const PetscInt *cols;
+ const PetscScalar *vals;
+ ierr = MatGetRow(opflow->Jac_Gi, i, &nvals, &cols, &vals);
+ CHKERRQ(ierr);
+ for (int j = 0; j < nvals; j++)
+ vptr[j] = vals[j];
+ vptr += nvals;
+ ierr = MatRestoreRow(opflow->Jac_Gi, i, &nvals, &cols, &vals);
+ CHKERRQ(ierr);
+ }
+
+ int ref_count = (int)(vptr - ref_vals);
+ std::cout << "PETSc extracted " << ref_count << " ineq Jacobian values"
+ << " (expected " << nnz << ")" << std::endl;
+
+ if (ref_count != nnz) {
+ std::cout << "FAIL: NNZ mismatch! PETSc=" << ref_count
+ << " analytical=" << nnz << std::endl;
+ fail++;
+ }
+
+ /* ------------------------------------------------------------------
+ * Step 5: Compute GPU inequality Jacobian at the same solution
+ * ------------------------------------------------------------------ */
+ double *x_host;
+ ierr = VecGetArray(opflow->X, &x_host);
+ CHKERRQ(ierr);
+
+ double *x_sd =
+ static_cast(h_allocator.allocate(nx * sizeof(double)));
+ for (int i = 0; i < nx; i++)
+ x_sd[opflow->idxn2sd_map[i]] = x_host[i];
+
+ ierr = VecRestoreArray(opflow->X, &x_host);
+ CHKERRQ(ierr);
+
+ double *gpu_vals;
+ double *x_dev, *gpu_vals_dev;
+
+#ifdef EXAGO_ENABLE_GPU
+ umpire::Allocator d_allocator = resmgr.getAllocator("DEVICE");
+ x_dev = static_cast(d_allocator.allocate(nx * sizeof(double)));
+ gpu_vals_dev =
+ static_cast(d_allocator.allocate(nnz * sizeof(double)));
+ resmgr.memset(gpu_vals_dev, 0, nnz * sizeof(double));
+#else
+ x_dev = x_sd;
+ gpu_vals_dev =
+ static_cast(h_allocator.allocate(nnz * sizeof(double)));
+ memset(gpu_vals_dev, 0, nnz * sizeof(double));
+#endif
+
+ umpire::util::AllocationRecord rec_x{x_sd, sizeof(double) * nx,
+ h_allocator.getAllocationStrategy()};
+ resmgr.registerAllocation(x_sd, rec_x);
+#ifdef EXAGO_ENABLE_GPU
+ resmgr.copy(x_dev, x_sd);
+#endif
+
+ std::cout << "Running RAJA GPU inequality Jacobian kernel..." << std::endl;
+ ComputeIneqJacValuesGPU_PBPOLRAJAHIOPSPARSE(opflow, x_dev, gpu_vals_dev);
+
+ gpu_vals = static_cast(h_allocator.allocate(nnz * sizeof(double)));
+#ifdef EXAGO_ENABLE_GPU
+ resmgr.copy(gpu_vals, gpu_vals_dev);
+#else
+ memcpy(gpu_vals, gpu_vals_dev, nnz * sizeof(double));
+#endif
+
+ /* ------------------------------------------------------------------
+ * Step 6: Compare
+ * ------------------------------------------------------------------ */
+ std::cout << "Comparing " << nnz << " inequality Jacobian values..."
+ << std::endl;
+ int cmp_fail = compare_arrays(ref_vals, gpu_vals, nnz, "ineqjac");
+ fail += cmp_fail;
+
+ if (cmp_fail == 0)
+ std::cout << "PASS: All " << nnz
+ << " inequality Jacobian values match within tol=" << TOL
+ << std::endl;
+ else
+ std::cout << "FAIL: " << cmp_fail << " of " << nnz << " values differ"
+ << std::endl;
+
+ /* ------------------------------------------------------------------
+ * Step 7: Performance comparison (enabled with -benchmark flag)
+ * ------------------------------------------------------------------ */
+ PetscBool run_benchmark = PETSC_FALSE;
+ ierr = PetscOptionsGetBool(NULL, NULL, "-benchmark", NULL, &run_benchmark);
+ CHKERRQ(ierr);
+
+ if (run_benchmark) {
+ int niters = 1000;
+ PetscInt bench_nrow, bench_ncol;
+ ierr = MatGetSize(opflow->Jac_Gi, &bench_nrow, &bench_ncol);
+ CHKERRQ(ierr);
+
+ double *bench_vals =
+ static_cast(h_allocator.allocate(nnz * sizeof(double)));
+
+ std::cout << "\n=== Performance Benchmark (" << niters
+ << " iterations) ===" << std::endl;
+
+ /* --- PETSc path: compute + MatGetRow extraction + copy to device --- */
+ {
+ double *bench_dev;
+ size_t nnz_bytes = nnz * sizeof(double);
+#ifdef EXAGO_ENABLE_GPU
+ bench_dev = static_cast(d_allocator.allocate(nnz_bytes));
+#else
+ bench_dev = static_cast(h_allocator.allocate(nnz_bytes));
+#endif
+
+ auto t0 = std::chrono::high_resolution_clock::now();
+ for (int iter = 0; iter < niters; iter++) {
+ ierr = (*opflow->modelops.computeinequalityconstraintjacobian)(
+ opflow, opflow->X, opflow->Jac_Gi);
+
+ double *vp = bench_vals;
+ for (int i = 0; i < bench_nrow; i++) {
+ PetscInt nv;
+ const PetscInt *c;
+ const PetscScalar *v;
+ MatGetRow(opflow->Jac_Gi, i, &nv, &c, &v);
+ for (int j = 0; j < nv; j++)
+ vp[j] = v[j];
+ vp += nv;
+ MatRestoreRow(opflow->Jac_Gi, i, &nv, &c, &v);
+ }
+#ifdef EXAGO_ENABLE_GPU
+ (void)hipMemcpy(bench_dev, bench_vals, nnz_bytes,
+ hipMemcpyHostToDevice);
+#else
+ memcpy(bench_dev, bench_vals, nnz_bytes);
+#endif
+ }
+ auto t1 = std::chrono::high_resolution_clock::now();
+ double petsc_us =
+ std::chrono::duration(t1 - t0).count() / niters;
+ std::cout << " PETSc path (compute + MatGetRow + copy): " << petsc_us
+ << " us/iter" << std::endl;
+
+#ifdef EXAGO_ENABLE_GPU
+ d_allocator.deallocate(bench_dev);
+#else
+ h_allocator.deallocate(bench_dev);
+#endif
+ }
+
+ /* --- GPU path: RAJA kernels, no copies --- */
+ {
+ double *bench_dev;
+#ifdef EXAGO_ENABLE_GPU
+ bench_dev =
+ static_cast(d_allocator.allocate(nnz * sizeof(double)));
+#else
+ bench_dev =
+ static_cast(h_allocator.allocate(nnz * sizeof(double)));
+#endif
+
+#ifdef EXAGO_ENABLE_GPU
+ (void)hipDeviceSynchronize();
+#endif
+ auto t0 = std::chrono::high_resolution_clock::now();
+ for (int iter = 0; iter < niters; iter++) {
+ ComputeIneqJacValuesGPU_PBPOLRAJAHIOPSPARSE(opflow, x_dev, bench_dev);
+ }
+#ifdef EXAGO_ENABLE_GPU
+ (void)hipDeviceSynchronize();
+#endif
+ auto t1 = std::chrono::high_resolution_clock::now();
+ double gpu_us =
+ std::chrono::duration(t1 - t0).count() / niters;
+ std::cout << " GPU path (RAJA kernels, no copies): " << gpu_us
+ << " us/iter" << std::endl;
+
+#ifdef EXAGO_ENABLE_GPU
+ d_allocator.deallocate(bench_dev);
+#else
+ h_allocator.deallocate(bench_dev);
+#endif
+ }
+
+ h_allocator.deallocate(bench_vals);
+ std::cout << "=== End Benchmark ===" << std::endl;
+ }
+
+ /* ------------------------------------------------------------------
+ * Cleanup
+ * ------------------------------------------------------------------ */
+ h_allocator.deallocate(ref_vals);
+ h_allocator.deallocate(gpu_vals);
+ h_allocator.deallocate(x_sd);
+#ifdef EXAGO_ENABLE_GPU
+ d_allocator.deallocate(x_dev);
+ d_allocator.deallocate(gpu_vals_dev);
+#else
+ h_allocator.deallocate(gpu_vals_dev);
+#endif
+
+ ierr = OPFLOWDestroy(&opflow);
+ CHKERRQ(ierr);
+ ierr = OPFLOWDestroy(&opflow_ref);
+ CHKERRQ(ierr);
+ ExaGOFinalize();
+
+ return fail;
+}
diff --git a/viz/data/dataprocess.ipynb b/viz/data/dataprocess.ipynb
index 25bd8732..7d5ade4b 100644
--- a/viz/data/dataprocess.ipynb
+++ b/viz/data/dataprocess.ipynb
@@ -69,7 +69,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": null,
"metadata": {
"id": "0PG5fSd1aIMP"
},
@@ -77,8 +77,8 @@
"source": [
"# update point id in us file\n",
"\n",
- "# df = pd.read_csv('/content/drive/MyDrive/23 summer pnnl/new_all_bus_id_7_25.csv')\n",
- "# f = open('/content/drive/MyDrive/23 summer pnnl/case_ACTIVSg all point.json')\n",
+ "# df = pd.read_csv('/content/drive/MyDrive/23 summer ornl/new_all_bus_id_7_25.csv')\n",
+ "# f = open('/content/drive/MyDrive/23 summer ornl/case_ACTIVSg all point.json')\n",
"# pointsdata = json.load(f)\n",
"\n",
"# for point in pointsdata:\n",
@@ -87,7 +87,7 @@
"# if(len(result.index)>0):\n",
"# point['properties']['NAME'] = result.iloc[0]\n",
"# # df[df.wkt == src].iloc[0].id\n",
- "# with open('/content/drive/MyDrive/23 summer pnnl/case_ACTIVSg all point new name.json', 'w') as f:\n",
+ "# with open('/content/drive/MyDrive/23 summer ornl/case_ACTIVSg all point new name.json', 'w') as f:\n",
"# json.dump(pointsdata, f)"
]
},
@@ -268,15 +268,15 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": null,
"metadata": {
"id": "jvYcP_hKaIMN"
},
"outputs": [],
"source": [
"# change line name to uniqe point id\n",
- "df = pd.read_csv('/content/drive/MyDrive/23 summer pnnl/new_all_bus_id_7_25.csv')\n",
- "f = open('/content/drive/MyDrive/23 summer pnnl/case_ACTIVSg all line.json')\n",
+ "df = pd.read_csv('/content/drive/MyDrive/23 summer ornl/new_all_bus_id_7_25.csv')\n",
+ "f = open('/content/drive/MyDrive/23 summer ornl/case_ACTIVSg all line.json')\n",
"linedata = json.load(f)\n",
"# lines = []\n",
"# df['temp_wkt'] = df['wkt'].map(str).replace(\"POINT (\", '')\n",
@@ -300,7 +300,7 @@
" line['properties']['source'] = tresult.iloc[0]\n",
" line['properties']['NAME'] = line['properties']['target'] + ' -- ' + line['properties']['source']\n",
"\n",
- "with open('/content/drive/MyDrive/23 summer pnnl/case_ACTIVSg all line new name.json', 'w') as f:\n",
+ "with open('/content/drive/MyDrive/23 summer ornl/case_ACTIVSg all line new name.json', 'w') as f:\n",
" json.dump(linedata, f)\n",
"\n",
"\n"
@@ -308,7 +308,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": null,
"metadata": {
"id": "qbHpQ2A0nKks"
},
@@ -318,25 +318,25 @@
" \"type\": \"FeatureCollection\",\n",
" \"features\":linedata\n",
" }\n",
- "with open('/content/drive/MyDrive/23 summer pnnl/qgis all line new name.json', 'w') as f:\n",
+ "with open('/content/drive/MyDrive/23 summer ornl/qgis all line new name.json', 'w') as f:\n",
" json.dump(linejson, f)"
]
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": null,
"metadata": {
"id": "gP8B1KFJkDYM"
},
"outputs": [],
"source": [
"\n",
- "f = open('/content/drive/MyDrive/23 summer pnnl/case_ACTIVSg all temp.json')\n",
+ "f = open('/content/drive/MyDrive/23 summer ornl/case_ACTIVSg all temp.json')\n",
"alldata = json.load(f)\n",
"alldata['geojsondata']['features'] = linedata + pointsdata\n",
"alldata['nbus'] = len(pointsdata)\n",
"alldata['nbranch'] = len(linedata)\n",
- "with open('/content/drive/MyDrive/23 summer pnnl/case_ACTIVSg all.json', 'w') as f:\n",
+ "with open('/content/drive/MyDrive/23 summer ornl/case_ACTIVSg all.json', 'w') as f:\n",
" json.dump(alldata, f, indent=4)"
]
},
@@ -350,7 +350,7 @@
"source": [
"# keys = lines[0].keys()\n",
"\n",
- "# with open('/content/drive/MyDrive/23 summer pnnl/all_line_id_7_24.csv', 'w', newline='') as output_file:\n",
+ "# with open('/content/drive/MyDrive/23 summer ornl/all_line_id_7_24.csv', 'w', newline='') as output_file:\n",
"# dict_writer = csv.DictWriter(output_file, keys)\n",
"# dict_writer.writeheader()\n",
"# dict_writer.writerows(lines)"
@@ -358,19 +358,19 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": null,
"metadata": {
"id": "p_2TKr7b-itk"
},
"outputs": [],
"source": [
"# combine 2000 dataset with other dataset\n",
- "f70k = open('/content/drive/MyDrive/23 summer pnnl/case_ACTIVSg70k list.json')\n",
+ "f70k = open('/content/drive/MyDrive/23 summer ornl/case_ACTIVSg70k list.json')\n",
"d70k = json.load(f70k)\n",
"\n",
- "f10k = open('/content/drive/MyDrive/23 summer pnnl/case_ACTIVSg10k list.json')\n",
+ "f10k = open('/content/drive/MyDrive/23 summer ornl/case_ACTIVSg10k list.json')\n",
"d10k = json.load(f10k)\n",
- "f2000 = open('/content/drive/MyDrive/23 summer pnnl/case_ACTIVSg2000 list.json')\n",
+ "f2000 = open('/content/drive/MyDrive/23 summer ornl/case_ACTIVSg2000 list.json')\n",
"d2000 = json.load(f2000)\n",
"\n",
"\n",
@@ -379,7 +379,7 @@
"p2000 = [d for d in d2000 if d['geometry']['type'] == 'Point']\n",
"points = p70 + p10\n",
"points = points + p2000\n",
- "with open('/content/drive/MyDrive/23 summer pnnl/case_ACTIVSg all point.json', 'w') as f:\n",
+ "with open('/content/drive/MyDrive/23 summer ornl/case_ACTIVSg all point.json', 'w') as f:\n",
" json.dump(points, f)\n",
"\n",
"l70 = [d for d in d70k if d['geometry']['type'] == 'LineString']\n",
@@ -387,19 +387,19 @@
"l2000 = [d for d in d2000 if d['geometry']['type'] == 'LineString']\n",
"lines = l70 + l10\n",
"lines = lines + l2000\n",
- "with open('/content/drive/MyDrive/23 summer pnnl/case_ACTIVSg all line.json', 'w') as f:\n",
+ "with open('/content/drive/MyDrive/23 summer ornl/case_ACTIVSg all line.json', 'w') as f:\n",
" json.dump(lines, f)"
]
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": null,
"metadata": {
"id": "n2a_FbyeE0mS"
},
"outputs": [],
"source": [
- "with open('/content/drive/MyDrive/23 summer pnnl/case_ACTIVSg all point.json', 'w') as f:\n",
+ "with open('/content/drive/MyDrive/23 summer ornl/case_ACTIVSg all point.json', 'w') as f:\n",
" json.dump(points, f)"
]
},
@@ -416,7 +416,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": null,
"metadata": {
"id": "muQyfL9kaIMM"
},
@@ -426,8 +426,8 @@
"# generate source and target info of transmission lines, make pf value absolute\n",
"#remove linestring where the end and start point is the same, because it's not valid to perform bigquery\n",
"import csv\n",
- "input = open('/content/drive/MyDrive/23 summer pnnl/all_line_part_attribute_db_7_25.csv', 'rt')\n",
- "output = open('/content/drive/MyDrive/23 summer pnnl/all_line_db_7_26.csv', 'w', newline='')\n",
+ "input = open('/content/drive/MyDrive/23 summer ornl/all_line_part_attribute_db_7_25.csv', 'rt')\n",
+ "output = open('/content/drive/MyDrive/23 summer ornl/all_line_db_7_26.csv', 'w', newline='')\n",
"writer = csv.writer(output)\n",
"reader = csv.reader(input)\n",
"headers = next(reader, None) # returns the headers or `None` if the input is empty\n",