diff --git a/high_level.h b/high_level.h new file mode 100644 index 0000000..87893fd --- /dev/null +++ b/high_level.h @@ -0,0 +1,47 @@ +#include + +// +// High-level interface. +// + +// Static control part. +// Names of fields are self-explanatory. +// Access relations are tagged, that is live in the spaces +// [S1[...] -> __ref_ID[]] -> arrayID[...] +// where __ref_ID are unique identifiers. +// +// External users have statement-specific information stored either +// (a) separately in a map or +// (b) as user-pointers of statement ids using C interface (discouraged). +struct Scop { + isl::set context; + isl::union_set domain; + isl::union_map mayReads; + isl::union_map mayWrites; + isl::union_map mustWrites; + isl::schedule schedule; +}; + +// Map "scop" to a grid of GPU thread blocks described by "grid" and "block" +// starting from schedule node "node" and using the end-to-end mapping +// strategy. When tiling is performed, use sizes provided in "tiles". +// The return value indicates whether the mapping was performed. +// Mapping strategy is allowed to decrease the block and grid size to avoid +// launching empty blocks or threads, in which case the values in "block" and +// "grid" are updated with the new sizes. +// +// The underlying strategy may change at any time as long as the same types of +// trees can be mapped. +// +// Node is not necessarily a band node. +// +// Trailing ones in "block" and "grid" may be interpreted as not mapping to the +// corresponding thread or block dimensions. All values are strictly positive. +bool mapToGPU( + Scop& scop, + isl::schedule_node node, + const std::vector& tiles, + std::array& block, + std::array& grid); + + diff --git a/low_level.h b/low_level.h new file mode 100644 index 0000000..6501e70 --- /dev/null +++ b/low_level.h @@ -0,0 +1,137 @@ +#include + +// +// Low-level interface. +// + +class Scop; + +// Syntactic identifiers for threads. +enum class Thread { + x = 0, + y, + z +}; + +// Syntactic identifiers for blocks. +enum class Block { + x = 0, + y, + z +}; + +// Mark the schedule node "node" as the first node in a GPU kernel by inserting +// a mark node with a unique identifier, followed by an eventual guard node and +// a context node (referred to as kernel context below). The kernel is +// expected to be launched on a grid of thread blocks described by "grid" and +// "block" sizes. The grid configuration is stored in the kernel context. It +// will be used when mapping band members to blocks/threads. +isl::schedule_node_mark initKernel(Scop& scop, isl::schedule_node node, + const std::array& block, const std::array& grid); + +// Check whether the schedule node "node" is inside a kernel. +bool isInKernel(isl::schedule_node node); + +// Return the id of the kernel to which "node" belongs. +isl::id kernelId(isl::schedule_node node); + +// Return the grid of the kernel to which "node" belongs. +std::array kernelGrid(isl::schedule_node node); + +// Return the block size of the kernel to which "node" belongs. +std::array kernelBlock(isl::schedule_node node); + +// Check whether it is valid to map "dim"-th dimension of the band node "node" +// to thread "t" (block "b") in the context of schedule to which "node" +// belongs. For example, if children or ancestors of "node" already contain a +// mapping to thread "t" (block "b"), if the dimension is parallel, etc. +bool canMapBandDim(isl::schedule_node_band node, int dim, Thread t); +bool canMapBandDim(isl::schedule_node_band node, int dim, Block b); + +// Map "dim"-th dimension of the band node "node" to thread (block) dimension +// "t" ("b"). This mapping is assumed to be valid, in particular that the band +// is situated inside a kernel. The number of threads (blocks) to map to is +// taken from the kernel context. Return the updated band node, its parents may +// have changed due to mapping. +// +// Note: while it may be possible to map subtrees to a different number of +// blocks (threads), it is often dangerous in practice. For example, inserting +// a __syncthreads call under a condition involving thread identifiers leads to +// undefined behavior. Therefore, we prefer to have a fixed number of threads +// (blocks) per kernel. If it is strictly necessary to use less threads, the +// caller can first strip-mine the band so as to have the required number of +// threads and then map the inner band. +isl::schedule_node_band mapBandDim(isl::schedule_node_band node, int dim, + Thread t); +isl::schedule_node_band mapBandDim(isl::schedule_node_band node, int dim, + Block b); + +// Map the subtree rooted at "node" to a single thread (block) along the thread +// (block) dimension "t" ("b"). Return the updated node, its parents may have +// changed due to mapping. +isl::schedule_node mapFixed(isl::schedule_node node, Thread t); +isl::schedule_node mapFixed(isl::schedule_node node, Block b); + +// Get the mapping active at the subtree rooted at "node". In particular, +// intersect all filters on the path from "node" to the root of schedule tree. +// Some of these filters perform the mapping and the corresponding constraints +// will appear in the resulting union set. +// Ignores any mapping below "node". +isl::union_set mapping(isl::schedule_node node); + +// Check if the subtree rooted at "node" is mapped to thread (block) dimension +// "t" ("b"). Only the mapping above "node" is taken into account. +bool isMapped(isl::schedule_node node, Thread t); +bool isMapped(isl::schedule_node node, Block b); + +// Tile the band node "band" with sizes "tiles" and return the updated node. +isl::schedule_node_band tile(isl::schedule_node_band band, const std::vector& tiles); +isl::schedule_node_band unroll(isl::schedule_node_band band, long limit); + +// Finalize the mapping of a subtree rooted at "node" that represents a kernel. +// In particular, ensure that all branches are mapped to the same number of +// thread and block dimensions and that the appropriate synchronizations are +// inserted. Optionally, change the kernel-level context to only include the +// actually used threads (blocks). +isl::schedule_node finalizeKernel(isl::schedule_node_mark node); + +// An identifier of a group of references that must be promoted together +// for validity reasons. All references are to the array identifiable by +// "arrayId". Individual reference IDs are stored in "refIds". +// The promotion is be scoped under a mark node identified by "markId", that +// is, the extension subtree for copying to/from the promoted memory space is +// inserted below the mark node. +// +// It is impossible to store an isl::schedule_node because a new schedule may +// be created on each operation due to CoW. +// It is impossible to store the schedule depth if we want the promotion to be +// scoped at a sequence node, that is keep the promoted elements in memory for +// a group of sibling subtrees. +// Note: we may need to artificially separate a sequence (set) node into a +// nested structure of sequence (set) nodes to restrict the promotion scope. +struct ScopedReferenceGroup { + isl::id markId; + isl::id arrayId; + isl::id_list refIds; + + // Extra functions can be provided here, e.g. the size of the footprint in + // promoted memory space. +}; + +// Insert a mark node with a unique id that identifies a potential promotion +// scope. The insertion takes place immediately above "node". Return the +// inserted mark node. +isl::schedule_node_mark definePromotionScope(isl::schedule_node node); + +// For all promotion scopes defined in "scop", return the list of +// ScopedReferenceGroup for which promotion to shared (private) memory is legal +// given the current mapping and that the copies are inserted below the scoping +// mark node. +std::unordered_set promotableToShared(const Scop& scop); +std::unordered_set promotableToPrivate(const Scop& scop); + +// Promote the scoped reference group "group" to shared (private) memory in its +// scope by modifying the schedule of "scop". May additionally keep track of +// required declarations in each kernel inside "scop". +void promoteToShared(Scop& scop, ScopedReferenceGroup group); +void promoteToPrivate(Scop& scop, ScopedReferenceGroup group);