Skip to content
Merged
3 changes: 2 additions & 1 deletion src/runtime/hexagon/hexagon_device_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,8 @@ TVM_REGISTER_GLOBAL("device_api.hexagon.mem_copy").set_body([](TVMArgs args, TVM
void* src = args[1];
int size = args[2];

hexagon_user_dma_1d_sync(dst, src, size);
int error_code = hexagon_user_dma_1d_sync(dst, src, size);
CHECK_EQ(error_code, 0);

*rv = static_cast<int32_t>(0);
});
Expand Down
112 changes: 66 additions & 46 deletions src/runtime/hexagon/hexagon_user_dma.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,66 +17,47 @@
* under the License.
*/

#include <algorithm>
#include "hexagon_user_dma.h"

#include "hexagon_common.h"
#include "hexagon_user_dma_descriptors.h"
#include "hexagon_user_dma_instructions.h"
#include "hexagon_user_dma_registers.h"
#include <algorithm>

namespace tvm {
namespace runtime {
namespace hexagon {

int init_hexagon_user_dma() {
#if __HEXAGON_ARCH__ >= 68
// reset DMA engine
unsigned int HexagonUserDMA::Init() {
unsigned int status = dmpause() & DM0_STATUS_MASK;
if (status != DM0_STATUS_IDLE) {
return DMA_FAILURE;
}
#endif
return DMA_SUCCESS;
return status;
}

int hexagon_user_dma_1d_sync_helper(void* dst, void* src, uint32_t length) {
#if __HEXAGON_ARCH__ >= 68
static int config_dma = init_hexagon_user_dma();
if (config_dma != DMA_SUCCESS) {
int HexagonUserDMA::Copy(void* dst, void* src, uint32_t length) {
// length limited to 24 bits
if (length > DESC_LENGTH_MASK) {
return DMA_FAILURE;
}

uint64_t src64 = reinterpret_cast<uint64_t>(src);
// source address limited to 32 bits
if (src64 > DESC_SRC_MASK) {
uint64_t src64 = reinterpret_cast<uint64_t>(src);
if (!src64 || src64 > DESC_SRC_MASK) {
return DMA_FAILURE;
}

uint64_t dst64 = reinterpret_cast<uint64_t>(dst);
// destination address limited to 32 bits
if (dst64 > DESC_DST_MASK) {
return DMA_FAILURE;
}

// length limited to 24 bits
if (length > DESC_LENGTH_MASK) {
uint64_t dst64 = reinterpret_cast<uint64_t>(dst);
if (!dst64 || dst64 > DESC_DST_MASK) {
return DMA_FAILURE;
}

uint32_t src32 = src64 & DESC_SRC_MASK;
uint32_t dst32 = dst64 & DESC_DST_MASK;

void* dma_desc = nullptr;

int ret = posix_memalign(&dma_desc, DMA_DESC_2D_SIZE, DMA_DESC_2D_SIZE);
if (ret) {
return DMA_FAILURE;
}
uint32_t src32 = static_cast<uint32_t>(src64);
uint32_t dst32 = static_cast<uint32_t>(dst64);

// get pointer to next descriptor
dma_desc_2d_t* dma_desc = descriptors_->Next();
if (!dma_desc) {
return DMA_FAILURE;
return DMA_RETRY;
}

// populate descriptor fields
dma_desc_set_state(dma_desc, DESC_STATE_READY);
dma_desc_set_next(dma_desc, DMA_NULL_PTR);
dma_desc_set_length(dma_desc, length);
Expand All @@ -90,32 +71,71 @@ int hexagon_user_dma_1d_sync_helper(void* dst, void* src, uint32_t length) {
dma_desc_set_src(dma_desc, src32);
dma_desc_set_dst(dma_desc, dst32);

dmstart(dma_desc);
unsigned int status = dmwait() & DM0_STATUS_MASK;
unsigned int done = dma_desc_get_done(dma_desc);
if (first_dma_) {
// `dmstart` first descriptor
dmstart(dma_desc);
first_dma_ = false;
} else {
// `dmlink` descriptor to tail descriptor
dmlink(tail_dma_desc_, dma_desc);
}

free(dma_desc);
// update tail
tail_dma_desc_ = dma_desc;
return DMA_SUCCESS;
}

if (status == DM0_STATUS_IDLE && done == DESC_DONE_COMPLETE) {
return DMA_SUCCESS;
void HexagonUserDMA::Wait(uint32_t max_dmas_in_flight) {
// wait (forever) until max DMAs in flight <= actual DMAs in flight
while (DMAsInFlight() > max_dmas_in_flight) {
}
#endif
return DMA_FAILURE;
}

uint32_t HexagonUserDMA::Poll() { return DMAsInFlight(); }

uint32_t HexagonUserDMA::DMAsInFlight() {
dmpoll(); // update DMA engine status
return descriptors_->InFlight();
}

HexagonUserDMA::HexagonUserDMA() {
// reset DMA engine
unsigned int status = Init();
CHECK_EQ(status, DM0_STATUS_IDLE);

auto desc_in_flight = [](dma_desc_2d_t* dma_desc) {
unsigned int done = dma_desc_get_done(dma_desc);
return (done != DESC_DONE_COMPLETE);
};
descriptors_ = new RingBuffer<dma_desc_2d_t>(MAX_DMA_DESCRIPTORS, desc_in_flight);
}

HexagonUserDMA::~HexagonUserDMA() {
Init(); // stop DMA engine
delete descriptors_;
}

int hexagon_user_dma_1d_sync(void* dst, void* src, uint32_t length) {
// One DMA transfer can copy at most DESC_LENGTH_MASK bytes.
// Make the common case quick.
if (length <= DESC_LENGTH_MASK) return hexagon_user_dma_1d_sync_helper(dst, src, length);
if (length <= DESC_LENGTH_MASK) {
// sync DMA -> `Copy` and then `Wait(0)`
int ret_val = HexagonUserDMA::Get().Copy(dst, src, length);
if (ret_val != DMA_SUCCESS) return ret_val;
HexagonUserDMA::Get().Wait(0);
return DMA_SUCCESS;
}

// Split big transfers into smaller transfers.
char* cast_src = static_cast<char*>(src);
char* cast_dst = static_cast<char*>(dst);
for (uint32_t i = 0; i < length;) {
// Ensure there is no overflow while updating i
uint32_t cur_len = std::min<uint32_t>(length - i, DESC_LENGTH_MASK);
int ret_val = hexagon_user_dma_1d_sync_helper(&cast_dst[i], &cast_src[i], cur_len);
// sync DMA -> `Copy` and then `Wait(0)`
int ret_val = HexagonUserDMA::Get().Copy(&cast_dst[i], &cast_src[i], cur_len);
if (ret_val != DMA_SUCCESS) return ret_val;
HexagonUserDMA::Get().Wait(0);
// 2 cases for new val for i:
// 1. length - i <= DESC_LENGTH_MASK (<= MAX_UINT)
// new_i = i + (length - i) = length, no more iter
Expand Down
97 changes: 97 additions & 0 deletions src/runtime/hexagon/hexagon_user_dma.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#ifndef TVM_RUNTIME_HEXAGON_HEXAGON_USER_DMA_H_
#define TVM_RUNTIME_HEXAGON_HEXAGON_USER_DMA_H_

#include "hexagon_common.h"
#include "hexagon_user_dma_descriptors.h"
#include "hexagon_user_dma_instructions.h"
#include "hexagon_user_dma_registers.h"
#include "ring_buffer.h"

namespace tvm {
namespace runtime {
namespace hexagon {

#define DMA_SUCCESS 0
#define DMA_FAILURE -1
#define DMA_RETRY 1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: A docstring on the meaning of DMA_RETRY would be helpful for quicker understanding.

#define MAX_DMA_DESCRIPTORS 100

class HexagonUserDMA {
public:
/*!
* \brief Initiate DMA to copy memory from source to destination address
* \param dst Destination address
* \param src Source address
* \param length Length in bytes to copy
* \returns Status: DMA_SUCCESS or DMA_FAILURE
*/
int Copy(void* dst, void* src, uint32_t length);

/*!
* \brief Wait until the number of DMAs in flight is less than or equal to some maximum
* \param max_dmas_in_flight Maximum number of DMAs allowed to be in flight
* to satisfy the `Wait` e.g. use `Wait(0)` to wait on "all" outstanding DMAs to complete
*/
void Wait(uint32_t max_dmas_in_flight);

/*!
* \brief Poll the number of DMAs in flight
* \returns Number of DMAs in flight
*/
uint32_t Poll();

//! \brief HexagonUserDMA uses the singleton pattern
static HexagonUserDMA& Get() {
static HexagonUserDMA* hud = new HexagonUserDMA();
return *hud;
}

private:
// HexagonUserDMA uses the singleton pattern
HexagonUserDMA();
~HexagonUserDMA();
HexagonUserDMA(const HexagonUserDMA&) = delete;
HexagonUserDMA& operator=(const HexagonUserDMA&) = delete;
HexagonUserDMA(HexagonUserDMA&&) = delete;
HexagonUserDMA& operator=(HexagonUserDMA&&) = delete;

//! \brief Initializes the Hexagon User DMA engine
unsigned int Init();

//! \brief Calculates and returns the number of DMAs in flight
uint32_t DMAsInFlight();

//! \brief Tracks whether the very first DMA has been executed
bool first_dma_{true};

//! \brief Tracks the tail DMA descriptor
void* tail_dma_desc_{nullptr};

//! \brief Storage for all DMA descriptors
RingBuffer<dma_desc_2d_t>* descriptors_{nullptr};
};

} // namespace hexagon
} // namespace runtime
} // namespace tvm

#endif // TVM_RUNTIME_HEXAGON_HEXAGON_USER_DMA_H_
2 changes: 0 additions & 2 deletions src/runtime/hexagon/hexagon_user_dma_descriptors.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,6 @@ namespace hexagon {
#define DESC_DSTWIDTHOFFSET_MASK 0xFFFF0000
#define DESC_DSTWIDTHOFFSET_SHIFT 16

#define DMA_SUCCESS 0
#define DMA_FAILURE -1
#define DMA_NULL_PTR 0

/**************************/
Expand Down
8 changes: 4 additions & 4 deletions src/runtime/hexagon/hexagon_user_dma_instructions.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@ namespace tvm {
namespace runtime {
namespace hexagon {

#if __HEXAGON_ARCH__ >= 68

inline unsigned int dmpause() {
unsigned int dm0 = 0;
asm volatile(" %0 = dmpause" : "=r"(dm0));
Expand All @@ -34,6 +32,10 @@ inline unsigned int dmpause() {

inline void dmstart(void* next) { asm volatile(" dmstart(%0)" : : "r"(next)); }

inline void dmlink(void* tail, void* next) {
asm volatile(" dmlink(%0, %1)" : : "r"(tail), "r"(next));
}

inline unsigned int dmpoll() {
unsigned int dm0 = 0;
asm volatile(" %0 = dmpoll" : "=r"(dm0));
Expand Down Expand Up @@ -70,8 +72,6 @@ inline void dmcfgwr(unsigned int dmindex, unsigned int data) {
asm volatile(" dmcfgwr(%0, %1)" : : "r"(dmindex), "r"(data));
}

#endif

} // namespace hexagon
} // namespace runtime
} // namespace tvm
Expand Down
Loading