diff --git a/scripts/cmake/version.cmake b/scripts/cmake/version.cmake index dc1d9eb88e29..6f314ea91348 100644 --- a/scripts/cmake/version.cmake +++ b/scripts/cmake/version.cmake @@ -114,13 +114,13 @@ if(EXISTS ${SOF_ROOT_SOURCE_DIRECTORY}/.git/) OUTPUT_FILE "${SOURCE_HASH_DIR}/tracked_file_list" ) # calculate hash of each listed files (from file version saved in file system) - execute_process(COMMAND git hash-object --no-filters --stdin-paths + execute_process(COMMAND git hash-object --stdin-paths WORKING_DIRECTORY ${SOF_ROOT_SOURCE_DIRECTORY} INPUT_FILE "${SOURCE_HASH_DIR}/tracked_file_list" OUTPUT_FILE "${SOURCE_HASH_DIR}/tracked_file_hash_list" ) # then calculate single hash of previously calculated hash list - execute_process(COMMAND git hash-object --no-filters --stdin + execute_process(COMMAND git hash-object --stdin WORKING_DIRECTORY ${SOF_ROOT_SOURCE_DIRECTORY} OUTPUT_STRIP_TRAILING_WHITESPACE INPUT_FILE "${SOURCE_HASH_DIR}/tracked_file_hash_list" @@ -128,7 +128,10 @@ if(EXISTS ${SOF_ROOT_SOURCE_DIRECTORY}/.git/) ) string(SUBSTRING ${SOF_SRC_HASH_LONG} 0 8 SOF_SRC_HASH) message(STATUS "Source content hash: ${SOF_SRC_HASH}. \ -Note: by design, source hash is broken by config changes. See #3890.") +Notes: + - by design, source hash is broken by Kconfig changes. See #3890. + - Source hash is also broken by _asymmetric_ autocrlf=input, see + #5917 and reverted #5920.") else() # Zephyr, tarball,... if(NOT "${GIT_LOG_HASH}" STREQUAL "") string(SUBSTRING "${GIT_LOG_HASH}" 0 8 SOF_SRC_HASH) diff --git a/src/arch/xtensa/hal/set_region_translate.c b/src/arch/xtensa/hal/set_region_translate.c index b1b53ed4ab21..27ed6b80a50b 100644 --- a/src/arch/xtensa/hal/set_region_translate.c +++ b/src/arch/xtensa/hal/set_region_translate.c @@ -1,534 +1,534 @@ -/* - * Copyright (c) 2004-2014 Tensilica Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#include - -#if XCHAL_HAVE_XEA2 && (!XCHAL_HAVE_MPU) -/* - * C-stubs to issue the tlb related instructions (with dsync and isync's if needed). - * - */ -static inline void write_dtlb_entry(unsigned vpn_way, unsigned ppn_ca) { - __asm__ __volatile__("wdtlb %1, %0; dsync\n\t" - : : "r" (vpn_way), "r" (ppn_ca) ); -} - -static inline void write_itlb_entry(unsigned vpn_way, unsigned ppn_ca) { - __asm__ __volatile__("witlb %1, %0; isync\n\t" - : : "r" (vpn_way), "r" (ppn_ca) ); -} - -static inline unsigned read_dtlb1_entry(unsigned addr) { - unsigned long tmp; - __asm__ __volatile__("rdtlb1 %0, %1\n\t" - : "=a" (tmp) - : "a" (addr)); - return tmp; -} - -static inline unsigned read_itlb1_entry(unsigned addr) { - unsigned long tmp; - __asm__ __volatile__("ritlb1 %0, %1\n\t" - : "=a" (tmp) - : "a" (addr)); - return tmp; -} - -static inline unsigned probe_dtlb(unsigned addr) { - unsigned long tmp; - __asm__ __volatile__("pdtlb %0, %1\n\t" - : "=a" (tmp) - : "a" (addr)); - return tmp; -} - -static inline unsigned probe_itlb(unsigned addr) { - unsigned long tmp; - __asm__ __volatile__("pitlb %0, %1\n\t" - : "=a" (tmp) - : "a" (addr)); - return tmp; -} - -static inline void invalidate_dtlb_entry(unsigned addr) { - __asm__ __volatile__("idtlb %0; dsync \n\t" - : : "a" (addr)); -} - -static inline void invalidate_itlb_entry(unsigned addr) { - __asm__ __volatile__("iitlb %0 ; isync\n\t" - : : "a" (addr)); -} - -static inline unsigned read_dtlbcfg() { - unsigned long tmp; - __asm__ __volatile__("rsr.dtlbcfg %0\n\t" - : "=a" (tmp)); - return tmp; -} - -static inline unsigned read_itlbcfg() { - unsigned long tmp; - __asm__ __volatile__("rsr.itlbcfg %0\n\t" - : "=a" (tmp)); - return tmp; -} - -#endif - -/* - * xthal_set_region_translation_raw is a quick and simple function - * to set both physical address and cache attribute for - * a 512MB region at . - * - * Parameters: - * void* vaddr 512MB aligned pointer representing the start of virtual address region - * void* paddr 512MB aligned pointer representing the start of physical address region - * unsigned cattr 4 bit value encoding the caching properties and rights (MMU only). - * - * returns 0 (XCHAL_SUCCESS) if successful - * returns non zero (XCHAL_UNSUPPORTED) on failure - * - * This function has the following limitations: - * - * 1) Requires either the Region Translation Option or a v3 MMU running in the default mode (with spanning way) - * 2) It does no error checking. - * 3) Deals with one 512MB region (vaddr and paddr are required to be 512MB aligned although that is not explicitly checked) - * 4) It requires the caller to do any cache flushing that is needed - * 5) Doesn't support mnemonically setting the 'rights' (rwx, rw, ... ) bit on the MMU - * 6) It is illegal to change the mapping of the region containing the current PC (not checked) - * - */ -int xthal_set_region_translation_raw(void *vaddr, void *paddr, unsigned cattr) { -#if XCHAL_HAVE_MPU - return XTHAL_UNSUPPORTED; -#else -#if XCHAL_HAVE_XEA2 -#if XCHAL_HAVE_XLT_CACHEATTR || (XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY) -# if XCHAL_HAVE_XLT_CACHEATTR - unsigned vpn_way = (unsigned)vaddr; -# else - unsigned vpn_way = ((unsigned) vaddr & 0xFFFFFFF0) + XCHAL_SPANNING_WAY; -# endif - unsigned ppn_ca = ((unsigned) paddr & 0xFFFFFFF0) + (cattr & 0xF); - write_dtlb_entry(vpn_way, ppn_ca); - write_itlb_entry(vpn_way, ppn_ca); - return XTHAL_SUCCESS; -#else - return XTHAL_UNSUPPORTED; -#endif -#else - return XTHAL_UNSUPPORTED; -#endif -#endif -} - -/* - * xthal_v2p() takes a virtual address as input, and if that virtual address is mapped to a physical address - * by the MMU, it returns the: - * a) corresponding physical address - * b) the tlb way that is used to translate the address - * c) cache attribute for translation - * - * Parameters: - * void* vaddr A pointer representing the virtual address (there are no alignment requirements for this address) - * void** paddr This value can be 0, or can point to a pointer variable which will be updated to contain the physical address - * unsigned* way This value can be 0, or can point to an unsigned variable which will be updated to contain the TLB way. - * unsigned* cattr This value can be 0, or can point to an unsigned variable which will be updated to contain the cache attr - * For MPU configurations bits 0..3 hold the access rights and bits 4..8 hold the encoded memory type - * - * Returns 0 (XCHAL_SUCCESS) if successful - * XTHAL_NO_MAPPING if there is no current mapping for the virtual address - * XCHAL_UNSUPPORTED if unsupported - * - * Limitations: - * Assumes that architecture variable DVARWAY56 is "Variable" - * Uses the D-TLBS for the translation ... assumption is that ITLB's have same mappings - */ -int xthal_v2p(void* vaddr, void** paddr, unsigned *way, unsigned* cattr) { -#if XCHAL_HAVE_XEA2 -#if XCHAL_HAVE_MPU - if (paddr) - *paddr = vaddr; - if (way) - *way = 0; - if (cattr) - { - struct xthal_MPU_entry x = xthal_get_entry_for_address(vaddr, 0); - *cattr = XTHAL_MPU_ENTRY_GET_ACCESS(x) | XTHAL_MPU_ENTRY_GET_MEMORY_TYPE(x) << XTHAL_AR_WIDTH; - } - return XTHAL_SUCCESS; -#else - unsigned long probe = probe_dtlb((unsigned) vaddr); -#if !XCHAL_HAVE_PTP_MMU - if (!(0x1 & probe)) - return XTHAL_NO_MAPPING; - if (way) - *way = 1; - if (paddr || cattr) { - unsigned long temp; - temp = read_dtlb1_entry(probe); - unsigned ppn = 0xe0000000 & temp; - unsigned att = 0xf & temp; - if (paddr) - *paddr = ((void*) (ppn + (((unsigned) vaddr) & 0x1fffffff))); - if (cattr) - *cattr = att; - } -#else - { - unsigned iway; - if (!(0x10 & probe)) - return XTHAL_NO_MAPPING; - iway = 0xf & probe; - if (way) - *way = iway; - if (paddr || cattr) { - unsigned temp; - unsigned ppn; - unsigned ppn1; - unsigned dtlbcfg = read_dtlbcfg(); - temp = read_dtlb1_entry(probe); - unsigned att = 0xf & temp; - if (cattr) - *cattr = att; - if (paddr) - switch (iway) // followin code derived from fig 4-40 from ISA MMU Option Data (at) Format for RxTLB1 - { /* 4k pages */ - case 0: - case 1: - case 2: - case 3: - case 7: - case 8: - case 9: - ppn = 0xfffff000; // 4k pages - break; - case 4: { - switch ((dtlbcfg & (0x3 << 16)) >> 16) // bits 16 & 17 - { - case 0: // 1MB pages - ppn = 0xfff00000; - break; - case 1: // 4MB pages - ppn = 0xffc00000; - break; - case 2: // 16MB pages - ppn = 0xff000000; - break; - case 3: // 64MB pages - ppn = 0xfc000000; - break; - default: - return XTHAL_UNSUPPORTED; - } - } - break; - case 5: - if ((dtlbcfg & (1 << 20))) - ppn = 0xf8000000; // 128MB pages - else - ppn = 0xf0000000; // 256MB pages - break; - case 6: - if ((dtlbcfg & (1 << 24))) - ppn = 0xe0000000; // 512MB pages - else - ppn = 0xf0000000; // 256MB pages - break; - default: - return XTHAL_UNSUPPORTED; - break; - } - ppn1 = ppn & temp; - *paddr = ((void*) (ppn1 + (((unsigned) vaddr) & (~ppn)))); - } - } -#endif - return XTHAL_SUCCESS; -#endif -#else - return XTHAL_UNSUPPORTED; -#endif -} - -/* these constants borrowed from xthal_set_region_attribute */ -# if XCHAL_HAVE_PTP_MMU -# define CA_BYPASS XCHAL_CA_BYPASS -# define CA_WRITETHRU XCHAL_CA_WRITETHRU -# define CA_WRITEBACK XCHAL_CA_WRITEBACK -# define CA_WRITEBACK_NOALLOC XCHAL_CA_WRITEBACK_NOALLOC -# define CA_ILLEGAL XCHAL_CA_ILLEGAL -# else -/* Hardcode these, because they get remapped when caches or writeback not configured: */ -# define CA_BYPASS 2 -# define CA_WRITETHRU 1 -# define CA_WRITEBACK 4 -# define CA_WRITEBACK_NOALLOC 5 -# define CA_ILLEGAL 15 -# endif - -/* internal function that returns 1 if the supplied attr indicates the - * cache is in writeback mode. - */ -static inline int is_writeback(unsigned attr) { -#if XCHAL_HAVE_XLT_CACHEATTR - return attr == CA_WRITEBACK || attr == CA_WRITEBACK_NOALLOC; -#endif -#if XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY - return (attr | 0x3) == CA_WRITEBACK; -#endif - return -1; /* unsupported */ -} - -/* - * xthal_set_region_translation() - * - * Establishes a new mapping (with the supplied cache attributes) - * between a virtual address region, and a physical address region. - * - * This function is only supported with following processor configurations: - * a) Region Translation - * b) v3 MMU with a spanning way running in the default mode - * - * If the specified memory range exactly covers a series - * of consecutive 512 MB regions, the address mapping and cache - * attributes of these regions are updated. - * - * If this is not the case, e.g. if either or both the - * start and end of the range only partially cover a 512 MB - * region, one of three results are possible: - * - * 1. By default, the cache attribute of all regions - * covered, even just partially, is changed to - * the requested attribute. - * - * 2. If the XTHAL_CAFLAG_EXACT flag is specified, - * a non-zero error code is returned. - * - * 3. If the XTHAL_CAFLAG_NO_PARTIAL flag is specified - * (but not the EXACT flag), only regions fully - * covered by the specified range are updated with - * the requested attribute. - * - * CACHE HANDLING - * - * This function automatically writes back dirty data before remapping a - * virtual address region. - * - * This writeback is done safely, ie. by first switching to writethrough - * mode, and then invoking xthal_dcache_all_writeback(). Such a sequence is - * necessary to ensure there is no longer any dirty data in the memory region by the time - * this function returns, even in the presence of interrupts, speculation, etc. - * This automatic write-back can be disabled using the XTHAL_CAFLAG_NO_AUTO_WB flag. - * - * This function also invalidates the caches after remapping a region because the - * cache could contain (now invalid) data from the previous mapping. - * This automatic invalidate can be disabled using the XTHAL_CAFLAG_NO_AUTO_INV flag. - * - * Parameters: - * vaddr starting virtual address of region of memory - * - * paddr starting physical address for the mapping (this should be 512MB aligned to vaddr such that ((vaddr ^ paddr) & 0x10000000 == 0) - * - * size number of bytes in region of memory - * (see above, SPECIFYING THE MEMORY REGION) - * - * cattr cache attribute (encoded); - * typically taken from compile-time HAL constants - * XCHAL_CA_{BYPASS, WRITETHRU, WRITEBACK[_NOALLOC], ILLEGAL} - * (defined in ); - * in XEA1, this corresponds to the value of a nibble - * in the CACHEATTR register; - * in XEA2, this corresponds to the value of the - * cache attribute (CA) field of each TLB entry - * - * flags bitwise combination of flags XTHAL_CAFLAG_* - * - * XTHAL_CAFLAG_EXACT - If this flag is present, - * the mapping will only be done if the specified - * region exactly matches on or more 512MB pages otherwise - * XCHAL_INEXACT is returned (and no mapping is done). - * - * XTHAL_CAFLAG_NO_PARTIAL - If this flag is specified, then - * only pages that are completely covered by the specified region - * are affected. If this flag is specified, and no pages are completely - * covered by the region, then no pages are affected and XCHAL_NO_REGIONS_COVERED - * is returned. - * - * - * - * Returns: - * XCHAL_SUCCESS - successful, or size is zero - * - * XCHAL_NO_REGIONS_COVERED - XTHAL_CAFLAG_NO_PARTIAL flag specified and address range - * is valid with a non-zero size, however no 512 MB region (or page) - * is completely covered by the range - * - * XCHAL_INEXACT XTHAL_CAFLAG_EXACT flag specified, and address range does - * not exactly specify a 512 MB region (or page) - * - * XCHAL_INVALID_ADDRESS invalid address range specified (wraps around the end of memory) - * - * XCHAL_ADDRESS_MISALIGNED virtual and physical addresses are not aligned (512MB) - * - * - * XCHAL_UNSUPPORTED_ON_THIS_ARCH function not supported in this processor configuration - */ -int xthal_set_region_translation(void* vaddr, void* paddr, unsigned size, - unsigned cattr, unsigned flags) { -#if XCHAL_HAVE_XEA2 & !XCHAL_HAVE_MPU -#if XCHAL_HAVE_XLT_CACHEATTR || (XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY) - const unsigned CA_MASK = 0xF; - const unsigned addr_mask = 0x1fffffff; - const unsigned addr_shift = 29; - unsigned vaddr_a = (unsigned) vaddr; - unsigned paddr_a = (unsigned) paddr; - unsigned end_vaddr; - unsigned end_paddr; - unsigned start_va_reg; - unsigned end_va_reg; - unsigned start_pa_reg; - unsigned icache_attr = 0; - int rv; - int i; - if (size == 0) - return XTHAL_SUCCESS; - if ((vaddr_a & addr_mask) ^ (paddr_a & addr_mask)) - return XTHAL_ADDRESS_MISALIGNED; - icache_attr = cattr & CA_MASK; -#if (XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY) - // if using the mmu in spanning way mode then 'and in' the R, RX, RW, RWX bits - if ((cattr & 0x40000000) && (icache_attr < 12)) - icache_attr = icache_attr & ((cattr & 0xF0) >> 4); -#endif - end_vaddr = vaddr_a + size - 1; - end_paddr = paddr_a + size - 1; - - if ((end_vaddr < vaddr_a) || (end_paddr < paddr_a)) - return XTHAL_INVALID_ADDRESS; - start_va_reg = vaddr_a >> addr_shift; - end_va_reg = end_vaddr >> addr_shift; - start_pa_reg = paddr_a >> addr_shift; - if ((flags & XTHAL_CAFLAG_EXACT) - && ((size & addr_mask) || (vaddr_a & addr_mask) - || (paddr_a & addr_mask))) - return XTHAL_INEXACT; - if (flags & XTHAL_CAFLAG_NO_PARTIAL) { - if (vaddr_a & addr_mask) { - start_va_reg++; - start_pa_reg++; - } - if ((end_vaddr & addr_mask) != addr_mask) - end_va_reg--; - } - if (end_va_reg < start_va_reg) - return XTHAL_NO_REGIONS_COVERED; - /* - * Now we need to take care of any uncommitted cache writes in the affected regions - * 1) first determine if any regions are in write back mode - * 2) change those pages to write through - * 3) force the writeback of d-cache by calling xthal_dcach_all_writeback() - */ -#if ((XCHAL_DCACHE_SIZE >0) && XCHAL_DCACHE_IS_WRITEBACK) - if (!(flags & XTHAL_CAFLAG_NO_AUTO_WB)) { - unsigned old_cache_attr = xthal_get_cacheattr(); - unsigned cachewrtr = old_cache_attr; - unsigned need_safe_writeback = 0; - for (i = start_va_reg; i <= end_va_reg; i++) { - unsigned sh = i << 2; - unsigned old_attr = (old_cache_attr >> sh) & CA_MASK; - if (is_writeback(old_attr)) { - need_safe_writeback = 1; - cachewrtr = (cachewrtr & ~(CA_MASK << sh)) - | (CA_WRITETHRU << sh); - } - } - - if (need_safe_writeback) { - xthal_set_cacheattr(cachewrtr); /* set to writethru first, to safely writeback any dirty data */ - xthal_dcache_all_writeback(); /* much quicker than scanning entire 512MB region(s) */ - } - } -#endif - /* Now we set the affected region translations */ - for (i = start_va_reg; i <= end_va_reg; i++) { - if ((rv = xthal_set_region_translation_raw( - (void*) ((start_va_reg++) << addr_shift), - (void*) ((start_pa_reg++) << addr_shift), icache_attr))) - return rv; - } - - /* - * Now we need to invalidate the cache in the affected regions. For now invalidate entire cache, - * but investigate if there are faster alternatives on some architectures. - */ - if (!(flags & XTHAL_CAFLAG_NO_AUTO_INV)) { -# if XCHAL_DCACHE_SIZE > 0 - xthal_dcache_all_writeback_inv(); /* some areas in memory (outside the intended region) may have uncommitted - data so we need the writeback_inv(). */ -#endif -#if XCHAL_ICACHE_SIZE >0 - xthal_icache_all_invalidate(); -#endif - } - return XTHAL_SUCCESS; -#else - return XTHAL_UNSUPPORTED; -#endif -#else - return XTHAL_UNSUPPORTED; -#endif -} - -/* xthal_invalidate_region() - * invalidates the tlb entry for the specified region. - * - * This function is only supported on processor configurations - * with a v3 MMU with a spanning way. - * - * Parameter - * vaddr - virtual address of region to invalidate (512MB aligned) - * - * returns: - * XCHAL_SUCCESS - Success - * XCHAL_UNSUPPORTED_ON_THIS_ARCH - Unsupported - * - */ -int xthal_invalidate_region(void* vaddr) { -#if XCHAL_HAVE_XEA2 & !XCHAL_HAVE_MPU -#if (XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY) - unsigned addr = (unsigned) vaddr; - if (addr & 0x1fffffff) - return XTHAL_INVALID_ADDRESS; - addr += XCHAL_SPANNING_WAY; - invalidate_dtlb_entry(addr); - invalidate_itlb_entry(addr); - return XTHAL_SUCCESS; -#else - return XTHAL_UNSUPPORTED; -#endif -#else - return XTHAL_UNSUPPORTED; -#endif -} - +/* + * Copyright (c) 2004-2014 Tensilica Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include + +#if XCHAL_HAVE_XEA2 && (!XCHAL_HAVE_MPU) +/* + * C-stubs to issue the tlb related instructions (with dsync and isync's if needed). + * + */ +static inline void write_dtlb_entry(unsigned vpn_way, unsigned ppn_ca) { + __asm__ __volatile__("wdtlb %1, %0; dsync\n\t" + : : "r" (vpn_way), "r" (ppn_ca) ); +} + +static inline void write_itlb_entry(unsigned vpn_way, unsigned ppn_ca) { + __asm__ __volatile__("witlb %1, %0; isync\n\t" + : : "r" (vpn_way), "r" (ppn_ca) ); +} + +static inline unsigned read_dtlb1_entry(unsigned addr) { + unsigned long tmp; + __asm__ __volatile__("rdtlb1 %0, %1\n\t" + : "=a" (tmp) + : "a" (addr)); + return tmp; +} + +static inline unsigned read_itlb1_entry(unsigned addr) { + unsigned long tmp; + __asm__ __volatile__("ritlb1 %0, %1\n\t" + : "=a" (tmp) + : "a" (addr)); + return tmp; +} + +static inline unsigned probe_dtlb(unsigned addr) { + unsigned long tmp; + __asm__ __volatile__("pdtlb %0, %1\n\t" + : "=a" (tmp) + : "a" (addr)); + return tmp; +} + +static inline unsigned probe_itlb(unsigned addr) { + unsigned long tmp; + __asm__ __volatile__("pitlb %0, %1\n\t" + : "=a" (tmp) + : "a" (addr)); + return tmp; +} + +static inline void invalidate_dtlb_entry(unsigned addr) { + __asm__ __volatile__("idtlb %0; dsync \n\t" + : : "a" (addr)); +} + +static inline void invalidate_itlb_entry(unsigned addr) { + __asm__ __volatile__("iitlb %0 ; isync\n\t" + : : "a" (addr)); +} + +static inline unsigned read_dtlbcfg() { + unsigned long tmp; + __asm__ __volatile__("rsr.dtlbcfg %0\n\t" + : "=a" (tmp)); + return tmp; +} + +static inline unsigned read_itlbcfg() { + unsigned long tmp; + __asm__ __volatile__("rsr.itlbcfg %0\n\t" + : "=a" (tmp)); + return tmp; +} + +#endif + +/* + * xthal_set_region_translation_raw is a quick and simple function + * to set both physical address and cache attribute for + * a 512MB region at . + * + * Parameters: + * void* vaddr 512MB aligned pointer representing the start of virtual address region + * void* paddr 512MB aligned pointer representing the start of physical address region + * unsigned cattr 4 bit value encoding the caching properties and rights (MMU only). + * + * returns 0 (XCHAL_SUCCESS) if successful + * returns non zero (XCHAL_UNSUPPORTED) on failure + * + * This function has the following limitations: + * + * 1) Requires either the Region Translation Option or a v3 MMU running in the default mode (with spanning way) + * 2) It does no error checking. + * 3) Deals with one 512MB region (vaddr and paddr are required to be 512MB aligned although that is not explicitly checked) + * 4) It requires the caller to do any cache flushing that is needed + * 5) Doesn't support mnemonically setting the 'rights' (rwx, rw, ... ) bit on the MMU + * 6) It is illegal to change the mapping of the region containing the current PC (not checked) + * + */ +int xthal_set_region_translation_raw(void *vaddr, void *paddr, unsigned cattr) { +#if XCHAL_HAVE_MPU + return XTHAL_UNSUPPORTED; +#else +#if XCHAL_HAVE_XEA2 +#if XCHAL_HAVE_XLT_CACHEATTR || (XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY) +# if XCHAL_HAVE_XLT_CACHEATTR + unsigned vpn_way = (unsigned)vaddr; +# else + unsigned vpn_way = ((unsigned) vaddr & 0xFFFFFFF0) + XCHAL_SPANNING_WAY; +# endif + unsigned ppn_ca = ((unsigned) paddr & 0xFFFFFFF0) + (cattr & 0xF); + write_dtlb_entry(vpn_way, ppn_ca); + write_itlb_entry(vpn_way, ppn_ca); + return XTHAL_SUCCESS; +#else + return XTHAL_UNSUPPORTED; +#endif +#else + return XTHAL_UNSUPPORTED; +#endif +#endif +} + +/* + * xthal_v2p() takes a virtual address as input, and if that virtual address is mapped to a physical address + * by the MMU, it returns the: + * a) corresponding physical address + * b) the tlb way that is used to translate the address + * c) cache attribute for translation + * + * Parameters: + * void* vaddr A pointer representing the virtual address (there are no alignment requirements for this address) + * void** paddr This value can be 0, or can point to a pointer variable which will be updated to contain the physical address + * unsigned* way This value can be 0, or can point to an unsigned variable which will be updated to contain the TLB way. + * unsigned* cattr This value can be 0, or can point to an unsigned variable which will be updated to contain the cache attr + * For MPU configurations bits 0..3 hold the access rights and bits 4..8 hold the encoded memory type + * + * Returns 0 (XCHAL_SUCCESS) if successful + * XTHAL_NO_MAPPING if there is no current mapping for the virtual address + * XCHAL_UNSUPPORTED if unsupported + * + * Limitations: + * Assumes that architecture variable DVARWAY56 is "Variable" + * Uses the D-TLBS for the translation ... assumption is that ITLB's have same mappings + */ +int xthal_v2p(void* vaddr, void** paddr, unsigned *way, unsigned* cattr) { +#if XCHAL_HAVE_XEA2 +#if XCHAL_HAVE_MPU + if (paddr) + *paddr = vaddr; + if (way) + *way = 0; + if (cattr) + { + struct xthal_MPU_entry x = xthal_get_entry_for_address(vaddr, 0); + *cattr = XTHAL_MPU_ENTRY_GET_ACCESS(x) | XTHAL_MPU_ENTRY_GET_MEMORY_TYPE(x) << XTHAL_AR_WIDTH; + } + return XTHAL_SUCCESS; +#else + unsigned long probe = probe_dtlb((unsigned) vaddr); +#if !XCHAL_HAVE_PTP_MMU + if (!(0x1 & probe)) + return XTHAL_NO_MAPPING; + if (way) + *way = 1; + if (paddr || cattr) { + unsigned long temp; + temp = read_dtlb1_entry(probe); + unsigned ppn = 0xe0000000 & temp; + unsigned att = 0xf & temp; + if (paddr) + *paddr = ((void*) (ppn + (((unsigned) vaddr) & 0x1fffffff))); + if (cattr) + *cattr = att; + } +#else + { + unsigned iway; + if (!(0x10 & probe)) + return XTHAL_NO_MAPPING; + iway = 0xf & probe; + if (way) + *way = iway; + if (paddr || cattr) { + unsigned temp; + unsigned ppn; + unsigned ppn1; + unsigned dtlbcfg = read_dtlbcfg(); + temp = read_dtlb1_entry(probe); + unsigned att = 0xf & temp; + if (cattr) + *cattr = att; + if (paddr) + switch (iway) // followin code derived from fig 4-40 from ISA MMU Option Data (at) Format for RxTLB1 + { /* 4k pages */ + case 0: + case 1: + case 2: + case 3: + case 7: + case 8: + case 9: + ppn = 0xfffff000; // 4k pages + break; + case 4: { + switch ((dtlbcfg & (0x3 << 16)) >> 16) // bits 16 & 17 + { + case 0: // 1MB pages + ppn = 0xfff00000; + break; + case 1: // 4MB pages + ppn = 0xffc00000; + break; + case 2: // 16MB pages + ppn = 0xff000000; + break; + case 3: // 64MB pages + ppn = 0xfc000000; + break; + default: + return XTHAL_UNSUPPORTED; + } + } + break; + case 5: + if ((dtlbcfg & (1 << 20))) + ppn = 0xf8000000; // 128MB pages + else + ppn = 0xf0000000; // 256MB pages + break; + case 6: + if ((dtlbcfg & (1 << 24))) + ppn = 0xe0000000; // 512MB pages + else + ppn = 0xf0000000; // 256MB pages + break; + default: + return XTHAL_UNSUPPORTED; + break; + } + ppn1 = ppn & temp; + *paddr = ((void*) (ppn1 + (((unsigned) vaddr) & (~ppn)))); + } + } +#endif + return XTHAL_SUCCESS; +#endif +#else + return XTHAL_UNSUPPORTED; +#endif +} + +/* these constants borrowed from xthal_set_region_attribute */ +# if XCHAL_HAVE_PTP_MMU +# define CA_BYPASS XCHAL_CA_BYPASS +# define CA_WRITETHRU XCHAL_CA_WRITETHRU +# define CA_WRITEBACK XCHAL_CA_WRITEBACK +# define CA_WRITEBACK_NOALLOC XCHAL_CA_WRITEBACK_NOALLOC +# define CA_ILLEGAL XCHAL_CA_ILLEGAL +# else +/* Hardcode these, because they get remapped when caches or writeback not configured: */ +# define CA_BYPASS 2 +# define CA_WRITETHRU 1 +# define CA_WRITEBACK 4 +# define CA_WRITEBACK_NOALLOC 5 +# define CA_ILLEGAL 15 +# endif + +/* internal function that returns 1 if the supplied attr indicates the + * cache is in writeback mode. + */ +static inline int is_writeback(unsigned attr) { +#if XCHAL_HAVE_XLT_CACHEATTR + return attr == CA_WRITEBACK || attr == CA_WRITEBACK_NOALLOC; +#endif +#if XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY + return (attr | 0x3) == CA_WRITEBACK; +#endif + return -1; /* unsupported */ +} + +/* + * xthal_set_region_translation() + * + * Establishes a new mapping (with the supplied cache attributes) + * between a virtual address region, and a physical address region. + * + * This function is only supported with following processor configurations: + * a) Region Translation + * b) v3 MMU with a spanning way running in the default mode + * + * If the specified memory range exactly covers a series + * of consecutive 512 MB regions, the address mapping and cache + * attributes of these regions are updated. + * + * If this is not the case, e.g. if either or both the + * start and end of the range only partially cover a 512 MB + * region, one of three results are possible: + * + * 1. By default, the cache attribute of all regions + * covered, even just partially, is changed to + * the requested attribute. + * + * 2. If the XTHAL_CAFLAG_EXACT flag is specified, + * a non-zero error code is returned. + * + * 3. If the XTHAL_CAFLAG_NO_PARTIAL flag is specified + * (but not the EXACT flag), only regions fully + * covered by the specified range are updated with + * the requested attribute. + * + * CACHE HANDLING + * + * This function automatically writes back dirty data before remapping a + * virtual address region. + * + * This writeback is done safely, ie. by first switching to writethrough + * mode, and then invoking xthal_dcache_all_writeback(). Such a sequence is + * necessary to ensure there is no longer any dirty data in the memory region by the time + * this function returns, even in the presence of interrupts, speculation, etc. + * This automatic write-back can be disabled using the XTHAL_CAFLAG_NO_AUTO_WB flag. + * + * This function also invalidates the caches after remapping a region because the + * cache could contain (now invalid) data from the previous mapping. + * This automatic invalidate can be disabled using the XTHAL_CAFLAG_NO_AUTO_INV flag. + * + * Parameters: + * vaddr starting virtual address of region of memory + * + * paddr starting physical address for the mapping (this should be 512MB aligned to vaddr such that ((vaddr ^ paddr) & 0x10000000 == 0) + * + * size number of bytes in region of memory + * (see above, SPECIFYING THE MEMORY REGION) + * + * cattr cache attribute (encoded); + * typically taken from compile-time HAL constants + * XCHAL_CA_{BYPASS, WRITETHRU, WRITEBACK[_NOALLOC], ILLEGAL} + * (defined in ); + * in XEA1, this corresponds to the value of a nibble + * in the CACHEATTR register; + * in XEA2, this corresponds to the value of the + * cache attribute (CA) field of each TLB entry + * + * flags bitwise combination of flags XTHAL_CAFLAG_* + * + * XTHAL_CAFLAG_EXACT - If this flag is present, + * the mapping will only be done if the specified + * region exactly matches on or more 512MB pages otherwise + * XCHAL_INEXACT is returned (and no mapping is done). + * + * XTHAL_CAFLAG_NO_PARTIAL - If this flag is specified, then + * only pages that are completely covered by the specified region + * are affected. If this flag is specified, and no pages are completely + * covered by the region, then no pages are affected and XCHAL_NO_REGIONS_COVERED + * is returned. + * + * + * + * Returns: + * XCHAL_SUCCESS - successful, or size is zero + * + * XCHAL_NO_REGIONS_COVERED - XTHAL_CAFLAG_NO_PARTIAL flag specified and address range + * is valid with a non-zero size, however no 512 MB region (or page) + * is completely covered by the range + * + * XCHAL_INEXACT XTHAL_CAFLAG_EXACT flag specified, and address range does + * not exactly specify a 512 MB region (or page) + * + * XCHAL_INVALID_ADDRESS invalid address range specified (wraps around the end of memory) + * + * XCHAL_ADDRESS_MISALIGNED virtual and physical addresses are not aligned (512MB) + * + * + * XCHAL_UNSUPPORTED_ON_THIS_ARCH function not supported in this processor configuration + */ +int xthal_set_region_translation(void* vaddr, void* paddr, unsigned size, + unsigned cattr, unsigned flags) { +#if XCHAL_HAVE_XEA2 & !XCHAL_HAVE_MPU +#if XCHAL_HAVE_XLT_CACHEATTR || (XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY) + const unsigned CA_MASK = 0xF; + const unsigned addr_mask = 0x1fffffff; + const unsigned addr_shift = 29; + unsigned vaddr_a = (unsigned) vaddr; + unsigned paddr_a = (unsigned) paddr; + unsigned end_vaddr; + unsigned end_paddr; + unsigned start_va_reg; + unsigned end_va_reg; + unsigned start_pa_reg; + unsigned icache_attr = 0; + int rv; + int i; + if (size == 0) + return XTHAL_SUCCESS; + if ((vaddr_a & addr_mask) ^ (paddr_a & addr_mask)) + return XTHAL_ADDRESS_MISALIGNED; + icache_attr = cattr & CA_MASK; +#if (XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY) + // if using the mmu in spanning way mode then 'and in' the R, RX, RW, RWX bits + if ((cattr & 0x40000000) && (icache_attr < 12)) + icache_attr = icache_attr & ((cattr & 0xF0) >> 4); +#endif + end_vaddr = vaddr_a + size - 1; + end_paddr = paddr_a + size - 1; + + if ((end_vaddr < vaddr_a) || (end_paddr < paddr_a)) + return XTHAL_INVALID_ADDRESS; + start_va_reg = vaddr_a >> addr_shift; + end_va_reg = end_vaddr >> addr_shift; + start_pa_reg = paddr_a >> addr_shift; + if ((flags & XTHAL_CAFLAG_EXACT) + && ((size & addr_mask) || (vaddr_a & addr_mask) + || (paddr_a & addr_mask))) + return XTHAL_INEXACT; + if (flags & XTHAL_CAFLAG_NO_PARTIAL) { + if (vaddr_a & addr_mask) { + start_va_reg++; + start_pa_reg++; + } + if ((end_vaddr & addr_mask) != addr_mask) + end_va_reg--; + } + if (end_va_reg < start_va_reg) + return XTHAL_NO_REGIONS_COVERED; + /* + * Now we need to take care of any uncommitted cache writes in the affected regions + * 1) first determine if any regions are in write back mode + * 2) change those pages to write through + * 3) force the writeback of d-cache by calling xthal_dcach_all_writeback() + */ +#if ((XCHAL_DCACHE_SIZE >0) && XCHAL_DCACHE_IS_WRITEBACK) + if (!(flags & XTHAL_CAFLAG_NO_AUTO_WB)) { + unsigned old_cache_attr = xthal_get_cacheattr(); + unsigned cachewrtr = old_cache_attr; + unsigned need_safe_writeback = 0; + for (i = start_va_reg; i <= end_va_reg; i++) { + unsigned sh = i << 2; + unsigned old_attr = (old_cache_attr >> sh) & CA_MASK; + if (is_writeback(old_attr)) { + need_safe_writeback = 1; + cachewrtr = (cachewrtr & ~(CA_MASK << sh)) + | (CA_WRITETHRU << sh); + } + } + + if (need_safe_writeback) { + xthal_set_cacheattr(cachewrtr); /* set to writethru first, to safely writeback any dirty data */ + xthal_dcache_all_writeback(); /* much quicker than scanning entire 512MB region(s) */ + } + } +#endif + /* Now we set the affected region translations */ + for (i = start_va_reg; i <= end_va_reg; i++) { + if ((rv = xthal_set_region_translation_raw( + (void*) ((start_va_reg++) << addr_shift), + (void*) ((start_pa_reg++) << addr_shift), icache_attr))) + return rv; + } + + /* + * Now we need to invalidate the cache in the affected regions. For now invalidate entire cache, + * but investigate if there are faster alternatives on some architectures. + */ + if (!(flags & XTHAL_CAFLAG_NO_AUTO_INV)) { +# if XCHAL_DCACHE_SIZE > 0 + xthal_dcache_all_writeback_inv(); /* some areas in memory (outside the intended region) may have uncommitted + data so we need the writeback_inv(). */ +#endif +#if XCHAL_ICACHE_SIZE >0 + xthal_icache_all_invalidate(); +#endif + } + return XTHAL_SUCCESS; +#else + return XTHAL_UNSUPPORTED; +#endif +#else + return XTHAL_UNSUPPORTED; +#endif +} + +/* xthal_invalidate_region() + * invalidates the tlb entry for the specified region. + * + * This function is only supported on processor configurations + * with a v3 MMU with a spanning way. + * + * Parameter + * vaddr - virtual address of region to invalidate (512MB aligned) + * + * returns: + * XCHAL_SUCCESS - Success + * XCHAL_UNSUPPORTED_ON_THIS_ARCH - Unsupported + * + */ +int xthal_invalidate_region(void* vaddr) { +#if XCHAL_HAVE_XEA2 & !XCHAL_HAVE_MPU +#if (XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY) + unsigned addr = (unsigned) vaddr; + if (addr & 0x1fffffff) + return XTHAL_INVALID_ADDRESS; + addr += XCHAL_SPANNING_WAY; + invalidate_dtlb_entry(addr); + invalidate_itlb_entry(addr); + return XTHAL_SUCCESS; +#else + return XTHAL_UNSUPPORTED; +#endif +#else + return XTHAL_UNSUPPORTED; +#endif +} + diff --git a/src/arch/xtensa/include/xtensa/c6x-compat.h b/src/arch/xtensa/include/xtensa/c6x-compat.h index 4b17987ea95c..ca91bd718397 100755 --- a/src/arch/xtensa/include/xtensa/c6x-compat.h +++ b/src/arch/xtensa/include/xtensa/c6x-compat.h @@ -1,1758 +1,1758 @@ -/* - * Copyright (c) 2006-2010 Tensilica Inc. ALL RIGHTS RESERVED. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#ifndef __C6X_COMPAT__H -#define __C6X_COMPAT__H - -/* Unimplemented functions _gmpy, _gmpy4, _xormpy, _lssub, _cmpy, _cmpyr, - _cmpyr1, _ddotpl2r, _ddotph2r */ - - -typedef long long C6X_COMPAT_LONG40; - - -#define _memd8(a) (*((double*)(a))) -#define _memd8_const(a) (*((const double*)(a))) - -#define _amemd8(a) (*((double*)(a))) -#define _amemd8_const(a) (*((const double*)(a))) - -#define _mem8(a) (*((unsigned long long*)(a))) -#define _mem8_const(a) (*((const unsigned long long*)(a))) - -#define _mem4(a) (*((unsigned*)(a))) -#define _mem4_const(a) (*((const unsigned*)(a))) -#define _amem4_const(a) (*((const unsigned*)(a))) - -/* NOTE: To emulate a C6X properly you should define global variables - for your Xtensa with these names. Some of the emulation routines - will set these values. */ - -extern int _carry; -extern int _overflow; - -// Utility routines - - -#define TESTBIT(x,n) (((x) >> (n)) & 1) - -#define NSA_BITS 32 - -static inline unsigned int norm_shift_amt_U_and_non_U(int is_signed, int inp) { -int j=0, k=0; -int x=inp; -if (is_signed) { - /* Invert signed val if negative */ - x= TESTBIT(x,(NSA_BITS-1))? ~x: x; - x= (x&1)|(x<<1); /* Shift up to return count-1 */ - if (x ==0) - return NSA_BITS-1; - } - if (x ==0) - return NSA_BITS; - /* Now count leading zeros */ - for (j=0, k=NSA_BITS-1; k>=0; j++, k--) { - if (TESTBIT(x,k)) - return j; - } - return NSA_BITS; -} - - - -static inline long long -orig_L40_set( long long L40_var1) { - long long L40_var_out; - - L40_var_out = L40_var1 & 0x000000ffffffffffLL; - - if( L40_var1 & 0x8000000000LL) - L40_var_out = L40_var_out | 0xffffff0000000000LL; - - return( L40_var_out); -} - - - -static inline signed long long -util_saturate_n_no_state(signed long long t, int n) -{ - signed long long maxv, minv; - maxv = (1LL << (n-1)) - 1; - minv = (-1LL << (n-1)); - if (t > maxv) { - t = maxv; - } else if (t < minv) { - t = minv; - } - return t; -} - - -static inline signed long long -util_saturate_n_sgn(signed long long t, int n) -{ - signed long long result; - signed long long maxv, minv; - maxv = (1LL << (n-1)) - 1; - minv = (-1LL << (n-1)); - if (t > 0) { - result = maxv; - _overflow = 1; - } else if (t < 0) { - result = minv; - _overflow = 1; - } else { - result = 0; - } - return result; -} - - - - -/* well-behaved signed shift right (left on negative) with - saturation */ -static inline signed long long -util_shift_right_saturate_n(signed long long t, int shval, int n) -{ - /* n should be <= 62 */ - long long result; - - signed long long mask; - int actual_shift = shval; - long long shft = actual_shift > 0 ? actual_shift : -actual_shift; - - if (t == 0 || actual_shift == 0) - return t; - - if (actual_shift >= n) { - return (t < 0) ? -1 : 0; - } - if (actual_shift <= -n) { - return util_saturate_n_sgn(t, n); - } - if (actual_shift > 0) { - return t >> actual_shift; - } - /* actual_shift < 0. Check for saturation after shift. */ - mask = (-1LL << (n-shft-1)); - if (t > 0 && ((mask & t) != 0)) { - return util_saturate_n_sgn(t, n); - } - if (t < 0 && ((mask & t) != mask)) { - return util_saturate_n_sgn(t, n); - } - result = t << shft; - - return result; -} - - -/* Implemented c6x standard C compatibility functions (alphabetical - order) */ - - -static inline int _abs(int src1) { - if ((unsigned) src1 == (unsigned) 0x80000000) { - return 0x7fffffff; - } - return abs(src1); -} - - -static inline int _abs2(int src1) { - short s1[2],r[2]; - int result; - *((int*)s1) = src1; - if ((unsigned short) s1[1] == (unsigned short) 0x8000) r[1] = 0x7fff; - else r[1] = abs(s1[1]); - if ((unsigned short) s1[0] == (unsigned short) 0x8000) r[0] = 0x7fff; - else r[0] = abs(s1[0]); - result = *(int*)r; - return result; - } - - - - -static inline int _add2(int src1, int src2) { - short s1[2], s2[2], r[2]; - int result; - *((int*)s1) = src1; - *((int*)s2) = src2; - r[0] = s1[0] + s2[0]; - r[1] = s1[1] + s2[1]; - result = *(int*)r; - return result; -} - -static inline int _add4(int src1, int src2) { - char c1[4], c2[4], r[4]; - int result; - *((int*)c1) = src1; - *((int*)c2) = src2; - r[0] = c1[0] + c2[0]; - r[1] = c1[1] + c2[1]; - r[2] = c1[2] + c2[2]; - r[3] = c1[3] + c2[3]; - result = *(int*)r; - return result; -} - - - -static inline long long _addsub(unsigned int src1, unsigned int src2) -{ - - int res_lo; - int res_hi; - - res_hi = src1+src2; - res_lo = src1-src2; - return (((unsigned long long) res_hi) << 32) | ((unsigned int) res_lo) ; -} - - -static inline long long _addsub2(unsigned int src1, unsigned int src2) -{ - short s1[2], s2[2], ra[2], rs[2]; - int res_lo; - int res_hi; - - *((int*)s1) = src1; - *((int*)s2) = src2; - ra[0] = s1[0] + s2[0]; - ra[1] = s1[1] + s2[1]; - rs[0] = s1[0] - s2[0]; - rs[1] = s1[1] - s2[1]; - - res_hi = *(int*)ra; - res_lo = *(int*)rs; - return (((unsigned long long) res_hi) << 32) | ((unsigned int) res_lo) ; -} - - -static inline int _avg2(int src1, int src2) { - int low = (((int)1 + (short) src1 + (short) src2) >> 1) & 0XFFFF; - int high1 = src1 >> 16; - int high2 = src2 >> 16; - int high = ((high1 + high2 + 1) >> 1)<< 16; - return high | low; -} - - - -static inline unsigned int _avgu4(unsigned int src1, unsigned int src2) { -unsigned int res0 = ((src1 & 0xFF) + (src2 & 0xFF) + 1) >> 1; - unsigned int res1 = (((src1 & 0xFF00) >> 8) + ((src2 & 0xFF00) >> 8) + 1) >> 1; - unsigned int res2 = (((src1 & 0xFF0000) >> 16) + ((src2 & 0xFF0000) >> 16) + 1) >> 1; - unsigned int res3 = (((src1 & 0xFF000000) >> 24) + ((src2 & 0xFF000000) >> 24) + 1) >> 1; - return (res3 << 24) | (res2 << 16) | (res1 << 8) | res0; -} - - -static inline int TEN_popc (unsigned char b) -{ - int i, result = 0; - for (i = 0; i < 8; i++){ - if (b & 0x1) - result++; - b >>= 1; - } - return result; -} - -static inline unsigned int _bitc4(unsigned int src1) -{ - unsigned int res0 = TEN_popc(src1 & 0xFF); - unsigned int res1 = TEN_popc((src1 & 0xFF00) >> 8); - unsigned int res2 = TEN_popc((src1 & 0xFF0000) >> 16); - unsigned int res3 = TEN_popc((src1 & 0xFF000000) >> 24); - return (res3 << 24) | (res2 << 16) | (res1 << 8) | res0; -} - -static inline unsigned int _bitr(unsigned int src) { - int i; - unsigned r = 0; - for (i = 0; i< 32; ++i) { - r = r | (((src >> i) & 1)<<(31-i)); - } - return r; -} - - -static inline unsigned int _clr(unsigned int src2, int csta, int cstb) -{ - csta &= 0x1f; - cstb &= 0x1f; - if (csta > cstb) - return src2; - else { - unsigned int mask = (((1 << (cstb - csta)) << 1) - 1) << csta; - return src2 & (~mask); - } -} - -static inline unsigned int _clrr(unsigned int src2, int src1) -{ - unsigned int csta = (src1 >> 5) & 0x1f; - unsigned int cstb = src1 & 0x1f; - if (csta > cstb) - return src2; - else { - unsigned int mask = (((1 << (cstb - csta)) << 1) - 1) << csta; - return src2 & (~mask); - } -} - - - - -static inline int _cmpeq2(int src1, int src2) { - short s1[2], s2[2]; - int r0, r1; - int result; - *((int*)s1) = src1; - *((int*)s2) = src2; - r0 = s1[0] == s2[0] ? 1 : 0; - r1 = s1[1] == s2[1] ? 1 : 0; - result = (r1 << 1) | r0; - return result; -} - -static inline int _cmpeq4(int src1, int src2) { - char s1[4], s2[4]; - int r0, r1, r2, r3; - int result; - *((int*)s1) = src1; - *((int*)s2) = src2; - r0 = s1[0] == s2[0] ? 1 : 0; - r1 = s1[1] == s2[1] ? 1 : 0; - r2 = s1[2] == s2[2] ? 1 : 0; - r3 = s1[3] == s2[3] ? 1 : 0; - result = (r3 << 3) | (r2 << 2) | (r1 << 1) | r0; - return result; -} - - -static inline int _cmpgt2(int src1, int src2) { - short s1[2], s2[2]; - int r1, r0; - int result; - *((int*)s1) = src1; - *((int*)s2) = src2; - r0 = s1[0] > s2[0] ? 1 : 0; - r1 = s1[1] > s2[1] ? 1 : 0; - result = (r1<<1) | r0; - return result; -} - - -static inline unsigned int _cmpgtu4(unsigned int src1, unsigned int src2) { - unsigned int s1_0 = (src1 & 0xFF); - unsigned int s1_1 = (src1 & 0xFF00) >> 8; - unsigned int s1_2 = (src1 & 0xFF0000) >> 16; - unsigned int s1_3 = (src1 & 0xFF000000) >> 24; - - unsigned int s2_0 = (src2 & 0xFF); - unsigned int s2_1 = (src2 & 0xFF00) >> 8; - unsigned int s2_2 = (src2 & 0xFF0000) >> 16; - unsigned int s2_3 = (src2 & 0xFF000000) >> 24; - - unsigned int result = 0; - - if (s1_0 > s2_0) - result |= 0x1; - - if (s1_1 > s2_1) - result |= 0x2; - - if (s1_2 > s2_2) - result |= 0x4; - - if (s1_3 > s2_3) - result |= 0x8; - - return result; -} - - - - -static inline long long _ddotp4(unsigned int src1, unsigned int src2) { - unsigned int res0, res1; - short s1_0 = (src1 & 0xffff); - short s1_1 = (src1 & 0xfff0000) >> 16; - - unsigned short s2_0 = (src2 & 0xff); - unsigned short s2_1 = (src2 & 0xff00) >> 8; - unsigned short s2_2 = (src2 & 0xff0000) >> 16; - unsigned short s2_3 = (src2 & 0xff000000) >> 24; - - res0 = ((int)s1_0) * s2_0 + ((int)s1_1) * s2_1; - res1 = ((int)s1_0) * s2_2 + ((int)s1_1) * s2_3; - - return (res1 << 16) | res0; -} - - -static inline long long _ddotph2(long long src1_o_src1_e, unsigned int src2) -{ - - unsigned int src1_o = src1_o_src1_e >> 32; - unsigned int src1_e = src1_o_src1_e & 0xFFFFFFFF; - short ls1_o = src1_o & 0XFFFF; - short hs1_o = src1_o >> 16; -// short ls1_e = src1_e & 0XFFFF; - short hs1_e = src1_e >> 16; - short ls2 = src2 & 0XFFFF; - short hs2 = src2 >> 16; - - unsigned long long res_hi = ls2 * ls1_o + hs2 * hs1_o; - unsigned int res_lo = ls1_o * hs2 + hs1_e * ls2; - return (res_hi << 32) | res_lo; -} - - -static inline long long _ddotpl2(long long src1_o_src1_e, unsigned int src2) -{ - unsigned int src1_o = src1_o_src1_e >> 32; - unsigned int src1_e = src1_o_src1_e & 0xFFFFFFFF; - short ls1_o = src1_o & 0XFFFF; -// short hs1_o = src1_o >> 16; - short ls1_e = src1_e & 0XFFFF; - short hs1_e = src1_e >> 16; - short ls2 = src2 & 0XFFFF; - short hs2 = src2 >> 16; - - unsigned long long res_hi = ls2 * hs1_e + hs2 * ls1_o; - unsigned res_lo = hs1_e * hs2 + ls1_e * ls2; - return (res_hi << 32) | res_lo; -} - - -static inline unsigned int _deal(unsigned int src) -{ - int i; - unsigned short lo = 0, hi = 0; - for (i = 0; i < 32; i+= 2) { - lo >>= 1; - lo |= (src & 0x1) << 15; - src >>= 1; - hi >>= 1; - hi |= (src & 0x1) << 15; - src >>= 1; - } - return (hi << 16) | lo; -} - - -static inline long long _dmv(unsigned int src1, unsigned int src2) -{ - return (((long long) src1) << 32) | src2; -} - - -static inline int _dotpn2(int src1, int src2) { -short int s1_h = src1>>16; - short int s1_l = src1; - short int s2_h = src2>>16; - short int s2_l = src2; - return s1_h * s2_h - s1_l * s2_l; -} - - -static inline int _dotp2(int src1, int src2) { - short int s1_h = src1>>16; - short int s1_l = src1; - short int s2_h = src2>>16; - short int s2_l = src2; - return s1_h * s2_h + s1_l * s2_l; -} - - - -static inline int _dotpnrsu2(int src1, unsigned int src2) -{ - short ls1 = src1 & 0XFFFF; - unsigned short ls2 = src2 & 0XFFFF; - short hs1 = src1 >> 16; - unsigned short hs2 = src2 >> 16; - - int result = (((long long) (int)(hs1 * hs2)) - ((long long) (int)(ls1 * ls2)) + (1 << 15)) >> 16; - return result; -} - - - -static inline int _dotprsu2(int src1, unsigned int src2) { - short ls1 = src1 & 0XFFFF; - unsigned short ls2 = (src2 & 0XFFFF); - short hs1 = src1 >> 16; - unsigned short hs2 = (src2 >> 16); - - int result = (((long long) (int) (ls1 * ls2)) + ((long long) (int) (hs1 * hs2)) + (1LL << 15)) >> 16; - return result; -} - - - - - - - -static inline int _dotpsu4(int src1, unsigned int src2) { - int result; - signed char s1_0 = (src1 & 0xff); - signed char s1_1 = (src1 & 0xff00) >> 8; - signed char s1_2 = (src1 & 0xff0000) >> 16; - signed char s1_3 = (src1 & 0xff000000) >> 24; - - unsigned int s2_0 = (src2 & 0xff); - unsigned int s2_1 = (src2 & 0xff00) >> 8; - unsigned int s2_2 = (src2 & 0xff0000) >> 16; - unsigned int s2_3 = (src2 & 0xff000000) >> 24; - - result = s1_0 * s2_0 + s1_1 * s2_1 + s1_2 * s2_2 + s1_3 * s2_3; - return result; -} - - -static inline unsigned int _dotpu4(unsigned int src1, unsigned int src2) { - unsigned char v1_0 = src1 & 0xff; - unsigned char v1_1 = (src1>>8) & 0xff; - unsigned char v1_2 = (src1>>16) & 0xff; - unsigned char v1_3 = (src1>>24) & 0xff; - - unsigned char v2_0 = src2 & 0xff; - unsigned char v2_1 = (src2>>8) & 0xff; - unsigned char v2_2 = (src2>>16) & 0xff; - unsigned char v2_3 = (src2>>24) & 0xff; - - unsigned v = v1_0 * v2_0 + v1_1 * v2_1 + v1_2 * v2_2 + v1_3 * v2_3; - return v; -} - - -static inline long long _dpack2(unsigned int src1, unsigned int src2){ -unsigned short s1[2], s2[2]; -*((int*)s1) = src1; -*((int*)s2) = src2; -return ((unsigned long long) s1[1] << 48) | ((unsigned long long) s2[1] << 32) | ((unsigned long long) s1[0] << 16) | ((unsigned long long) s2[0]); -} - - -static inline long long _dpackx2(unsigned int src1, unsigned int src2){ -unsigned short s1[2], s2[2]; -*((int*)s1) = src1; -*((int*)s2) = src2; -return ((unsigned long long) s2[0] << 48) | ((unsigned long long) s1[1] << 32) | ((unsigned long long) s1[0] << 16) | ((unsigned long long) s2[1]); -} - -static inline int _ext(int src2, unsigned int csta, unsigned int cstb) -{ - return (src2 << csta) >> cstb; -} - -static inline int _extr(int src2, int src1) -{ - unsigned int csta = (src1 >> 5) & 0x1f; - unsigned int cstb = src1 & 0x1f; - return (src2 << csta) >> cstb; -} - -static inline unsigned int _extu(unsigned int src2, unsigned int csta, unsigned int cstb) -{ - return (src2 << csta) >> cstb; -} - -static inline unsigned int _extur(unsigned int src2, int src1) -{ - unsigned int csta = (src1 >> 5) & 0x1f; - unsigned int cstb = src1 & 0x1f; - return (src2 << csta) >> cstb; -} - - -static inline unsigned long long _hi(double src) { - unsigned long long v; - *(double*)&v = src; - return v>>32; -} - -static inline unsigned int _hill (long long src) -{ - return (unsigned int) (src >> 32); -} - - - -static inline double _itod(unsigned hi, unsigned lo) { - double v; - unsigned long long ll = ((((unsigned long long)(hi))<<32) | (unsigned long long)((unsigned)lo)); - *((unsigned long long *)&v) = ll; - return v; -} - - -static inline long long _itoll(unsigned int src2, unsigned int src1) -{ - return (((long long) src2) << 32) | src1; -} - - -static inline C6X_COMPAT_LONG40 _labs(C6X_COMPAT_LONG40 src2) -{ - long long maxv = (1LL << (40 -1)) - 1; - long long minv = (-1LL << (40 - 1)); - C6X_COMPAT_LONG40 lres = orig_L40_set(src2); - - lres = lres < 0 ? -lres : lres; - if (lres > maxv) lres = maxv; - else if (lres < minv) lres = minv; - - return lres; -} - - -static inline C6X_COMPAT_LONG40 _ldotp2(int src1, int src2) { -return (C6X_COMPAT_LONG40) _dotp2(src1, src2); -} - - -static inline unsigned int _lmbd(unsigned int src1, unsigned int src2) -{ - return norm_shift_amt_U_and_non_U(0,(((int) (src1 << 31)) >> 31) ^ (~src2)); -} - - -static inline unsigned int _lnorm(C6X_COMPAT_LONG40 src2) { -if (src2 == 0) - return 39; - else { - int hi = (int)(src2 >> 32); - int lo = (int)src2; - - - long long temp = (unsigned long long)(unsigned)lo | (unsigned long long)hi << 32; - temp = orig_L40_set(temp); - - if (temp == 0) return 0; - int cnt = 0; - while (((temp >> 39) & 1) == ((temp >> 38) & 1)) { - temp <<= 1; - cnt++; - } - return cnt; - } -} - - -static inline unsigned long long _lo(double src) { - unsigned long long v; - *(double*)&v = src; - return v; -} - - -static inline unsigned int _loll (long long src) -{ - return (unsigned int) src; -} - - -static inline C6X_COMPAT_LONG40 _lsadd(int src1, C6X_COMPAT_LONG40 src2) -{ - long long maxv = (1LL << (40 -1)) - 1; - long long minv = (-1LL << (40 - 1)); - int hi = (int)(src2 >> 32); - int lo = (int)src2; - long long src2_int = (unsigned long long)(unsigned)lo | (unsigned long long)hi << 32; - - - long long src2_int2 = orig_L40_set(src2_int); - - long long res = src1 + src2_int2; - - if (res > maxv) { - res = maxv; - _overflow = 1; - } - else if (res < minv) { - res = minv; - _overflow = 1; - } - - long long res2 = orig_L40_set(res); - - res2 = (signed char)(res2 >> 32); - - C6X_COMPAT_LONG40 lres = (((C6X_COMPAT_LONG40) res2) << 32) | ((unsigned int)res); - return lres; -} - - - -static inline int _max2 (int src1, int src2) { - short s1[2], s2[2], r[2]; - int result; - *((int*)s1) = src1; - *((int*)s2) = src2; - r[0] = s1[0] > s2[0] ? s1[0] : s2[0]; - r[1] = s1[1] > s2[1] ? s1[1] : s2[1]; - result = *(int*)r; - return result; -} - - - - - - -static inline unsigned int _maxu4(unsigned int src1, unsigned int src2) { - unsigned int res0, res1, res2, res3; - unsigned int s1_0 = res0 = (src1 & 0xFF); - unsigned int s1_1 = res1 = (src1 & 0xFF00) >> 8; - unsigned int s1_2 = res2 = (src1 & 0xFF0000) >> 16; - unsigned int s1_3 = res3 = (src1 & 0xFF000000) >> 24; - - unsigned int s2_0 = (src2 & 0xFF); - unsigned int s2_1 = (src2 & 0xFF00) >> 8; - unsigned int s2_2 = (src2 & 0xFF0000) >> 16; - unsigned int s2_3 = (src2 & 0xFF000000) >> 24; - -// unsigned int res = 0; - - if (s1_0 < s2_0) - res0 = s2_0; - - if (s1_1 < s2_1) - res1 = s2_1; - - if (s1_2 < s2_2) - res2 = s2_2; - - if (s1_3 < s2_3) - res3 = s2_3; - - return (res3 << 24) | (res2 << 16) | (res1 << 8) | res0; - - -} - -static inline int _min2(int src1, int src2) { - short s1[2], s2[2], r[2]; - int result; - *((int*)s1) = src1; - *((int*)s2) = src2; - r[0] = s1[0] < s2[0] ? s1[0] : s2[0]; - r[1] = s1[1] < s2[1] ? s1[1] : s2[1]; - result = *(int*)r; - return result; -} - - -static inline unsigned int _minu4(unsigned int src1, unsigned int src2) { -unsigned int res0, res1, res2, res3; - unsigned int s1_0 = res0 = (src1 & 0xFF); - unsigned int s1_1 = res1 = (src1 & 0xFF00) >> 8; - unsigned int s1_2 = res2 = (src1 & 0xFF0000) >> 16; - unsigned int s1_3 = res3 = (src1 & 0xFF000000) >> 24; - - unsigned int s2_0 = (src2 & 0xFF); - unsigned int s2_1 = (src2 & 0xFF00) >> 8; - unsigned int s2_2 = (src2 & 0xFF0000) >> 16; - unsigned int s2_3 = (src2 & 0xFF000000) >> 24; - -// unsigned int res = 0; - - if (s1_0 > s2_0) - res0 = s2_0; - - if (s1_1 > s2_1) - res1 = s2_1; - - if (s1_2 > s2_2) - res2 = s2_2; - - if (s1_3 > s2_3) - res3 = s2_3; - - return (res3 << 24) | (res2 << 16) | (res1 << 8) | res0; -} - - -static inline int _mpy(int src1, int src2) { -return (short) src1 * (short) src2; -} - - -static inline int _mpyh(int src1, int src2) { -return (short) (src1 >> 16) * (short) (src2 >> 16); -} - - -static inline long long _mpyhill (int src1, int src2) -{ - short s1 = src1 >> 16; - return ((long long) src2) * s1; -} - -static inline int _mpyhir(int src1, int src2) -{ - short s1 = src1 >> 16; - long long result = ((long long) src2) * s1 + (1 << 14); - result >>= 15; - return result; -} - - -static inline int _mpyhl(int src1, int src2) { -return (short) (src1 >> 16) * (short) (src2); -} - -static inline unsigned int _mpyhlu(unsigned int src1, unsigned int src2) { -return (unsigned short) (src1 >> 16) * (unsigned short) (src2); -} - -static inline int _mpyhslu(int src1, unsigned int src2) { -return (short) (src1 >> 16) * (unsigned short) src2; -} - - -static inline int _mpyhsu(int src1, unsigned int src2) { -return (short) (src1 >>16) * (unsigned short) (src2 >>16); -} - - -static inline unsigned int _mpyhu(unsigned int src1, unsigned int src2) { -return (unsigned short) (src1 >>16) * (unsigned short) (src2 >> 16); -} - - -static inline int _mpyhuls(unsigned int src1, int src2) { -return (unsigned short) (src1 >>16) * (signed short) (src2); -} - - -static inline int _mpyhus(unsigned int src1, int src2) { -return (unsigned short) (src1 >> 16) * (short) (src2 >>16); -} - - - -static inline long long _mpyidll (int src1, int src2) -{ - return (long long) src1 * src2; -} - - -static inline int _mpylh(int src1, int src2) { -return (signed short) (src1 & 0xffff) * (signed short) (src2 >> 16); -} - -static inline unsigned int _mpylhu(unsigned int src1, unsigned int src2) { -return (unsigned short) src1 * (unsigned short) (src2 >> 16); -} - - -static inline long long _mpylill (int src1, int src2) -{ - return ((long long) src2) * ((short)src1); -} - - - -static inline int _mpylir(int src1, int src2) -{ - short s1 = src1; - long long result = ((long long) src2) * s1 + (1 << 14); - result >>= 15; - return result; -} - - -static inline int _mpylshu(int src1, unsigned int src2) { -return (short) src1 * (unsigned short) (src2 >> 16); -} - - -static inline int _mpyluhs(unsigned int src1, int src2) { -return (unsigned short) src1 * (short) (src2 >> 16); -} - - - -static inline int _mpysu(int src1, unsigned int src2) { -return (short) src1 * (unsigned short) src2; -} - - - -static inline long long _mpysu4ll (int src1, unsigned int src2) { - unsigned short res0, res1, res2, res3; - signed char s1_0 = (src1 & 0xff); - signed char s1_1 = (src1 & 0xff00) >> 8; - signed char s1_2 = (src1 & 0xff0000) >> 16; - signed char s1_3 = (src1 & 0xff000000) >> 24; - - unsigned short s2_0 = (src2 & 0xff); - unsigned short s2_1 = (src2 & 0xff00) >> 8; - unsigned short s2_2 = (src2 & 0xff0000) >> 16; - unsigned short s2_3 = (src2 & 0xff000000) >> 24; - - res0 = s1_0 * s2_0; - res1 = s1_1 * s2_1; - res2 = s1_2 * s2_2; - res3 = s1_3 * s2_3; - - return (((unsigned long long) res3) << 48) - | (((unsigned long long) res2) << 32) - | (((unsigned long long) res1) << 16) - | res0; -} - -static inline unsigned int _mpyu(unsigned int src1, unsigned int src2) { - unsigned v = (unsigned short)src1 * (unsigned short)src2; - return v; -} - -static inline int _mpyus(unsigned int src1, int src2) { -return (unsigned short) src1 * (short) src2; -} - -static inline long long _mpyu4ll (unsigned int src1, unsigned int src2) { - unsigned short res0, res1, res2, res3; - unsigned char s1_0 = (src1 & 0xff); - unsigned char s1_1 = (src1 & 0xff00) >> 8; - unsigned char s1_2 = (src1 & 0xff0000) >> 16; - unsigned char s1_3 = (src1 & 0xff000000) >> 24; - - unsigned short s2_0 = (src2 & 0xff); - unsigned short s2_1 = (src2 & 0xff00) >> 8; - unsigned short s2_2 = (src2 & 0xff0000) >> 16; - unsigned short s2_3 = (src2 & 0xff000000) >> 24; - - res0 = s1_0 * s2_0; - res1 = s1_1 * s2_1; - res2 = s1_2 * s2_2; - res3 = s1_3 * s2_3; - - return (((unsigned long long) res3) << 48) - | (((unsigned long long) res2) << 32) - | (((unsigned long long) res1) << 16) - | res0; -} - - -static inline long long _mpy2ir(unsigned int src1, unsigned int src2) -{ - if ((src1 == 0x8000) && (src2 == 0x80000000)) { - _overflow = 1; - return 0; - } - else { - short ls1 = src1 & 0xffff; - short hs1 = src1 >> 16; - unsigned long long hi = (((long long) hs1) * (int) src2 + (1 << 14)) >> 15; - unsigned long long lo = ((((long long) ls1) * (int) src2 + (1 << 14)) >> 15) & 0xFFFFFFFF; - return (hi << 32) | lo; - } -} - - -static inline long long _mpy2ll (int src1, int src2) { - short ls1 = src1 & 0xffff; - short hs1 = src1 >> 16; - short ls2 = src2 & 0xffff; - short hs2 = src2 >> 16; - - unsigned long long hi = hs1 * hs2; - unsigned long long lo = (ls1 * ls2) & 0xFFFFFFFF; - - return (hi << 32) | lo; - -} - - -static inline int _mpy32(int src1, int src2) -{ - return src1 * src2; -} - - -static inline long long _mpy32ll(int src1, int src2) -{ - return ((long long) src1) * src2; -} - -static inline long long _mpy32su(int src1, unsigned int src2) -{ - return ((long long) src1) * ((int) src2); -} - -static inline long long _mpy32u(unsigned int src1, unsigned int src2) -{ - return ((long long) ((int) src1)) * ((long long) ((int) src2)); -} - -static inline long long _mpy32us(unsigned int src1, int src2) -{ - return ((int) src1) * ((long long) src2); -} - -static inline int _mvd (int src2) -{ - return src2; -} - - -static inline unsigned int _norm(int src2) -{ - return norm_shift_amt_U_and_non_U(1,src2); -} - - -static inline unsigned int _pack2 (unsigned int src1, unsigned int src2) { - short s1[2], s2[2], r[2]; - int result; - *((int*)s1) = src1; - *((int*)s2) = src2; - r[0] = s2[0]; - r[1] = s1[0]; - result = *(int*)r; - return result; -} - - -static inline int _packh2 (unsigned int src1, unsigned int src2) { - unsigned v0 = src1 & 0xffff0000; - unsigned v1 = src2 >> 16; - unsigned v = v0|v1; - return v; - -} - -static inline unsigned int _packh4 (unsigned int src1, unsigned int src2) { - unsigned v3 = (src1 >> 24) & 0xff; - unsigned v2 = (src1 >> 8) & 0xff; - unsigned v1 = (src2 >> 24) & 0xff; - unsigned v0 = (src2 >> 8) & 0xff; - unsigned v = (v3<<24) | (v2<<16) | (v1 << 8) | v0; - return v; -} - -static inline unsigned int _packhl2 (unsigned int src1, unsigned int src2) { - unsigned v0 = src1 & 0xffff0000; - unsigned v1 = src2 & 0x0000ffff; - unsigned v = v0|v1; - return v; -} - -static inline unsigned int _packlh2 (unsigned int src1, unsigned int src2) { - unsigned v0 = src1 << 16; - unsigned v1 = (src2 >> 16) & 0xffff; - unsigned v = v0|v1; - return v; -} - - - - -static inline unsigned int _packl4 (unsigned int src1, unsigned int src2) { - unsigned v3 = (src1 >> 16) & 0xff; - unsigned v2 = (src1) & 0xff; - unsigned v1 = (src2 >> 16) & 0xff; - unsigned v0 = (src2) & 0xff; - unsigned v = (v3<<24) | (v2<<16) | (v1 << 8) | v0; - return v; -} - - - - -static inline unsigned int _rpack2 (unsigned int src1, unsigned int src2) { -int s1 = (int) src1; -int s2 = (int) src2; -s1 = util_shift_right_saturate_n (s1, -1, 32); -s2 = util_shift_right_saturate_n (s2, -1, 32); -return (unsigned int) (s1 & 0xffff0000) | (unsigned int) ((s2 & 0xffff0000) >>16); -} - - -static inline unsigned int _rotl (unsigned int src1, unsigned int src2) -{ - src2 &= 0x1f; - return (src1 << src2) | (src1 >> (32 - src2)); -} - - -static inline int _sadd(int src1, int src2) { -signed long long res; -signed long long maxv, minv; -maxv = (1LL << (32-1)) - 1; -minv = (-1LL << (32-1)); -res = (long long) src1 + (long long) src2; -if (res > maxv) { - res = maxv; - _overflow = 1; - } -else if (res < minv ) { - res = minv; - _overflow = 1; - } -return (int) res; -} - -static inline long long _saddsub(unsigned int src1, unsigned int src2) { -int radd; -signed long long rsub; - -signed long long maxv, minv; -maxv = (1LL << (32-1)) - 1; -minv = (-1LL << (32-1)); - -radd = (int) src1 + (int) src2; - -// saturate on subtract, not add - - -rsub = (long long) ((int) src1) - (long long) ((int) src2); -if (rsub > maxv) { - rsub = maxv; - /* NOTE: TI c6x does NOT set the overflow register even if results saturate */ - /* _overflow = 1; */ - } -else if (rsub < minv ) { - rsub = minv; - /* NOTE: TI c6x does NOT set the overflow register even if results saturate */ - /* _overflow = 1; */ - } - -return (((unsigned long long) radd) << 32) | ( rsub & 0x00000000ffffffff ) ; -} - - - -static inline long long _saddsub2(unsigned int src1, unsigned int src2) { -signed int radd[2]; -signed int rsub[2]; -signed short s1[2], s2[2]; - -signed int maxv, minv; -maxv = (1L << (16-1)) - 1; -minv = (-1L << (16-1)); - -*((int*)s1) = src1; -*((int*)s2) = src2; - -radd[0] = (int) s1[0] + (int) s2[0]; -radd[1] = (int) s1[1] + (int) s2[1]; - -rsub[0] = (int) s1[0] - (int) s2[0]; -rsub[1] = (int) s1[1] - (int) s2[1]; - -if (radd[0] > maxv) { - radd[0] = maxv; - /* NOTE: TI c6x does NOT set the overflow register even if results saturate */ - /* _overflow = 1; */ - } -else if (radd[0] < minv ) { - radd[0] = minv; - /* NOTE: TI c6x does NOT set the overflow register even if results saturate */ - /* _overflow = 1; */ - } - -if (radd[1] > maxv) { - radd[1] = maxv; - /* NOTE: TI c6x does NOT set the overflow register even if results saturate */ - /* _overflow = 1; */ - } -else if (radd[1] < minv ) { - radd[1] = minv; - /* NOTE: TI c6x does NOT set the overflow register even if results saturate */ - /* _overflow = 1; */ - } - - -if (rsub[0] > maxv) { - rsub[0] = maxv; - /* NOTE: TI c6x does NOT set the overflow register even if results saturate */ - /* _overflow = 1; */ - } -else if (rsub[0] < minv ) { - rsub[0] = minv; - /* NOTE: TI c6x does NOT set the overflow register even if results saturate */ - /* _overflow = 1; */ - } - -if (rsub[1] > maxv) { - rsub[1] = maxv; - /* NOTE: TI c6x does NOT set the overflow register even if results saturate */ - /* _overflow = 1; */ - } -else if (rsub[1] < minv ) { - rsub[1] = minv; - /* NOTE: TI c6x does NOT set the overflow register even if results saturate */ - /* _overflow = 1; */ - } - - -return ((((unsigned long long) radd[1]) & 0x000000000000ffff) << 48) | - ((((unsigned long long) radd[0]) & 0x000000000000ffff) << 32) | - ((((unsigned long long) rsub[1]) & 0x000000000000ffff) << 16) | - ((((unsigned long long) rsub[0]) & 0x000000000000ffff)); -} - - - -static inline int _sadd2(int src1, int src2) { -signed short s1[2], s2[2]; -signed int r[2], maxv, minv; - -maxv = (1L << (16-1)) - 1; -minv = (-1L << (16-1)); - - -*((int*)s1) = src1; -*((int*)s2) = src2; - -r[0] = (int) s1[0] + (int) s2[0]; -r[1] = (int) s1[1] + (int) s2[1]; - -if (r[0] > maxv) { - r[0] = maxv; - /* NOTE: TI c6x does NOT set the overflow register even if results saturate */ - /* _overflow = 1; */ - } -else if (r[0] < minv ) { - r[0] = minv; - /* NOTE: TI c6x does NOT set the overflow register even if results saturate */ - /* _overflow = 1; */ - } -if (r[1] > maxv) { - r[1] = maxv; - /* NOTE: TI c6x does NOT set the overflow register even if results saturate */ - /* _overflow = 1; */ - } -else if (r[1] < minv ) { - r[1] = minv; - /* NOTE: TI c6x does NOT set the overflow register even if results saturate */ - /* _overflow = 1; */ - } - -return ((r[1] & 0xffff) << 16 ) | (r[0] & 0xffff) ; -} - - -static inline int _saddus2(unsigned int src1, int src2) { -int res0, res1; - unsigned int s1_0 = (src1 & 0xffff); - unsigned int s1_1 = (src1 & 0xffff0000) >> 16; - - short s2_0 = (src2 & 0xffff); - short s2_1 = (src2 & 0xffff0000) >> 16; - - res0 = s1_0 + s2_0; - res1 = s1_1 + s2_1; - - if (res0 >= 0x10000) - res0 = 0xffff; - else if (res0 < 0) - res0 = 0; - - if (res1 >= 0x10000) - res1 = 0xffff; - else if (res1 < 0) - res1 = 0; - - return (res1 << 16) | res0; -} - - -static inline unsigned int _saddu4(unsigned int src1, unsigned int src2) { -unsigned int res0, res1, res2, res3; - unsigned int s1_0 = (src1 & 0xff); - unsigned int s1_1 = (src1 & 0xff00) >> 8; - unsigned int s1_2 = (src1 & 0xff0000) >> 16; - unsigned int s1_3 = (src1 & 0xff000000) >> 24; - - unsigned int s2_0 = (src2 & 0xff); - unsigned int s2_1 = (src2 & 0xff00) >> 8; - unsigned int s2_2 = (src2 & 0xff0000) >> 16; - unsigned int s2_3 = (src2 & 0xff000000) >> 24; - - res0 = s1_0 + s2_0; - res1 = s1_1 + s2_1; - res2 = s1_2 + s2_2; - res3 = s1_3 + s2_3; - - if (res0 >= 0x100) - res0 = 0xff; - - if (res1 >= 0x100) - res1 = 0xff; - - if (res2 >= 0x100) - res2 = 0xff; - - if (res3 >= 0x100) - res3 = 0xff; - - return (res3 << 24) | (res2 << 16) | (res1 << 8) | res0; - -} - - - -static inline int _sat(C6X_COMPAT_LONG40 src2) -{ - long long maxv = (1LL << (32-1)) - 1; - long long minv = (-1LL << (32-1)); - - int hi = (int)(src2 >> 32); - int lo = (int)src2; - long long temp = (unsigned long long)(unsigned)lo | (unsigned long long)hi << 32; - temp = orig_L40_set(temp); - - if (temp > maxv) { - temp = maxv; - _overflow = 1; - } - else if (temp < minv) { - temp = minv; - _overflow = 1; - } - return (int) temp; -} - -static inline unsigned int _set(unsigned int src2, unsigned int csta, unsigned int cstb) -{ - csta &= 0x1f; - cstb &= 0x1f; - if (csta > cstb) - return src2; - else { - unsigned int mask = (((1 << (cstb - csta)) << 1) - 1) << csta; - return src2 | mask; - } -} - -static inline unsigned int _setr(unsigned int src2, int src1) -{ - unsigned int csta = (src1 >> 5) & 0x1f; - unsigned int cstb = src1 & 0x1f; - if (csta > cstb) - return src2; - else { - unsigned int mask = (((1 << (cstb - csta)) << 1) - 1) << csta; - return src2 | mask; - } -} - - -static inline unsigned int _shfl (unsigned int src2) -{ - unsigned short lo = src2; - unsigned short hi = src2 >> 16; - unsigned int result = 0; - int i; - for (i = 0; i < 32; i+= 2) { - result >>= 1; - result |= (lo & 0x1) << 31; - lo >>= 1; - result >>= 1; - result |= (hi & 0x1) << 31; - hi >>= 1; - } - return result; -} - -static inline long long _shfl3 (unsigned int src1, unsigned int src2) -{ - unsigned short lo = src2; - unsigned short hi = src1 >> 16; - unsigned short mid = src1; - unsigned long long result = 0; - int i; - for (i = 0; i < 32; i+= 2) { - result >>= 1; - result |= ((unsigned long long) (lo & 0x1)) << 47; - lo >>= 1; - result >>= 1; - result |= ((unsigned long long) (mid & 0x1)) << 47; - mid >>= 1; - result >>= 1; - result |= ((unsigned long long) (hi & 0x1)) << 47; - hi >>= 1; - } - return result; -} - - - -static inline unsigned int _shlmb (unsigned int src1, unsigned int src2) -{ - return (src2 << 8) | (src1 >> 24); -} - -static inline unsigned int _shrmb (unsigned int src1, unsigned int src2) -{ - return (src2 >> 8) | (src1 << 24); -} - - -static inline unsigned int _shru2 (unsigned int src1, unsigned int src2) { -unsigned short hs1 = src1 >> 16; - unsigned short ls1 = src1 & 0xFFFF; - hs1 >>= src2; - ls1 >>= src2; - return (hs1 << 16) | ls1; -} - - -static inline int _shr2 (int src1, unsigned int src2) { - short s1[2], result[2]; - *((int*)s1) = src1; - src2 = src2 & 31; - result[0] = (int)s1[0] >> src2; - result[1] = (int)s1[1] >> src2; - - return *(int*)result; -} - - -static inline int _smpy (int src1, int src2) { -unsigned long long result; -result = (((short) src1 * (short) src2) << 1); - -if ((result & 0xffffffff) == 0x80000000){ - result = 0x7fffffff; - _overflow = 1; - } -return (int) (result); -} - -static inline int _smpyh (int src1, int src2) { -unsigned long long result; -result = ((short) (src1 >> 16) * (short) (src2 >> 16)) << 1; -if ((result & 0xffffffff) == 0x80000000){ - result = 0x7fffffff; - _overflow = 1; - } -return (int) (result); -} - -static inline int _smpyhl (int src1, int src2) { -unsigned long long result; -result = ((short) (src1 >> 16) * (short) (src2)) << 1; -if ((result & 0xffffffff) == 0x80000000){ - result = 0x7fffffff; - _overflow = 1; - } -return (int) (result); -} - -static inline int _smpylh (int src1, int src2) { -unsigned long long result; -result = ((short) (src1) * (short) (src2 >> 16)) << 1; -if ((result & 0xffffffff) == 0x80000000){ - result = 0x7fffffff; - _overflow = 1; - } -return (int) (result); -} - -static inline long long _smpy2ll (int src1, int src2) { - short ls1 = src1 & 0XFFFF; - short hs1 = src1 >> 16; - short ls2 = src2 & 0XFFFF; - short hs2 = src2 >> 16; - - unsigned long long hi = (hs1 * hs2) << 1; - unsigned long long lo = ((ls1 * ls2) << 1) & 0xFFFFFFFF; - if ((hi & 0xffffffff) == 0x80000000){ - hi = 0x7fffffff; - _overflow = 1; - } - - if ((lo & 0xffffffff) == 0x80000000){ - lo = 0x7fffffff; - _overflow = 1; - } - - return (hi << 32) | lo; -} - - - - -static inline int _smpy32(int src1, int src2) -{ - long long res = (long long) src1 * src2; - res <<= 1; - res >>= 32; - return res; -} - -static inline unsigned char TEN_satu8 (short src) -{ - if (src > 0xff) - return 0xff; - else if (src < 0) - return 0; - else - return src; -} - -static inline int _spack2 (int src1, int src2) { -short s1 = (short) util_saturate_n_no_state(src1,16); -short s2 = (short) util_saturate_n_no_state(src2,16); -return ( (unsigned int) s1 << 16) | (((int) s2) & 0xFFFF); -} - - -static inline unsigned int _spacku4 (int src1, int src2) { - short lolo = src2; - short lohi = src2 >> 16; - short hilo = src1; - short hihi = src1 >> 16; - - lolo = TEN_satu8(lolo); - lohi = TEN_satu8(lohi); - hilo = TEN_satu8(hilo); - hihi = TEN_satu8(hihi); - - return (((unsigned int) hihi) << 24) | (((unsigned int) hilo) << 16) | (lohi << 8) | lolo; -} - - - -static inline int _sshl (int src1, unsigned int src2) { -short local2 = (short)(src2 & 0x7FFF); -return (int) util_shift_right_saturate_n(src1, -local2, 32); -} - - - - -static inline int _sshvl (int src2, int src1) { - short s1; - if (src1 > 31) - s1 = 31; - else if (src1 < -31) - s1 = -31; - else - s1 = src1; - - return (int) util_shift_right_saturate_n(src2, -s1, 32); -} - - - - - -static inline int _sshvr (int src2, int src1) { -short s1; - if (src1 > 31) - s1 = 31; - else if (src1 < -31) - s1 = -31; - else - s1 = src1; - return (int) util_shift_right_saturate_n(src2, s1, 32); -} - - - - -static inline int _ssub(int src1, int src2) { -signed long long res; -signed long long maxv, minv; -maxv = (1LL << (32-1)) - 1; -minv = (-1LL << (32-1)); -res = (long long) src1 - (long long) src2; -if (res > maxv) { - res = maxv; - _overflow = 1; - } -else if (res < minv ) { - res = minv; - _overflow = 1; - } -return (int) res; -} - -static inline int _ssub2(int src1, int src2) { -signed short s1[2], s2[2]; -signed int r[2], maxv, minv; - -maxv = (1L << (16-1)) - 1; -minv = (-1L << (16-1)); - - -*((int*)s1) = src1; -*((int*)s2) = src2; - -r[0] = (int) s1[0] - (int) s2[0]; -r[1] = (int) s1[1] - (int) s2[1]; - -if (r[0] > maxv) { - r[0] = maxv; - /* NOTE: TI c6x does NOT set the overflow register even if results saturate */ - /* _overflow = 1; */ - } -else if (r[0] < minv ) { - r[0] = minv; - /* NOTE: TI c6x does NOT set the overflow register even if results saturate */ - /* _overflow = 1; */ - } -if (r[1] > maxv) { - r[1] = maxv; - /* NOTE: TI c6x does NOT set the overflow register even if results saturate */ - /* _overflow = 1; */ - } -else if (r[1] < minv ) { - r[1] = minv; - /* NOTE: TI c6x does NOT set the overflow register even if results saturate */ - /* _overflow = 1; */ - } - -return ((r[1] & 0xffff) << 16 ) | (r[0] & 0xffff) ; -} - - -static inline int _subabs4 (int src1, int src2) { - int res0, res1, res2, res3; - unsigned int s1_0 = (src1 & 0xff); - unsigned int s1_1 = (src1 & 0xff00) >> 8; - unsigned int s1_2 = (src1 & 0xff0000) >> 16; - unsigned int s1_3 = (src1 & 0xff000000) >> 24; - - unsigned int s2_0 = (src2 & 0xff); - unsigned int s2_1 = (src2 & 0xff00) >> 8; - unsigned int s2_2 = (src2 & 0xff0000) >> 16; - unsigned int s2_3 = (src2 & 0xff000000) >> 24; - - res0 = s1_0 - s2_0; - res1 = s1_1 - s2_1; - res2 = s1_2 - s2_2; - res3 = s1_3 - s2_3; - - if (res0 < 0) - res0 = -res0; - - if (res1 < 0) - res1 = -res1; - - if (res2 < 0) - res2 = -res2; - - if (res3 < 0) - res3 = -res3; - - return (res3 << 24) | (res2 << 16) | (res1 << 8) | res0; -} - - -static inline unsigned int _subc (unsigned int src1, unsigned int src2) -{ - if ( src1 >= src2) - return ((src1 - src2) << 1) + 1; - else - return src1 << 1; -} - - - -static inline int _sub2(int src1, int src2) { - short s1[2], s2[2], r[2]; - int result; - *((int*)s1) = src1; - *((int*)s2) = src2; - r[0] = s1[0] - s2[0]; - r[1] = s1[1] - s2[1]; - result = *(int*)r; - return result; -} - - -static inline int _sub4(int src1, int src2) { - char c1[4], c2[4], r[4]; - int result; - *((int*)c1) = src1; - *((int*)c2) = src2; - r[0] = c1[0] - c2[0]; - r[1] = c1[1] - c2[1]; - r[2] = c1[2] - c2[2]; - r[3] = c1[3] - c2[3]; - result = *(int*)r; - return result; -} - - -static inline int _swap4 (unsigned int src1) { - unsigned char v0 = src1; - unsigned char v1 = src1 >> 8; - unsigned char v2 = src1 >> 16; - unsigned char v3 = src1 >> 24; - unsigned v = v0<<8 | v1 | v2<<24 | v3<<16; - return v; -} - -static inline unsigned int _unpkhu4 (unsigned int src1) { - unsigned v0 = src1>>24; - unsigned v1 = (src1>>16) & 0xff; - return (v0<<16) | v1; -} - -static inline unsigned int _unpklu4 (unsigned int src1) { - unsigned v1 = (src1>>8) & 0xff; - unsigned v0 = (src1) & 0xff; - return (v1<<16) | v0; -} - - - - -static inline unsigned int _xpnd2 (unsigned int src1) { - int v0 = (src1 & 0x1) ? 0x0000ffff : 0x00000000; - int v1 = (src1 & 0x2) ? 0xffff0000 : 0x00000000; - return v0|v1; -} - -static inline unsigned int _xpnd4 (unsigned int src1) { - int v0 = (src1 & 0x1) ? 0x000000ff : 0x00000000; - int v1 = (src1 & 0x2) ? 0x0000ff00 : 0x00000000; - int v2 = (src1 & 0x4) ? 0x00ff0000 : 0x00000000; - int v3 = (src1 & 0x8) ? 0xff000000 : 0x00000000; - int r = v0|v1|v2|v3; - return r; -} - - - -// end of Implemented in alphabetical order - - -#endif /* __C6X_COMPAT__H */ +/* + * Copyright (c) 2006-2010 Tensilica Inc. ALL RIGHTS RESERVED. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef __C6X_COMPAT__H +#define __C6X_COMPAT__H + +/* Unimplemented functions _gmpy, _gmpy4, _xormpy, _lssub, _cmpy, _cmpyr, + _cmpyr1, _ddotpl2r, _ddotph2r */ + + +typedef long long C6X_COMPAT_LONG40; + + +#define _memd8(a) (*((double*)(a))) +#define _memd8_const(a) (*((const double*)(a))) + +#define _amemd8(a) (*((double*)(a))) +#define _amemd8_const(a) (*((const double*)(a))) + +#define _mem8(a) (*((unsigned long long*)(a))) +#define _mem8_const(a) (*((const unsigned long long*)(a))) + +#define _mem4(a) (*((unsigned*)(a))) +#define _mem4_const(a) (*((const unsigned*)(a))) +#define _amem4_const(a) (*((const unsigned*)(a))) + +/* NOTE: To emulate a C6X properly you should define global variables + for your Xtensa with these names. Some of the emulation routines + will set these values. */ + +extern int _carry; +extern int _overflow; + +// Utility routines + + +#define TESTBIT(x,n) (((x) >> (n)) & 1) + +#define NSA_BITS 32 + +static inline unsigned int norm_shift_amt_U_and_non_U(int is_signed, int inp) { +int j=0, k=0; +int x=inp; +if (is_signed) { + /* Invert signed val if negative */ + x= TESTBIT(x,(NSA_BITS-1))? ~x: x; + x= (x&1)|(x<<1); /* Shift up to return count-1 */ + if (x ==0) + return NSA_BITS-1; + } + if (x ==0) + return NSA_BITS; + /* Now count leading zeros */ + for (j=0, k=NSA_BITS-1; k>=0; j++, k--) { + if (TESTBIT(x,k)) + return j; + } + return NSA_BITS; +} + + + +static inline long long +orig_L40_set( long long L40_var1) { + long long L40_var_out; + + L40_var_out = L40_var1 & 0x000000ffffffffffLL; + + if( L40_var1 & 0x8000000000LL) + L40_var_out = L40_var_out | 0xffffff0000000000LL; + + return( L40_var_out); +} + + + +static inline signed long long +util_saturate_n_no_state(signed long long t, int n) +{ + signed long long maxv, minv; + maxv = (1LL << (n-1)) - 1; + minv = (-1LL << (n-1)); + if (t > maxv) { + t = maxv; + } else if (t < minv) { + t = minv; + } + return t; +} + + +static inline signed long long +util_saturate_n_sgn(signed long long t, int n) +{ + signed long long result; + signed long long maxv, minv; + maxv = (1LL << (n-1)) - 1; + minv = (-1LL << (n-1)); + if (t > 0) { + result = maxv; + _overflow = 1; + } else if (t < 0) { + result = minv; + _overflow = 1; + } else { + result = 0; + } + return result; +} + + + + +/* well-behaved signed shift right (left on negative) with + saturation */ +static inline signed long long +util_shift_right_saturate_n(signed long long t, int shval, int n) +{ + /* n should be <= 62 */ + long long result; + + signed long long mask; + int actual_shift = shval; + long long shft = actual_shift > 0 ? actual_shift : -actual_shift; + + if (t == 0 || actual_shift == 0) + return t; + + if (actual_shift >= n) { + return (t < 0) ? -1 : 0; + } + if (actual_shift <= -n) { + return util_saturate_n_sgn(t, n); + } + if (actual_shift > 0) { + return t >> actual_shift; + } + /* actual_shift < 0. Check for saturation after shift. */ + mask = (-1LL << (n-shft-1)); + if (t > 0 && ((mask & t) != 0)) { + return util_saturate_n_sgn(t, n); + } + if (t < 0 && ((mask & t) != mask)) { + return util_saturate_n_sgn(t, n); + } + result = t << shft; + + return result; +} + + +/* Implemented c6x standard C compatibility functions (alphabetical + order) */ + + +static inline int _abs(int src1) { + if ((unsigned) src1 == (unsigned) 0x80000000) { + return 0x7fffffff; + } + return abs(src1); +} + + +static inline int _abs2(int src1) { + short s1[2],r[2]; + int result; + *((int*)s1) = src1; + if ((unsigned short) s1[1] == (unsigned short) 0x8000) r[1] = 0x7fff; + else r[1] = abs(s1[1]); + if ((unsigned short) s1[0] == (unsigned short) 0x8000) r[0] = 0x7fff; + else r[0] = abs(s1[0]); + result = *(int*)r; + return result; + } + + + + +static inline int _add2(int src1, int src2) { + short s1[2], s2[2], r[2]; + int result; + *((int*)s1) = src1; + *((int*)s2) = src2; + r[0] = s1[0] + s2[0]; + r[1] = s1[1] + s2[1]; + result = *(int*)r; + return result; +} + +static inline int _add4(int src1, int src2) { + char c1[4], c2[4], r[4]; + int result; + *((int*)c1) = src1; + *((int*)c2) = src2; + r[0] = c1[0] + c2[0]; + r[1] = c1[1] + c2[1]; + r[2] = c1[2] + c2[2]; + r[3] = c1[3] + c2[3]; + result = *(int*)r; + return result; +} + + + +static inline long long _addsub(unsigned int src1, unsigned int src2) +{ + + int res_lo; + int res_hi; + + res_hi = src1+src2; + res_lo = src1-src2; + return (((unsigned long long) res_hi) << 32) | ((unsigned int) res_lo) ; +} + + +static inline long long _addsub2(unsigned int src1, unsigned int src2) +{ + short s1[2], s2[2], ra[2], rs[2]; + int res_lo; + int res_hi; + + *((int*)s1) = src1; + *((int*)s2) = src2; + ra[0] = s1[0] + s2[0]; + ra[1] = s1[1] + s2[1]; + rs[0] = s1[0] - s2[0]; + rs[1] = s1[1] - s2[1]; + + res_hi = *(int*)ra; + res_lo = *(int*)rs; + return (((unsigned long long) res_hi) << 32) | ((unsigned int) res_lo) ; +} + + +static inline int _avg2(int src1, int src2) { + int low = (((int)1 + (short) src1 + (short) src2) >> 1) & 0XFFFF; + int high1 = src1 >> 16; + int high2 = src2 >> 16; + int high = ((high1 + high2 + 1) >> 1)<< 16; + return high | low; +} + + + +static inline unsigned int _avgu4(unsigned int src1, unsigned int src2) { +unsigned int res0 = ((src1 & 0xFF) + (src2 & 0xFF) + 1) >> 1; + unsigned int res1 = (((src1 & 0xFF00) >> 8) + ((src2 & 0xFF00) >> 8) + 1) >> 1; + unsigned int res2 = (((src1 & 0xFF0000) >> 16) + ((src2 & 0xFF0000) >> 16) + 1) >> 1; + unsigned int res3 = (((src1 & 0xFF000000) >> 24) + ((src2 & 0xFF000000) >> 24) + 1) >> 1; + return (res3 << 24) | (res2 << 16) | (res1 << 8) | res0; +} + + +static inline int TEN_popc (unsigned char b) +{ + int i, result = 0; + for (i = 0; i < 8; i++){ + if (b & 0x1) + result++; + b >>= 1; + } + return result; +} + +static inline unsigned int _bitc4(unsigned int src1) +{ + unsigned int res0 = TEN_popc(src1 & 0xFF); + unsigned int res1 = TEN_popc((src1 & 0xFF00) >> 8); + unsigned int res2 = TEN_popc((src1 & 0xFF0000) >> 16); + unsigned int res3 = TEN_popc((src1 & 0xFF000000) >> 24); + return (res3 << 24) | (res2 << 16) | (res1 << 8) | res0; +} + +static inline unsigned int _bitr(unsigned int src) { + int i; + unsigned r = 0; + for (i = 0; i< 32; ++i) { + r = r | (((src >> i) & 1)<<(31-i)); + } + return r; +} + + +static inline unsigned int _clr(unsigned int src2, int csta, int cstb) +{ + csta &= 0x1f; + cstb &= 0x1f; + if (csta > cstb) + return src2; + else { + unsigned int mask = (((1 << (cstb - csta)) << 1) - 1) << csta; + return src2 & (~mask); + } +} + +static inline unsigned int _clrr(unsigned int src2, int src1) +{ + unsigned int csta = (src1 >> 5) & 0x1f; + unsigned int cstb = src1 & 0x1f; + if (csta > cstb) + return src2; + else { + unsigned int mask = (((1 << (cstb - csta)) << 1) - 1) << csta; + return src2 & (~mask); + } +} + + + + +static inline int _cmpeq2(int src1, int src2) { + short s1[2], s2[2]; + int r0, r1; + int result; + *((int*)s1) = src1; + *((int*)s2) = src2; + r0 = s1[0] == s2[0] ? 1 : 0; + r1 = s1[1] == s2[1] ? 1 : 0; + result = (r1 << 1) | r0; + return result; +} + +static inline int _cmpeq4(int src1, int src2) { + char s1[4], s2[4]; + int r0, r1, r2, r3; + int result; + *((int*)s1) = src1; + *((int*)s2) = src2; + r0 = s1[0] == s2[0] ? 1 : 0; + r1 = s1[1] == s2[1] ? 1 : 0; + r2 = s1[2] == s2[2] ? 1 : 0; + r3 = s1[3] == s2[3] ? 1 : 0; + result = (r3 << 3) | (r2 << 2) | (r1 << 1) | r0; + return result; +} + + +static inline int _cmpgt2(int src1, int src2) { + short s1[2], s2[2]; + int r1, r0; + int result; + *((int*)s1) = src1; + *((int*)s2) = src2; + r0 = s1[0] > s2[0] ? 1 : 0; + r1 = s1[1] > s2[1] ? 1 : 0; + result = (r1<<1) | r0; + return result; +} + + +static inline unsigned int _cmpgtu4(unsigned int src1, unsigned int src2) { + unsigned int s1_0 = (src1 & 0xFF); + unsigned int s1_1 = (src1 & 0xFF00) >> 8; + unsigned int s1_2 = (src1 & 0xFF0000) >> 16; + unsigned int s1_3 = (src1 & 0xFF000000) >> 24; + + unsigned int s2_0 = (src2 & 0xFF); + unsigned int s2_1 = (src2 & 0xFF00) >> 8; + unsigned int s2_2 = (src2 & 0xFF0000) >> 16; + unsigned int s2_3 = (src2 & 0xFF000000) >> 24; + + unsigned int result = 0; + + if (s1_0 > s2_0) + result |= 0x1; + + if (s1_1 > s2_1) + result |= 0x2; + + if (s1_2 > s2_2) + result |= 0x4; + + if (s1_3 > s2_3) + result |= 0x8; + + return result; +} + + + + +static inline long long _ddotp4(unsigned int src1, unsigned int src2) { + unsigned int res0, res1; + short s1_0 = (src1 & 0xffff); + short s1_1 = (src1 & 0xfff0000) >> 16; + + unsigned short s2_0 = (src2 & 0xff); + unsigned short s2_1 = (src2 & 0xff00) >> 8; + unsigned short s2_2 = (src2 & 0xff0000) >> 16; + unsigned short s2_3 = (src2 & 0xff000000) >> 24; + + res0 = ((int)s1_0) * s2_0 + ((int)s1_1) * s2_1; + res1 = ((int)s1_0) * s2_2 + ((int)s1_1) * s2_3; + + return (res1 << 16) | res0; +} + + +static inline long long _ddotph2(long long src1_o_src1_e, unsigned int src2) +{ + + unsigned int src1_o = src1_o_src1_e >> 32; + unsigned int src1_e = src1_o_src1_e & 0xFFFFFFFF; + short ls1_o = src1_o & 0XFFFF; + short hs1_o = src1_o >> 16; +// short ls1_e = src1_e & 0XFFFF; + short hs1_e = src1_e >> 16; + short ls2 = src2 & 0XFFFF; + short hs2 = src2 >> 16; + + unsigned long long res_hi = ls2 * ls1_o + hs2 * hs1_o; + unsigned int res_lo = ls1_o * hs2 + hs1_e * ls2; + return (res_hi << 32) | res_lo; +} + + +static inline long long _ddotpl2(long long src1_o_src1_e, unsigned int src2) +{ + unsigned int src1_o = src1_o_src1_e >> 32; + unsigned int src1_e = src1_o_src1_e & 0xFFFFFFFF; + short ls1_o = src1_o & 0XFFFF; +// short hs1_o = src1_o >> 16; + short ls1_e = src1_e & 0XFFFF; + short hs1_e = src1_e >> 16; + short ls2 = src2 & 0XFFFF; + short hs2 = src2 >> 16; + + unsigned long long res_hi = ls2 * hs1_e + hs2 * ls1_o; + unsigned res_lo = hs1_e * hs2 + ls1_e * ls2; + return (res_hi << 32) | res_lo; +} + + +static inline unsigned int _deal(unsigned int src) +{ + int i; + unsigned short lo = 0, hi = 0; + for (i = 0; i < 32; i+= 2) { + lo >>= 1; + lo |= (src & 0x1) << 15; + src >>= 1; + hi >>= 1; + hi |= (src & 0x1) << 15; + src >>= 1; + } + return (hi << 16) | lo; +} + + +static inline long long _dmv(unsigned int src1, unsigned int src2) +{ + return (((long long) src1) << 32) | src2; +} + + +static inline int _dotpn2(int src1, int src2) { +short int s1_h = src1>>16; + short int s1_l = src1; + short int s2_h = src2>>16; + short int s2_l = src2; + return s1_h * s2_h - s1_l * s2_l; +} + + +static inline int _dotp2(int src1, int src2) { + short int s1_h = src1>>16; + short int s1_l = src1; + short int s2_h = src2>>16; + short int s2_l = src2; + return s1_h * s2_h + s1_l * s2_l; +} + + + +static inline int _dotpnrsu2(int src1, unsigned int src2) +{ + short ls1 = src1 & 0XFFFF; + unsigned short ls2 = src2 & 0XFFFF; + short hs1 = src1 >> 16; + unsigned short hs2 = src2 >> 16; + + int result = (((long long) (int)(hs1 * hs2)) - ((long long) (int)(ls1 * ls2)) + (1 << 15)) >> 16; + return result; +} + + + +static inline int _dotprsu2(int src1, unsigned int src2) { + short ls1 = src1 & 0XFFFF; + unsigned short ls2 = (src2 & 0XFFFF); + short hs1 = src1 >> 16; + unsigned short hs2 = (src2 >> 16); + + int result = (((long long) (int) (ls1 * ls2)) + ((long long) (int) (hs1 * hs2)) + (1LL << 15)) >> 16; + return result; +} + + + + + + + +static inline int _dotpsu4(int src1, unsigned int src2) { + int result; + signed char s1_0 = (src1 & 0xff); + signed char s1_1 = (src1 & 0xff00) >> 8; + signed char s1_2 = (src1 & 0xff0000) >> 16; + signed char s1_3 = (src1 & 0xff000000) >> 24; + + unsigned int s2_0 = (src2 & 0xff); + unsigned int s2_1 = (src2 & 0xff00) >> 8; + unsigned int s2_2 = (src2 & 0xff0000) >> 16; + unsigned int s2_3 = (src2 & 0xff000000) >> 24; + + result = s1_0 * s2_0 + s1_1 * s2_1 + s1_2 * s2_2 + s1_3 * s2_3; + return result; +} + + +static inline unsigned int _dotpu4(unsigned int src1, unsigned int src2) { + unsigned char v1_0 = src1 & 0xff; + unsigned char v1_1 = (src1>>8) & 0xff; + unsigned char v1_2 = (src1>>16) & 0xff; + unsigned char v1_3 = (src1>>24) & 0xff; + + unsigned char v2_0 = src2 & 0xff; + unsigned char v2_1 = (src2>>8) & 0xff; + unsigned char v2_2 = (src2>>16) & 0xff; + unsigned char v2_3 = (src2>>24) & 0xff; + + unsigned v = v1_0 * v2_0 + v1_1 * v2_1 + v1_2 * v2_2 + v1_3 * v2_3; + return v; +} + + +static inline long long _dpack2(unsigned int src1, unsigned int src2){ +unsigned short s1[2], s2[2]; +*((int*)s1) = src1; +*((int*)s2) = src2; +return ((unsigned long long) s1[1] << 48) | ((unsigned long long) s2[1] << 32) | ((unsigned long long) s1[0] << 16) | ((unsigned long long) s2[0]); +} + + +static inline long long _dpackx2(unsigned int src1, unsigned int src2){ +unsigned short s1[2], s2[2]; +*((int*)s1) = src1; +*((int*)s2) = src2; +return ((unsigned long long) s2[0] << 48) | ((unsigned long long) s1[1] << 32) | ((unsigned long long) s1[0] << 16) | ((unsigned long long) s2[1]); +} + +static inline int _ext(int src2, unsigned int csta, unsigned int cstb) +{ + return (src2 << csta) >> cstb; +} + +static inline int _extr(int src2, int src1) +{ + unsigned int csta = (src1 >> 5) & 0x1f; + unsigned int cstb = src1 & 0x1f; + return (src2 << csta) >> cstb; +} + +static inline unsigned int _extu(unsigned int src2, unsigned int csta, unsigned int cstb) +{ + return (src2 << csta) >> cstb; +} + +static inline unsigned int _extur(unsigned int src2, int src1) +{ + unsigned int csta = (src1 >> 5) & 0x1f; + unsigned int cstb = src1 & 0x1f; + return (src2 << csta) >> cstb; +} + + +static inline unsigned long long _hi(double src) { + unsigned long long v; + *(double*)&v = src; + return v>>32; +} + +static inline unsigned int _hill (long long src) +{ + return (unsigned int) (src >> 32); +} + + + +static inline double _itod(unsigned hi, unsigned lo) { + double v; + unsigned long long ll = ((((unsigned long long)(hi))<<32) | (unsigned long long)((unsigned)lo)); + *((unsigned long long *)&v) = ll; + return v; +} + + +static inline long long _itoll(unsigned int src2, unsigned int src1) +{ + return (((long long) src2) << 32) | src1; +} + + +static inline C6X_COMPAT_LONG40 _labs(C6X_COMPAT_LONG40 src2) +{ + long long maxv = (1LL << (40 -1)) - 1; + long long minv = (-1LL << (40 - 1)); + C6X_COMPAT_LONG40 lres = orig_L40_set(src2); + + lres = lres < 0 ? -lres : lres; + if (lres > maxv) lres = maxv; + else if (lres < minv) lres = minv; + + return lres; +} + + +static inline C6X_COMPAT_LONG40 _ldotp2(int src1, int src2) { +return (C6X_COMPAT_LONG40) _dotp2(src1, src2); +} + + +static inline unsigned int _lmbd(unsigned int src1, unsigned int src2) +{ + return norm_shift_amt_U_and_non_U(0,(((int) (src1 << 31)) >> 31) ^ (~src2)); +} + + +static inline unsigned int _lnorm(C6X_COMPAT_LONG40 src2) { +if (src2 == 0) + return 39; + else { + int hi = (int)(src2 >> 32); + int lo = (int)src2; + + + long long temp = (unsigned long long)(unsigned)lo | (unsigned long long)hi << 32; + temp = orig_L40_set(temp); + + if (temp == 0) return 0; + int cnt = 0; + while (((temp >> 39) & 1) == ((temp >> 38) & 1)) { + temp <<= 1; + cnt++; + } + return cnt; + } +} + + +static inline unsigned long long _lo(double src) { + unsigned long long v; + *(double*)&v = src; + return v; +} + + +static inline unsigned int _loll (long long src) +{ + return (unsigned int) src; +} + + +static inline C6X_COMPAT_LONG40 _lsadd(int src1, C6X_COMPAT_LONG40 src2) +{ + long long maxv = (1LL << (40 -1)) - 1; + long long minv = (-1LL << (40 - 1)); + int hi = (int)(src2 >> 32); + int lo = (int)src2; + long long src2_int = (unsigned long long)(unsigned)lo | (unsigned long long)hi << 32; + + + long long src2_int2 = orig_L40_set(src2_int); + + long long res = src1 + src2_int2; + + if (res > maxv) { + res = maxv; + _overflow = 1; + } + else if (res < minv) { + res = minv; + _overflow = 1; + } + + long long res2 = orig_L40_set(res); + + res2 = (signed char)(res2 >> 32); + + C6X_COMPAT_LONG40 lres = (((C6X_COMPAT_LONG40) res2) << 32) | ((unsigned int)res); + return lres; +} + + + +static inline int _max2 (int src1, int src2) { + short s1[2], s2[2], r[2]; + int result; + *((int*)s1) = src1; + *((int*)s2) = src2; + r[0] = s1[0] > s2[0] ? s1[0] : s2[0]; + r[1] = s1[1] > s2[1] ? s1[1] : s2[1]; + result = *(int*)r; + return result; +} + + + + + + +static inline unsigned int _maxu4(unsigned int src1, unsigned int src2) { + unsigned int res0, res1, res2, res3; + unsigned int s1_0 = res0 = (src1 & 0xFF); + unsigned int s1_1 = res1 = (src1 & 0xFF00) >> 8; + unsigned int s1_2 = res2 = (src1 & 0xFF0000) >> 16; + unsigned int s1_3 = res3 = (src1 & 0xFF000000) >> 24; + + unsigned int s2_0 = (src2 & 0xFF); + unsigned int s2_1 = (src2 & 0xFF00) >> 8; + unsigned int s2_2 = (src2 & 0xFF0000) >> 16; + unsigned int s2_3 = (src2 & 0xFF000000) >> 24; + +// unsigned int res = 0; + + if (s1_0 < s2_0) + res0 = s2_0; + + if (s1_1 < s2_1) + res1 = s2_1; + + if (s1_2 < s2_2) + res2 = s2_2; + + if (s1_3 < s2_3) + res3 = s2_3; + + return (res3 << 24) | (res2 << 16) | (res1 << 8) | res0; + + +} + +static inline int _min2(int src1, int src2) { + short s1[2], s2[2], r[2]; + int result; + *((int*)s1) = src1; + *((int*)s2) = src2; + r[0] = s1[0] < s2[0] ? s1[0] : s2[0]; + r[1] = s1[1] < s2[1] ? s1[1] : s2[1]; + result = *(int*)r; + return result; +} + + +static inline unsigned int _minu4(unsigned int src1, unsigned int src2) { +unsigned int res0, res1, res2, res3; + unsigned int s1_0 = res0 = (src1 & 0xFF); + unsigned int s1_1 = res1 = (src1 & 0xFF00) >> 8; + unsigned int s1_2 = res2 = (src1 & 0xFF0000) >> 16; + unsigned int s1_3 = res3 = (src1 & 0xFF000000) >> 24; + + unsigned int s2_0 = (src2 & 0xFF); + unsigned int s2_1 = (src2 & 0xFF00) >> 8; + unsigned int s2_2 = (src2 & 0xFF0000) >> 16; + unsigned int s2_3 = (src2 & 0xFF000000) >> 24; + +// unsigned int res = 0; + + if (s1_0 > s2_0) + res0 = s2_0; + + if (s1_1 > s2_1) + res1 = s2_1; + + if (s1_2 > s2_2) + res2 = s2_2; + + if (s1_3 > s2_3) + res3 = s2_3; + + return (res3 << 24) | (res2 << 16) | (res1 << 8) | res0; +} + + +static inline int _mpy(int src1, int src2) { +return (short) src1 * (short) src2; +} + + +static inline int _mpyh(int src1, int src2) { +return (short) (src1 >> 16) * (short) (src2 >> 16); +} + + +static inline long long _mpyhill (int src1, int src2) +{ + short s1 = src1 >> 16; + return ((long long) src2) * s1; +} + +static inline int _mpyhir(int src1, int src2) +{ + short s1 = src1 >> 16; + long long result = ((long long) src2) * s1 + (1 << 14); + result >>= 15; + return result; +} + + +static inline int _mpyhl(int src1, int src2) { +return (short) (src1 >> 16) * (short) (src2); +} + +static inline unsigned int _mpyhlu(unsigned int src1, unsigned int src2) { +return (unsigned short) (src1 >> 16) * (unsigned short) (src2); +} + +static inline int _mpyhslu(int src1, unsigned int src2) { +return (short) (src1 >> 16) * (unsigned short) src2; +} + + +static inline int _mpyhsu(int src1, unsigned int src2) { +return (short) (src1 >>16) * (unsigned short) (src2 >>16); +} + + +static inline unsigned int _mpyhu(unsigned int src1, unsigned int src2) { +return (unsigned short) (src1 >>16) * (unsigned short) (src2 >> 16); +} + + +static inline int _mpyhuls(unsigned int src1, int src2) { +return (unsigned short) (src1 >>16) * (signed short) (src2); +} + + +static inline int _mpyhus(unsigned int src1, int src2) { +return (unsigned short) (src1 >> 16) * (short) (src2 >>16); +} + + + +static inline long long _mpyidll (int src1, int src2) +{ + return (long long) src1 * src2; +} + + +static inline int _mpylh(int src1, int src2) { +return (signed short) (src1 & 0xffff) * (signed short) (src2 >> 16); +} + +static inline unsigned int _mpylhu(unsigned int src1, unsigned int src2) { +return (unsigned short) src1 * (unsigned short) (src2 >> 16); +} + + +static inline long long _mpylill (int src1, int src2) +{ + return ((long long) src2) * ((short)src1); +} + + + +static inline int _mpylir(int src1, int src2) +{ + short s1 = src1; + long long result = ((long long) src2) * s1 + (1 << 14); + result >>= 15; + return result; +} + + +static inline int _mpylshu(int src1, unsigned int src2) { +return (short) src1 * (unsigned short) (src2 >> 16); +} + + +static inline int _mpyluhs(unsigned int src1, int src2) { +return (unsigned short) src1 * (short) (src2 >> 16); +} + + + +static inline int _mpysu(int src1, unsigned int src2) { +return (short) src1 * (unsigned short) src2; +} + + + +static inline long long _mpysu4ll (int src1, unsigned int src2) { + unsigned short res0, res1, res2, res3; + signed char s1_0 = (src1 & 0xff); + signed char s1_1 = (src1 & 0xff00) >> 8; + signed char s1_2 = (src1 & 0xff0000) >> 16; + signed char s1_3 = (src1 & 0xff000000) >> 24; + + unsigned short s2_0 = (src2 & 0xff); + unsigned short s2_1 = (src2 & 0xff00) >> 8; + unsigned short s2_2 = (src2 & 0xff0000) >> 16; + unsigned short s2_3 = (src2 & 0xff000000) >> 24; + + res0 = s1_0 * s2_0; + res1 = s1_1 * s2_1; + res2 = s1_2 * s2_2; + res3 = s1_3 * s2_3; + + return (((unsigned long long) res3) << 48) + | (((unsigned long long) res2) << 32) + | (((unsigned long long) res1) << 16) + | res0; +} + +static inline unsigned int _mpyu(unsigned int src1, unsigned int src2) { + unsigned v = (unsigned short)src1 * (unsigned short)src2; + return v; +} + +static inline int _mpyus(unsigned int src1, int src2) { +return (unsigned short) src1 * (short) src2; +} + +static inline long long _mpyu4ll (unsigned int src1, unsigned int src2) { + unsigned short res0, res1, res2, res3; + unsigned char s1_0 = (src1 & 0xff); + unsigned char s1_1 = (src1 & 0xff00) >> 8; + unsigned char s1_2 = (src1 & 0xff0000) >> 16; + unsigned char s1_3 = (src1 & 0xff000000) >> 24; + + unsigned short s2_0 = (src2 & 0xff); + unsigned short s2_1 = (src2 & 0xff00) >> 8; + unsigned short s2_2 = (src2 & 0xff0000) >> 16; + unsigned short s2_3 = (src2 & 0xff000000) >> 24; + + res0 = s1_0 * s2_0; + res1 = s1_1 * s2_1; + res2 = s1_2 * s2_2; + res3 = s1_3 * s2_3; + + return (((unsigned long long) res3) << 48) + | (((unsigned long long) res2) << 32) + | (((unsigned long long) res1) << 16) + | res0; +} + + +static inline long long _mpy2ir(unsigned int src1, unsigned int src2) +{ + if ((src1 == 0x8000) && (src2 == 0x80000000)) { + _overflow = 1; + return 0; + } + else { + short ls1 = src1 & 0xffff; + short hs1 = src1 >> 16; + unsigned long long hi = (((long long) hs1) * (int) src2 + (1 << 14)) >> 15; + unsigned long long lo = ((((long long) ls1) * (int) src2 + (1 << 14)) >> 15) & 0xFFFFFFFF; + return (hi << 32) | lo; + } +} + + +static inline long long _mpy2ll (int src1, int src2) { + short ls1 = src1 & 0xffff; + short hs1 = src1 >> 16; + short ls2 = src2 & 0xffff; + short hs2 = src2 >> 16; + + unsigned long long hi = hs1 * hs2; + unsigned long long lo = (ls1 * ls2) & 0xFFFFFFFF; + + return (hi << 32) | lo; + +} + + +static inline int _mpy32(int src1, int src2) +{ + return src1 * src2; +} + + +static inline long long _mpy32ll(int src1, int src2) +{ + return ((long long) src1) * src2; +} + +static inline long long _mpy32su(int src1, unsigned int src2) +{ + return ((long long) src1) * ((int) src2); +} + +static inline long long _mpy32u(unsigned int src1, unsigned int src2) +{ + return ((long long) ((int) src1)) * ((long long) ((int) src2)); +} + +static inline long long _mpy32us(unsigned int src1, int src2) +{ + return ((int) src1) * ((long long) src2); +} + +static inline int _mvd (int src2) +{ + return src2; +} + + +static inline unsigned int _norm(int src2) +{ + return norm_shift_amt_U_and_non_U(1,src2); +} + + +static inline unsigned int _pack2 (unsigned int src1, unsigned int src2) { + short s1[2], s2[2], r[2]; + int result; + *((int*)s1) = src1; + *((int*)s2) = src2; + r[0] = s2[0]; + r[1] = s1[0]; + result = *(int*)r; + return result; +} + + +static inline int _packh2 (unsigned int src1, unsigned int src2) { + unsigned v0 = src1 & 0xffff0000; + unsigned v1 = src2 >> 16; + unsigned v = v0|v1; + return v; + +} + +static inline unsigned int _packh4 (unsigned int src1, unsigned int src2) { + unsigned v3 = (src1 >> 24) & 0xff; + unsigned v2 = (src1 >> 8) & 0xff; + unsigned v1 = (src2 >> 24) & 0xff; + unsigned v0 = (src2 >> 8) & 0xff; + unsigned v = (v3<<24) | (v2<<16) | (v1 << 8) | v0; + return v; +} + +static inline unsigned int _packhl2 (unsigned int src1, unsigned int src2) { + unsigned v0 = src1 & 0xffff0000; + unsigned v1 = src2 & 0x0000ffff; + unsigned v = v0|v1; + return v; +} + +static inline unsigned int _packlh2 (unsigned int src1, unsigned int src2) { + unsigned v0 = src1 << 16; + unsigned v1 = (src2 >> 16) & 0xffff; + unsigned v = v0|v1; + return v; +} + + + + +static inline unsigned int _packl4 (unsigned int src1, unsigned int src2) { + unsigned v3 = (src1 >> 16) & 0xff; + unsigned v2 = (src1) & 0xff; + unsigned v1 = (src2 >> 16) & 0xff; + unsigned v0 = (src2) & 0xff; + unsigned v = (v3<<24) | (v2<<16) | (v1 << 8) | v0; + return v; +} + + + + +static inline unsigned int _rpack2 (unsigned int src1, unsigned int src2) { +int s1 = (int) src1; +int s2 = (int) src2; +s1 = util_shift_right_saturate_n (s1, -1, 32); +s2 = util_shift_right_saturate_n (s2, -1, 32); +return (unsigned int) (s1 & 0xffff0000) | (unsigned int) ((s2 & 0xffff0000) >>16); +} + + +static inline unsigned int _rotl (unsigned int src1, unsigned int src2) +{ + src2 &= 0x1f; + return (src1 << src2) | (src1 >> (32 - src2)); +} + + +static inline int _sadd(int src1, int src2) { +signed long long res; +signed long long maxv, minv; +maxv = (1LL << (32-1)) - 1; +minv = (-1LL << (32-1)); +res = (long long) src1 + (long long) src2; +if (res > maxv) { + res = maxv; + _overflow = 1; + } +else if (res < minv ) { + res = minv; + _overflow = 1; + } +return (int) res; +} + +static inline long long _saddsub(unsigned int src1, unsigned int src2) { +int radd; +signed long long rsub; + +signed long long maxv, minv; +maxv = (1LL << (32-1)) - 1; +minv = (-1LL << (32-1)); + +radd = (int) src1 + (int) src2; + +// saturate on subtract, not add + + +rsub = (long long) ((int) src1) - (long long) ((int) src2); +if (rsub > maxv) { + rsub = maxv; + /* NOTE: TI c6x does NOT set the overflow register even if results saturate */ + /* _overflow = 1; */ + } +else if (rsub < minv ) { + rsub = minv; + /* NOTE: TI c6x does NOT set the overflow register even if results saturate */ + /* _overflow = 1; */ + } + +return (((unsigned long long) radd) << 32) | ( rsub & 0x00000000ffffffff ) ; +} + + + +static inline long long _saddsub2(unsigned int src1, unsigned int src2) { +signed int radd[2]; +signed int rsub[2]; +signed short s1[2], s2[2]; + +signed int maxv, minv; +maxv = (1L << (16-1)) - 1; +minv = (-1L << (16-1)); + +*((int*)s1) = src1; +*((int*)s2) = src2; + +radd[0] = (int) s1[0] + (int) s2[0]; +radd[1] = (int) s1[1] + (int) s2[1]; + +rsub[0] = (int) s1[0] - (int) s2[0]; +rsub[1] = (int) s1[1] - (int) s2[1]; + +if (radd[0] > maxv) { + radd[0] = maxv; + /* NOTE: TI c6x does NOT set the overflow register even if results saturate */ + /* _overflow = 1; */ + } +else if (radd[0] < minv ) { + radd[0] = minv; + /* NOTE: TI c6x does NOT set the overflow register even if results saturate */ + /* _overflow = 1; */ + } + +if (radd[1] > maxv) { + radd[1] = maxv; + /* NOTE: TI c6x does NOT set the overflow register even if results saturate */ + /* _overflow = 1; */ + } +else if (radd[1] < minv ) { + radd[1] = minv; + /* NOTE: TI c6x does NOT set the overflow register even if results saturate */ + /* _overflow = 1; */ + } + + +if (rsub[0] > maxv) { + rsub[0] = maxv; + /* NOTE: TI c6x does NOT set the overflow register even if results saturate */ + /* _overflow = 1; */ + } +else if (rsub[0] < minv ) { + rsub[0] = minv; + /* NOTE: TI c6x does NOT set the overflow register even if results saturate */ + /* _overflow = 1; */ + } + +if (rsub[1] > maxv) { + rsub[1] = maxv; + /* NOTE: TI c6x does NOT set the overflow register even if results saturate */ + /* _overflow = 1; */ + } +else if (rsub[1] < minv ) { + rsub[1] = minv; + /* NOTE: TI c6x does NOT set the overflow register even if results saturate */ + /* _overflow = 1; */ + } + + +return ((((unsigned long long) radd[1]) & 0x000000000000ffff) << 48) | + ((((unsigned long long) radd[0]) & 0x000000000000ffff) << 32) | + ((((unsigned long long) rsub[1]) & 0x000000000000ffff) << 16) | + ((((unsigned long long) rsub[0]) & 0x000000000000ffff)); +} + + + +static inline int _sadd2(int src1, int src2) { +signed short s1[2], s2[2]; +signed int r[2], maxv, minv; + +maxv = (1L << (16-1)) - 1; +minv = (-1L << (16-1)); + + +*((int*)s1) = src1; +*((int*)s2) = src2; + +r[0] = (int) s1[0] + (int) s2[0]; +r[1] = (int) s1[1] + (int) s2[1]; + +if (r[0] > maxv) { + r[0] = maxv; + /* NOTE: TI c6x does NOT set the overflow register even if results saturate */ + /* _overflow = 1; */ + } +else if (r[0] < minv ) { + r[0] = minv; + /* NOTE: TI c6x does NOT set the overflow register even if results saturate */ + /* _overflow = 1; */ + } +if (r[1] > maxv) { + r[1] = maxv; + /* NOTE: TI c6x does NOT set the overflow register even if results saturate */ + /* _overflow = 1; */ + } +else if (r[1] < minv ) { + r[1] = minv; + /* NOTE: TI c6x does NOT set the overflow register even if results saturate */ + /* _overflow = 1; */ + } + +return ((r[1] & 0xffff) << 16 ) | (r[0] & 0xffff) ; +} + + +static inline int _saddus2(unsigned int src1, int src2) { +int res0, res1; + unsigned int s1_0 = (src1 & 0xffff); + unsigned int s1_1 = (src1 & 0xffff0000) >> 16; + + short s2_0 = (src2 & 0xffff); + short s2_1 = (src2 & 0xffff0000) >> 16; + + res0 = s1_0 + s2_0; + res1 = s1_1 + s2_1; + + if (res0 >= 0x10000) + res0 = 0xffff; + else if (res0 < 0) + res0 = 0; + + if (res1 >= 0x10000) + res1 = 0xffff; + else if (res1 < 0) + res1 = 0; + + return (res1 << 16) | res0; +} + + +static inline unsigned int _saddu4(unsigned int src1, unsigned int src2) { +unsigned int res0, res1, res2, res3; + unsigned int s1_0 = (src1 & 0xff); + unsigned int s1_1 = (src1 & 0xff00) >> 8; + unsigned int s1_2 = (src1 & 0xff0000) >> 16; + unsigned int s1_3 = (src1 & 0xff000000) >> 24; + + unsigned int s2_0 = (src2 & 0xff); + unsigned int s2_1 = (src2 & 0xff00) >> 8; + unsigned int s2_2 = (src2 & 0xff0000) >> 16; + unsigned int s2_3 = (src2 & 0xff000000) >> 24; + + res0 = s1_0 + s2_0; + res1 = s1_1 + s2_1; + res2 = s1_2 + s2_2; + res3 = s1_3 + s2_3; + + if (res0 >= 0x100) + res0 = 0xff; + + if (res1 >= 0x100) + res1 = 0xff; + + if (res2 >= 0x100) + res2 = 0xff; + + if (res3 >= 0x100) + res3 = 0xff; + + return (res3 << 24) | (res2 << 16) | (res1 << 8) | res0; + +} + + + +static inline int _sat(C6X_COMPAT_LONG40 src2) +{ + long long maxv = (1LL << (32-1)) - 1; + long long minv = (-1LL << (32-1)); + + int hi = (int)(src2 >> 32); + int lo = (int)src2; + long long temp = (unsigned long long)(unsigned)lo | (unsigned long long)hi << 32; + temp = orig_L40_set(temp); + + if (temp > maxv) { + temp = maxv; + _overflow = 1; + } + else if (temp < minv) { + temp = minv; + _overflow = 1; + } + return (int) temp; +} + +static inline unsigned int _set(unsigned int src2, unsigned int csta, unsigned int cstb) +{ + csta &= 0x1f; + cstb &= 0x1f; + if (csta > cstb) + return src2; + else { + unsigned int mask = (((1 << (cstb - csta)) << 1) - 1) << csta; + return src2 | mask; + } +} + +static inline unsigned int _setr(unsigned int src2, int src1) +{ + unsigned int csta = (src1 >> 5) & 0x1f; + unsigned int cstb = src1 & 0x1f; + if (csta > cstb) + return src2; + else { + unsigned int mask = (((1 << (cstb - csta)) << 1) - 1) << csta; + return src2 | mask; + } +} + + +static inline unsigned int _shfl (unsigned int src2) +{ + unsigned short lo = src2; + unsigned short hi = src2 >> 16; + unsigned int result = 0; + int i; + for (i = 0; i < 32; i+= 2) { + result >>= 1; + result |= (lo & 0x1) << 31; + lo >>= 1; + result >>= 1; + result |= (hi & 0x1) << 31; + hi >>= 1; + } + return result; +} + +static inline long long _shfl3 (unsigned int src1, unsigned int src2) +{ + unsigned short lo = src2; + unsigned short hi = src1 >> 16; + unsigned short mid = src1; + unsigned long long result = 0; + int i; + for (i = 0; i < 32; i+= 2) { + result >>= 1; + result |= ((unsigned long long) (lo & 0x1)) << 47; + lo >>= 1; + result >>= 1; + result |= ((unsigned long long) (mid & 0x1)) << 47; + mid >>= 1; + result >>= 1; + result |= ((unsigned long long) (hi & 0x1)) << 47; + hi >>= 1; + } + return result; +} + + + +static inline unsigned int _shlmb (unsigned int src1, unsigned int src2) +{ + return (src2 << 8) | (src1 >> 24); +} + +static inline unsigned int _shrmb (unsigned int src1, unsigned int src2) +{ + return (src2 >> 8) | (src1 << 24); +} + + +static inline unsigned int _shru2 (unsigned int src1, unsigned int src2) { +unsigned short hs1 = src1 >> 16; + unsigned short ls1 = src1 & 0xFFFF; + hs1 >>= src2; + ls1 >>= src2; + return (hs1 << 16) | ls1; +} + + +static inline int _shr2 (int src1, unsigned int src2) { + short s1[2], result[2]; + *((int*)s1) = src1; + src2 = src2 & 31; + result[0] = (int)s1[0] >> src2; + result[1] = (int)s1[1] >> src2; + + return *(int*)result; +} + + +static inline int _smpy (int src1, int src2) { +unsigned long long result; +result = (((short) src1 * (short) src2) << 1); + +if ((result & 0xffffffff) == 0x80000000){ + result = 0x7fffffff; + _overflow = 1; + } +return (int) (result); +} + +static inline int _smpyh (int src1, int src2) { +unsigned long long result; +result = ((short) (src1 >> 16) * (short) (src2 >> 16)) << 1; +if ((result & 0xffffffff) == 0x80000000){ + result = 0x7fffffff; + _overflow = 1; + } +return (int) (result); +} + +static inline int _smpyhl (int src1, int src2) { +unsigned long long result; +result = ((short) (src1 >> 16) * (short) (src2)) << 1; +if ((result & 0xffffffff) == 0x80000000){ + result = 0x7fffffff; + _overflow = 1; + } +return (int) (result); +} + +static inline int _smpylh (int src1, int src2) { +unsigned long long result; +result = ((short) (src1) * (short) (src2 >> 16)) << 1; +if ((result & 0xffffffff) == 0x80000000){ + result = 0x7fffffff; + _overflow = 1; + } +return (int) (result); +} + +static inline long long _smpy2ll (int src1, int src2) { + short ls1 = src1 & 0XFFFF; + short hs1 = src1 >> 16; + short ls2 = src2 & 0XFFFF; + short hs2 = src2 >> 16; + + unsigned long long hi = (hs1 * hs2) << 1; + unsigned long long lo = ((ls1 * ls2) << 1) & 0xFFFFFFFF; + if ((hi & 0xffffffff) == 0x80000000){ + hi = 0x7fffffff; + _overflow = 1; + } + + if ((lo & 0xffffffff) == 0x80000000){ + lo = 0x7fffffff; + _overflow = 1; + } + + return (hi << 32) | lo; +} + + + + +static inline int _smpy32(int src1, int src2) +{ + long long res = (long long) src1 * src2; + res <<= 1; + res >>= 32; + return res; +} + +static inline unsigned char TEN_satu8 (short src) +{ + if (src > 0xff) + return 0xff; + else if (src < 0) + return 0; + else + return src; +} + +static inline int _spack2 (int src1, int src2) { +short s1 = (short) util_saturate_n_no_state(src1,16); +short s2 = (short) util_saturate_n_no_state(src2,16); +return ( (unsigned int) s1 << 16) | (((int) s2) & 0xFFFF); +} + + +static inline unsigned int _spacku4 (int src1, int src2) { + short lolo = src2; + short lohi = src2 >> 16; + short hilo = src1; + short hihi = src1 >> 16; + + lolo = TEN_satu8(lolo); + lohi = TEN_satu8(lohi); + hilo = TEN_satu8(hilo); + hihi = TEN_satu8(hihi); + + return (((unsigned int) hihi) << 24) | (((unsigned int) hilo) << 16) | (lohi << 8) | lolo; +} + + + +static inline int _sshl (int src1, unsigned int src2) { +short local2 = (short)(src2 & 0x7FFF); +return (int) util_shift_right_saturate_n(src1, -local2, 32); +} + + + + +static inline int _sshvl (int src2, int src1) { + short s1; + if (src1 > 31) + s1 = 31; + else if (src1 < -31) + s1 = -31; + else + s1 = src1; + + return (int) util_shift_right_saturate_n(src2, -s1, 32); +} + + + + + +static inline int _sshvr (int src2, int src1) { +short s1; + if (src1 > 31) + s1 = 31; + else if (src1 < -31) + s1 = -31; + else + s1 = src1; + return (int) util_shift_right_saturate_n(src2, s1, 32); +} + + + + +static inline int _ssub(int src1, int src2) { +signed long long res; +signed long long maxv, minv; +maxv = (1LL << (32-1)) - 1; +minv = (-1LL << (32-1)); +res = (long long) src1 - (long long) src2; +if (res > maxv) { + res = maxv; + _overflow = 1; + } +else if (res < minv ) { + res = minv; + _overflow = 1; + } +return (int) res; +} + +static inline int _ssub2(int src1, int src2) { +signed short s1[2], s2[2]; +signed int r[2], maxv, minv; + +maxv = (1L << (16-1)) - 1; +minv = (-1L << (16-1)); + + +*((int*)s1) = src1; +*((int*)s2) = src2; + +r[0] = (int) s1[0] - (int) s2[0]; +r[1] = (int) s1[1] - (int) s2[1]; + +if (r[0] > maxv) { + r[0] = maxv; + /* NOTE: TI c6x does NOT set the overflow register even if results saturate */ + /* _overflow = 1; */ + } +else if (r[0] < minv ) { + r[0] = minv; + /* NOTE: TI c6x does NOT set the overflow register even if results saturate */ + /* _overflow = 1; */ + } +if (r[1] > maxv) { + r[1] = maxv; + /* NOTE: TI c6x does NOT set the overflow register even if results saturate */ + /* _overflow = 1; */ + } +else if (r[1] < minv ) { + r[1] = minv; + /* NOTE: TI c6x does NOT set the overflow register even if results saturate */ + /* _overflow = 1; */ + } + +return ((r[1] & 0xffff) << 16 ) | (r[0] & 0xffff) ; +} + + +static inline int _subabs4 (int src1, int src2) { + int res0, res1, res2, res3; + unsigned int s1_0 = (src1 & 0xff); + unsigned int s1_1 = (src1 & 0xff00) >> 8; + unsigned int s1_2 = (src1 & 0xff0000) >> 16; + unsigned int s1_3 = (src1 & 0xff000000) >> 24; + + unsigned int s2_0 = (src2 & 0xff); + unsigned int s2_1 = (src2 & 0xff00) >> 8; + unsigned int s2_2 = (src2 & 0xff0000) >> 16; + unsigned int s2_3 = (src2 & 0xff000000) >> 24; + + res0 = s1_0 - s2_0; + res1 = s1_1 - s2_1; + res2 = s1_2 - s2_2; + res3 = s1_3 - s2_3; + + if (res0 < 0) + res0 = -res0; + + if (res1 < 0) + res1 = -res1; + + if (res2 < 0) + res2 = -res2; + + if (res3 < 0) + res3 = -res3; + + return (res3 << 24) | (res2 << 16) | (res1 << 8) | res0; +} + + +static inline unsigned int _subc (unsigned int src1, unsigned int src2) +{ + if ( src1 >= src2) + return ((src1 - src2) << 1) + 1; + else + return src1 << 1; +} + + + +static inline int _sub2(int src1, int src2) { + short s1[2], s2[2], r[2]; + int result; + *((int*)s1) = src1; + *((int*)s2) = src2; + r[0] = s1[0] - s2[0]; + r[1] = s1[1] - s2[1]; + result = *(int*)r; + return result; +} + + +static inline int _sub4(int src1, int src2) { + char c1[4], c2[4], r[4]; + int result; + *((int*)c1) = src1; + *((int*)c2) = src2; + r[0] = c1[0] - c2[0]; + r[1] = c1[1] - c2[1]; + r[2] = c1[2] - c2[2]; + r[3] = c1[3] - c2[3]; + result = *(int*)r; + return result; +} + + +static inline int _swap4 (unsigned int src1) { + unsigned char v0 = src1; + unsigned char v1 = src1 >> 8; + unsigned char v2 = src1 >> 16; + unsigned char v3 = src1 >> 24; + unsigned v = v0<<8 | v1 | v2<<24 | v3<<16; + return v; +} + +static inline unsigned int _unpkhu4 (unsigned int src1) { + unsigned v0 = src1>>24; + unsigned v1 = (src1>>16) & 0xff; + return (v0<<16) | v1; +} + +static inline unsigned int _unpklu4 (unsigned int src1) { + unsigned v1 = (src1>>8) & 0xff; + unsigned v0 = (src1) & 0xff; + return (v1<<16) | v0; +} + + + + +static inline unsigned int _xpnd2 (unsigned int src1) { + int v0 = (src1 & 0x1) ? 0x0000ffff : 0x00000000; + int v1 = (src1 & 0x2) ? 0xffff0000 : 0x00000000; + return v0|v1; +} + +static inline unsigned int _xpnd4 (unsigned int src1) { + int v0 = (src1 & 0x1) ? 0x000000ff : 0x00000000; + int v1 = (src1 & 0x2) ? 0x0000ff00 : 0x00000000; + int v2 = (src1 & 0x4) ? 0x00ff0000 : 0x00000000; + int v3 = (src1 & 0x8) ? 0xff000000 : 0x00000000; + int r = v0|v1|v2|v3; + return r; +} + + + +// end of Implemented in alphabetical order + + +#endif /* __C6X_COMPAT__H */