-
Notifications
You must be signed in to change notification settings - Fork 349
coherent: tighten up the API #5285
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,11 +20,37 @@ | |
| #define __coherent __attribute__((packed, aligned(DCACHE_LINE_SIZE))) | ||
|
|
||
| /* | ||
| * The coherent API allows optimized access to memory by multiple cores, using | ||
| * cache, taking care about coherence. The intended use is to let cores acquire | ||
| * ownership of such shared objects, use them, and then release them, possibly | ||
| * to be re-acquired by other cores. Such shared objects must only be accessed | ||
| * via this API. It's designed to be primarily used with dynamically allocated | ||
| * objects because of their well-defined life span. It can also be used with | ||
| * objects from .data or .bss sections but greater care must be takenwith them | ||
| * to strictly follow the API flow. | ||
| * | ||
| * The API assumes, that in the beginning no core has cache lines associated | ||
| * with the memory area, used with it. That is true for dynamically allocated | ||
| * memory, because when such memory is freed, its cache is invalidated - as long | ||
| * as that memory was never accessed by other cores, except by using this API. | ||
| * The first call must be coherent_init(), which initializes the header. If the | ||
| * object will be used by multiple cores, next coherent_shared() must be called. | ||
| * After that to use that memory, coherent_acquire_irq() or coherent_acquire() | ||
| * must be called, which acquires ownership of the object and returns a cached | ||
| * address of the memory. After that the user can perform cached access to the | ||
| * memory. To release the memory, one of coherent_release_irq() or | ||
| * coherent_release() must be called. The only time when the memory is accessed | ||
| * using cache is between those two calls, so only when releasing the memory we | ||
| * have to write back and invalidate caches to make sure, that next time we | ||
| * acquire this memory, our uncached header access will not be overwritten! When | ||
| * memory is not needed any more, typically before freeing the memory, | ||
| * coherent_free() should be called. | ||
| * | ||
| * This structure needs to be embedded at the start of any container to ensure | ||
| * container object cache alignment and to minimise non cache access when | ||
| * acquiring ownership. | ||
| * | ||
| * This structure should not be accessed outside of these APIs. | ||
| * This structure must not be accessed outside of these APIs. | ||
| * The shared flag is only set at coherent init and thereafter it's RO. | ||
| */ | ||
| struct coherent { | ||
|
|
@@ -58,12 +84,8 @@ struct coherent { | |
| #define CHECK_COHERENT_IRQ(_c) | ||
| #endif | ||
|
|
||
| /* | ||
| * Incoherent devices require manual cache invalidation and writeback as | ||
| * well as locking to manage shared access. | ||
| */ | ||
|
|
||
| #if CONFIG_INCOHERENT | ||
| /* When coherent_acquire() is called, we are sure not to have cache for this memory */ | ||
| __must_check static inline struct coherent *coherent_acquire(struct coherent *c, | ||
| const size_t size) | ||
| { | ||
|
|
@@ -149,46 +171,46 @@ static inline struct coherent *coherent_release_irq(struct coherent *c, const si | |
| return cache_to_uncache(c); | ||
| } | ||
|
|
||
| #define coherent_init(object, member) \ | ||
| do { \ | ||
| #define coherent_init(object, member) \ | ||
| do { \ | ||
| /* assert if someone passes a cache/local address in here. */ \ | ||
| ADDR_IS_COHERENT(object); \ | ||
| /* TODO static assert if we are not cache aligned */ \ | ||
| spinlock_init(&object->member.lock); \ | ||
| object->member.shared = false; \ | ||
| object->member.shared = false; \ | ||
| object->member.core = cpu_get_id(); \ | ||
| list_init(&object->member.list); \ | ||
| /* wtb and inv local data to coherent object */ \ | ||
| dcache_writeback_invalidate_region(uncache_to_cache(object), sizeof(*object)); \ | ||
| /* inv local data to coherent object */ \ | ||
| dcache_invalidate_region(uncache_to_cache(object), sizeof(*object)); \ | ||
| } while (0) | ||
|
|
||
| #define coherent_free(object, member) \ | ||
| do { \ | ||
| /* assert if someone passes a cache address in here. */ \ | ||
| #define coherent_free(object, member) \ | ||
| do { \ | ||
| /* assert if someone passes a cache address in here. */ \ | ||
| ADDR_IS_COHERENT(object); \ | ||
| /* wtb and inv local data to coherent object */ \ | ||
| dcache_writeback_invalidate_region(uncache_to_cache(object), sizeof(*object)); \ | ||
|
||
| } while (0) | ||
|
|
||
| /* set the object to shared mode with coherency managed by SW */ | ||
| #define coherent_shared(object, member) \ | ||
| do { \ | ||
| #define coherent_shared(object, member) \ | ||
| do { \ | ||
| /* assert if someone passes a cache/local address in here. */ \ | ||
| ADDR_IS_COHERENT(object); \ | ||
| spin_lock(&(object)->member.lock); \ | ||
| (object)->member.shared = true; \ | ||
| dcache_writeback_invalidate_region(object, sizeof(*object)); \ | ||
| spin_unlock(&(object)->member.lock); \ | ||
| dcache_invalidate_region(object, sizeof(*object)); \ | ||
| spin_unlock(&(object)->member.lock); \ | ||
| } while (0) | ||
|
|
||
| /* set the object to shared mode with coherency managed by SW */ | ||
| #define coherent_shared_irq(object, member) \ | ||
| do { \ | ||
| #define coherent_shared_irq(object, member) \ | ||
| do { \ | ||
| /* assert if someone passes a cache/local address in here. */ \ | ||
| ADDR_IS_COHERENT(object); \ | ||
| spin_lock_irq(&(object)->member.lock, &(object)->member.flags); \ | ||
| (object)->member.shared = true; \ | ||
| dcache_writeback_invalidate_region(object, sizeof(*object)); \ | ||
| dcache_invalidate_region(object, sizeof(*object)); \ | ||
| spin_unlock_irq(&(object)->member.lock, &(object)->member.flags); \ | ||
| } while (0) | ||
| #else | ||
|
|
@@ -245,8 +267,8 @@ static inline struct coherent *coherent_release_irq(struct coherent *c, const si | |
| return c; | ||
| } | ||
|
|
||
| #define coherent_init(object, member) \ | ||
| do { \ | ||
| #define coherent_init(object, member) \ | ||
| do { \ | ||
| /* TODO static assert if we are not cache aligned */ \ | ||
| spinlock_init(&object->member.lock); \ | ||
| object->member.shared = 0; \ | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The WB/INV is still needed here, the only change that would be valid is that uncache_to_cache be removed since the object being passed in here to release() will be the cache alias.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why is it needed? We are just initialising the coherent API, so, we're guaranteed to not have caches for it, because we either haven't accessed it yet, or we accessed it and released it and dropped all caches for it. Note, that we might or might not have used the coherent API for it before, e.g. in the heap case. But
rfree()drops caches anyway. And we must not write back caches here. We just wrote data to that memory bypassing cache, using uncached ("coherent") alias. So, if we really do happen to have cached data for it here and we write it back here - we just have overwritten our initialisation.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We dont know who and how the memory was used before we use it. This fixed issues that were validated.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@lgirdwood I think this is actually wrong. Again: we wrote to SRAM bypassing cache at lines 170-173 above and now we're (potentially) overwriting what we just have written with some stale data from cache - this is a bug. It only doesn't bite us because we actually don't have any dirty caches at this point, so, this is a NOP. But (1) it makes an impression that we might have dirty caches at this point and (2) if we did happen to have any, it would corrupt our data.
Again, I think this API cannot be used in the "we don't know" mode. It can only work if absolutely enforced. Any memory, designated to be used with this API must only be accessed via it. If this is ever violated, it can corrupt data. This cannot be made to work without a certainty, that the memory is only accessed via this API.
One seeming exception is heap memory, where of course the same memory could have been allocated before and used without this API. And that indeed can be a problem: if such a memory was only allocated, used and released on the same core - we're fine.
rfree()will invalidate caches. But if that memory was accessed via its cached aliases on different cores - we have a problem. E.g. it could well happen that memory got allocated on core 0 or 1, then got used only by core 1 via cache - so data is safe, but then got freed by core 0. Then we have a problem. Core 1 can still hold dirty caches for that memory. When we then allocate it for coherent API use on core 0, nothing can prevent cache from core 1 from being written back at any moment of time overwriting our data.So, we must always guarantee that dynamically allocated (heap) memory is either (1) only allocated, used and freed by the same core, or (2) shared via this API, or (3) shared but only accessed via uncached aliased.