-
Notifications
You must be signed in to change notification settings - Fork 349
Add new mtrace_printf() and use it for DMA init error handling and demoting banner to INFO level #4389
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add new mtrace_printf() and use it for DMA init error handling and demoting banner to INFO level #4389
Changes from all commits
4f9ad88
12ff901
930b813
c9c177b
3bb5060
8d26d43
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -126,17 +126,42 @@ static enum task_state trace_work(void *data) | |
| */ | ||
| int dma_trace_init_early(struct sof *sof) | ||
| { | ||
| int ret; | ||
|
|
||
| /* If this assert is wrong then traces have been corrupting | ||
| * random parts of memory. Some functions run before _and_ after | ||
| * DMA trace initialization and we don't want to ask them to | ||
| * never trace. So dma_trace_initialized() must be either | ||
| * clearly false/NULL or clearly true, we can't tolerate random | ||
| * uninitialized values in sof->dmat etc. | ||
| */ | ||
| assert(!dma_trace_initialized(sof->dmat)); | ||
|
|
||
| sof->dmat = rzalloc(SOF_MEM_ZONE_SYS_SHARED, 0, SOF_MEM_CAPS_RAM, sizeof(*sof->dmat)); | ||
|
|
||
| dma_sg_init(&sof->dmat->config.elem_array); | ||
| spinlock_init(&sof->dmat->lock); | ||
|
|
||
| ipc_build_trace_posn(&sof->dmat->posn); | ||
| sof->dmat->msg = ipc_msg_init(sof->dmat->posn.rhdr.hdr.cmd, | ||
| sizeof(sof->dmat->posn)); | ||
| if (!sof->dmat->msg) | ||
| return -ENOMEM; | ||
| if (!sof->dmat->msg) { | ||
| ret = -ENOMEM; | ||
| goto err; | ||
| } | ||
|
|
||
| return 0; | ||
|
|
||
| err: | ||
| mtrace_printf(LOG_LEVEL_ERROR, | ||
| "dma_trace_init_early() failed: %d", ret); | ||
|
|
||
| /* Cannot rfree(sof->dmat) from the system memory pool, see | ||
| * comments in lib/alloc.c | ||
| */ | ||
| sof->dmat = NULL; | ||
|
||
|
|
||
| return ret; | ||
| } | ||
|
|
||
| /** Run after dma_trace_init_early() and before dma_trace_enable() */ | ||
|
|
@@ -146,18 +171,26 @@ int dma_trace_init_complete(struct dma_trace_data *d) | |
|
|
||
| tr_info(&dt_tr, "dma_trace_init_complete()"); | ||
|
|
||
| if (!d) { | ||
| mtrace_printf(LOG_LEVEL_ERROR, | ||
| "dma_trace_init_complete(): failed, no dma_trace_data"); | ||
| return -ENOMEM; | ||
| } | ||
|
|
||
| /* init DMA copy context */ | ||
| ret = dma_copy_new(&d->dc); | ||
| if (ret < 0) { | ||
| tr_err(&dt_tr, "dma_trace_init_complete(): dma_copy_new() failed"); | ||
| mtrace_printf(LOG_LEVEL_ERROR, | ||
| "dma_trace_init_complete(): dma_copy_new() failed: %d", ret); | ||
| goto out; | ||
| } | ||
|
|
||
| ret = dma_get_attribute(d->dc.dmac, DMA_ATTR_COPY_ALIGNMENT, | ||
| &d->dma_copy_align); | ||
|
|
||
| if (ret < 0) { | ||
| tr_err(&dt_tr, "dma_trace_init_complete(): dma_get_attribute()"); | ||
| mtrace_printf(LOG_LEVEL_ERROR, | ||
| "dma_trace_init_complete(): dma_get_attribute() failed: %d", ret); | ||
|
|
||
| goto out; | ||
| } | ||
|
|
@@ -331,16 +364,23 @@ int dma_trace_enable(struct dma_trace_data *d) | |
|
|
||
| /* initialize dma trace buffer */ | ||
| err = dma_trace_buffer_init(d); | ||
| if (err < 0) | ||
|
|
||
| if (err < 0) { | ||
| mtrace_printf(LOG_LEVEL_ERROR, "dma_trace_enable: buffer_init failed"); | ||
| goto out; | ||
| } | ||
|
|
||
| /* | ||
| * It should be the very first sent log for easily identification. | ||
| * Use tr_err to have this initial message also in error logs and assert | ||
| * traces works well. | ||
| */ | ||
| tr_err(&dt_tr, "FW ABI 0x%x DBG ABI 0x%x tag " SOF_GIT_TAG " src hash 0x%08x (ldc hash " META_QUOTE(SOF_SRC_HASH) ")", | ||
| SOF_ABI_VERSION, SOF_ABI_DBG_VERSION, SOF_SRC_HASH); | ||
| /* It should be the very first sent log for easy identification. */ | ||
| mtrace_printf(LOG_LEVEL_INFO, | ||
| "SHM: FW ABI 0x%x DBG ABI 0x%x tag " SOF_GIT_TAG " src hash 0x%08x (ldc hash " | ||
| META_QUOTE(SOF_SRC_HASH) ")", | ||
| SOF_ABI_VERSION, SOF_ABI_DBG_VERSION, SOF_SRC_HASH); | ||
|
|
||
| /* Use a different, DMA: prefix to ease identification of log files */ | ||
| tr_info(&dt_tr, | ||
| "DMA: FW ABI 0x%x DBG ABI 0x%x tag " SOF_GIT_TAG " src hash 0x%08x (ldc hash " | ||
| META_QUOTE(SOF_SRC_HASH) ")", | ||
| SOF_ABI_VERSION, SOF_ABI_DBG_VERSION, SOF_SRC_HASH); | ||
|
|
||
| #if CONFIG_DMA_GW | ||
| /* | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would it not be better to make the default version safe ? (and return time 0 to indicate no timer running)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe, I don't know. You tell me. So far I've been spending at least 80% of my time trying to fix issues fairly distant from my actual objectives so this time I tried to minimize the changes but maybe I shouldn't have?
There is a variety of platform_timer_get(timer) implementations. Some implementations don't even look at their timer argument: so these platforms would then lose even more timestamps for no real reason.
At least one platform performs an assert(timer), maybe because it was considered valuable not to keep running with broken timers? Tracing is still useful without timestamps but I wouldn't be so sure about other areas: I can easily imagine some threads stuck forever.
Considering such "diversity" I chose not to open that can of worms and to change only the area in this PR. As of now the new "safe" function is meant to be used only in areas with tricky boot dependencies; like tracing.
As explained in the comment, I'm not returning zero but on small value on purpose: because zero looks like something is wrong.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
0 looks perfectly fine to me. Look at dmesg soon after a system boot - the first messages are marked with time 0. Looks much more logical than any random "small value" to me.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This non-zero value is useful to make the difference with
platform_timer_get(timer)returning zero (for good or bad reasons). When everyone and everything returns zero including buggy code (but not just), then there's no way to tell where it came from.