-
-
Notifications
You must be signed in to change notification settings - Fork 579
Shorten CWD path #854
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Shorten CWD path #854
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,164 @@ | ||||||
| #include "config.h" // IWYU pragma: keep | ||||||
|
|
||||||
| #include "CwdUtils.h" | ||||||
|
|
||||||
| #include <stdbool.h> | ||||||
| #include <stdlib.h> | ||||||
| #include <wchar.h> | ||||||
|
|
||||||
| #include "XUtils.h" | ||||||
|
|
||||||
|
|
||||||
| typedef struct ShortenCwdContext { | ||||||
| size_t maxLength; | ||||||
| size_t len; | ||||||
| wchar_t **parts; | ||||||
| size_t partsLen; | ||||||
| size_t *partLengths; | ||||||
|
Comment on lines
+14
to
+17
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The fields To mitigate this, using a naming scheme like
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should be |
||||||
| } ShortenCwdContext; | ||||||
|
|
||||||
| static void ShortenCwdContext_init(ShortenCwdContext *ctx, wchar_t *cwd, size_t maxLength) { | ||||||
| ctx->maxLength = maxLength; | ||||||
| ctx->len = wcslen(cwd); | ||||||
| ctx->parts = Wstring_split(cwd, L'/', &ctx->partsLen); | ||||||
| ctx->partLengths = xCalloc(ctx->partsLen, sizeof(size_t)); | ||||||
| for (size_t i = 0; i < ctx->partsLen; i++) | ||||||
| ctx->partLengths[i] = wcslen(ctx->parts[i]); | ||||||
| } | ||||||
|
|
||||||
| static void ShortenCwdContext_destroy(ShortenCwdContext *ctx) { | ||||||
| for (size_t i = 0; i < ctx->partsLen; i++) | ||||||
| free(ctx->parts[i]); | ||||||
| free(ctx->parts); | ||||||
| free(ctx->partLengths); | ||||||
| *ctx = (ShortenCwdContext){0}; | ||||||
| } | ||||||
|
|
||||||
| static void shortenCwdParts(ShortenCwdContext *ctx) { | ||||||
| for (size_t i = ctx->partsLen - 2; i != (size_t)-1 && ctx->len > ctx->maxLength; i--) { | ||||||
| if (ctx->partLengths[i] < 3) | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Also, documenting where the 3 comes from (initial, separator, trailer) would be nice. |
||||||
| continue; | ||||||
|
|
||||||
| size_t extraChars = ctx->len - ctx->maxLength; | ||||||
| size_t maxRemovableChars = ctx->partLengths[i] - 2; | ||||||
| size_t charsToRemove = extraChars < maxRemovableChars ? extraChars : maxRemovableChars; | ||||||
|
|
||||||
| ctx->partLengths[i] -= charsToRemove; | ||||||
| ctx->len -= charsToRemove; | ||||||
|
|
||||||
| Wstring_safeWcsncpy(ctx->parts[i] + (ctx->partLengths[i] - 1), L"~", 2); | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| static size_t collapseCwdParts(ShortenCwdContext *ctx, bool doActualWork) { | ||||||
| if (ctx->len <= ctx->maxLength || ctx->partsLen <= 3) | ||||||
| return 0; | ||||||
|
|
||||||
| size_t len = ctx->len; | ||||||
|
|
||||||
| size_t i; | ||||||
| for (i = ctx->partsLen - 2; i > 1; i--) { | ||||||
| if (len + (3 - ctx->partLengths[i]) <= ctx->maxLength) | ||||||
| break; | ||||||
|
|
||||||
| len -= ctx->partLengths[i] + 1; | ||||||
|
|
||||||
| if (doActualWork) { | ||||||
| ctx->partLengths[i] = 0; | ||||||
| free(ctx->parts[i]); | ||||||
| ctx->parts[i] = NULL; | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| len += 3 - ctx->partLengths[i]; | ||||||
| size_t diff = ctx->len - len; | ||||||
|
|
||||||
| if (doActualWork) { | ||||||
| wchar_t newPart[] = L"~~~"; | ||||||
| newPart[0] = ctx->parts[i][0]; | ||||||
| free(ctx->parts[i]); | ||||||
| ctx->parts[i] = xWcsdup(newPart); | ||||||
| ctx->partLengths[i] = 3; | ||||||
| ctx->len = len; | ||||||
| } | ||||||
|
|
||||||
| return diff; | ||||||
| } | ||||||
|
|
||||||
| static size_t shortenCwdLastPart(ShortenCwdContext *ctx, bool doActualWork) { | ||||||
| if (ctx->len <= ctx->maxLength) | ||||||
| return 0; | ||||||
|
|
||||||
| size_t lastPartLen = ctx->partLengths[ctx->partsLen - 1]; | ||||||
| if (lastPartLen <= 3) | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unexplained magic number 3.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Minimal length of directory/part strings (after shortening, if not shorter before). |
||||||
| return 0; | ||||||
|
|
||||||
| wchar_t *lastPart = ctx->parts[ctx->partsLen - 1]; | ||||||
| size_t extraChars = ctx->len - ctx->maxLength + 1; | ||||||
| size_t maxRemovableChars = lastPartLen - 2; | ||||||
| size_t charsToRemove = extraChars < maxRemovableChars ? extraChars : maxRemovableChars; | ||||||
|
|
||||||
| if (doActualWork) { | ||||||
| size_t charsAtBeginning = (lastPartLen - charsToRemove + 1) / 2; | ||||||
| size_t charsAtEnd = lastPartLen - charsToRemove - charsAtBeginning; | ||||||
| lastPart[charsAtBeginning] = '~'; | ||||||
| wmemmove(lastPart + charsAtBeginning + 1, lastPart + lastPartLen - charsAtEnd, charsAtEnd); | ||||||
| lastPart[charsAtBeginning + charsAtEnd + 1] = '\0'; | ||||||
| ctx->partLengths[ctx->partsLen - 1] = lastPartLen - charsToRemove + 1; | ||||||
| ctx->len -= charsToRemove - 1; | ||||||
| } | ||||||
|
|
||||||
| return charsToRemove - 1; | ||||||
| } | ||||||
|
|
||||||
| static wchar_t* buildCwdFromParts(ShortenCwdContext *ctx) { | ||||||
| size_t len = ctx->partsLen - 1; | ||||||
| for (size_t i = 0; i < ctx->partsLen; i++) | ||||||
| len += ctx->partLengths[i]; | ||||||
|
|
||||||
| wchar_t *newCwd = xCalloc(len + 1, sizeof(wchar_t)); | ||||||
|
|
||||||
| newCwd[0] = '\0'; | ||||||
| for (size_t i = 0, writeIndex = 0; i < ctx->partsLen; i++) { | ||||||
| if (!ctx->parts[i]) | ||||||
| continue; | ||||||
|
|
||||||
| Wstring_safeWcsncpy(newCwd + writeIndex, ctx->parts[i], ctx->partLengths[i] + 1); | ||||||
| writeIndex += ctx->partLengths[i]; | ||||||
| if (i < ctx->partsLen - 1) | ||||||
| newCwd[writeIndex++] = L'/'; | ||||||
| } | ||||||
|
|
||||||
| return newCwd; | ||||||
| } | ||||||
|
|
||||||
| char* CwdUtils_shortenCwd(const char *cwd, const size_t maxLength) { | ||||||
| wchar_t *wcwd = xMbstowcs(cwd); | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I quick pre-test could be: if (strnlen(cwd, maxLength) < maxLength) {
return xStrdup(cwd);
}
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't the comparison be
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, because You could do |
||||||
| size_t len = wcslen(wcwd); | ||||||
| if (len <= maxLength) { | ||||||
| free(wcwd); | ||||||
| return xStrdup(cwd); | ||||||
| } | ||||||
|
|
||||||
| ShortenCwdContext ctx; | ||||||
| ShortenCwdContext_init(&ctx, wcwd, maxLength); | ||||||
| free(wcwd); | ||||||
| wcwd = NULL; | ||||||
|
|
||||||
| shortenCwdParts(&ctx); | ||||||
| if (shortenCwdLastPart(&ctx, false) > collapseCwdParts(&ctx, false)) { | ||||||
| shortenCwdLastPart(&ctx, true); | ||||||
| collapseCwdParts(&ctx, true); | ||||||
| } else { | ||||||
| collapseCwdParts(&ctx, true); | ||||||
| shortenCwdLastPart(&ctx, true); | ||||||
| } | ||||||
|
Comment on lines
+148
to
+155
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The rules/logic in this area is arcane and hard to read. What's the reasoning behind these function calls and conditionals?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A quick comment about the rationale of the check would be fine. |
||||||
|
|
||||||
| wchar_t *newWcwd = buildCwdFromParts(&ctx); | ||||||
| char *newCwd = xWcstombs(newWcwd); | ||||||
| free(newWcwd); | ||||||
|
|
||||||
| ShortenCwdContext_destroy(&ctx); | ||||||
|
|
||||||
| return newCwd; | ||||||
| } | ||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,8 @@ | ||
| #ifndef HEADER_CwdUtils | ||
| #define HEADER_CwdUtils | ||
|
|
||
| #include <stddef.h> | ||
|
|
||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Second blank line after the header includes. |
||
| char* CwdUtils_shortenCwd(const char* cwd, size_t maxLength); | ||
|
|
||
| #endif | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -402,3 +402,84 @@ unsigned int countTrailingZeros(unsigned int x) { | |
| return mod37BitPosition[(-x & x) % 37]; | ||
| } | ||
| #endif | ||
|
|
||
| wchar_t* xMbstowcs(const char *mbs) { | ||
| size_t len = strlen(mbs); | ||
| wchar_t *wcs = xCalloc(len + 1, sizeof(wchar_t)); | ||
| mbstate_t mbstate = {0}; | ||
| mbsrtowcs(wcs, &mbs, len + 1, &mbstate); | ||
| if (mbs) | ||
| fail(); | ||
| return wcs; | ||
| } | ||
|
|
||
| char* xWcstombs(const wchar_t *wcs) { | ||
| size_t len = wcslen(wcs); | ||
| if (SIZE_MAX / MB_CUR_MAX <= len) | ||
| fail(); | ||
| size_t mbsSize = len * MB_CUR_MAX + 1; | ||
| char *mbs = xMalloc(mbsSize); | ||
| mbstate_t mbstate = {0}; | ||
| wcsrtombs(mbs, &wcs, mbsSize, &mbstate); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this will fail for multi-byte string, where a sizable number of characters is more than 4 bytes. This may happen with strings containing many emoji or other characters that use more than 4 bytes for UTF-8 encoding.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @BenBE For your info, emoji sequences are considered multiple characters in Unicode. It is only that the display of the characters would look combined together. The real problem is we can't ensure the The right element size to use is
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I was aware of this (after all 🐈⬛ is f0 9f 90 88 e2 80 8d e2 ac 9b, which is 3 Unicode codepoints), but through a mix-up on my side I kinda had in mind that some of the non-BMP characters needed 5/6 bytes. Further fueled by MySQL, who had a utf8mb4 charset, which they added to support things beyond the BMP. But …
Exactly. Or encode twice and use the first encoding to query how much space is actually needed. |
||
| if (wcs) | ||
| fail(); | ||
| return mbs; | ||
| } | ||
|
|
||
| wchar_t* xWcsdup(const wchar_t* str) { | ||
| wchar_t* data = wcsdup(str); | ||
| if (!data) | ||
| fail(); | ||
| return data; | ||
| } | ||
|
|
||
| wchar_t* xWcsndup(const wchar_t* str, size_t len) { | ||
| wchar_t* data = xCalloc(len + 1, sizeof(wchar_t)); | ||
| if (!data) | ||
| fail(); | ||
|
|
||
| Wstring_safeWcsncpy(data, str, len + 1); | ||
|
|
||
| return data; | ||
| } | ||
|
|
||
| wchar_t** Wstring_split(const wchar_t* s, wchar_t sep, size_t* n) { | ||
| const unsigned int rate = 10; | ||
| wchar_t** out = xCalloc(rate, sizeof(wchar_t*)); | ||
| size_t ctr = 0; | ||
| unsigned int blocks = rate; | ||
| const wchar_t* where; | ||
| while ((where = wcschr(s, sep)) != NULL) { | ||
| size_t size = (size_t)(where - s); | ||
| out[ctr] = xWcsndup(s, size); | ||
| ctr++; | ||
| if (ctr == blocks) { | ||
| blocks += rate; | ||
| out = xReallocArray(out, blocks, sizeof(wchar_t*)); | ||
| } | ||
| s += size + 1; | ||
| } | ||
| if (s[0] != L'\0') { | ||
| out[ctr] = xWcsdup(s); | ||
| ctr++; | ||
| } | ||
| out = xRealloc(out, sizeof(wchar_t*) * (ctr + 1)); | ||
| out[ctr] = NULL; | ||
|
|
||
| if (n) | ||
| *n = ctr; | ||
|
|
||
| return out; | ||
| } | ||
|
|
||
| size_t Wstring_safeWcsncpy(wchar_t* restrict dest, const wchar_t* restrict src, size_t size) { | ||
| assert(size > 0); | ||
|
|
||
| size_t i = 0; | ||
| for (; i < size - 1 && src[i]; i++) | ||
| dest[i] = src[i]; | ||
|
|
||
| dest[i] = L'\0'; | ||
|
|
||
| return i; | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.