Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
164 changes: 164 additions & 0 deletions CwdUtils.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
#include "config.h" // IWYU pragma: keep

#include "CwdUtils.h"

#include <stdbool.h>
#include <stdlib.h>
#include <wchar.h>

#include "XUtils.h"


typedef struct ShortenCwdContext {
size_t maxLength;
size_t len;
wchar_t **parts;
size_t partsLen;
size_t *partLengths;
Comment on lines +14 to +17
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The fields partsLen and partLengths have quite similar names which may cause confusion.

To mitigate this, using a naming scheme like strSize/partCount/partPtr/partSize might be somewhat more easy to disambiguate.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be partPtrs and partSizes because the latter two are arrays.

} ShortenCwdContext;

static void ShortenCwdContext_init(ShortenCwdContext *ctx, wchar_t *cwd, size_t maxLength) {
ctx->maxLength = maxLength;
ctx->len = wcslen(cwd);
ctx->parts = Wstring_split(cwd, L'/', &ctx->partsLen);
ctx->partLengths = xCalloc(ctx->partsLen, sizeof(size_t));
for (size_t i = 0; i < ctx->partsLen; i++)
ctx->partLengths[i] = wcslen(ctx->parts[i]);
}

static void ShortenCwdContext_destroy(ShortenCwdContext *ctx) {
for (size_t i = 0; i < ctx->partsLen; i++)
free(ctx->parts[i]);
free(ctx->parts);
free(ctx->partLengths);
*ctx = (ShortenCwdContext){0};
}

static void shortenCwdParts(ShortenCwdContext *ctx) {
for (size_t i = ctx->partsLen - 2; i != (size_t)-1 && ctx->len > ctx->maxLength; i--) {
if (ctx->partLengths[i] < 3)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (ctx->partLengths[i] < 3)
if (ctx->partLengths[i] <= 3)

Also, documenting where the 3 comes from (initial, separator, trailer) would be nice.

continue;

size_t extraChars = ctx->len - ctx->maxLength;
size_t maxRemovableChars = ctx->partLengths[i] - 2;
size_t charsToRemove = extraChars < maxRemovableChars ? extraChars : maxRemovableChars;

ctx->partLengths[i] -= charsToRemove;
ctx->len -= charsToRemove;

Wstring_safeWcsncpy(ctx->parts[i] + (ctx->partLengths[i] - 1), L"~", 2);
}
}

static size_t collapseCwdParts(ShortenCwdContext *ctx, bool doActualWork) {
if (ctx->len <= ctx->maxLength || ctx->partsLen <= 3)
return 0;

size_t len = ctx->len;

size_t i;
for (i = ctx->partsLen - 2; i > 1; i--) {
if (len + (3 - ctx->partLengths[i]) <= ctx->maxLength)
break;

len -= ctx->partLengths[i] + 1;

if (doActualWork) {
ctx->partLengths[i] = 0;
free(ctx->parts[i]);
ctx->parts[i] = NULL;
}
}

len += 3 - ctx->partLengths[i];
size_t diff = ctx->len - len;

if (doActualWork) {
wchar_t newPart[] = L"~~~";
newPart[0] = ctx->parts[i][0];
free(ctx->parts[i]);
ctx->parts[i] = xWcsdup(newPart);
ctx->partLengths[i] = 3;
ctx->len = len;
}

return diff;
}

static size_t shortenCwdLastPart(ShortenCwdContext *ctx, bool doActualWork) {
if (ctx->len <= ctx->maxLength)
return 0;

size_t lastPartLen = ctx->partLengths[ctx->partsLen - 1];
if (lastPartLen <= 3)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unexplained magic number 3.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minimal length of directory/part strings (after shortening, if not shorter before).

return 0;

wchar_t *lastPart = ctx->parts[ctx->partsLen - 1];
size_t extraChars = ctx->len - ctx->maxLength + 1;
size_t maxRemovableChars = lastPartLen - 2;
size_t charsToRemove = extraChars < maxRemovableChars ? extraChars : maxRemovableChars;

if (doActualWork) {
size_t charsAtBeginning = (lastPartLen - charsToRemove + 1) / 2;
size_t charsAtEnd = lastPartLen - charsToRemove - charsAtBeginning;
lastPart[charsAtBeginning] = '~';
wmemmove(lastPart + charsAtBeginning + 1, lastPart + lastPartLen - charsAtEnd, charsAtEnd);
lastPart[charsAtBeginning + charsAtEnd + 1] = '\0';
ctx->partLengths[ctx->partsLen - 1] = lastPartLen - charsToRemove + 1;
ctx->len -= charsToRemove - 1;
}

return charsToRemove - 1;
}

static wchar_t* buildCwdFromParts(ShortenCwdContext *ctx) {
size_t len = ctx->partsLen - 1;
for (size_t i = 0; i < ctx->partsLen; i++)
len += ctx->partLengths[i];

wchar_t *newCwd = xCalloc(len + 1, sizeof(wchar_t));

newCwd[0] = '\0';
for (size_t i = 0, writeIndex = 0; i < ctx->partsLen; i++) {
if (!ctx->parts[i])
continue;

Wstring_safeWcsncpy(newCwd + writeIndex, ctx->parts[i], ctx->partLengths[i] + 1);
writeIndex += ctx->partLengths[i];
if (i < ctx->partsLen - 1)
newCwd[writeIndex++] = L'/';
}

return newCwd;
}

char* CwdUtils_shortenCwd(const char *cwd, const size_t maxLength) {
wchar_t *wcwd = xMbstowcs(cwd);
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I quick pre-test could be:

   if (strnlen(cwd, maxLength) < maxLength) {
      return xStrdup(cwd);
   }

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't the comparison be <=?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, because == would mean that maxLength bytes were read and no NUL byte was encountered. You couldn't distinguish the case of exactly maxLength bytes from the one with more bytes. This doesn't hurt though, as the == case is caught below in the propr check (with conversion) …

You could do strnlen(cwd, maxLength + 1) < maxLength if you wanted to catch that, but strictly speaking you'd then also have to include an overflow check for the maxLength + 1

size_t len = wcslen(wcwd);
if (len <= maxLength) {
free(wcwd);
return xStrdup(cwd);
}

ShortenCwdContext ctx;
ShortenCwdContext_init(&ctx, wcwd, maxLength);
free(wcwd);
wcwd = NULL;

shortenCwdParts(&ctx);
if (shortenCwdLastPart(&ctx, false) > collapseCwdParts(&ctx, false)) {
shortenCwdLastPart(&ctx, true);
collapseCwdParts(&ctx, true);
} else {
collapseCwdParts(&ctx, true);
shortenCwdLastPart(&ctx, true);
}
Comment on lines +148 to +155
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The rules/logic in this area is arcane and hard to read. What's the reasoning behind these function calls and conditionals?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A quick comment about the rationale of the check would be fine.


wchar_t *newWcwd = buildCwdFromParts(&ctx);
char *newCwd = xWcstombs(newWcwd);
free(newWcwd);

ShortenCwdContext_destroy(&ctx);

return newCwd;
}
8 changes: 8 additions & 0 deletions CwdUtils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#ifndef HEADER_CwdUtils
#define HEADER_CwdUtils

#include <stddef.h>

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Second blank line after the header includes.

char* CwdUtils_shortenCwd(const char* cwd, size_t maxLength);

#endif
2 changes: 2 additions & 0 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ myhtopsources = \
Compat.c \
CPUMeter.c \
CRT.c \
CwdUtils.c \
DateMeter.c \
DateTimeMeter.c \
DiskIOMeter.c \
Expand Down Expand Up @@ -109,6 +110,7 @@ myhtopheaders = \
CommandLine.h \
CommandScreen.h \
Compat.h \
CwdUtils.h \
DateMeter.h \
DateTimeMeter.h \
DiskIOMeter.h \
Expand Down
9 changes: 8 additions & 1 deletion Process.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ in the source distribution for its full text.
#include <sys/resource.h>

#include "CRT.h"
#include "CwdUtils.h"
#include "Hashtable.h"
#include "Machine.h"
#include "Macros.h"
Expand Down Expand Up @@ -630,7 +631,7 @@ void Process_writeField(const Process* this, RichString* str, RowField field) {
attr = CRT_colors[PROCESS_SHADOW];
cwd = "main thread terminated";
} else {
cwd = this->procCwd;
cwd = this->procCwdShort;
}
Row_printLeftAlignedField(str, attr, cwd, 25);
return;
Expand Down Expand Up @@ -770,6 +771,7 @@ void Process_done(Process* this) {
free(this->procComm);
free(this->procExe);
free(this->procCwd);
free(this->procCwdShort);
free(this->mergedCommand.str);
free(this->tty_name);
}
Expand Down Expand Up @@ -1083,6 +1085,11 @@ void Process_updateCPUFieldWidths(float percentage) {
Row_updateFieldWidth(PERCENT_NORM_CPU, width);
}

void Process_updateShortCwd(Process* this) {
free(this->procCwdShort);
this->procCwdShort = this->procCwd ? CwdUtils_shortenCwd(this->procCwd, 25) : NULL;
}

const ProcessClass Process_class = {
.super = {
.super = {
Expand Down
4 changes: 4 additions & 0 deletions Process.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,9 @@ typedef struct Process_ {
/* The process/thread working directory */
char* procCwd;

/* Shortened process/thread working directory */
char* procCwdShort;

/* Offset in procExe of the process basename */
int procExeBasenameOffset;

Expand Down Expand Up @@ -328,6 +331,7 @@ const char* Process_getCommand(const Process* this);
void Process_updateComm(Process* this, const char* comm);
void Process_updateCmdline(Process* this, const char* cmdline, int basenameStart, int basenameEnd);
void Process_updateExe(Process* this, const char* exe);
void Process_updateShortCwd(Process* this);

/* This function constructs the string that is displayed by
* Process_writeCommand and also returned by Process_getCommand */
Expand Down
2 changes: 2 additions & 0 deletions ProcessTable.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ static void ProcessTable_cleanupEntries(Table* super) {

// tidy up Process state after refreshing the ProcessTable table
Process_makeCommandStr(p, settings);
if (p->super.host->settings->ss->flags & PROCESS_FLAG_CWD)
Process_updateShortCwd(p);

// keep track of the highest UID for column scaling
if (p->st_uid > host->maxUserId)
Expand Down
81 changes: 81 additions & 0 deletions XUtils.c
Original file line number Diff line number Diff line change
Expand Up @@ -402,3 +402,84 @@ unsigned int countTrailingZeros(unsigned int x) {
return mod37BitPosition[(-x & x) % 37];
}
#endif

wchar_t* xMbstowcs(const char *mbs) {
size_t len = strlen(mbs);
wchar_t *wcs = xCalloc(len + 1, sizeof(wchar_t));
mbstate_t mbstate = {0};
mbsrtowcs(wcs, &mbs, len + 1, &mbstate);
if (mbs)
fail();
return wcs;
}

char* xWcstombs(const wchar_t *wcs) {
size_t len = wcslen(wcs);
if (SIZE_MAX / MB_CUR_MAX <= len)
fail();
size_t mbsSize = len * MB_CUR_MAX + 1;
char *mbs = xMalloc(mbsSize);
mbstate_t mbstate = {0};
wcsrtombs(mbs, &wcs, mbsSize, &mbstate);
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this will fail for multi-byte string, where a sizable number of characters is more than 4 bytes. This may happen with strings containing many emoji or other characters that use more than 4 bytes for UTF-8 encoding.

Copy link
Copy Markdown
Contributor

@Explorer09 Explorer09 Jan 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@BenBE For your info, emoji sequences are considered multiple characters in Unicode. It is only that the display of the characters would look combined together.

The real problem is we can't ensure the sizeof(wchar_t) is at least 4. For (hypothetical) systems that use 2 bytes for wchar_t (thus only be able to handle the Unicode BMP), this would surely overflow the mbs buffer.

The right element size to use is MB_CUR_MAX, not sizeof(wchar_t).

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@BenBE For your info, emoji sequences are considered multiple characters in Unicode. It is only that the display of the characters would look combined together.

I was aware of this (after all 🐈‍⬛ is f0 9f 90 88 e2 80 8d e2 ac 9b, which is 3 Unicode codepoints), but through a mix-up on my side I kinda had in mind that some of the non-BMP characters needed 5/6 bytes. Further fueled by MySQL, who had a utf8mb4 charset, which they added to support things beyond the BMP.

But …

The real problem is we can't ensure the sizeof(wchar_t) is at least 4. For (hypothetical) systems that use 2 bytes for wchar_t (thus only be able to handle the Unicode BMP), this would surely overflow the mbs buffer.

The right element size to use is MB_CUR_MAX, not sizeof(wchar_t).

Exactly. Or encode twice and use the first encoding to query how much space is actually needed.

if (wcs)
fail();
return mbs;
}

wchar_t* xWcsdup(const wchar_t* str) {
wchar_t* data = wcsdup(str);
if (!data)
fail();
return data;
}

wchar_t* xWcsndup(const wchar_t* str, size_t len) {
wchar_t* data = xCalloc(len + 1, sizeof(wchar_t));
if (!data)
fail();

Wstring_safeWcsncpy(data, str, len + 1);

return data;
}

wchar_t** Wstring_split(const wchar_t* s, wchar_t sep, size_t* n) {
const unsigned int rate = 10;
wchar_t** out = xCalloc(rate, sizeof(wchar_t*));
size_t ctr = 0;
unsigned int blocks = rate;
const wchar_t* where;
while ((where = wcschr(s, sep)) != NULL) {
size_t size = (size_t)(where - s);
out[ctr] = xWcsndup(s, size);
ctr++;
if (ctr == blocks) {
blocks += rate;
out = xReallocArray(out, blocks, sizeof(wchar_t*));
}
s += size + 1;
}
if (s[0] != L'\0') {
out[ctr] = xWcsdup(s);
ctr++;
}
out = xRealloc(out, sizeof(wchar_t*) * (ctr + 1));
out[ctr] = NULL;

if (n)
*n = ctr;

return out;
}

size_t Wstring_safeWcsncpy(wchar_t* restrict dest, const wchar_t* restrict src, size_t size) {
assert(size > 0);

size_t i = 0;
for (; i < size - 1 && src[i]; i++)
dest[i] = src[i];

dest[i] = L'\0';

return i;
}
7 changes: 7 additions & 0 deletions XUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,4 +174,11 @@ static inline int xDirfd(DIR* dirp) {
return r;
}

wchar_t* xMbstowcs(const char *mbs);
char* xWcstombs(const wchar_t *wcs);
wchar_t* xWcsdup(const wchar_t* str);
wchar_t* xWcsndup(const wchar_t* str, size_t len);
wchar_t** Wstring_split(const wchar_t* s, wchar_t sep, size_t* n);
size_t Wstring_safeWcsncpy(wchar_t* restrict dest, const wchar_t* restrict src, size_t size);

#endif