Closed
Conversation
Member
|
What do you think about this alternate implementation that uses only 1 allocation for all the fields? (It passes the test case) // SPDX-License-Identifier: MIT
// Copyright (c) 2015-2021 Zig Contributors
// This file is part of [zig](https://ziglang.org/), which is MIT licensed.
// The MIT license requires this copyright notice to be included in all copies
// and substantial portions of the software.
const std = @import("std.zig");
const assert = std.debug.assert;
const meta = std.meta;
const mem = std.mem;
const Allocator = mem.Allocator;
pub fn MultiArrayList(comptime S: type) type {
return struct {
const Self = @This();
pub const Field = meta.FieldEnum(S);
bytes: [*]align(@alignOf(S)) u8 = undefined,
len: usize = 0,
capacity: usize = 0,
const fields = meta.fields(S);
/// `sizes.bytes` is an array of @sizeOf each S field. Sorted by alignment, descending.
/// `sizes.indexes` is an array mapping from field to its index in the `sizes.bytes` array.
const sizes = blk: {
const Data = struct {
size: usize,
size_index: usize,
alignment: usize,
};
var data: [fields.len]Data = undefined;
for (fields) |field_info, i| {
data[i] = .{
.size = @sizeOf(field_info.field_type),
.size_index = i,
.alignment = field_info.alignment,
};
}
const Sort = struct {
fn lessThan(trash: *i32, lhs: Data, rhs: Data) bool {
return lhs.alignment >= rhs.alignment;
}
};
var trash: i32 = undefined; // workaround for stage1 compiler bug
std.sort.sort(Data, &data, &trash, Sort.lessThan);
var sizes_bytes: [fields.len]usize = undefined;
var sizes_indexes: [fields.len]usize = undefined;
for (data) |elem, i| {
sizes_bytes[i] = elem.size;
sizes_indexes[elem.size_index] = i;
}
break :blk .{
.bytes = sizes_bytes,
.indexes = sizes_indexes,
};
};
/// Release all allocated memory.
pub fn deinit(self: *Self, gpa: *Allocator) void {
gpa.free(self.allocatedBytes());
self.* = undefined;
}
pub fn items(self: *Self, comptime field: Field) []FieldType(field) {
const T = FieldType(field);
var offset: usize = 0;
const size_index = sizes.indexes[@enumToInt(field)];
for (sizes.bytes[0..size_index]) |field_size| {
offset += field_size * self.capacity;
}
const ptr = @ptrCast([*]T, @alignCast(@alignOf(T), self.bytes + offset));
return ptr[0..self.len];
}
/// Extend the list by 1 element. Allocates more memory as necessary.
pub fn append(self: *Self, gpa: *Allocator, elem: S) !void {
try self.ensureCapacity(gpa, self.len + 1);
self.appendAssumeCapacity(elem);
}
/// Extend the list by 1 element, but asserting `self.capacity`
/// is sufficient to hold an additional item.
pub fn appendAssumeCapacity(self: *Self, elem: S) void {
assert(self.len < self.capacity);
self.len += 1;
inline for (fields) |field_info, i| {
self.items(@intToEnum(Field, i))[self.len - 1] = @field(elem, field_info.name);
}
}
/// Adjust the list's length to `new_len`.
/// Does not initialize added items, if any.
pub fn resize(self: *Self, gpa: *Allocator, new_len: usize) !void {
try self.ensureCapacity(gpa, new_len);
self.items.len = new_len;
}
/// Attempt to reduce allocated capacity to `new_len`.
/// If `new_len` is greater than zero, this may fail to reduce the capacity,
/// but the data remains intact and the length is updated to new_len.
pub fn shrinkAndFree(self: *Self, gpa: *Allocator, new_len: usize) void {
if (new_len == 0) {
gpa.free(self.allocatedBytes());
self.* = .{};
return;
}
assert(new_len <= self.capacity);
assert(new_len <= self.len);
var other = Self{};
other.setCapacity(gpa, new_len) catch {
self.len = new_len;
// TODO memset the invalidated items to undefined
return;
};
other.len = new_len;
inline for (fields) |field_info, i| {
const field = @intToEnum(Field, i);
mem.copy(field_info.field_type, other.items(field), self.items(field));
}
gpa.free(self.allocatedBytes());
self.* = other;
}
/// Reduce length to `new_len`.
/// Invalidates pointers to elements `items[new_len..]`.
/// Keeps capacity the same.
pub fn shrinkRetainingCapacity(self: *Self, new_len: usize) void {
self.len = new_len;
}
/// Modify the array so that it can hold at least `new_capacity` items.
/// Implements super-linear growth to achieve amortized O(1) append operations.
/// Invalidates pointers if additional memory is needed.
pub fn ensureCapacity(self: *Self, gpa: *Allocator, new_capacity: usize) !void {
var better_capacity = self.capacity;
if (better_capacity >= new_capacity) return;
while (true) {
better_capacity += better_capacity / 2 + 8;
if (better_capacity >= new_capacity) break;
}
return self.setCapacity(gpa, better_capacity);
}
/// Modify the array so that it can hold exactly `new_capacity` items.
/// Invalidates pointers if additional memory is needed.
pub fn setCapacity(self: *Self, gpa: *Allocator, new_capacity: usize) !void {
const bytes = try gpa.reallocAdvanced(
self.allocatedBytes(),
@alignOf(S),
capacityInBytes(new_capacity),
.exact,
);
self.bytes = bytes.ptr;
self.capacity = new_capacity;
}
fn capacityInBytes(capacity: usize) usize {
// TODO this looks like a job for SIMD (need that @reduce builtin tho)
var bytes_count: usize = 0;
for (sizes.bytes) |field_size| {
bytes_count += capacity * field_size;
}
return bytes_count;
}
fn allocatedBytes(self: Self) []align(@alignOf(S)) u8 {
return self.bytes[0..capacityInBytes(self.capacity)];
}
fn FieldType(field: Field) type {
return meta.fieldInfo(S, field).field_type;
}
};
}
test "basic usage" {
const testing = std.testing;
const ally = testing.allocator;
const Foo = struct {
a: u32,
b: []const u8,
c: u8,
};
var list = MultiArrayList(Foo){};
defer list.deinit(ally);
try list.ensureCapacity(ally, 2);
list.appendAssumeCapacity(.{
.a = 1,
.b = "foobar",
.c = 'a',
});
list.appendAssumeCapacity(.{
.a = 2,
.b = "zigzag",
.c = 'b',
});
testing.expectEqualSlices(u32, list.items(.a), &[_]u32{ 1, 2 });
testing.expectEqualSlices(u8, list.items(.c), &[_]u8{ 'a', 'b' });
testing.expectEqual(@as(usize, 2), list.items(.b).len);
testing.expectEqualStrings("foobar", list.items(.b)[0]);
testing.expectEqualStrings("zigzag", list.items(.b)[1]);
try list.append(ally, .{
.a = 3,
.b = "fizzbuzz",
.c = 'c',
});
testing.expectEqualSlices(u32, list.items(.a), &[_]u32{ 1, 2, 3 });
testing.expectEqualSlices(u8, list.items(.c), &[_]u8{ 'a', 'b', 'c' });
testing.expectEqual(@as(usize, 3), list.items(.b).len);
testing.expectEqualStrings("foobar", list.items(.b)[0]);
testing.expectEqualStrings("zigzag", list.items(.b)[1]);
testing.expectEqualStrings("fizzbuzz", list.items(.b)[2]);
} |
Member
|
@ifreund you might want to ping me on IRC before you do any more work on this because I started adding more functionality to my fork of this and using it locally and I think you might like it |
andrewrk
added a commit
that referenced
this pull request
Jan 31, 2021
Also known as "Struct-Of-Arrays" or "SOA". The purpose of this data structure is to provide a similar API to ArrayList but instead of the element type being a struct, the fields of the struct are in N different arrays, all with the same length and capacity. Having this abstraction means we can put them in the same allocation, avoiding overhead with the allocator. It also saves a tiny bit of overhead from the redundant capacity and length fields, since each struct element shares the same value. This is an alternate implementation to #7854.
058f687 to
9eb7f4f
Compare
9eb7f4f to
d9c1ce4
Compare
Member
|
I think the next step here is to swap out the master branch implementation with this one, and run some benchmarks to see which one has better perf / peak memory usage. The main difference is single allocation vs one allocation per field. Depending on the results of those benchmarks, we will know which implementation to keep. |
dgbuckley
pushed a commit
to dgbuckley/zig
that referenced
this pull request
Mar 9, 2021
Also known as "Struct-Of-Arrays" or "SOA". The purpose of this data structure is to provide a similar API to ArrayList but instead of the element type being a struct, the fields of the struct are in N different arrays, all with the same length and capacity. Having this abstraction means we can put them in the same allocation, avoiding overhead with the allocator. It also saves a tiny bit of overhead from the redundant capacity and length fields, since each struct element shares the same value. This is an alternate implementation to ziglang#7854.
Member
|
Extracted into #8415. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
This was requested on IRC by @andrewrk for use in the self hosted compiler. This is a draft PR as I'm not 100% settled on the API yet, which could also use expansion to support more operations similar to ArrayList.