From 161eee16c495398a166c862748f252a58d6ee93a Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Thu, 17 May 2018 01:56:20 +0900 Subject: [PATCH 01/19] Reduce PyGC_Head size --- Include/objimpl.h | 57 +++++----- Modules/gcmodule.c | 274 ++++++++++++++++++++++++++++++--------------- Objects/object.c | 4 +- 3 files changed, 213 insertions(+), 122 deletions(-) diff --git a/Include/objimpl.h b/Include/objimpl.h index 057bb50cbda9e2..6e714c43d6fd3f 100644 --- a/Include/objimpl.h +++ b/Include/objimpl.h @@ -251,9 +251,8 @@ PyAPI_FUNC(PyVarObject *) _PyObject_GC_Resize(PyVarObject *, Py_ssize_t); #ifndef Py_LIMITED_API typedef union _gc_head { struct { - union _gc_head *gc_next; - union _gc_head *gc_prev; - Py_ssize_t gc_refs; + union _gc_head *gc_next; // NULL means the object is not tracked + uintptr_t gc_prev; } gc; double dummy; /* force worst-case alignment */ } PyGC_Head; @@ -263,44 +262,39 @@ extern PyGC_Head *_PyGC_generation0; #define _Py_AS_GC(o) ((PyGC_Head *)(o)-1) /* Bit 0 is set when tp_finalize is called */ -#define _PyGC_REFS_MASK_FINALIZED (1 << 0) -/* The (N-1) most significant bits contain the gc state / refcount */ -#define _PyGC_REFS_SHIFT (1) -#define _PyGC_REFS_MASK (((size_t) -1) << _PyGC_REFS_SHIFT) - -#define _PyGCHead_REFS(g) ((g)->gc.gc_refs >> _PyGC_REFS_SHIFT) -#define _PyGCHead_SET_REFS(g, v) do { \ - (g)->gc.gc_refs = ((g)->gc.gc_refs & ~_PyGC_REFS_MASK) \ - | (((size_t)(v)) << _PyGC_REFS_SHIFT); \ +#define _PyGC_PREV_MASK_FINALIZED (1 << 0) +/* Bit 1 and 2 is used in gcmodule.c */ +/* The (N-3) most significant bits contain the real address. */ +#define _PyGC_PREV_SHIFT (3) +#define _PyGC_PREV_MASK (((uintptr_t) -1) << _PyGC_PREV_SHIFT) + +#define _PyGCHead_PREV(g) ((PyGC_Head*)((g)->gc.gc_prev & _PyGC_PREV_MASK)) +#define _PyGCHead_SET_PREV(g, p) do { \ + assert(((uintptr_t)p & ~_PyGC_PREV_MASK) == 0); \ + (g)->gc.gc_prev = ((g)->gc.gc_prev & ~_PyGC_PREV_MASK) \ + | ((uintptr_t)(p)); \ } while (0) -#define _PyGCHead_DECREF(g) ((g)->gc.gc_refs -= 1 << _PyGC_REFS_SHIFT) -#define _PyGCHead_FINALIZED(g) (((g)->gc.gc_refs & _PyGC_REFS_MASK_FINALIZED) != 0) +#define _PyGCHead_FINALIZED(g) (((g)->gc.gc_prev & _PyGC_PREV_MASK_FINALIZED) != 0) #define _PyGCHead_SET_FINALIZED(g, v) do { \ - (g)->gc.gc_refs = ((g)->gc.gc_refs & ~_PyGC_REFS_MASK_FINALIZED) \ + (g)->gc.gc_prev = ((g)->gc.gc_prev & ~_PyGC_PREV_MASK_FINALIZED) \ | (v != 0); \ } while (0) #define _PyGC_FINALIZED(o) _PyGCHead_FINALIZED(_Py_AS_GC(o)) #define _PyGC_SET_FINALIZED(o, v) _PyGCHead_SET_FINALIZED(_Py_AS_GC(o), v) -#define _PyGC_REFS(o) _PyGCHead_REFS(_Py_AS_GC(o)) - -#define _PyGC_REFS_UNTRACKED (-2) -#define _PyGC_REFS_REACHABLE (-3) -#define _PyGC_REFS_TENTATIVELY_UNREACHABLE (-4) - /* Tell the GC to track this object. NB: While the object is tracked the * collector it must be safe to call the ob_traverse method. */ #define _PyObject_GC_TRACK(o) do { \ PyGC_Head *g = _Py_AS_GC(o); \ - if (_PyGCHead_REFS(g) != _PyGC_REFS_UNTRACKED) \ + if (g->gc.gc_next != NULL) \ Py_FatalError("GC object already tracked"); \ - _PyGCHead_SET_REFS(g, _PyGC_REFS_REACHABLE); \ + assert((g->gc.gc_prev & 6) == 0); \ g->gc.gc_next = _PyGC_generation0; \ - g->gc.gc_prev = _PyGC_generation0->gc.gc_prev; \ - g->gc.gc_prev->gc.gc_next = g; \ - _PyGC_generation0->gc.gc_prev = g; \ + _PyGCHead_SET_PREV(g, _PyGC_generation0->gc.gc_prev); \ + _PyGCHead_PREV(_PyGC_generation0)->gc.gc_next = g; \ + _PyGC_generation0->gc.gc_prev = (uintptr_t)g; \ } while (0); /* Tell the GC to stop tracking this object. @@ -309,16 +303,15 @@ extern PyGC_Head *_PyGC_generation0; */ #define _PyObject_GC_UNTRACK(o) do { \ PyGC_Head *g = _Py_AS_GC(o); \ - assert(_PyGCHead_REFS(g) != _PyGC_REFS_UNTRACKED); \ - _PyGCHead_SET_REFS(g, _PyGC_REFS_UNTRACKED); \ - g->gc.gc_prev->gc.gc_next = g->gc.gc_next; \ - g->gc.gc_next->gc.gc_prev = g->gc.gc_prev; \ + assert(g->gc.gc_next != NULL); \ + _PyGCHead_PREV(g)->gc.gc_next = g->gc.gc_next; \ + _PyGCHead_SET_PREV(g->gc.gc_next, _PyGCHead_PREV(g)); \ g->gc.gc_next = NULL; \ + g->gc.gc_prev &= _PyGC_PREV_MASK_FINALIZED; \ } while (0); /* True if the object is currently tracked by the GC. */ -#define _PyObject_GC_IS_TRACKED(o) \ - (_PyGC_REFS(o) != _PyGC_REFS_UNTRACKED) +#define _PyObject_GC_IS_TRACKED(o) (_Py_AS_GC(o)->gc.gc_next != NULL) /* True if the object may be tracked by the GC in the future, or already is. This can be useful to implement some optimizations. */ diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 4d701cb72e8c5d..fb41279aa156cf 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -31,6 +31,57 @@ #include "pydtrace.h" #include "pytime.h" /* for _PyTime_GetMonotonicClock() */ +#define GC_DEBUG (0) /* More asserts */ + +// Bit 0 is used for _PyGC_PREV_MASK_FINALIZED in objimpl.h +#define MASK_COLLECTING (1 << 1) +#define MASK_TENTATIVELY_UNREACHABLE (1 << 2) + +#define IS_TRACKED(o) (_Py_AS_GC(o)->gc.gc_next != NULL) +#define IS_TENTATIVELY_UNREACHABLE(o) ( \ + (_Py_AS_GC(o)->gc.gc_prev & MASK_TENTATIVELY_UNREACHABLE) != 0) + +static inline int +gc_is_collecting(PyGC_Head *g) +{ + return (g->gc.gc_prev & MASK_COLLECTING) != 0; +} + +static inline Py_ssize_t +gc_get_refs(PyGC_Head *g) +{ + return (Py_ssize_t)(g->gc.gc_prev >> _PyGC_PREV_SHIFT); +} + +static inline void +gc_set_refs(PyGC_Head *g, Py_ssize_t v) +{ + g->gc.gc_prev = (g->gc.gc_prev & ~_PyGC_PREV_MASK) + | ((uintptr_t)(v) << _PyGC_PREV_SHIFT); +} + +static inline void +gc_reset_refs(PyGC_Head *g, Py_ssize_t v) +{ + g->gc.gc_prev = (g->gc.gc_prev & _PyGC_PREV_MASK_FINALIZED) + | MASK_COLLECTING + | ((uintptr_t)(v) << _PyGC_PREV_SHIFT); +} + +static inline void +gc_set_prev(PyGC_Head *g, PyGC_Head *v) +{ + g->gc.gc_prev = (g->gc.gc_prev & ~_PyGC_PREV_MASK) + | ((uintptr_t)(v) & _PyGC_PREV_MASK); +} + +static inline void +gc_decref(PyGC_Head *g) +{ + assert(gc_get_refs(g) > 0); + g->gc.gc_prev -= 1 << _PyGC_PREV_SHIFT; +} + /*[clinic input] module gc [clinic start generated code]*/ @@ -64,21 +115,23 @@ _PyGC_Initialize(struct _gc_runtime_state *state) #define _GEN_HEAD(n) (&state->generations[n].head) struct gc_generation generations[NUM_GENERATIONS] = { /* PyGC_Head, threshold, count */ - {{{_GEN_HEAD(0), _GEN_HEAD(0), 0}}, 700, 0}, - {{{_GEN_HEAD(1), _GEN_HEAD(1), 0}}, 10, 0}, - {{{_GEN_HEAD(2), _GEN_HEAD(2), 0}}, 10, 0}, + {{{_GEN_HEAD(0), (uintptr_t)_GEN_HEAD(0)}}, 700, 0}, + {{{_GEN_HEAD(1), (uintptr_t)_GEN_HEAD(1)}}, 10, 0}, + {{{_GEN_HEAD(2), (uintptr_t)_GEN_HEAD(2)}}, 10, 0}, }; for (int i = 0; i < NUM_GENERATIONS; i++) { state->generations[i] = generations[i]; }; state->generation0 = GEN_HEAD(0); struct gc_generation permanent_generation = { - {{&state->permanent_generation.head, &state->permanent_generation.head, 0}}, 0, 0 + {{&state->permanent_generation.head, (uintptr_t)&state->permanent_generation.head}}, 0, 0 }; state->permanent_generation = permanent_generation; } /*-------------------------------------------------------------------------- +TODO: Rewrite this section. + gc_refs values. Between collections, every gc'ed object has one of two gc_refs values: @@ -117,21 +170,13 @@ GC_TENTATIVELY_UNREACHABLE it has a __del__ method), its gc_refs is restored to GC_REACHABLE again. ---------------------------------------------------------------------------- */ -#define GC_UNTRACKED _PyGC_REFS_UNTRACKED -#define GC_REACHABLE _PyGC_REFS_REACHABLE -#define GC_TENTATIVELY_UNREACHABLE _PyGC_REFS_TENTATIVELY_UNREACHABLE - -#define IS_TRACKED(o) (_PyGC_REFS(o) != GC_UNTRACKED) -#define IS_REACHABLE(o) (_PyGC_REFS(o) == GC_REACHABLE) -#define IS_TENTATIVELY_UNREACHABLE(o) ( \ - _PyGC_REFS(o) == GC_TENTATIVELY_UNREACHABLE) /*** list functions ***/ static void gc_list_init(PyGC_Head *list) { - list->gc.gc_prev = list; + list->gc.gc_prev = (uintptr_t)list; list->gc.gc_next = list; } @@ -141,25 +186,23 @@ gc_list_is_empty(PyGC_Head *list) return (list->gc.gc_next == list); } -#if 0 -/* This became unused after gc_list_move() was introduced. */ /* Append `node` to `list`. */ static void gc_list_append(PyGC_Head *node, PyGC_Head *list) { node->gc.gc_next = list; - node->gc.gc_prev = list->gc.gc_prev; - node->gc.gc_prev->gc.gc_next = node; - list->gc.gc_prev = node; + _PyGCHead_SET_PREV(node, list->gc.gc_prev); + _PyGCHead_PREV(node)->gc.gc_next = node; + list->gc.gc_prev = (uintptr_t)node; } -#endif /* Remove `node` from the gc list it's currently in. */ static void gc_list_remove(PyGC_Head *node) { - node->gc.gc_prev->gc.gc_next = node->gc.gc_next; - node->gc.gc_next->gc.gc_prev = node->gc.gc_prev; + PyGC_Head *prev = _PyGCHead_PREV(node); + prev->gc.gc_next = node->gc.gc_next; + _PyGCHead_SET_PREV(node->gc.gc_next, prev); node->gc.gc_next = NULL; /* object is not currently tracked */ } @@ -170,15 +213,16 @@ gc_list_remove(PyGC_Head *node) static void gc_list_move(PyGC_Head *node, PyGC_Head *list) { - PyGC_Head *new_prev; - PyGC_Head *current_prev = node->gc.gc_prev; + PyGC_Head *current_prev = _PyGCHead_PREV(node); PyGC_Head *current_next = node->gc.gc_next; /* Unlink from current list. */ current_prev->gc.gc_next = current_next; - current_next->gc.gc_prev = current_prev; + _PyGCHead_SET_PREV(current_next, current_prev); /* Relink at end of new list. */ - new_prev = node->gc.gc_prev = list->gc.gc_prev; - new_prev->gc.gc_next = list->gc.gc_prev = node; + PyGC_Head *new_prev = (PyGC_Head*)list->gc.gc_prev; + _PyGCHead_SET_PREV(node, new_prev); + new_prev->gc.gc_next = node; + list->gc.gc_prev = (uintptr_t)node; node->gc.gc_next = list; } @@ -186,14 +230,19 @@ gc_list_move(PyGC_Head *node, PyGC_Head *list) static void gc_list_merge(PyGC_Head *from, PyGC_Head *to) { - PyGC_Head *tail; assert(from != to); if (!gc_list_is_empty(from)) { - tail = to->gc.gc_prev; - tail->gc.gc_next = from->gc.gc_next; - tail->gc.gc_next->gc.gc_prev = tail; - to->gc.gc_prev = from->gc.gc_prev; - to->gc.gc_prev->gc.gc_next = to; + PyGC_Head *to_tail = _PyGCHead_PREV(to); + PyGC_Head *from_head = from->gc.gc_next; + PyGC_Head *from_tail = _PyGCHead_PREV(from); + assert(from_head != from); + assert(from_tail != from); + + to_tail->gc.gc_next = from_head; + _PyGCHead_SET_PREV(from_head, to_tail); + + from_tail->gc.gc_next = to; + to->gc.gc_prev = (uintptr_t)from_tail; } gc_list_init(from); } @@ -227,20 +276,38 @@ append_objects(PyObject *py_list, PyGC_Head *gc_list) return 0; } +#if GC_DEBUG +static void +validate_list(PyGC_Head *head, uintptr_t expected_mask) +{ + PyGC_Head *prev = head; + PyGC_Head *gc = head->gc.gc_next; + while (gc != head) { + assert(gc->gc.gc_next != NULL); + assert(_PyGCHead_PREV(gc) == prev); + assert((gc->gc.gc_prev & (MASK_COLLECTING | MASK_TENTATIVELY_UNREACHABLE)) + == expected_mask); + prev = gc; + gc = gc->gc.gc_next; + } + assert(prev == (PyGC_Head*)head->gc.gc_prev); +} +#else +#define validate_list(x,y) do{}while(0) +#endif + /*** end of list stuff ***/ /* Set all gc_refs = ob_refcnt. After this, gc_refs is > 0 for all objects - * in containers, and is GC_REACHABLE for all tracked gc objects not in - * containers. + * in containers. */ static void update_refs(PyGC_Head *containers) { PyGC_Head *gc = containers->gc.gc_next; for (; gc != containers; gc = gc->gc.gc_next) { - assert(_PyGCHead_REFS(gc) == GC_REACHABLE); - _PyGCHead_SET_REFS(gc, Py_REFCNT(FROM_GC(gc))); + gc_reset_refs(gc, Py_REFCNT(FROM_GC(gc))); /* Python's cyclic gc should never see an incoming refcount * of 0: if something decref'ed to 0, it should have been * deallocated immediately at that time. @@ -259,7 +326,7 @@ update_refs(PyGC_Head *containers) * so serious that maybe this should be a release-build * check instead of an assert? */ - assert(_PyGCHead_REFS(gc) != 0); + assert(gc_get_refs(gc) != 0); } } @@ -274,9 +341,9 @@ visit_decref(PyObject *op, void *data) * generation being collected, which can be recognized * because only they have positive gc_refs. */ - assert(_PyGCHead_REFS(gc) != 0); /* else refcount was too small */ - if (_PyGCHead_REFS(gc) > 0) - _PyGCHead_DECREF(gc); + if (gc_is_collecting(gc)) { + gc_decref(gc); + } } return 0; } @@ -305,39 +372,37 @@ visit_reachable(PyObject *op, PyGC_Head *reachable) { if (PyObject_IS_GC(op)) { PyGC_Head *gc = AS_GC(op); - const Py_ssize_t gc_refs = _PyGCHead_REFS(gc); + const Py_ssize_t gc_refs = gc_get_refs(gc); - if (gc_refs == 0) { - /* This is in move_unreachable's 'young' list, but - * the traversal hasn't yet gotten to it. All - * we need to do is tell move_unreachable that it's - * reachable. - */ - _PyGCHead_SET_REFS(gc, 1); + if (gc->gc.gc_next == NULL || !gc_is_collecting(gc)) { + return 0; } - else if (gc_refs == GC_TENTATIVELY_UNREACHABLE) { + if (gc->gc.gc_prev & MASK_TENTATIVELY_UNREACHABLE) { /* This had gc_refs = 0 when move_unreachable got * to it, but turns out it's reachable after all. * Move it back to move_unreachable's 'young' list, * and move_unreachable will eventually get to it * again. */ + gc->gc.gc_prev &= ~MASK_TENTATIVELY_UNREACHABLE; gc_list_move(gc, reachable); - _PyGCHead_SET_REFS(gc, 1); + gc_set_refs(gc, 1); + } + else if (gc_refs == 0) { + /* This is in move_unreachable's 'young' list, but + * the traversal hasn't yet gotten to it. All + * we need to do is tell move_unreachable that it's + * reachable. + */ + gc_set_refs(gc, 1); } /* Else there's nothing to do. * If gc_refs > 0, it must be in move_unreachable's 'young' * list, and move_unreachable will eventually get to it. - * If gc_refs == GC_REACHABLE, it's either in some other - * generation so we don't care about it, or move_unreachable - * already dealt with it. - * If gc_refs == GC_UNTRACKED, it must be ignored. */ - else { - assert(gc_refs > 0 - || gc_refs == GC_REACHABLE - || gc_refs == GC_UNTRACKED); - } + else { + assert(gc_refs > 0); + } } return 0; } @@ -353,6 +418,7 @@ visit_reachable(PyObject *op, PyGC_Head *reachable) static void move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) { + PyGC_Head *prev = young; PyGC_Head *gc = young->gc.gc_next; /* Invariants: all objects "to the left" of us in young have gc_refs @@ -367,7 +433,7 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) while (gc != young) { PyGC_Head *next; - if (_PyGCHead_REFS(gc)) { + if (gc_get_refs(gc)) { /* gc is definitely reachable from outside the * original 'young'. Mark it as such, and traverse * its pointers to find any other objects that may @@ -378,15 +444,14 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) */ PyObject *op = FROM_GC(gc); traverseproc traverse = Py_TYPE(op)->tp_traverse; - assert(_PyGCHead_REFS(gc) > 0); - _PyGCHead_SET_REFS(gc, GC_REACHABLE); + assert(gc_get_refs(gc) > 0); (void) traverse(op, - (visitproc)visit_reachable, - (void *)young); + (visitproc)visit_reachable, + (void *)young); + gc_set_prev(gc, prev); + gc->gc.gc_prev &= ~MASK_COLLECTING; + prev = gc; next = gc->gc.gc_next; - if (PyTuple_CheckExact(op)) { - _PyTuple_MaybeUntrack(op); - } } else { /* This *may* be unreachable. To make progress, @@ -396,9 +461,24 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) * visit_reachable will eventually move gc back into * young if that's so, and we'll see it again. */ - next = gc->gc.gc_next; - gc_list_move(gc, unreachable); - _PyGCHead_SET_REFS(gc, GC_TENTATIVELY_UNREACHABLE); + gc->gc.gc_prev |= MASK_TENTATIVELY_UNREACHABLE; + prev->gc.gc_next = next = gc->gc.gc_next; + gc_list_append(gc, unreachable); + } + gc = next; + } + young->gc.gc_prev = (uintptr_t)prev; +} + +static void +untrack_tuples(PyGC_Head *head) +{ + PyGC_Head *next, *gc = head->gc.gc_next; + while (gc != head) { + PyObject *op = FROM_GC(gc); + next = gc->gc.gc_next; + if (PyTuple_CheckExact(op)) { + _PyTuple_MaybeUntrack(op); } gc = next; } @@ -446,7 +526,7 @@ move_legacy_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers) if (has_legacy_finalizer(op)) { gc_list_move(gc, finalizers); - _PyGCHead_SET_REFS(gc, GC_REACHABLE); + gc->gc.gc_prev &= ~(MASK_TENTATIVELY_UNREACHABLE | MASK_COLLECTING); } } } @@ -459,7 +539,7 @@ visit_move(PyObject *op, PyGC_Head *tolist) if (IS_TENTATIVELY_UNREACHABLE(op)) { PyGC_Head *gc = AS_GC(op); gc_list_move(gc, tolist); - _PyGCHead_SET_REFS(gc, GC_REACHABLE); + gc->gc.gc_prev &= ~(MASK_TENTATIVELY_UNREACHABLE | MASK_COLLECTING); } } return 0; @@ -574,7 +654,6 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) */ if (IS_TENTATIVELY_UNREACHABLE(wr)) continue; - assert(IS_REACHABLE(wr)); /* Create a new reference so that wr can't go away * before we can process it again. @@ -599,7 +678,6 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) gc = wrcb_to_call.gc.gc_next; op = FROM_GC(gc); - assert(IS_REACHABLE(op)); assert(PyWeakref_Check(op)); wr = (PyWeakReference *)op; callback = wr->wr_callback; @@ -713,20 +791,26 @@ finalize_garbage(PyGC_Head *collectable) static int check_garbage(PyGC_Head *collectable) { + int ret = 0; PyGC_Head *gc; for (gc = collectable->gc.gc_next; gc != collectable; gc = gc->gc.gc_next) { - _PyGCHead_SET_REFS(gc, Py_REFCNT(FROM_GC(gc))); - assert(_PyGCHead_REFS(gc) != 0); + gc_set_refs(gc, Py_REFCNT(FROM_GC(gc))); + assert(gc_get_refs(gc) != 0); } subtract_refs(collectable); + PyGC_Head *prev = collectable; for (gc = collectable->gc.gc_next; gc != collectable; gc = gc->gc.gc_next) { - assert(_PyGCHead_REFS(gc) >= 0); - if (_PyGCHead_REFS(gc) != 0) - return -1; + assert(gc_get_refs(gc) >= 0); + if (gc_get_refs(gc) != 0) { + ret = -1; + } + _PyGCHead_SET_PREV(gc, prev); + gc->gc.gc_prev &= ~(MASK_TENTATIVELY_UNREACHABLE | MASK_COLLECTING); + prev = gc; } - return 0; + return ret; } static void @@ -735,7 +819,7 @@ revive_garbage(PyGC_Head *collectable) PyGC_Head *gc; for (gc = collectable->gc.gc_next; gc != collectable; gc = gc->gc.gc_next) { - _PyGCHead_SET_REFS(gc, GC_REACHABLE); + gc->gc.gc_prev &= ~(MASK_TENTATIVELY_UNREACHABLE | MASK_COLLECTING); } } @@ -752,6 +836,8 @@ delete_garbage(PyGC_Head *collectable, PyGC_Head *old) PyGC_Head *gc = collectable->gc.gc_next; PyObject *op = FROM_GC(gc); + assert(Py_REFCNT(FROM_GC(gc)) > 0); + if (_PyRuntime.gc.debug & DEBUG_SAVEALL) { PyList_Append(_PyRuntime.gc.garbage, op); } @@ -765,7 +851,6 @@ delete_garbage(PyGC_Head *collectable, PyGC_Head *old) if (collectable->gc.gc_next == gc) { /* object is still alive, move it, it may die later */ gc_list_move(gc, old); - _PyGCHead_SET_REFS(gc, GC_REACHABLE); } } } @@ -844,12 +929,14 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, else old = young; + validate_list(young, 0); + validate_list(old, 0); /* Using ob_refcnt and gc_refs, calculate which objects in the * container set are reachable from outside the set (i.e., have a * refcount greater than 0 when all the references within the * set are taken into account). */ - update_refs(young); + update_refs(young); // gc_prev is used for gc_refs subtract_refs(young); /* Leave everything reachable from outside young in young, and move @@ -859,8 +946,11 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, * so it's more efficient to move the unreachable things. */ gc_list_init(&unreachable); - move_unreachable(young, &unreachable); + move_unreachable(young, &unreachable); // gc_prev is pointer again + validate_list(young, 0); + validate_list(&unreachable, MASK_COLLECTING | MASK_TENTATIVELY_UNREACHABLE); + untrack_tuples(young); /* Move reachable objects to next generation. */ if (young != old) { if (generation == NUM_GENERATIONS - 2) { @@ -887,6 +977,9 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, */ move_legacy_finalizer_reachable(&finalizers); + validate_list(&finalizers, 0); + validate_list(&unreachable, MASK_COLLECTING | MASK_TENTATIVELY_UNREACHABLE); + /* Collect statistics on collectable objects found and print * debugging information. */ @@ -899,12 +992,15 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, } /* Clear weakrefs and invoke callbacks as necessary. */ - m += handle_weakrefs(&unreachable, old); + m += handle_weakrefs(&unreachable, old); // clears masks + + validate_list(old, 0); + validate_list(&unreachable, MASK_COLLECTING | MASK_TENTATIVELY_UNREACHABLE); /* Call tp_finalize on objects which have one. */ finalize_garbage(&unreachable); - if (check_garbage(&unreachable)) { + if (check_garbage(&unreachable)) { // clears MASKs revive_garbage(&unreachable); gc_list_merge(&unreachable, old); } @@ -943,6 +1039,7 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, * this if they insist on creating this type of structure. */ handle_legacy_finalizers(&finalizers, old); + validate_list(old, 0); /* Clear free list only during the collection of the highest * generation */ @@ -1696,8 +1793,8 @@ _PyObject_GC_Alloc(int use_calloc, size_t basicsize) g = (PyGC_Head *)PyObject_Malloc(size); if (g == NULL) return PyErr_NoMemory(); - g->gc.gc_refs = 0; - _PyGCHead_SET_REFS(g, GC_UNTRACKED); + g->gc.gc_next = NULL; + g->gc.gc_prev = 0; _PyRuntime.gc.generations[0].count++; /* number of allocated GC objects */ if (_PyRuntime.gc.generations[0].count > _PyRuntime.gc.generations[0].threshold && _PyRuntime.gc.enabled && @@ -1754,6 +1851,7 @@ PyVarObject * _PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems) { const size_t basicsize = _PyObject_VAR_SIZE(Py_TYPE(op), nitems); + assert(!_PyObject_GC_IS_TRACKED(op)); PyGC_Head *g = AS_GC(op); assert(!IS_TRACKED(op)); if (basicsize > PY_SSIZE_T_MAX - sizeof(PyGC_Head)) diff --git a/Objects/object.c b/Objects/object.c index 3eb4810bd9acb4..c3e0450f3756b6 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2095,7 +2095,7 @@ _PyTrash_deposit_object(PyObject *op) assert(PyObject_IS_GC(op)); assert(!_PyObject_GC_IS_TRACKED(op)); assert(op->ob_refcnt == 0); - _Py_AS_GC(op)->gc.gc_prev = (PyGC_Head *)_PyRuntime.gc.trash_delete_later; + _PyGCHead_SET_PREV(_Py_AS_GC(op), _PyRuntime.gc.trash_delete_later); _PyRuntime.gc.trash_delete_later = op; } @@ -2107,7 +2107,7 @@ _PyTrash_thread_deposit_object(PyObject *op) assert(PyObject_IS_GC(op)); assert(!_PyObject_GC_IS_TRACKED(op)); assert(op->ob_refcnt == 0); - _Py_AS_GC(op)->gc.gc_prev = (PyGC_Head *) tstate->trash_delete_later; + _PyGCHead_SET_PREV(_Py_AS_GC(op), tstate->trash_delete_later); tstate->trash_delete_later = op; } From 561e2d55b1d9d3ab96679e69057841681c879811 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 23 May 2018 00:24:00 +0900 Subject: [PATCH 02/19] Update comments --- Modules/gcmodule.c | 47 +++++++++++++++++++--------------------------- 1 file changed, 19 insertions(+), 28 deletions(-) diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index fb41279aa156cf..76ffb6965505e7 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -33,7 +33,7 @@ #define GC_DEBUG (0) /* More asserts */ -// Bit 0 is used for _PyGC_PREV_MASK_FINALIZED in objimpl.h +// Bit 0 of gc_prev is used for _PyGC_PREV_MASK_FINALIZED in objimpl.h #define MASK_COLLECTING (1 << 1) #define MASK_TENTATIVELY_UNREACHABLE (1 << 2) @@ -130,25 +130,17 @@ _PyGC_Initialize(struct _gc_runtime_state *state) } /*-------------------------------------------------------------------------- -TODO: Rewrite this section. +gc_prev values. -gc_refs values. +Between collections, gc_prev is used for doubly linked list. -Between collections, every gc'ed object has one of two gc_refs values: +Lowest three bits of gc_prev are used for flags. +MASK_COLLECTING and MASK_TENTATIVELY_UNREACHABLE are used only while collecting. -GC_UNTRACKED - The initial state; objects returned by PyObject_GC_Malloc are in this - state. The object doesn't live in any generation list, and its - tp_traverse slot must not be called. +During a collection, gc_prev is temporary used for gc_refs, and the gc list +is singly linked until gc_prev is restored. -GC_REACHABLE - The object lives in some generation list, and its tp_traverse is safe to - call. An object transitions to GC_REACHABLE when PyObject_GC_Track - is called. - -During a collection, gc_refs can temporarily take on other states: - ->= 0 +gc_refs At the start of a collection, update_refs() copies the true refcount to gc_refs, for each object in the generation being collected. subtract_refs() then adjusts gc_refs so that it equals the number of @@ -156,18 +148,17 @@ During a collection, gc_refs can temporarily take on other states: being collected. gc_refs remains >= 0 throughout these steps. -GC_TENTATIVELY_UNREACHABLE +MASK_TENTATIVELY_UNREACHABLE move_unreachable() then moves objects not reachable (whether directly or - indirectly) from outside the generation into an "unreachable" set. - Objects that are found to be reachable have gc_refs set to GC_REACHABLE - again. Objects that are found to be unreachable have gc_refs set to - GC_TENTATIVELY_UNREACHABLE. It's "tentatively" because the pass doing - this can't be sure until it ends, and GC_TENTATIVELY_UNREACHABLE may - transition back to GC_REACHABLE. - - Only objects with GC_TENTATIVELY_UNREACHABLE still set are candidates - for collection. If it's decided not to collect such an object (e.g., - it has a __del__ method), its gc_refs is restored to GC_REACHABLE again. + indirectly) from outside the generation into an "unreachable" set and + set MASK_TENTATIVELY_UNREACHABLE flag. + + Objects that are found to be reachable have gc_refs set to 1. + When MASK_TENTATIVELY_UNREACHABLE flag is set for the reachable object, + the flag is unset and the object is moved back to "reachable" set. + + Only objects with MASK_TENTATIVELY_UNREACHABLE still set are candidates + for collection. ---------------------------------------------------------------------------- */ @@ -992,7 +983,7 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, } /* Clear weakrefs and invoke callbacks as necessary. */ - m += handle_weakrefs(&unreachable, old); // clears masks + m += handle_weakrefs(&unreachable, old); validate_list(old, 0); validate_list(&unreachable, MASK_COLLECTING | MASK_TENTATIVELY_UNREACHABLE); From 184841a44f3614cd10d46c36aa5ddd9f83c036d6 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 23 May 2018 00:26:04 +0900 Subject: [PATCH 03/19] Update _PyObject_GC_UNTRACK --- Include/objimpl.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Include/objimpl.h b/Include/objimpl.h index 6e714c43d6fd3f..cc93c21c60be5a 100644 --- a/Include/objimpl.h +++ b/Include/objimpl.h @@ -303,9 +303,10 @@ extern PyGC_Head *_PyGC_generation0; */ #define _PyObject_GC_UNTRACK(o) do { \ PyGC_Head *g = _Py_AS_GC(o); \ + PyGC_Head *prev = _PyGCHead_PREV(g); \ assert(g->gc.gc_next != NULL); \ - _PyGCHead_PREV(g)->gc.gc_next = g->gc.gc_next; \ - _PyGCHead_SET_PREV(g->gc.gc_next, _PyGCHead_PREV(g)); \ + prev->gc.gc_next = g->gc.gc_next; \ + _PyGCHead_SET_PREV(g->gc.gc_next, prev); \ g->gc.gc_next = NULL; \ g->gc.gc_prev &= _PyGC_PREV_MASK_FINALIZED; \ } while (0); From 16c66ca64fde4a086dcfbab441225210db3ae324 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Mon, 28 May 2018 21:17:34 +0900 Subject: [PATCH 04/19] Add NEWS entry --- .../Core and Builtins/2018-05-28-21-17-31.bpo-33597.r0ToM4.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2018-05-28-21-17-31.bpo-33597.r0ToM4.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-05-28-21-17-31.bpo-33597.r0ToM4.rst b/Misc/NEWS.d/next/Core and Builtins/2018-05-28-21-17-31.bpo-33597.r0ToM4.rst new file mode 100644 index 00000000000000..b6baab2526ada0 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2018-05-28-21-17-31.bpo-33597.r0ToM4.rst @@ -0,0 +1 @@ +Reduce ``PyGC_Head`` size from 3 words to 2 words. From cedc2e55ed7da6fc9476bcbd40a150cdcaefeb3f Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 30 May 2018 13:46:33 +0900 Subject: [PATCH 05/19] update --- Modules/gcmodule.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 76ffb6965505e7..3b383867934856 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -47,6 +47,12 @@ gc_is_collecting(PyGC_Head *g) return (g->gc.gc_prev & MASK_COLLECTING) != 0; } +static inline void +gc_clear_masks(PyGC_Head *g) +{ + g->gc.gc_prev &= ~(MASK_COLLECTING | MASK_TENTATIVELY_UNREACHABLE); +} + static inline Py_ssize_t gc_get_refs(PyGC_Head *g) { @@ -146,7 +152,6 @@ gc_refs subtract_refs() then adjusts gc_refs so that it equals the number of times an object is referenced directly from outside the generation being collected. - gc_refs remains >= 0 throughout these steps. MASK_TENTATIVELY_UNREACHABLE move_unreachable() then moves objects not reachable (whether directly or @@ -517,7 +522,7 @@ move_legacy_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers) if (has_legacy_finalizer(op)) { gc_list_move(gc, finalizers); - gc->gc.gc_prev &= ~(MASK_TENTATIVELY_UNREACHABLE | MASK_COLLECTING); + gc_clear_masks(gc); } } } @@ -530,7 +535,7 @@ visit_move(PyObject *op, PyGC_Head *tolist) if (IS_TENTATIVELY_UNREACHABLE(op)) { PyGC_Head *gc = AS_GC(op); gc_list_move(gc, tolist); - gc->gc.gc_prev &= ~(MASK_TENTATIVELY_UNREACHABLE | MASK_COLLECTING); + gc_clear_masks(gc); } } return 0; @@ -798,22 +803,12 @@ check_garbage(PyGC_Head *collectable) ret = -1; } _PyGCHead_SET_PREV(gc, prev); - gc->gc.gc_prev &= ~(MASK_TENTATIVELY_UNREACHABLE | MASK_COLLECTING); + gc_clear_masks(gc); prev = gc; } return ret; } -static void -revive_garbage(PyGC_Head *collectable) -{ - PyGC_Head *gc; - for (gc = collectable->gc.gc_next; gc != collectable; - gc = gc->gc.gc_next) { - gc->gc.gc_prev &= ~(MASK_TENTATIVELY_UNREACHABLE | MASK_COLLECTING); - } -} - /* Break reference cycles by clearing the containers involved. This is * tricky business as the lists can be changing and we don't know which * objects may be freed. It is possible I screwed something up here. @@ -992,7 +987,6 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, finalize_garbage(&unreachable); if (check_garbage(&unreachable)) { // clears MASKs - revive_garbage(&unreachable); gc_list_merge(&unreachable, old); } else { From 3b23da2615ce95a0cd2a5d2e0cbf0cb290b66fe3 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 30 May 2018 13:59:09 +0900 Subject: [PATCH 06/19] Simplify _PyGC_SET_FINALIZED --- Include/objimpl.h | 8 +++----- Modules/gcmodule.c | 2 +- Objects/object.c | 5 +++-- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/Include/objimpl.h b/Include/objimpl.h index cc93c21c60be5a..a0e2d73c0641f6 100644 --- a/Include/objimpl.h +++ b/Include/objimpl.h @@ -261,6 +261,7 @@ extern PyGC_Head *_PyGC_generation0; #define _Py_AS_GC(o) ((PyGC_Head *)(o)-1) +/* Bit flags for gc_prev */ /* Bit 0 is set when tp_finalize is called */ #define _PyGC_PREV_MASK_FINALIZED (1 << 0) /* Bit 1 and 2 is used in gcmodule.c */ @@ -276,13 +277,10 @@ extern PyGC_Head *_PyGC_generation0; } while (0) #define _PyGCHead_FINALIZED(g) (((g)->gc.gc_prev & _PyGC_PREV_MASK_FINALIZED) != 0) -#define _PyGCHead_SET_FINALIZED(g, v) do { \ - (g)->gc.gc_prev = ((g)->gc.gc_prev & ~_PyGC_PREV_MASK_FINALIZED) \ - | (v != 0); \ - } while (0) +#define _PyGCHead_SET_FINALIZED(g) ((g)->gc.gc_prev |= _PyGC_PREV_MASK_FINALIZED) #define _PyGC_FINALIZED(o) _PyGCHead_FINALIZED(_Py_AS_GC(o)) -#define _PyGC_SET_FINALIZED(o, v) _PyGCHead_SET_FINALIZED(_Py_AS_GC(o), v) +#define _PyGC_SET_FINALIZED(o) _PyGCHead_SET_FINALIZED(_Py_AS_GC(o)) /* Tell the GC to track this object. NB: While the object is tracked the * collector it must be safe to call the ob_traverse method. */ diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 3b383867934856..e2f90e7866b86e 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -772,7 +772,7 @@ finalize_garbage(PyGC_Head *collectable) if (!_PyGCHead_FINALIZED(gc) && PyType_HasFeature(Py_TYPE(op), Py_TPFLAGS_HAVE_FINALIZE) && (finalize = Py_TYPE(op)->tp_finalize) != NULL) { - _PyGCHead_SET_FINALIZED(gc, 1); + _PyGCHead_SET_FINALIZED(gc); Py_INCREF(op); finalize(op); Py_DECREF(op); diff --git a/Objects/object.c b/Objects/object.c index c3e0450f3756b6..5f6c2161ec087d 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -284,8 +284,9 @@ PyObject_CallFinalizer(PyObject *self) return; tp->tp_finalize(self); - if (PyType_IS_GC(tp)) - _PyGC_SET_FINALIZED(self, 1); + if (PyType_IS_GC(tp)) { + _PyGC_SET_FINALIZED(self); + } } int From 59f05fa929ffd287e752b71e057dba20d329928f Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sat, 2 Jun 2018 19:18:42 +0900 Subject: [PATCH 07/19] Update comment in objimpl.h --- Include/objimpl.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Include/objimpl.h b/Include/objimpl.h index a0e2d73c0641f6..9d9ef7d281d096 100644 --- a/Include/objimpl.h +++ b/Include/objimpl.h @@ -263,8 +263,9 @@ extern PyGC_Head *_PyGC_generation0; /* Bit flags for gc_prev */ /* Bit 0 is set when tp_finalize is called */ -#define _PyGC_PREV_MASK_FINALIZED (1 << 0) +#define _PyGC_PREV_MASK_FINALIZED (1) /* Bit 1 and 2 is used in gcmodule.c */ +#define _PyGC_PREV_MASK_INTERNAL (2 | 4) /* The (N-3) most significant bits contain the real address. */ #define _PyGC_PREV_SHIFT (3) #define _PyGC_PREV_MASK (((uintptr_t) -1) << _PyGC_PREV_SHIFT) @@ -288,7 +289,7 @@ extern PyGC_Head *_PyGC_generation0; PyGC_Head *g = _Py_AS_GC(o); \ if (g->gc.gc_next != NULL) \ Py_FatalError("GC object already tracked"); \ - assert((g->gc.gc_prev & 6) == 0); \ + assert((g->gc.gc_prev & _PyGC_PREV_MASK_INTERNAL) == 0); \ g->gc.gc_next = _PyGC_generation0; \ _PyGCHead_SET_PREV(g, _PyGC_generation0->gc.gc_prev); \ _PyGCHead_PREV(_PyGC_generation0)->gc.gc_next = g; \ @@ -296,8 +297,8 @@ extern PyGC_Head *_PyGC_generation0; } while (0); /* Tell the GC to stop tracking this object. - * gc_next doesn't need to be set to NULL, but doing so is a good - * way to provoke memory errors if calling code is confused. + * NOTE: This may be called while GC. So _PyGC_PREV_MASK_INTERNAL must be + * cleared. */ #define _PyObject_GC_UNTRACK(o) do { \ PyGC_Head *g = _Py_AS_GC(o); \ From 952a69d466258a951b9db1cfc22213f7eb1f0c3c Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sat, 2 Jun 2018 19:37:20 +0900 Subject: [PATCH 08/19] Remove gc_set_prev() --- Modules/gcmodule.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index e2f90e7866b86e..503e3dd5951d0a 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -74,13 +74,6 @@ gc_reset_refs(PyGC_Head *g, Py_ssize_t v) | ((uintptr_t)(v) << _PyGC_PREV_SHIFT); } -static inline void -gc_set_prev(PyGC_Head *g, PyGC_Head *v) -{ - g->gc.gc_prev = (g->gc.gc_prev & ~_PyGC_PREV_MASK) - | ((uintptr_t)(v) & _PyGC_PREV_MASK); -} - static inline void gc_decref(PyGC_Head *g) { @@ -444,7 +437,7 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) (void) traverse(op, (visitproc)visit_reachable, (void *)young); - gc_set_prev(gc, prev); + _PyGCHead_SET_PREV(gc, prev); gc->gc.gc_prev &= ~MASK_COLLECTING; prev = gc; next = gc->gc.gc_next; From 6132b628afd0e5f66ec079b7450bb8de6ae9daae Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sat, 2 Jun 2018 19:42:05 +0900 Subject: [PATCH 09/19] Update comment --- Modules/gcmodule.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 503e3dd5951d0a..6fa89426ba4064 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -134,7 +134,8 @@ gc_prev values. Between collections, gc_prev is used for doubly linked list. Lowest three bits of gc_prev are used for flags. -MASK_COLLECTING and MASK_TENTATIVELY_UNREACHABLE are used only while collecting. +MASK_COLLECTING and MASK_TENTATIVELY_UNREACHABLE are used only while +collecting and cleared before GC ends or _PyObject_GC_UNTRACK() is called. During a collection, gc_prev is temporary used for gc_refs, and the gc list is singly linked until gc_prev is restored. @@ -146,6 +147,10 @@ gc_refs times an object is referenced directly from outside the generation being collected. +MASK_COLLECTING + Objects in generation being collected are marked MASK_COLLECTING in + update_refs(). + MASK_TENTATIVELY_UNREACHABLE move_unreachable() then moves objects not reachable (whether directly or indirectly) from outside the generation into an "unreachable" set and From d86144367ca35fefd7c058b29ce7fe89bdfe99a5 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sat, 2 Jun 2018 19:50:56 +0900 Subject: [PATCH 10/19] Add more comments --- Include/objimpl.h | 3 ++- Modules/gcmodule.c | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Include/objimpl.h b/Include/objimpl.h index 9d9ef7d281d096..8d7e621c47e61b 100644 --- a/Include/objimpl.h +++ b/Include/objimpl.h @@ -252,7 +252,8 @@ PyAPI_FUNC(PyVarObject *) _PyObject_GC_Resize(PyVarObject *, Py_ssize_t); typedef union _gc_head { struct { union _gc_head *gc_next; // NULL means the object is not tracked - uintptr_t gc_prev; + uintptr_t gc_prev; // Pointer to previous object in the list. + // Lowest three bits are used for flags. } gc; double dummy; /* force worst-case alignment */ } PyGC_Head; diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 6fa89426ba4064..d7e1ac1a28cf6d 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -408,6 +408,9 @@ visit_reachable(PyObject *op, PyGC_Head *reachable) * All objects in young after this are directly or indirectly reachable * from outside the original young; and all objects in unreachable are * not. + * + * This function restores gc_prev pointer. young and unreachable are + * doubly linked list after this function. */ static void move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) @@ -442,6 +445,7 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) (void) traverse(op, (visitproc)visit_reachable, (void *)young); + // gc is not COLLECTING state aftere here. _PyGCHead_SET_PREV(gc, prev); gc->gc.gc_prev &= ~MASK_COLLECTING; prev = gc; From d83ef20644c0d125b4679052ebe8510c10a37230 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sun, 3 Jun 2018 01:16:30 +0900 Subject: [PATCH 11/19] Add more comments, simplify move_unreachable --- Modules/gcmodule.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index d7e1ac1a28cf6d..ce00c862d7fe87 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -415,6 +415,7 @@ visit_reachable(PyObject *op, PyGC_Head *reachable) static void move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) { + // previous elem in the young list, used for restore gc_prev. PyGC_Head *prev = young; PyGC_Head *gc = young->gc.gc_next; @@ -428,8 +429,6 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) */ while (gc != young) { - PyGC_Head *next; - if (gc_get_refs(gc)) { /* gc is definitely reachable from outside the * original 'young'. Mark it as such, and traverse @@ -442,14 +441,16 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) PyObject *op = FROM_GC(gc); traverseproc traverse = Py_TYPE(op)->tp_traverse; assert(gc_get_refs(gc) > 0); + // NOTE: visit_reachable may change gc->gc.gc_next when + // young->gc.gc_prev == gc. (void) traverse(op, (visitproc)visit_reachable, (void *)young); - // gc is not COLLECTING state aftere here. + // relink gc_prev to prev element. _PyGCHead_SET_PREV(gc, prev); + // gc is not COLLECTING state aftere here. gc->gc.gc_prev &= ~MASK_COLLECTING; prev = gc; - next = gc->gc.gc_next; } else { /* This *may* be unreachable. To make progress, @@ -460,11 +461,14 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) * young if that's so, and we'll see it again. */ gc->gc.gc_prev |= MASK_TENTATIVELY_UNREACHABLE; - prev->gc.gc_next = next = gc->gc.gc_next; + // Move gc to unreachable. + // No need to gc->next->prev = prev because next is single linked. + prev->gc.gc_next = gc->gc.gc_next; gc_list_append(gc, unreachable); } - gc = next; + gc = prev->gc.gc_next; } + // young->gc.gc_prev must be last element remained in the list. young->gc.gc_prev = (uintptr_t)prev; } @@ -793,6 +797,7 @@ check_garbage(PyGC_Head *collectable) PyGC_Head *gc; for (gc = collectable->gc.gc_next; gc != collectable; gc = gc->gc.gc_next) { + // Use gc_refs and break gc_prev again. gc_set_refs(gc, Py_REFCNT(FROM_GC(gc))); assert(gc_get_refs(gc) != 0); } @@ -804,6 +809,7 @@ check_garbage(PyGC_Head *collectable) if (gc_get_refs(gc) != 0) { ret = -1; } + // Restore gc_prev here. _PyGCHead_SET_PREV(gc, prev); gc_clear_masks(gc); prev = gc; @@ -1838,7 +1844,6 @@ PyVarObject * _PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems) { const size_t basicsize = _PyObject_VAR_SIZE(Py_TYPE(op), nitems); - assert(!_PyObject_GC_IS_TRACKED(op)); PyGC_Head *g = AS_GC(op); assert(!IS_TRACKED(op)); if (basicsize > PY_SSIZE_T_MAX - sizeof(PyGC_Head)) From 7c2688f9dd6b775e2f08fc72048051fe214eac1d Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Tue, 19 Jun 2018 16:26:26 +0900 Subject: [PATCH 12/19] Small update comments --- Include/objimpl.h | 15 +++++++++++---- Modules/gcmodule.c | 1 + 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/Include/objimpl.h b/Include/objimpl.h index 8d7e621c47e61b..7e654bf332bf0b 100644 --- a/Include/objimpl.h +++ b/Include/objimpl.h @@ -284,8 +284,14 @@ extern PyGC_Head *_PyGC_generation0; #define _PyGC_FINALIZED(o) _PyGCHead_FINALIZED(_Py_AS_GC(o)) #define _PyGC_SET_FINALIZED(o) _PyGCHead_SET_FINALIZED(_Py_AS_GC(o)) -/* Tell the GC to track this object. NB: While the object is tracked the - * collector it must be safe to call the ob_traverse method. */ +/* Tell the GC to track this object. + * + * NB: While the object is tracked the collector it must be safe to call the + * ob_traverse method. + * + * Internal note: _PyGC_generation0->gc.gc_prev doesn't have any bit flags + * because it's not object header. Wo we skip using _PyGCHead_SET_PREV() here. + */ #define _PyObject_GC_TRACK(o) do { \ PyGC_Head *g = _Py_AS_GC(o); \ if (g->gc.gc_next != NULL) \ @@ -298,8 +304,9 @@ extern PyGC_Head *_PyGC_generation0; } while (0); /* Tell the GC to stop tracking this object. - * NOTE: This may be called while GC. So _PyGC_PREV_MASK_INTERNAL must be - * cleared. + * + * Internal note: This may be called while GC. So _PyGC_PREV_MASK_INTERNAL must + * be cleared. Only _PyGC_PREV_MASK_FINALIZED bit is kept. */ #define _PyObject_GC_UNTRACK(o) do { \ PyGC_Head *g = _Py_AS_GC(o); \ diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index ce00c862d7fe87..833c8d41056b9d 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -34,6 +34,7 @@ #define GC_DEBUG (0) /* More asserts */ // Bit 0 of gc_prev is used for _PyGC_PREV_MASK_FINALIZED in objimpl.h +// These two mask bits are used only between GC. #define MASK_COLLECTING (1 << 1) #define MASK_TENTATIVELY_UNREACHABLE (1 << 2) From 07329a8946ea575fb42585b1d7bab78ecb02e39a Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Wed, 20 Jun 2018 01:21:12 +0900 Subject: [PATCH 13/19] Fix comments --- Include/objimpl.h | 5 +++-- Modules/gcmodule.c | 7 +++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Include/objimpl.h b/Include/objimpl.h index 7e654bf332bf0b..a73e17d9dd8b2e 100644 --- a/Include/objimpl.h +++ b/Include/objimpl.h @@ -286,11 +286,12 @@ extern PyGC_Head *_PyGC_generation0; /* Tell the GC to track this object. * - * NB: While the object is tracked the collector it must be safe to call the + * NB: While the object is tracked by the collector, it must be safe to call the * ob_traverse method. * * Internal note: _PyGC_generation0->gc.gc_prev doesn't have any bit flags - * because it's not object header. Wo we skip using _PyGCHead_SET_PREV() here. + * because it's not object header. That's why we don't use _PyGCHead_SET_PREV() + * for it. */ #define _PyObject_GC_TRACK(o) do { \ PyGC_Head *g = _Py_AS_GC(o); \ diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 833c8d41056b9d..2319c357786892 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -38,7 +38,6 @@ #define MASK_COLLECTING (1 << 1) #define MASK_TENTATIVELY_UNREACHABLE (1 << 2) -#define IS_TRACKED(o) (_Py_AS_GC(o)->gc.gc_next != NULL) #define IS_TENTATIVELY_UNREACHABLE(o) ( \ (_Py_AS_GC(o)->gc.gc_prev & MASK_TENTATIVELY_UNREACHABLE) != 0) @@ -294,8 +293,8 @@ validate_list(PyGC_Head *head, uintptr_t expected_mask) /*** end of list stuff ***/ -/* Set all gc_refs = ob_refcnt. After this, gc_refs is > 0 for all objects - * in containers. +/* Set all gc_refs = ob_refcnt. After this, gc_refs is > 0 and + * MASK_COLLECTING bit is set for all objects in containers. */ static void update_refs(PyGC_Head *containers) @@ -1496,7 +1495,7 @@ gc_is_tracked(PyObject *module, PyObject *obj) { PyObject *result; - if (PyObject_IS_GC(obj) && IS_TRACKED(obj)) + if (PyObject_IS_GC(obj) && _PyObject_GC_IS_TRACKED(obj)) result = Py_True; else result = Py_False; From 09360cc4286b8f204fd4e90764ea384cd905b053 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Wed, 20 Jun 2018 01:30:08 +0900 Subject: [PATCH 14/19] GC pointer must be aligned by 8bytes --- Modules/gcmodule.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 2319c357786892..c449f35849f8fa 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -1786,6 +1786,7 @@ _PyObject_GC_Alloc(int use_calloc, size_t basicsize) g = (PyGC_Head *)PyObject_Malloc(size); if (g == NULL) return PyErr_NoMemory(); + assert(((uintptr_t)g & 7) == 0); // g must be aligned 8bytes boundary g->gc.gc_next = NULL; g->gc.gc_prev = 0; _PyRuntime.gc.generations[0].count++; /* number of allocated GC objects */ From df4f3743535a6c0814f6fd4b1757448bf9f1f497 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Wed, 20 Jun 2018 01:59:55 +0900 Subject: [PATCH 15/19] s/IS_TRACKED/_PyObject_GC_IS_TRACKED/ --- Modules/gcmodule.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index c449f35849f8fa..7ed93887a5c5a9 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -1767,7 +1767,7 @@ PyObject_GC_UnTrack(void *op) /* Obscure: the Py_TRASHCAN mechanism requires that we be able to * call PyObject_GC_UnTrack twice on an object. */ - if (IS_TRACKED(op)) + if (_PyObject_GC_IS_TRACKED(op)) _PyObject_GC_UNTRACK(op); } @@ -1846,7 +1846,7 @@ _PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems) { const size_t basicsize = _PyObject_VAR_SIZE(Py_TYPE(op), nitems); PyGC_Head *g = AS_GC(op); - assert(!IS_TRACKED(op)); + assert(!_PyObject_GC_IS_TRACKED(op)); if (basicsize > PY_SSIZE_T_MAX - sizeof(PyGC_Head)) return (PyVarObject *)PyErr_NoMemory(); g = (PyGC_Head *)PyObject_REALLOC(g, sizeof(PyGC_Head) + basicsize); @@ -1861,7 +1861,7 @@ void PyObject_GC_Del(void *op) { PyGC_Head *g = AS_GC(op); - if (IS_TRACKED(op)) + if (_PyObject_GC_IS_TRACKED(op)) gc_list_remove(g); if (_PyRuntime.gc.generations[0].count > 0) { _PyRuntime.gc.generations[0].count--; From ac3d5a815b8d03a8e12d2978859b4160b9cc2675 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Wed, 27 Jun 2018 01:58:05 +0900 Subject: [PATCH 16/19] Move clinic header --- Modules/gcmodule.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 7ed93887a5c5a9..e21189cff83b9a 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -31,6 +31,11 @@ #include "pydtrace.h" #include "pytime.h" /* for _PyTime_GetMonotonicClock() */ +/*[clinic input] +module gc +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b5c9690ecc842d79]*/ + #define GC_DEBUG (0) /* More asserts */ // Bit 0 of gc_prev is used for _PyGC_PREV_MASK_FINALIZED in objimpl.h @@ -81,11 +86,6 @@ gc_decref(PyGC_Head *g) g->gc.gc_prev -= 1 << _PyGC_PREV_SHIFT; } -/*[clinic input] -module gc -[clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b5c9690ecc842d79]*/ - /* Get an object's GC head */ #define AS_GC(o) ((PyGC_Head *)(o)-1) From f0c305ce934d962f73f26d13480c4c22034f7038 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Wed, 27 Jun 2018 14:46:26 +0900 Subject: [PATCH 17/19] Use only lowest two bits of gc_prev. --- Include/objimpl.h | 76 ++++---- Modules/gcmodule.c | 434 ++++++++++++++++++++++++++------------------- Objects/object.c | 6 +- 3 files changed, 293 insertions(+), 223 deletions(-) diff --git a/Include/objimpl.h b/Include/objimpl.h index a73e17d9dd8b2e..ee6706cc762f2a 100644 --- a/Include/objimpl.h +++ b/Include/objimpl.h @@ -249,37 +249,44 @@ PyAPI_FUNC(PyVarObject *) _PyObject_GC_Resize(PyVarObject *, Py_ssize_t); /* GC information is stored BEFORE the object structure. */ #ifndef Py_LIMITED_API -typedef union _gc_head { - struct { - union _gc_head *gc_next; // NULL means the object is not tracked - uintptr_t gc_prev; // Pointer to previous object in the list. - // Lowest three bits are used for flags. - } gc; - double dummy; /* force worst-case alignment */ +typedef struct { + // Pointer to next object in the list. + // 0 means the object is not tracked + uintptr_t _gc_next; + + // Pointer to previous object in the list. + // Lowest two bits are used for flags documented later. + uintptr_t _gc_prev; } PyGC_Head; extern PyGC_Head *_PyGC_generation0; #define _Py_AS_GC(o) ((PyGC_Head *)(o)-1) -/* Bit flags for gc_prev */ +/* Bit flags for _gc_prev */ /* Bit 0 is set when tp_finalize is called */ #define _PyGC_PREV_MASK_FINALIZED (1) -/* Bit 1 and 2 is used in gcmodule.c */ -#define _PyGC_PREV_MASK_INTERNAL (2 | 4) -/* The (N-3) most significant bits contain the real address. */ -#define _PyGC_PREV_SHIFT (3) +/* Bit 1 is set when the object is in generation which is GCed currently. */ +#define _PyGC_PREV_MASK_COLLECTING (2) +/* The (N-2) most significant bits contain the real address. */ +#define _PyGC_PREV_SHIFT (2) #define _PyGC_PREV_MASK (((uintptr_t) -1) << _PyGC_PREV_SHIFT) -#define _PyGCHead_PREV(g) ((PyGC_Head*)((g)->gc.gc_prev & _PyGC_PREV_MASK)) +// Lowest bit of _gc_next is used for flags only in GC. +// But it is always 0 for normal code. +#define _PyGCHead_NEXT(g) ((PyGC_Head*)(g)->_gc_next) +#define _PyGCHead_SET_NEXT(g, p) ((g)->_gc_next = (uintptr_t)(p)) + +// Lowest two bits of _gc_prev is used for flags described below. +#define _PyGCHead_PREV(g) ((PyGC_Head*)((g)->_gc_prev & _PyGC_PREV_MASK)) #define _PyGCHead_SET_PREV(g, p) do { \ assert(((uintptr_t)p & ~_PyGC_PREV_MASK) == 0); \ - (g)->gc.gc_prev = ((g)->gc.gc_prev & ~_PyGC_PREV_MASK) \ + (g)->_gc_prev = ((g)->_gc_prev & ~_PyGC_PREV_MASK) \ | ((uintptr_t)(p)); \ } while (0) -#define _PyGCHead_FINALIZED(g) (((g)->gc.gc_prev & _PyGC_PREV_MASK_FINALIZED) != 0) -#define _PyGCHead_SET_FINALIZED(g) ((g)->gc.gc_prev |= _PyGC_PREV_MASK_FINALIZED) +#define _PyGCHead_FINALIZED(g) (((g)->_gc_prev & _PyGC_PREV_MASK_FINALIZED) != 0) +#define _PyGCHead_SET_FINALIZED(g) ((g)->_gc_prev |= _PyGC_PREV_MASK_FINALIZED) #define _PyGC_FINALIZED(o) _PyGCHead_FINALIZED(_Py_AS_GC(o)) #define _PyGC_SET_FINALIZED(o) _PyGCHead_SET_FINALIZED(_Py_AS_GC(o)) @@ -289,38 +296,41 @@ extern PyGC_Head *_PyGC_generation0; * NB: While the object is tracked by the collector, it must be safe to call the * ob_traverse method. * - * Internal note: _PyGC_generation0->gc.gc_prev doesn't have any bit flags - * because it's not object header. That's why we don't use _PyGCHead_SET_PREV() - * for it. + * Internal note: _PyGC_generation0->_gc_prev doesn't have any bit flags + * because it's not object header. So we don't use _PyGCHead_PREV() and + * _PyGCHead_SET_PREV() for it to avoid unnecessary bitwise operations. */ #define _PyObject_GC_TRACK(o) do { \ PyGC_Head *g = _Py_AS_GC(o); \ - if (g->gc.gc_next != NULL) \ + if (g->_gc_next != 0) { \ Py_FatalError("GC object already tracked"); \ - assert((g->gc.gc_prev & _PyGC_PREV_MASK_INTERNAL) == 0); \ - g->gc.gc_next = _PyGC_generation0; \ - _PyGCHead_SET_PREV(g, _PyGC_generation0->gc.gc_prev); \ - _PyGCHead_PREV(_PyGC_generation0)->gc.gc_next = g; \ - _PyGC_generation0->gc.gc_prev = (uintptr_t)g; \ + } \ + assert((g->_gc_prev & _PyGC_PREV_MASK_COLLECTING) == 0); \ + PyGC_Head *last = (PyGC_Head*)(_PyGC_generation0->_gc_prev); \ + _PyGCHead_SET_NEXT(last, g); \ + _PyGCHead_SET_PREV(g, last); \ + _PyGCHead_SET_NEXT(g, _PyGC_generation0); \ + _PyGC_generation0->_gc_prev = (uintptr_t)g; \ } while (0); /* Tell the GC to stop tracking this object. * - * Internal note: This may be called while GC. So _PyGC_PREV_MASK_INTERNAL must - * be cleared. Only _PyGC_PREV_MASK_FINALIZED bit is kept. + * Internal note: This may be called while GC. So _PyGC_PREV_MASK_COLLECTING must + * be cleared. But _PyGC_PREV_MASK_FINALIZED bit is kept. */ #define _PyObject_GC_UNTRACK(o) do { \ PyGC_Head *g = _Py_AS_GC(o); \ PyGC_Head *prev = _PyGCHead_PREV(g); \ - assert(g->gc.gc_next != NULL); \ - prev->gc.gc_next = g->gc.gc_next; \ - _PyGCHead_SET_PREV(g->gc.gc_next, prev); \ - g->gc.gc_next = NULL; \ - g->gc.gc_prev &= _PyGC_PREV_MASK_FINALIZED; \ + PyGC_Head *next = _PyGCHead_NEXT(g); \ + assert(next != NULL); \ + _PyGCHead_SET_NEXT(prev, next); \ + _PyGCHead_SET_PREV(next, prev); \ + g->_gc_next = 0; \ + g->_gc_prev &= _PyGC_PREV_MASK_FINALIZED; \ } while (0); /* True if the object is currently tracked by the GC. */ -#define _PyObject_GC_IS_TRACKED(o) (_Py_AS_GC(o)->gc.gc_next != NULL) +#define _PyObject_GC_IS_TRACKED(o) (_Py_AS_GC(o)->_gc_next != 0) /* True if the object may be tracked by the GC in the future, or already is. This can be useful to implement some optimizations. */ diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index e21189cff83b9a..e9a2db903b7a36 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -38,52 +38,62 @@ module gc #define GC_DEBUG (0) /* More asserts */ -// Bit 0 of gc_prev is used for _PyGC_PREV_MASK_FINALIZED in objimpl.h -// These two mask bits are used only between GC. -#define MASK_COLLECTING (1 << 1) -#define MASK_TENTATIVELY_UNREACHABLE (1 << 2) - -#define IS_TENTATIVELY_UNREACHABLE(o) ( \ - (_Py_AS_GC(o)->gc.gc_prev & MASK_TENTATIVELY_UNREACHABLE) != 0) +#define GC_NEXT _PyGCHead_NEXT +#define GC_PREV _PyGCHead_PREV + +// Bit 0 of _gc_prev is used for _PyGC_PREV_MASK_FINALIZED in objimpl.h +#define PREV_MASK_COLLECTING _PyGC_PREV_MASK_COLLECTING + +// Lowest bit of _gc_next is used for UNREACHABLE flag. +// +// This flag represents the object is in unreachable list in move_unreachable() +// +// Although this flag is used only in move_unreachable(), move_unreachable() +// doesn't clear this flag to skip unnecessary iteration. +// move_legacy_finalizers() removes this flag instead. +// Between them, unreachable list is not normal list and we can not use +// most gc_list_* functions for it. We should manually tweaking unreachable +// list in these two functions. +#define NEXT_MASK_UNREACHABLE (1) static inline int gc_is_collecting(PyGC_Head *g) { - return (g->gc.gc_prev & MASK_COLLECTING) != 0; + return (g->_gc_prev & PREV_MASK_COLLECTING) != 0; } static inline void -gc_clear_masks(PyGC_Head *g) +gc_clear_collecting(PyGC_Head *g) { - g->gc.gc_prev &= ~(MASK_COLLECTING | MASK_TENTATIVELY_UNREACHABLE); + g->_gc_prev &= ~PREV_MASK_COLLECTING; } static inline Py_ssize_t gc_get_refs(PyGC_Head *g) { - return (Py_ssize_t)(g->gc.gc_prev >> _PyGC_PREV_SHIFT); + return (Py_ssize_t)(g->_gc_prev >> _PyGC_PREV_SHIFT); } static inline void -gc_set_refs(PyGC_Head *g, Py_ssize_t v) +gc_set_refs(PyGC_Head *g, Py_ssize_t refs) { - g->gc.gc_prev = (g->gc.gc_prev & ~_PyGC_PREV_MASK) - | ((uintptr_t)(v) << _PyGC_PREV_SHIFT); + g->_gc_prev = (g->_gc_prev & ~_PyGC_PREV_MASK) + | ((uintptr_t)(refs) << _PyGC_PREV_SHIFT); } static inline void -gc_reset_refs(PyGC_Head *g, Py_ssize_t v) +gc_reset_refs(PyGC_Head *g, Py_ssize_t refs) { - g->gc.gc_prev = (g->gc.gc_prev & _PyGC_PREV_MASK_FINALIZED) - | MASK_COLLECTING - | ((uintptr_t)(v) << _PyGC_PREV_SHIFT); + g->_gc_prev = (g->_gc_prev & _PyGC_PREV_MASK_FINALIZED) + | PREV_MASK_COLLECTING + | ((uintptr_t)(refs) << _PyGC_PREV_SHIFT); } static inline void gc_decref(PyGC_Head *g) { assert(gc_get_refs(g) > 0); - g->gc.gc_prev -= 1 << _PyGC_PREV_SHIFT; + g->_gc_prev -= 1 << _PyGC_PREV_SHIFT; } /* Get an object's GC head */ @@ -113,32 +123,34 @@ _PyGC_Initialize(struct _gc_runtime_state *state) #define _GEN_HEAD(n) (&state->generations[n].head) struct gc_generation generations[NUM_GENERATIONS] = { - /* PyGC_Head, threshold, count */ - {{{_GEN_HEAD(0), (uintptr_t)_GEN_HEAD(0)}}, 700, 0}, - {{{_GEN_HEAD(1), (uintptr_t)_GEN_HEAD(1)}}, 10, 0}, - {{{_GEN_HEAD(2), (uintptr_t)_GEN_HEAD(2)}}, 10, 0}, + /* PyGC_Head, threshold, count */ + {{(uintptr_t)_GEN_HEAD(0), (uintptr_t)_GEN_HEAD(0)}, 700, 0}, + {{(uintptr_t)_GEN_HEAD(1), (uintptr_t)_GEN_HEAD(1)}, 10, 0}, + {{(uintptr_t)_GEN_HEAD(2), (uintptr_t)_GEN_HEAD(2)}, 10, 0}, }; for (int i = 0; i < NUM_GENERATIONS; i++) { state->generations[i] = generations[i]; }; state->generation0 = GEN_HEAD(0); struct gc_generation permanent_generation = { - {{&state->permanent_generation.head, (uintptr_t)&state->permanent_generation.head}}, 0, 0 + {(uintptr_t)&state->permanent_generation.head, + (uintptr_t)&state->permanent_generation.head}, 0, 0 }; state->permanent_generation = permanent_generation; } -/*-------------------------------------------------------------------------- -gc_prev values. +/* +_gc_prev values +--------------- -Between collections, gc_prev is used for doubly linked list. +Between collections, _gc_prev is used for doubly linked list. -Lowest three bits of gc_prev are used for flags. -MASK_COLLECTING and MASK_TENTATIVELY_UNREACHABLE are used only while -collecting and cleared before GC ends or _PyObject_GC_UNTRACK() is called. +Lowest two bits of _gc_prev are used for flags. +PREV_MASK_COLLECTING is used only while collecting and cleared before GC ends +or _PyObject_GC_UNTRACK() is called. -During a collection, gc_prev is temporary used for gc_refs, and the gc list -is singly linked until gc_prev is restored. +During a collection, _gc_prev is temporary used for gc_refs, and the gc list +is singly linked until _gc_prev is restored. gc_refs At the start of a collection, update_refs() copies the true refcount @@ -147,57 +159,80 @@ gc_refs times an object is referenced directly from outside the generation being collected. -MASK_COLLECTING - Objects in generation being collected are marked MASK_COLLECTING in +PREV_MASK_COLLECTING + Objects in generation being collected are marked PREV_MASK_COLLECTING in update_refs(). -MASK_TENTATIVELY_UNREACHABLE + +_gc_next values +--------------- + +_gc_next takes these values: + +0 + The object is not tracked + +!= 0 + Pointer to the next object in the GC list. + Additionally, lowest bit is used temporary for + NEXT_MASK_UNREACHABLE flag described below. + +NEXT_MASK_UNREACHABLE move_unreachable() then moves objects not reachable (whether directly or indirectly) from outside the generation into an "unreachable" set and - set MASK_TENTATIVELY_UNREACHABLE flag. + set this flag. Objects that are found to be reachable have gc_refs set to 1. - When MASK_TENTATIVELY_UNREACHABLE flag is set for the reachable object, - the flag is unset and the object is moved back to "reachable" set. + When this flag is set for the reachable object, the object must be in + "unreachable" set. + The flag is unset and the object is moved back to "reachable" set. - Only objects with MASK_TENTATIVELY_UNREACHABLE still set are candidates - for collection. ----------------------------------------------------------------------------- + move_legacy_finalizers() will remove this flag from "unreachable" set. */ /*** list functions ***/ -static void +static inline void gc_list_init(PyGC_Head *list) { - list->gc.gc_prev = (uintptr_t)list; - list->gc.gc_next = list; + // List header must not have flags. + // We can assign pointer by simple cast. + list->_gc_prev = (uintptr_t)list; + list->_gc_next = (uintptr_t)list; } -static int +static inline int gc_list_is_empty(PyGC_Head *list) { - return (list->gc.gc_next == list); + return (list->_gc_next == (uintptr_t)list); } /* Append `node` to `list`. */ -static void +static inline void gc_list_append(PyGC_Head *node, PyGC_Head *list) { - node->gc.gc_next = list; - _PyGCHead_SET_PREV(node, list->gc.gc_prev); - _PyGCHead_PREV(node)->gc.gc_next = node; - list->gc.gc_prev = (uintptr_t)node; + PyGC_Head *last = (PyGC_Head *)list->_gc_prev; + + // last <-> node + _PyGCHead_SET_PREV(node, last); + _PyGCHead_SET_NEXT(last, node); + + // node <-> list + _PyGCHead_SET_NEXT(node, list); + list->_gc_prev = (uintptr_t)node; } /* Remove `node` from the gc list it's currently in. */ -static void +static inline void gc_list_remove(PyGC_Head *node) { - PyGC_Head *prev = _PyGCHead_PREV(node); - prev->gc.gc_next = node->gc.gc_next; - _PyGCHead_SET_PREV(node->gc.gc_next, prev); - node->gc.gc_next = NULL; /* object is not currently tracked */ + PyGC_Head *prev = GC_PREV(node); + PyGC_Head *next = GC_NEXT(node); + + _PyGCHead_SET_NEXT(prev, next); + _PyGCHead_SET_PREV(next, prev); + + node->_gc_next = 0; /* object is not currently tracked */ } /* Move `node` from the gc list it's currently in (which is not explicitly @@ -207,17 +242,19 @@ gc_list_remove(PyGC_Head *node) static void gc_list_move(PyGC_Head *node, PyGC_Head *list) { - PyGC_Head *current_prev = _PyGCHead_PREV(node); - PyGC_Head *current_next = node->gc.gc_next; /* Unlink from current list. */ - current_prev->gc.gc_next = current_next; - _PyGCHead_SET_PREV(current_next, current_prev); + PyGC_Head *from_prev = GC_PREV(node); + PyGC_Head *from_next = GC_NEXT(node); + _PyGCHead_SET_NEXT(from_prev, from_next); + _PyGCHead_SET_PREV(from_next, from_prev); + /* Relink at end of new list. */ - PyGC_Head *new_prev = (PyGC_Head*)list->gc.gc_prev; - _PyGCHead_SET_PREV(node, new_prev); - new_prev->gc.gc_next = node; - list->gc.gc_prev = (uintptr_t)node; - node->gc.gc_next = list; + // list must not have flags. So we can skip macros. + PyGC_Head *to_prev = (PyGC_Head*)list->_gc_prev; + _PyGCHead_SET_PREV(node, to_prev); + _PyGCHead_SET_NEXT(to_prev, node); + list->_gc_prev = (uintptr_t)node; + _PyGCHead_SET_NEXT(node, list); } /* append list `from` onto list `to`; `from` becomes an empty list */ @@ -226,17 +263,17 @@ gc_list_merge(PyGC_Head *from, PyGC_Head *to) { assert(from != to); if (!gc_list_is_empty(from)) { - PyGC_Head *to_tail = _PyGCHead_PREV(to); - PyGC_Head *from_head = from->gc.gc_next; - PyGC_Head *from_tail = _PyGCHead_PREV(from); + PyGC_Head *to_tail = GC_PREV(to); + PyGC_Head *from_head = GC_NEXT(from); + PyGC_Head *from_tail = GC_PREV(from); assert(from_head != from); assert(from_tail != from); - to_tail->gc.gc_next = from_head; + _PyGCHead_SET_NEXT(to_tail, from_head); _PyGCHead_SET_PREV(from_head, to_tail); - from_tail->gc.gc_next = to; - to->gc.gc_prev = (uintptr_t)from_tail; + _PyGCHead_SET_NEXT(from_tail, to); + _PyGCHead_SET_PREV(to, from_tail); } gc_list_init(from); } @@ -246,7 +283,7 @@ gc_list_size(PyGC_Head *list) { PyGC_Head *gc; Py_ssize_t n = 0; - for (gc = list->gc.gc_next; gc != list; gc = gc->gc.gc_next) { + for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) { n++; } return n; @@ -259,7 +296,7 @@ static int append_objects(PyObject *py_list, PyGC_Head *gc_list) { PyGC_Head *gc; - for (gc = gc_list->gc.gc_next; gc != gc_list; gc = gc->gc.gc_next) { + for (gc = GC_NEXT(gc_list); gc != gc_list; gc = GC_NEXT(gc)) { PyObject *op = FROM_GC(gc); if (op != py_list) { if (PyList_Append(py_list, op)) { @@ -271,20 +308,21 @@ append_objects(PyObject *py_list, PyGC_Head *gc_list) } #if GC_DEBUG +// validate_list checks list consistency. And it works as document +// describing when expected_mask is set / unset. static void validate_list(PyGC_Head *head, uintptr_t expected_mask) { PyGC_Head *prev = head; - PyGC_Head *gc = head->gc.gc_next; + PyGC_Head *gc = GC_NEXT(head); while (gc != head) { - assert(gc->gc.gc_next != NULL); - assert(_PyGCHead_PREV(gc) == prev); - assert((gc->gc.gc_prev & (MASK_COLLECTING | MASK_TENTATIVELY_UNREACHABLE)) - == expected_mask); + assert(GC_NEXT(gc) != NULL); + assert(GC_PREV(gc) == prev); + assert((gc->_gc_prev & PREV_MASK_COLLECTING) == expected_mask); prev = gc; - gc = gc->gc.gc_next; + gc = GC_NEXT(gc); } - assert(prev == (PyGC_Head*)head->gc.gc_prev); + assert(prev == GC_PREV(head)); } #else #define validate_list(x,y) do{}while(0) @@ -294,13 +332,13 @@ validate_list(PyGC_Head *head, uintptr_t expected_mask) /* Set all gc_refs = ob_refcnt. After this, gc_refs is > 0 and - * MASK_COLLECTING bit is set for all objects in containers. + * PREV_MASK_COLLECTING bit is set for all objects in containers. */ static void update_refs(PyGC_Head *containers) { - PyGC_Head *gc = containers->gc.gc_next; - for (; gc != containers; gc = gc->gc.gc_next) { + PyGC_Head *gc = GC_NEXT(containers); + for (; gc != containers; gc = GC_NEXT(gc)) { gc_reset_refs(gc, Py_REFCNT(FROM_GC(gc))); /* Python's cyclic gc should never see an incoming refcount * of 0: if something decref'ed to 0, it should have been @@ -351,8 +389,8 @@ static void subtract_refs(PyGC_Head *containers) { traverseproc traverse; - PyGC_Head *gc = containers->gc.gc_next; - for (; gc != containers; gc=gc->gc.gc_next) { + PyGC_Head *gc = GC_NEXT(containers); + for (; gc != containers; gc = GC_NEXT(gc)) { traverse = Py_TYPE(FROM_GC(gc))->tp_traverse; (void) traverse(FROM_GC(gc), (visitproc)visit_decref, @@ -364,66 +402,78 @@ subtract_refs(PyGC_Head *containers) static int visit_reachable(PyObject *op, PyGC_Head *reachable) { - if (PyObject_IS_GC(op)) { - PyGC_Head *gc = AS_GC(op); - const Py_ssize_t gc_refs = gc_get_refs(gc); + if (!PyObject_IS_GC(op)) { + return 0; + } - if (gc->gc.gc_next == NULL || !gc_is_collecting(gc)) { - return 0; - } - if (gc->gc.gc_prev & MASK_TENTATIVELY_UNREACHABLE) { - /* This had gc_refs = 0 when move_unreachable got - * to it, but turns out it's reachable after all. - * Move it back to move_unreachable's 'young' list, - * and move_unreachable will eventually get to it - * again. - */ - gc->gc.gc_prev &= ~MASK_TENTATIVELY_UNREACHABLE; - gc_list_move(gc, reachable); - gc_set_refs(gc, 1); - } - else if (gc_refs == 0) { - /* This is in move_unreachable's 'young' list, but - * the traversal hasn't yet gotten to it. All - * we need to do is tell move_unreachable that it's - * reachable. - */ - gc_set_refs(gc, 1); - } - /* Else there's nothing to do. - * If gc_refs > 0, it must be in move_unreachable's 'young' - * list, and move_unreachable will eventually get to it. + PyGC_Head *gc = AS_GC(op); + const Py_ssize_t gc_refs = gc_get_refs(gc); + + // Ignore untracked objects and objects in other generation. + if (gc->_gc_next == 0 || !gc_is_collecting(gc)) { + return 0; + } + + if (gc->_gc_next & NEXT_MASK_UNREACHABLE) { + /* This had gc_refs = 0 when move_unreachable got + * to it, but turns out it's reachable after all. + * Move it back to move_unreachable's 'young' list, + * and move_unreachable will eventually get to it + * again. */ - else { - assert(gc_refs > 0); - } + // Manually unlink gc from unreachable list because + PyGC_Head *prev = GC_PREV(gc); + PyGC_Head *next = (PyGC_Head*)(gc->_gc_next & ~NEXT_MASK_UNREACHABLE); + assert(prev->_gc_next & NEXT_MASK_UNREACHABLE); + assert(next->_gc_next & NEXT_MASK_UNREACHABLE); + prev->_gc_next = gc->_gc_next; // copy NEXT_MASK_UNREACHABLE + _PyGCHead_SET_PREV(next, prev); + + gc_list_append(gc, reachable); + gc_set_refs(gc, 1); + } + else if (gc_refs == 0) { + /* This is in move_unreachable's 'young' list, but + * the traversal hasn't yet gotten to it. All + * we need to do is tell move_unreachable that it's + * reachable. + */ + gc_set_refs(gc, 1); + } + /* Else there's nothing to do. + * If gc_refs > 0, it must be in move_unreachable's 'young' + * list, and move_unreachable will eventually get to it. + */ + else { + assert(gc_refs > 0); } return 0; } /* Move the unreachable objects from young to unreachable. After this, - * all objects in young have gc_refs = GC_REACHABLE, and all objects in - * unreachable have gc_refs = GC_TENTATIVELY_UNREACHABLE. All tracked - * gc objects not in young or unreachable still have gc_refs = GC_REACHABLE. + * all objects in young don't have PREV_MASK_COLLECTING flag and + * unreachable have the flag. * All objects in young after this are directly or indirectly reachable * from outside the original young; and all objects in unreachable are * not. * - * This function restores gc_prev pointer. young and unreachable are + * This function restores _gc_prev pointer. young and unreachable are * doubly linked list after this function. + * But _gc_next in unreachable list has NEXT_MASK_UNREACHABLE flag. + * So we can not gc_list_* functions for unreachable until we remove the flag. */ static void move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) { // previous elem in the young list, used for restore gc_prev. PyGC_Head *prev = young; - PyGC_Head *gc = young->gc.gc_next; + PyGC_Head *gc = GC_NEXT(young); - /* Invariants: all objects "to the left" of us in young have gc_refs - * = GC_REACHABLE, and are indeed reachable (directly or indirectly) - * from outside the young list as it was at entry. All other objects - * from the original young "to the left" of us are in unreachable now, - * and have gc_refs = GC_TENTATIVELY_UNREACHABLE. All objects to the + /* Invariants: all objects "to the left" of us in young are reachable + * (directly or indirectly) from outside the young list as it was at entry. + * + * All other objects from the original young "to the left" of us are in + * unreachable now, and have NEXT_MASK_UNREACHABLE. All objects to the * left of us in 'young' now have been scanned, and no objects here * or to the right have been scanned yet. */ @@ -441,15 +491,15 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) PyObject *op = FROM_GC(gc); traverseproc traverse = Py_TYPE(op)->tp_traverse; assert(gc_get_refs(gc) > 0); - // NOTE: visit_reachable may change gc->gc.gc_next when - // young->gc.gc_prev == gc. + // NOTE: visit_reachable may change gc->_gc_next when + // young->_gc_prev == gc. Don't do gc = GC_NEXT(gc) before! (void) traverse(op, (visitproc)visit_reachable, (void *)young); // relink gc_prev to prev element. _PyGCHead_SET_PREV(gc, prev); // gc is not COLLECTING state aftere here. - gc->gc.gc_prev &= ~MASK_COLLECTING; + gc_clear_collecting(gc); prev = gc; } else { @@ -460,25 +510,35 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) * visit_reachable will eventually move gc back into * young if that's so, and we'll see it again. */ - gc->gc.gc_prev |= MASK_TENTATIVELY_UNREACHABLE; // Move gc to unreachable. - // No need to gc->next->prev = prev because next is single linked. - prev->gc.gc_next = gc->gc.gc_next; - gc_list_append(gc, unreachable); + // No need to gc->next->prev = prev because it is single linked. + prev->_gc_next = gc->_gc_next; + + // We can't use gc_list_append() here because we use + // NEXT_MASK_UNREACHABLE here. + PyGC_Head *last = GC_PREV(unreachable); + // NOTE: Since all objects in unreachable set has + // NEXT_MASK_UNREACHABLE flag, we set it unconditionally. + // But this may set the flat to unreachable too. + // move_legacy_finalizers() should care about it. + last->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)gc); + _PyGCHead_SET_PREV(gc, last); + gc->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)unreachable); + unreachable->_gc_prev = (uintptr_t)gc; } - gc = prev->gc.gc_next; + gc = (PyGC_Head*)prev->_gc_next; } - // young->gc.gc_prev must be last element remained in the list. - young->gc.gc_prev = (uintptr_t)prev; + // young->_gc_prev must be last element remained in the list. + young->_gc_prev = (uintptr_t)prev; } static void untrack_tuples(PyGC_Head *head) { - PyGC_Head *next, *gc = head->gc.gc_next; + PyGC_Head *next, *gc = GC_NEXT(head); while (gc != head) { PyObject *op = FROM_GC(gc); - next = gc->gc.gc_next; + next = GC_NEXT(gc); if (PyTuple_CheckExact(op)) { _PyTuple_MaybeUntrack(op); } @@ -490,12 +550,13 @@ untrack_tuples(PyGC_Head *head) static void untrack_dicts(PyGC_Head *head) { - PyGC_Head *next, *gc = head->gc.gc_next; + PyGC_Head *next, *gc = GC_NEXT(head); while (gc != head) { PyObject *op = FROM_GC(gc); - next = gc->gc.gc_next; - if (PyDict_CheckExact(op)) + next = GC_NEXT(gc); + if (PyDict_CheckExact(op)) { _PyDict_MaybeUntrack(op); + } gc = next; } } @@ -508,27 +569,29 @@ has_legacy_finalizer(PyObject *op) } /* Move the objects in unreachable with tp_del slots into `finalizers`. - * Objects moved into `finalizers` have gc_refs set to GC_REACHABLE; the - * objects remaining in unreachable are left at GC_TENTATIVELY_UNREACHABLE. + * + * This function also removes NEXT_MASK_UNREACHABLE flag + * from _gc_next in unreachable. */ static void move_legacy_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers) { - PyGC_Head *gc; - PyGC_Head *next; + PyGC_Head *gc, *next; + unreachable->_gc_next &= ~NEXT_MASK_UNREACHABLE; /* March over unreachable. Move objects with finalizers into * `finalizers`. */ - for (gc = unreachable->gc.gc_next; gc != unreachable; gc = next) { + for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) { PyObject *op = FROM_GC(gc); - assert(IS_TENTATIVELY_UNREACHABLE(op)); - next = gc->gc.gc_next; + assert(gc->_gc_next & NEXT_MASK_UNREACHABLE); + gc->_gc_next &= ~NEXT_MASK_UNREACHABLE; + next = (PyGC_Head*)gc->_gc_next; if (has_legacy_finalizer(op)) { + gc_clear_collecting(gc); gc_list_move(gc, finalizers); - gc_clear_masks(gc); } } } @@ -538,10 +601,10 @@ static int visit_move(PyObject *op, PyGC_Head *tolist) { if (PyObject_IS_GC(op)) { - if (IS_TENTATIVELY_UNREACHABLE(op)) { - PyGC_Head *gc = AS_GC(op); + PyGC_Head *gc = AS_GC(op); + if (gc_is_collecting(gc)) { gc_list_move(gc, tolist); - gc_clear_masks(gc); + gc_clear_collecting(gc); } } return 0; @@ -554,8 +617,8 @@ static void move_legacy_finalizer_reachable(PyGC_Head *finalizers) { traverseproc traverse; - PyGC_Head *gc = finalizers->gc.gc_next; - for (; gc != finalizers; gc = gc->gc.gc_next) { + PyGC_Head *gc = GC_NEXT(finalizers); + for (; gc != finalizers; gc = GC_NEXT(gc)) { /* Note that the finalizers list may grow during this. */ traverse = Py_TYPE(FROM_GC(gc))->tp_traverse; (void) traverse(FROM_GC(gc), @@ -595,12 +658,11 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) * make another pass over wrcb_to_call, invoking callbacks, after this * pass completes. */ - for (gc = unreachable->gc.gc_next; gc != unreachable; gc = next) { + for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) { PyWeakReference **wrlist; op = FROM_GC(gc); - assert(IS_TENTATIVELY_UNREACHABLE(op)); - next = gc->gc.gc_next; + next = GC_NEXT(gc); if (! PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) continue; @@ -654,8 +716,9 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) * to imagine how calling it later could create a problem for us. wr * is moved to wrcb_to_call in this case. */ - if (IS_TENTATIVELY_UNREACHABLE(wr)) + if (gc_is_collecting(AS_GC(wr))) { continue; + } /* Create a new reference so that wr can't go away * before we can process it again. @@ -678,7 +741,7 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) PyObject *temp; PyObject *callback; - gc = wrcb_to_call.gc.gc_next; + gc = (PyGC_Head*)wrcb_to_call._gc_next; op = FROM_GC(gc); assert(PyWeakref_Check(op)); wr = (PyWeakReference *)op; @@ -704,12 +767,13 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) * ours). */ Py_DECREF(op); - if (wrcb_to_call.gc.gc_next == gc) { + if (wrcb_to_call._gc_next == (uintptr_t)gc) { /* object is still alive -- move it */ gc_list_move(gc, old); } - else + else { ++num_freed; + } } return num_freed; @@ -732,14 +796,14 @@ debug_cycle(const char *msg, PyObject *op) static void handle_legacy_finalizers(PyGC_Head *finalizers, PyGC_Head *old) { - PyGC_Head *gc = finalizers->gc.gc_next; + PyGC_Head *gc = GC_NEXT(finalizers); if (_PyRuntime.gc.garbage == NULL) { _PyRuntime.gc.garbage = PyList_New(0); if (_PyRuntime.gc.garbage == NULL) Py_FatalError("gc couldn't create gc.garbage list"); } - for (; gc != finalizers; gc = gc->gc.gc_next) { + for (; gc != finalizers; gc = GC_NEXT(gc)) { PyObject *op = FROM_GC(gc); if ((_PyRuntime.gc.debug & DEBUG_SAVEALL) || has_legacy_finalizer(op)) { @@ -772,7 +836,7 @@ finalize_garbage(PyGC_Head *collectable) gc_list_init(&seen); while (!gc_list_is_empty(collectable)) { - PyGC_Head *gc = collectable->gc.gc_next; + PyGC_Head *gc = GC_NEXT(collectable); PyObject *op = FROM_GC(gc); gc_list_move(gc, &seen); if (!_PyGCHead_FINALIZED(gc) && @@ -795,23 +859,21 @@ check_garbage(PyGC_Head *collectable) { int ret = 0; PyGC_Head *gc; - for (gc = collectable->gc.gc_next; gc != collectable; - gc = gc->gc.gc_next) { + for (gc = GC_NEXT(collectable); gc != collectable; gc = GC_NEXT(gc)) { // Use gc_refs and break gc_prev again. gc_set_refs(gc, Py_REFCNT(FROM_GC(gc))); assert(gc_get_refs(gc) != 0); } subtract_refs(collectable); PyGC_Head *prev = collectable; - for (gc = collectable->gc.gc_next; gc != collectable; - gc = gc->gc.gc_next) { + for (gc = GC_NEXT(collectable); gc != collectable; gc = GC_NEXT(gc)) { assert(gc_get_refs(gc) >= 0); if (gc_get_refs(gc) != 0) { ret = -1; } // Restore gc_prev here. _PyGCHead_SET_PREV(gc, prev); - gc_clear_masks(gc); + gc_clear_collecting(gc); prev = gc; } return ret; @@ -827,7 +889,7 @@ delete_garbage(PyGC_Head *collectable, PyGC_Head *old) inquiry clear; while (!gc_list_is_empty(collectable)) { - PyGC_Head *gc = collectable->gc.gc_next; + PyGC_Head *gc = GC_NEXT(collectable); PyObject *op = FROM_GC(gc); assert(Py_REFCNT(FROM_GC(gc)) > 0); @@ -842,7 +904,7 @@ delete_garbage(PyGC_Head *collectable, PyGC_Head *old) Py_DECREF(op); } } - if (collectable->gc.gc_next == gc) { + if (GC_NEXT(collectable) == gc) { /* object is still alive, move it, it may die later */ gc_list_move(gc, old); } @@ -942,7 +1004,6 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, gc_list_init(&unreachable); move_unreachable(young, &unreachable); // gc_prev is pointer again validate_list(young, 0); - validate_list(&unreachable, MASK_COLLECTING | MASK_TENTATIVELY_UNREACHABLE); untrack_tuples(young); /* Move reachable objects to next generation. */ @@ -964,6 +1025,8 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, * legacy finalizers (e.g. tp_del) can't safely be deleted. */ gc_list_init(&finalizers); + // NEXT_MASK_UNREACHABLE is cleared here. + // After move_legacy_finalizers(), unreachable is normal list. move_legacy_finalizers(&unreachable, &finalizers); /* finalizers contains the unreachable objects with a legacy finalizer; * unreachable objects reachable *from* those are also uncollectable, @@ -972,13 +1035,12 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, move_legacy_finalizer_reachable(&finalizers); validate_list(&finalizers, 0); - validate_list(&unreachable, MASK_COLLECTING | MASK_TENTATIVELY_UNREACHABLE); + validate_list(&unreachable, PREV_MASK_COLLECTING); /* Collect statistics on collectable objects found and print * debugging information. */ - for (gc = unreachable.gc.gc_next; gc != &unreachable; - gc = gc->gc.gc_next) { + for (gc = GC_NEXT(&unreachable); gc != &unreachable; gc = GC_NEXT(gc)) { m++; if (_PyRuntime.gc.debug & DEBUG_COLLECTABLE) { debug_cycle("collectable", FROM_GC(gc)); @@ -989,12 +1051,12 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, m += handle_weakrefs(&unreachable, old); validate_list(old, 0); - validate_list(&unreachable, MASK_COLLECTING | MASK_TENTATIVELY_UNREACHABLE); + validate_list(&unreachable, PREV_MASK_COLLECTING); /* Call tp_finalize on objects which have one. */ finalize_garbage(&unreachable); - if (check_garbage(&unreachable)) { // clears MASKs + if (check_garbage(&unreachable)) { // clear PREV_MASK_COLLECTING here gc_list_merge(&unreachable, old); } else { @@ -1007,9 +1069,7 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, /* Collect statistics on uncollectable objects found and print * debugging information. */ - for (gc = finalizers.gc.gc_next; - gc != &finalizers; - gc = gc->gc.gc_next) { + for (gc = GC_NEXT(&finalizers); gc != &finalizers; gc = GC_NEXT(gc)) { n++; if (_PyRuntime.gc.debug & DEBUG_UNCOLLECTABLE) debug_cycle("uncollectable", FROM_GC(gc)); @@ -1335,7 +1395,7 @@ gc_referrers_for(PyObject *objs, PyGC_Head *list, PyObject *resultlist) PyGC_Head *gc; PyObject *obj; traverseproc traverse; - for (gc = list->gc.gc_next; gc != list; gc = gc->gc.gc_next) { + for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) { obj = FROM_GC(gc); traverse = Py_TYPE(obj)->tp_traverse; if (obj == objs || obj == resultlist) @@ -1767,8 +1827,9 @@ PyObject_GC_UnTrack(void *op) /* Obscure: the Py_TRASHCAN mechanism requires that we be able to * call PyObject_GC_UnTrack twice on an object. */ - if (_PyObject_GC_IS_TRACKED(op)) + if (_PyObject_GC_IS_TRACKED(op)) { _PyObject_GC_UNTRACK(op); + } } static PyObject * @@ -1786,9 +1847,9 @@ _PyObject_GC_Alloc(int use_calloc, size_t basicsize) g = (PyGC_Head *)PyObject_Malloc(size); if (g == NULL) return PyErr_NoMemory(); - assert(((uintptr_t)g & 7) == 0); // g must be aligned 8bytes boundary - g->gc.gc_next = NULL; - g->gc.gc_prev = 0; + assert(((uintptr_t)g & 3) == 0); // g must be aligned 4bytes boundary + g->_gc_next = 0; + g->_gc_prev = 0; _PyRuntime.gc.generations[0].count++; /* number of allocated GC objects */ if (_PyRuntime.gc.generations[0].count > _PyRuntime.gc.generations[0].threshold && _PyRuntime.gc.enabled && @@ -1861,8 +1922,9 @@ void PyObject_GC_Del(void *op) { PyGC_Head *g = AS_GC(op); - if (_PyObject_GC_IS_TRACKED(op)) + if (_PyObject_GC_IS_TRACKED(op)) { gc_list_remove(g); + } if (_PyRuntime.gc.generations[0].count > 0) { _PyRuntime.gc.generations[0].count--; } diff --git a/Objects/object.c b/Objects/object.c index 5f6c2161ec087d..600bdf0441a38c 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2122,8 +2122,7 @@ _PyTrash_destroy_chain(void) PyObject *op = _PyRuntime.gc.trash_delete_later; destructor dealloc = Py_TYPE(op)->tp_dealloc; - _PyRuntime.gc.trash_delete_later = - (PyObject*) _Py_AS_GC(op)->gc.gc_prev; + _PyRuntime.gc.trash_delete_later = (PyObject*) _Py_AS_GC(op)->_gc_prev; /* Call the deallocator directly. This used to try to * fool Py_DECREF into calling it indirectly, but @@ -2160,8 +2159,7 @@ _PyTrash_thread_destroy_chain(void) PyObject *op = tstate->trash_delete_later; destructor dealloc = Py_TYPE(op)->tp_dealloc; - tstate->trash_delete_later = - (PyObject*) _Py_AS_GC(op)->gc.gc_prev; + tstate->trash_delete_later = (PyObject*) _Py_AS_GC(op)->_gc_prev; /* Call the deallocator directly. This used to try to * fool Py_DECREF into calling it indirectly, but From d7755a82e2e8977d6c3a24644649846ed014acfd Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Fri, 29 Jun 2018 02:02:50 +0900 Subject: [PATCH 18/19] fix comment --- Include/objimpl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/objimpl.h b/Include/objimpl.h index ee6706cc762f2a..5284ea6ec488ec 100644 --- a/Include/objimpl.h +++ b/Include/objimpl.h @@ -277,7 +277,7 @@ extern PyGC_Head *_PyGC_generation0; #define _PyGCHead_NEXT(g) ((PyGC_Head*)(g)->_gc_next) #define _PyGCHead_SET_NEXT(g, p) ((g)->_gc_next = (uintptr_t)(p)) -// Lowest two bits of _gc_prev is used for flags described below. +// Lowest two bits of _gc_prev is used for _PyGC_PREV_MASK_* flags. #define _PyGCHead_PREV(g) ((PyGC_Head*)((g)->_gc_prev & _PyGC_PREV_MASK)) #define _PyGCHead_SET_PREV(g, p) do { \ assert(((uintptr_t)p & ~_PyGC_PREV_MASK) == 0); \ From 38c55a02b666585ffb0c93391bc2eac064b4e42f Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 29 Jun 2018 11:58:40 +0900 Subject: [PATCH 19/19] fixup --- Modules/gcmodule.c | 14 ++++++++++---- Objects/object.c | 6 ++++-- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index e9a2db903b7a36..d61616b9409e1b 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -36,12 +36,19 @@ module gc [clinic start generated code]*/ /*[clinic end generated code: output=da39a3ee5e6b4b0d input=b5c9690ecc842d79]*/ -#define GC_DEBUG (0) /* More asserts */ +#define GC_DEBUG (0) /* Enable more asserts */ #define GC_NEXT _PyGCHead_NEXT #define GC_PREV _PyGCHead_PREV -// Bit 0 of _gc_prev is used for _PyGC_PREV_MASK_FINALIZED in objimpl.h +// update_refs() set this bit for all objects in current generation. +// subtract_refs() and move_unreachable() uses this to distinguish +// visited object is in GCing or not. +// +// move_unreachable() removes this flag from reachable objects. +// Only unreachable objects have this flag. +// +// No objects in interpreter have this flag after GC ends. #define PREV_MASK_COLLECTING _PyGC_PREV_MASK_COLLECTING // Lowest bit of _gc_next is used for UNREACHABLE flag. @@ -52,8 +59,7 @@ module gc // doesn't clear this flag to skip unnecessary iteration. // move_legacy_finalizers() removes this flag instead. // Between them, unreachable list is not normal list and we can not use -// most gc_list_* functions for it. We should manually tweaking unreachable -// list in these two functions. +// most gc_list_* functions for it. #define NEXT_MASK_UNREACHABLE (1) static inline int diff --git a/Objects/object.c b/Objects/object.c index 600bdf0441a38c..2471f6b0588c9c 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2122,7 +2122,8 @@ _PyTrash_destroy_chain(void) PyObject *op = _PyRuntime.gc.trash_delete_later; destructor dealloc = Py_TYPE(op)->tp_dealloc; - _PyRuntime.gc.trash_delete_later = (PyObject*) _Py_AS_GC(op)->_gc_prev; + _PyRuntime.gc.trash_delete_later = + (PyObject*) _PyGCHead_PREV(_Py_AS_GC(op)); /* Call the deallocator directly. This used to try to * fool Py_DECREF into calling it indirectly, but @@ -2159,7 +2160,8 @@ _PyTrash_thread_destroy_chain(void) PyObject *op = tstate->trash_delete_later; destructor dealloc = Py_TYPE(op)->tp_dealloc; - tstate->trash_delete_later = (PyObject*) _Py_AS_GC(op)->_gc_prev; + tstate->trash_delete_later = + (PyObject*) _PyGCHead_PREV(_Py_AS_GC(op)); /* Call the deallocator directly. This used to try to * fool Py_DECREF into calling it indirectly, but