From 2f90337c5f2d58ffde7fa063e662b9ad4b136258 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Thu, 18 May 2017 18:18:37 -0400 Subject: [PATCH] Remove unnecessary Py_INCREF in PyBuffer causing memory leak Change-Id: Ic00f1a989b84d36eccf3c7769251303c849921f9 --- cpp/src/arrow/python/common.cc | 1 - python/scripts/test_leak.py | 27 ++++++++++++++++++++++++++- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/python/common.cc b/cpp/src/arrow/python/common.cc index ba7b6cf202e..a248db3abca 100644 --- a/cpp/src/arrow/python/common.cc +++ b/cpp/src/arrow/python/common.cc @@ -55,7 +55,6 @@ PyBuffer::PyBuffer(PyObject* obj) : Buffer(nullptr, 0), obj_(nullptr) { size_ = buffer->len; capacity_ = buffer->len; is_mutable_ = false; - Py_INCREF(obj_); } } diff --git a/python/scripts/test_leak.py b/python/scripts/test_leak.py index 2b197b6c130..0b12fb5cd25 100644 --- a/python/scripts/test_leak.py +++ b/python/scripts/test_leak.py @@ -21,6 +21,7 @@ import numpy as np import memory_profiler import gc +import io def leak(): @@ -32,4 +33,28 @@ def leak(): table.to_pandas() gc.collect() -leak() +# leak() + + +def leak2(): + data = [pa.array(np.concatenate([np.random.randn(100000)] * 10))] + table = pa.Table.from_arrays(data, ['foo']) + while True: + print('calling to_pandas') + print('memory_usage: {0}'.format(memory_profiler.memory_usage())) + df = table.to_pandas() + + batch = pa.RecordBatch.from_pandas(df) + + sink = io.BytesIO() + writer = pa.RecordBatchFileWriter(sink, batch.schema) + writer.write_batch(batch) + writer.close() + + buf_reader = pa.BufferReader(sink.getvalue()) + reader = pa.open_file(buf_reader) + reader.read_all() + + gc.collect() + +leak2()