From 416c876ad34d279792459ccdaa6e1076c6f037ec Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Mon, 15 May 2017 16:51:33 -0400 Subject: [PATCH] Consolidated changelog for the website with new features/improvements shown first. Include git shortlog Change-Id: Ib261fc4caa04a4ec16f9cd965fb7ae3bb3dae71b --- dev/make_changelog.py | 51 ++++-- site/_release/0.1.0.md | 276 +++++++++++++++-------------- site/_release/0.2.0.md | 261 ++++++++++++++------------- site/_release/0.3.0.md | 392 ++++++++++++++++++++++------------------- 4 files changed, 533 insertions(+), 447 deletions(-) diff --git a/dev/make_changelog.py b/dev/make_changelog.py index 0ad1607c793..47127903b7b 100644 --- a/dev/make_changelog.py +++ b/dev/make_changelog.py @@ -52,34 +52,65 @@ def get_issues_for_version(version): LINK_TEMPLATE = '[{0}](https://issues.apache.org/jira/browse/{0})' -def format_changelog_markdown(issues, out, links=False): +def format_changelog_markdown(issues, out): issues_by_type = defaultdict(list) for issue in issues: issues_by_type[issue.fields.issuetype.name].append(issue) - for issue_type, issue_group in sorted(issues_by_type.items()): issue_group.sort(key=lambda x: x.key) out.write('## {0}\n\n'.format(issue_type)) for issue in issue_group: - if links: - name = LINK_TEMPLATE.format(issue.key) - else: - name = issue.key - out.write('* {0} - {1}\n'.format(name, + out.write('* {0} - {1}\n'.format(issue.key, issue.fields.summary)) out.write('\n') +def format_changelog_website(issues, out): + NEW_FEATURE = 'New Features and Improvements' + BUGFIX = 'Bug Fixes' + + CATEGORIES = { + 'New Feature': NEW_FEATURE, + 'Improvement': NEW_FEATURE, + 'Task': NEW_FEATURE, + 'Test': NEW_FEATURE, + 'Bug': BUGFIX + } + + issues_by_category = defaultdict(list) + for issue in issues: + issue_type = issue.fields.issuetype.name + website_category = CATEGORIES[issue_type] + issues_by_category[website_category].append(issue) + + WEBSITE_ORDER = [NEW_FEATURE, BUGFIX] + + for issue_category in WEBSITE_ORDER: + issue_group = issues_by_category[issue_category] + issue_group.sort(key=lambda x: x.key) + + out.write('## {0}\n\n'.format(issue_category)) + for issue in issue_group: + name = LINK_TEMPLATE.format(issue.key) + out.write('* {0} - {1}\n'.format(name, issue.fields.summary)) + out.write('\n') + + if __name__ == '__main__': if len(sys.argv) < 2: - print('Usage: make_changelog.py $FIX_VERSION [$LINKS]') + print('Usage: make_changelog.py $FIX_VERSION [$IS_WEBSITE]') buf = StringIO() - links = len(sys.argv) > 2 and sys.argv[2] == '1' + for_website = len(sys.argv) > 2 and sys.argv[2] == '1' issues_for_version = get_issues_for_version(sys.argv[1]) - format_changelog_markdown(issues_for_version, buf, links=links) + + if for_website: + format_changelog_website(issues_for_version, buf) + else: + format_changelog_markdown(issues_for_version, buf) + print(buf.getvalue()) diff --git a/site/_release/0.1.0.md b/site/_release/0.1.0.md index e668e569865..cf30fde9e62 100644 --- a/site/_release/0.1.0.md +++ b/site/_release/0.1.0.md @@ -31,153 +31,114 @@ limitations under the License. # Changelog -## Bug - -* [ARROW-103](https://issues.apache.org/jira/browse/ARROW-103) - Missing patterns from .gitignore -* [ARROW-104](https://issues.apache.org/jira/browse/ARROW-104) - Update Layout.md based on discussion on the mailing list -* [ARROW-105](https://issues.apache.org/jira/browse/ARROW-105) - Unit tests fail if assertions are disabled -* [ARROW-113](https://issues.apache.org/jira/browse/ARROW-113) - TestValueVector test fails if cannot allocate 2GB of memory -* [ARROW-16](https://issues.apache.org/jira/browse/ARROW-16) - Building cpp issues on XCode 7.2.1 -* [ARROW-17](https://issues.apache.org/jira/browse/ARROW-17) - Set some vector fields to default access level for Drill compatibility -* [ARROW-18](https://issues.apache.org/jira/browse/ARROW-18) - Fix bug with decimal precision and scale -* [ARROW-185](https://issues.apache.org/jira/browse/ARROW-185) - [C++] Make sure alignment and memory padding conform to spec -* [ARROW-188](https://issues.apache.org/jira/browse/ARROW-188) - Python: Add numpy as install requirement -* [ARROW-193](https://issues.apache.org/jira/browse/ARROW-193) - For the instruction, typos "int his" should be "in this" -* [ARROW-194](https://issues.apache.org/jira/browse/ARROW-194) - C++: Allow read-only memory mapped source -* [ARROW-200](https://issues.apache.org/jira/browse/ARROW-200) - [Python] Convert Values String looks like it has incorrect error handling -* [ARROW-209](https://issues.apache.org/jira/browse/ARROW-209) - [C++] Broken builds: llvm.org apt repos are unavailable -* [ARROW-210](https://issues.apache.org/jira/browse/ARROW-210) - [C++] Tidy up the type system a little bit -* [ARROW-211](https://issues.apache.org/jira/browse/ARROW-211) - Several typos/errors in Layout.md examples -* [ARROW-217](https://issues.apache.org/jira/browse/ARROW-217) - Fix Travis w.r.t conda 4.1.0 changes -* [ARROW-219](https://issues.apache.org/jira/browse/ARROW-219) - [C++] Passed CMAKE_CXX_FLAGS are being dropped, fix compiler warnings -* [ARROW-223](https://issues.apache.org/jira/browse/ARROW-223) - Do not link against libpython -* [ARROW-225](https://issues.apache.org/jira/browse/ARROW-225) - [C++/Python] master Travis CI build is broken -* [ARROW-244](https://issues.apache.org/jira/browse/ARROW-244) - [C++] Some global APIs of IPC module should be visible to the outside -* [ARROW-246](https://issues.apache.org/jira/browse/ARROW-246) - [Java] UnionVector doesn't call allocateNew() when creating it's vectorType -* [ARROW-247](https://issues.apache.org/jira/browse/ARROW-247) - [C++] Missing explicit destructor in RowBatchReader causes an incomplete type error -* [ARROW-250](https://issues.apache.org/jira/browse/ARROW-250) - Fix for ARROW-246 may cause memory leaks -* [ARROW-259](https://issues.apache.org/jira/browse/ARROW-259) - Use flatbuffer fields in java implementation -* [ARROW-265](https://issues.apache.org/jira/browse/ARROW-265) - Negative decimal values have wrong padding -* [ARROW-266](https://issues.apache.org/jira/browse/ARROW-266) - [C++] Fix the broken build -* [ARROW-274](https://issues.apache.org/jira/browse/ARROW-274) - Make the MapVector nullable -* [ARROW-278](https://issues.apache.org/jira/browse/ARROW-278) - [Format] Struct type name consistency in implementations and metadata -* [ARROW-283](https://issues.apache.org/jira/browse/ARROW-283) - [C++] Update arrow_parquet to account for API changes in PARQUET-573 -* [ARROW-284](https://issues.apache.org/jira/browse/ARROW-284) - [C++] Triage builds by disabling Arrow-Parquet module -* [ARROW-287](https://issues.apache.org/jira/browse/ARROW-287) - [java] Make nullable vectors use a BitVecor instead of UInt1Vector for bits -* [ARROW-297](https://issues.apache.org/jira/browse/ARROW-297) - Fix Arrow pom for release -* [ARROW-304](https://issues.apache.org/jira/browse/ARROW-304) - NullableMapReaderImpl.isSet() always returns true -* [ARROW-308](https://issues.apache.org/jira/browse/ARROW-308) - UnionListWriter.setPosition() should not call startList() -* [ARROW-309](https://issues.apache.org/jira/browse/ARROW-309) - Types.getMinorTypeForArrowType() does not work for Union type -* [ARROW-313](https://issues.apache.org/jira/browse/ARROW-313) - XCode 8.0 breaks builds -* [ARROW-314](https://issues.apache.org/jira/browse/ARROW-314) - JSONScalar is unnecessary and unused. -* [ARROW-320](https://issues.apache.org/jira/browse/ARROW-320) - ComplexCopier.copy(FieldReader, FieldWriter) should not start a list if reader is not set -* [ARROW-321](https://issues.apache.org/jira/browse/ARROW-321) - Fix Arrow licences -* [ARROW-36](https://issues.apache.org/jira/browse/ARROW-36) - Remove fixVersions from patch tool (until we have them) -* [ARROW-46](https://issues.apache.org/jira/browse/ARROW-46) - Port DRILL-4410 to Arrow -* [ARROW-5](https://issues.apache.org/jira/browse/ARROW-5) - Error when run maven install -* [ARROW-51](https://issues.apache.org/jira/browse/ARROW-51) - Move ValueVector test from Drill project -* [ARROW-55](https://issues.apache.org/jira/browse/ARROW-55) - Python: fix legacy Python (2.7) tests and add to Travis CI -* [ARROW-62](https://issues.apache.org/jira/browse/ARROW-62) - Format: Are the nulls bits 0 or 1 for null values? -* [ARROW-63](https://issues.apache.org/jira/browse/ARROW-63) - C++: ctest fails if Python 3 is the active Python interpreter -* [ARROW-65](https://issues.apache.org/jira/browse/ARROW-65) - Python: FindPythonLibsNew does not work in a virtualenv -* [ARROW-69](https://issues.apache.org/jira/browse/ARROW-69) - Change permissions for assignable users -* [ARROW-72](https://issues.apache.org/jira/browse/ARROW-72) - FindParquet searches for non-existent header -* [ARROW-75](https://issues.apache.org/jira/browse/ARROW-75) - C++: Fix handling of empty strings -* [ARROW-77](https://issues.apache.org/jira/browse/ARROW-77) - C++: conform null bit interpretation to match ARROW-62 -* [ARROW-80](https://issues.apache.org/jira/browse/ARROW-80) - Segmentation fault on len(Array) for empty arrays -* [ARROW-88](https://issues.apache.org/jira/browse/ARROW-88) - C++: Refactor given PARQUET-572 -* [ARROW-93](https://issues.apache.org/jira/browse/ARROW-93) - XCode 7.3 breaks builds -* [ARROW-94](https://issues.apache.org/jira/browse/ARROW-94) - Expand list example to clarify null vs empty list - -## Improvement +## Contributors + +```shell +$ git shortlog -sn d5aa7c46..apache-arrow-0.1.0 + 49 Wes McKinney + 27 Uwe L. Korn + 25 Julien Le Dem + 13 Micah Kornfield + 11 Steven Phillips + 6 Jihoon Son + 5 Laurent Goujon + 5 adeneche + 4 Dan Robinson + 4 proflin + 2 Jacques Nadeau + 1 Christopher C. Aycock + 1 Edmon Begoli + 1 Kai Zheng + 1 MechCoder + 1 Minji Kim + 1 Philipp Moritz + 1 Smyatkin Maxim + 1 fengguangyuan + 1 hyukjinkwon + 1 hzhang2 + 1 lfzCarlosC +``` + +## New Features and Improvements +* [ARROW-1](https://issues.apache.org/jira/browse/ARROW-1) - Import Initial Codebase * [ARROW-10](https://issues.apache.org/jira/browse/ARROW-10) - Fix mismatch of javadoc names and method parameters -* [ARROW-15](https://issues.apache.org/jira/browse/ARROW-15) - Fix a naming typo for memory.AllocationManager.AllocationOutcome -* [ARROW-190](https://issues.apache.org/jira/browse/ARROW-190) - Python: Provide installable sdist builds -* [ARROW-199](https://issues.apache.org/jira/browse/ARROW-199) - [C++] Refine third party dependency -* [ARROW-206](https://issues.apache.org/jira/browse/ARROW-206) - [C++] Expose an equality API for arrays that compares a range of slots on two arrays -* [ARROW-212](https://issues.apache.org/jira/browse/ARROW-212) - [C++] Clarify the fact that PrimitiveArray is now abstract class -* [ARROW-213](https://issues.apache.org/jira/browse/ARROW-213) - Exposing static arrow build -* [ARROW-218](https://issues.apache.org/jira/browse/ARROW-218) - Add option to use GitHub API token via environment variable when merging PRs -* [ARROW-234](https://issues.apache.org/jira/browse/ARROW-234) - [C++] Build with libhdfs support in arrow_io in conda builds -* [ARROW-238](https://issues.apache.org/jira/browse/ARROW-238) - C++: InternalMemoryPool::Free() should throw an error when there is insufficient allocated memory -* [ARROW-245](https://issues.apache.org/jira/browse/ARROW-245) - [Format] Clarify Arrow's relationship with big endian platforms -* [ARROW-252](https://issues.apache.org/jira/browse/ARROW-252) - Add implementation guidelines to the documentation -* [ARROW-253](https://issues.apache.org/jira/browse/ARROW-253) - Int types should only have width of 8*2^n (8, 16, 32, 64) -* [ARROW-254](https://issues.apache.org/jira/browse/ARROW-254) - Remove Bit type as it is redundant with boolean -* [ARROW-255](https://issues.apache.org/jira/browse/ARROW-255) - Finalize Dictionary representation -* [ARROW-256](https://issues.apache.org/jira/browse/ARROW-256) - Add versioning to the arrow spec. -* [ARROW-257](https://issues.apache.org/jira/browse/ARROW-257) - Add a typeids Vector to Union type -* [ARROW-264](https://issues.apache.org/jira/browse/ARROW-264) - Create an Arrow File format -* [ARROW-270](https://issues.apache.org/jira/browse/ARROW-270) - [Format] Define more generic Interval logical type -* [ARROW-271](https://issues.apache.org/jira/browse/ARROW-271) - Update Field structure to be more explicit -* [ARROW-279](https://issues.apache.org/jira/browse/ARROW-279) - rename vector module to arrow-vector for consistency -* [ARROW-280](https://issues.apache.org/jira/browse/ARROW-280) - [C++] Consolidate file and shared memory IO interfaces -* [ARROW-285](https://issues.apache.org/jira/browse/ARROW-285) - Allow for custom flatc compiler -* [ARROW-286](https://issues.apache.org/jira/browse/ARROW-286) - Build thirdparty dependencies in parallel -* [ARROW-289](https://issues.apache.org/jira/browse/ARROW-289) - Install test-util.h -* [ARROW-290](https://issues.apache.org/jira/browse/ARROW-290) - Specialize alloc() in ArrowBuf -* [ARROW-292](https://issues.apache.org/jira/browse/ARROW-292) - [Java] Upgrade Netty to 4.041 -* [ARROW-299](https://issues.apache.org/jira/browse/ARROW-299) - Use absolute namespace in macros -* [ARROW-305](https://issues.apache.org/jira/browse/ARROW-305) - Add compression and use_dictionary options to Parquet interface -* [ARROW-306](https://issues.apache.org/jira/browse/ARROW-306) - Add option to pass cmake arguments via environment variable -* [ARROW-315](https://issues.apache.org/jira/browse/ARROW-315) - Finalize timestamp type -* [ARROW-319](https://issues.apache.org/jira/browse/ARROW-319) - Add canonical Arrow Schema json representation -* [ARROW-324](https://issues.apache.org/jira/browse/ARROW-324) - Update arrow metadata diagram -* [ARROW-325](https://issues.apache.org/jira/browse/ARROW-325) - make TestArrowFile not dependent on timezone -* [ARROW-50](https://issues.apache.org/jira/browse/ARROW-50) - C++: Enable library builds for 3rd-party users without having to build thirdparty googletest -* [ARROW-54](https://issues.apache.org/jira/browse/ARROW-54) - Python: rename package to "pyarrow" -* [ARROW-64](https://issues.apache.org/jira/browse/ARROW-64) - Add zsh support to C++ build scripts -* [ARROW-66](https://issues.apache.org/jira/browse/ARROW-66) - Maybe some missing steps in installation guide -* [ARROW-68](https://issues.apache.org/jira/browse/ARROW-68) - Update setup_build_env and third-party script to be more userfriendly -* [ARROW-71](https://issues.apache.org/jira/browse/ARROW-71) - C++: Add script to run clang-tidy on codebase -* [ARROW-73](https://issues.apache.org/jira/browse/ARROW-73) - Support CMake 2.8 -* [ARROW-78](https://issues.apache.org/jira/browse/ARROW-78) - C++: Add constructor for DecimalType -* [ARROW-79](https://issues.apache.org/jira/browse/ARROW-79) - Python: Add benchmarks -* [ARROW-8](https://issues.apache.org/jira/browse/ARROW-8) - Set up Travis CI -* [ARROW-85](https://issues.apache.org/jira/browse/ARROW-85) - C++: memcmp can be avoided in Equal when comparing with the same Buffer -* [ARROW-86](https://issues.apache.org/jira/browse/ARROW-86) - Python: Implement zero-copy Arrow-to-Pandas conversion -* [ARROW-87](https://issues.apache.org/jira/browse/ARROW-87) - Implement Decimal schema conversion for all ways supported in Parquet -* [ARROW-89](https://issues.apache.org/jira/browse/ARROW-89) - Python: Add benchmarks for Arrow<->Pandas conversion -* [ARROW-9](https://issues.apache.org/jira/browse/ARROW-9) - Rename some unchanged "Drill" to "Arrow" -* [ARROW-91](https://issues.apache.org/jira/browse/ARROW-91) - C++: First draft of an adapter class for parquet-cpp's ParquetFileReader that produces Arrow table/row batch objects - -## New Feature - * [ARROW-100](https://issues.apache.org/jira/browse/ARROW-100) - [C++] Computing RowBatch size +* [ARROW-101](https://issues.apache.org/jira/browse/ARROW-101) - Fix java warnings emitted by java compiler +* [ARROW-102](https://issues.apache.org/jira/browse/ARROW-102) - travis-ci support for java project * [ARROW-106](https://issues.apache.org/jira/browse/ARROW-106) - Add IPC round trip for string types (string, char, varchar, binary) * [ARROW-107](https://issues.apache.org/jira/browse/ARROW-107) - [C++] add ipc round trip for struct types +* [ARROW-11](https://issues.apache.org/jira/browse/ARROW-11) - Mirror JIRA activity to dev@arrow.apache.org * [ARROW-13](https://issues.apache.org/jira/browse/ARROW-13) - Add PR merge tool similar to that used in Parquet +* [ARROW-14](https://issues.apache.org/jira/browse/ARROW-14) - Add JIRA components +* [ARROW-15](https://issues.apache.org/jira/browse/ARROW-15) - Fix a naming typo for memory.AllocationManager.AllocationOutcome * [ARROW-19](https://issues.apache.org/jira/browse/ARROW-19) - C++: Externalize memory allocations and add a MemoryPool abstract interface to builder classes +* [ARROW-190](https://issues.apache.org/jira/browse/ARROW-190) - Python: Provide installable sdist builds * [ARROW-197](https://issues.apache.org/jira/browse/ARROW-197) - [Python] Add conda dev recipe for pyarrow +* [ARROW-199](https://issues.apache.org/jira/browse/ARROW-199) - [C++] Refine third party dependency * [ARROW-2](https://issues.apache.org/jira/browse/ARROW-2) - Post Simple Website * [ARROW-20](https://issues.apache.org/jira/browse/ARROW-20) - C++: Add null count member to Array containers, remove nullable member * [ARROW-201](https://issues.apache.org/jira/browse/ARROW-201) - C++: Initial ParquetWriter implementation * [ARROW-203](https://issues.apache.org/jira/browse/ARROW-203) - Python: Basic filename based Parquet read/write * [ARROW-204](https://issues.apache.org/jira/browse/ARROW-204) - [Python] Automate uploading conda build artifacts for libarrow and pyarrow +* [ARROW-206](https://issues.apache.org/jira/browse/ARROW-206) - [C++] Expose an equality API for arrays that compares a range of slots on two arrays * [ARROW-21](https://issues.apache.org/jira/browse/ARROW-21) - C++: Add in-memory schema metadata container +* [ARROW-212](https://issues.apache.org/jira/browse/ARROW-212) - [C++] Clarify the fact that PrimitiveArray is now abstract class +* [ARROW-213](https://issues.apache.org/jira/browse/ARROW-213) - Exposing static arrow build * [ARROW-214](https://issues.apache.org/jira/browse/ARROW-214) - C++: Add String support to Parquet I/O * [ARROW-215](https://issues.apache.org/jira/browse/ARROW-215) - C++: Support other integer types in Parquet I/O +* [ARROW-218](https://issues.apache.org/jira/browse/ARROW-218) - Add option to use GitHub API token via environment variable when merging PRs * [ARROW-22](https://issues.apache.org/jira/browse/ARROW-22) - C++: Add schema adapter routines for converting flat Parquet schemas to in-memory Arrow schemas * [ARROW-222](https://issues.apache.org/jira/browse/ARROW-222) - [C++] Create prototype file-like interface to HDFS (via libhdfs) and begin defining more general IO interface for Arrow data adapters * [ARROW-23](https://issues.apache.org/jira/browse/ARROW-23) - C++: Add logical "Column" container for chunked data * [ARROW-233](https://issues.apache.org/jira/browse/ARROW-233) - [C++] Add visibility defines for limiting shared library symbol visibility +* [ARROW-234](https://issues.apache.org/jira/browse/ARROW-234) - [C++] Build with libhdfs support in arrow_io in conda builds * [ARROW-236](https://issues.apache.org/jira/browse/ARROW-236) - [Python] Enable Parquet read/write to work with HDFS file objects * [ARROW-237](https://issues.apache.org/jira/browse/ARROW-237) - [C++] Create Arrow specializations of Parquet allocator and read interfaces +* [ARROW-238](https://issues.apache.org/jira/browse/ARROW-238) - C++: InternalMemoryPool::Free() should throw an error when there is insufficient allocated memory * [ARROW-24](https://issues.apache.org/jira/browse/ARROW-24) - C++: Add logical "Table" container * [ARROW-242](https://issues.apache.org/jira/browse/ARROW-242) - C++/Python: Support Timestamp Data Type +* [ARROW-245](https://issues.apache.org/jira/browse/ARROW-245) - [Format] Clarify Arrow's relationship with big endian platforms +* [ARROW-251](https://issues.apache.org/jira/browse/ARROW-251) - [C++] Expose APIs for getting code and message of the status +* [ARROW-252](https://issues.apache.org/jira/browse/ARROW-252) - Add implementation guidelines to the documentation +* [ARROW-253](https://issues.apache.org/jira/browse/ARROW-253) - Int types should only have width of 8*2^n (8, 16, 32, 64) +* [ARROW-254](https://issues.apache.org/jira/browse/ARROW-254) - Remove Bit type as it is redundant with boolean +* [ARROW-255](https://issues.apache.org/jira/browse/ARROW-255) - Finalize Dictionary representation +* [ARROW-256](https://issues.apache.org/jira/browse/ARROW-256) - Add versioning to the arrow spec. +* [ARROW-257](https://issues.apache.org/jira/browse/ARROW-257) - Add a typeids Vector to Union type * [ARROW-26](https://issues.apache.org/jira/browse/ARROW-26) - C++: Add developer instructions for building parquet-cpp integration +* [ARROW-260](https://issues.apache.org/jira/browse/ARROW-260) - TestValueVector.testFixedVectorReallocation and testVariableVectorReallocation are flaky * [ARROW-262](https://issues.apache.org/jira/browse/ARROW-262) - [Format] Add a new format document for metadata and logical types for messaging and IPC / on-wire/file representations +* [ARROW-264](https://issues.apache.org/jira/browse/ARROW-264) - Create an Arrow File format * [ARROW-267](https://issues.apache.org/jira/browse/ARROW-267) - [C++] C++ implementation of file-like layout for RPC / IPC +* [ARROW-270](https://issues.apache.org/jira/browse/ARROW-270) - [Format] Define more generic Interval logical type +* [ARROW-271](https://issues.apache.org/jira/browse/ARROW-271) - Update Field structure to be more explicit +* [ARROW-272](https://issues.apache.org/jira/browse/ARROW-272) - Arrow release 0.1 +* [ARROW-279](https://issues.apache.org/jira/browse/ARROW-279) - rename vector module to arrow-vector for consistency * [ARROW-28](https://issues.apache.org/jira/browse/ARROW-28) - C++: Add google/benchmark to the 3rd-party build toolchain +* [ARROW-280](https://issues.apache.org/jira/browse/ARROW-280) - [C++] Consolidate file and shared memory IO interfaces +* [ARROW-285](https://issues.apache.org/jira/browse/ARROW-285) - Allow for custom flatc compiler +* [ARROW-286](https://issues.apache.org/jira/browse/ARROW-286) - Build thirdparty dependencies in parallel +* [ARROW-289](https://issues.apache.org/jira/browse/ARROW-289) - Install test-util.h +* [ARROW-290](https://issues.apache.org/jira/browse/ARROW-290) - Specialize alloc() in ArrowBuf +* [ARROW-292](https://issues.apache.org/jira/browse/ARROW-292) - [Java] Upgrade Netty to 4.041 * [ARROW-293](https://issues.apache.org/jira/browse/ARROW-293) - [C++] Implementations of IO interfaces for operating system files * [ARROW-296](https://issues.apache.org/jira/browse/ARROW-296) - [C++] Remove arrow_parquet C++ module and related parts of build system +* [ARROW-298](https://issues.apache.org/jira/browse/ARROW-298) - create release scripts +* [ARROW-299](https://issues.apache.org/jira/browse/ARROW-299) - Use absolute namespace in macros * [ARROW-3](https://issues.apache.org/jira/browse/ARROW-3) - Post Initial Arrow Format Spec * [ARROW-30](https://issues.apache.org/jira/browse/ARROW-30) - Python: pandas/NumPy to/from Arrow conversion routines * [ARROW-301](https://issues.apache.org/jira/browse/ARROW-301) - [Format] Add some form of user field metadata to IPC schemas * [ARROW-302](https://issues.apache.org/jira/browse/ARROW-302) - [Python] Add support to use the Arrow file format with file-like objects +* [ARROW-305](https://issues.apache.org/jira/browse/ARROW-305) - Add compression and use_dictionary options to Parquet interface +* [ARROW-306](https://issues.apache.org/jira/browse/ARROW-306) - Add option to pass cmake arguments via environment variable * [ARROW-31](https://issues.apache.org/jira/browse/ARROW-31) - Python: basic PyList <-> Arrow marshaling code +* [ARROW-315](https://issues.apache.org/jira/browse/ARROW-315) - Finalize timestamp type * [ARROW-318](https://issues.apache.org/jira/browse/ARROW-318) - [Python] Revise README to reflect current state of project +* [ARROW-319](https://issues.apache.org/jira/browse/ARROW-319) - Add canonical Arrow Schema json representation +* [ARROW-324](https://issues.apache.org/jira/browse/ARROW-324) - Update arrow metadata diagram +* [ARROW-325](https://issues.apache.org/jira/browse/ARROW-325) - make TestArrowFile not dependent on timezone +* [ARROW-35](https://issues.apache.org/jira/browse/ARROW-35) - Add a short call-to-action / how-to-get-involved to the main README.md * [ARROW-37](https://issues.apache.org/jira/browse/ARROW-37) - C++: Represent boolean array data in bit-packed form * [ARROW-4](https://issues.apache.org/jira/browse/ARROW-4) - Initial Arrow CPP Implementation * [ARROW-42](https://issues.apache.org/jira/browse/ARROW-42) - Python: Add to Travis CI build @@ -185,38 +146,91 @@ limitations under the License. * [ARROW-44](https://issues.apache.org/jira/browse/ARROW-44) - Python: Implement basic object model for scalar values (i.e. results of arrow_arr[i]) * [ARROW-48](https://issues.apache.org/jira/browse/ARROW-48) - Python: Add Schema object wrapper * [ARROW-49](https://issues.apache.org/jira/browse/ARROW-49) - Python: Add Column and Table wrapper interface +* [ARROW-50](https://issues.apache.org/jira/browse/ARROW-50) - C++: Enable library builds for 3rd-party users without having to build thirdparty googletest * [ARROW-53](https://issues.apache.org/jira/browse/ARROW-53) - Python: Fix RPATH and add source installation instructions +* [ARROW-54](https://issues.apache.org/jira/browse/ARROW-54) - Python: rename package to "pyarrow" * [ARROW-56](https://issues.apache.org/jira/browse/ARROW-56) - Format: Specify LSB bit ordering in bit arrays * [ARROW-57](https://issues.apache.org/jira/browse/ARROW-57) - Format: Draft data headers IDL for data interchange * [ARROW-58](https://issues.apache.org/jira/browse/ARROW-58) - Format: Draft type metadata ("schemas") IDL * [ARROW-59](https://issues.apache.org/jira/browse/ARROW-59) - Python: Boolean data support for builtin data structures * [ARROW-60](https://issues.apache.org/jira/browse/ARROW-60) - C++: Struct type builder API +* [ARROW-64](https://issues.apache.org/jira/browse/ARROW-64) - Add zsh support to C++ build scripts +* [ARROW-66](https://issues.apache.org/jira/browse/ARROW-66) - Maybe some missing steps in installation guide * [ARROW-67](https://issues.apache.org/jira/browse/ARROW-67) - C++: Draft type metadata conversion to/from IPC representation +* [ARROW-68](https://issues.apache.org/jira/browse/ARROW-68) - Update setup_build_env and third-party script to be more userfriendly * [ARROW-7](https://issues.apache.org/jira/browse/ARROW-7) - Add Python library build toolchain * [ARROW-70](https://issues.apache.org/jira/browse/ARROW-70) - C++: Add "lite" DCHECK macros used in parquet-cpp +* [ARROW-71](https://issues.apache.org/jira/browse/ARROW-71) - C++: Add script to run clang-tidy on codebase +* [ARROW-73](https://issues.apache.org/jira/browse/ARROW-73) - Support CMake 2.8 * [ARROW-76](https://issues.apache.org/jira/browse/ARROW-76) - Revise format document to include null count, defer non-nullable arrays to the domain of metadata +* [ARROW-78](https://issues.apache.org/jira/browse/ARROW-78) - C++: Add constructor for DecimalType +* [ARROW-79](https://issues.apache.org/jira/browse/ARROW-79) - Python: Add benchmarks +* [ARROW-8](https://issues.apache.org/jira/browse/ARROW-8) - Set up Travis CI * [ARROW-82](https://issues.apache.org/jira/browse/ARROW-82) - C++: Implement IPC exchange for List types +* [ARROW-83](https://issues.apache.org/jira/browse/ARROW-83) - Add basic test infrastructure for DecimalType +* [ARROW-85](https://issues.apache.org/jira/browse/ARROW-85) - C++: memcmp can be avoided in Equal when comparing with the same Buffer +* [ARROW-86](https://issues.apache.org/jira/browse/ARROW-86) - Python: Implement zero-copy Arrow-to-Pandas conversion +* [ARROW-87](https://issues.apache.org/jira/browse/ARROW-87) - Implement Decimal schema conversion for all ways supported in Parquet +* [ARROW-89](https://issues.apache.org/jira/browse/ARROW-89) - Python: Add benchmarks for Arrow<->Pandas conversion +* [ARROW-9](https://issues.apache.org/jira/browse/ARROW-9) - Rename some unchanged "Drill" to "Arrow" * [ARROW-90](https://issues.apache.org/jira/browse/ARROW-90) - Apache Arrow cpp code does not support power architecture +* [ARROW-91](https://issues.apache.org/jira/browse/ARROW-91) - C++: First draft of an adapter class for parquet-cpp's ParquetFileReader that produces Arrow table/row batch objects * [ARROW-92](https://issues.apache.org/jira/browse/ARROW-92) - C++: Arrow to Parquet Schema conversion -## Task - -* [ARROW-1](https://issues.apache.org/jira/browse/ARROW-1) - Import Initial Codebase -* [ARROW-101](https://issues.apache.org/jira/browse/ARROW-101) - Fix java warnings emitted by java compiler -* [ARROW-102](https://issues.apache.org/jira/browse/ARROW-102) - travis-ci support for java project -* [ARROW-11](https://issues.apache.org/jira/browse/ARROW-11) - Mirror JIRA activity to dev@arrow.apache.org -* [ARROW-14](https://issues.apache.org/jira/browse/ARROW-14) - Add JIRA components -* [ARROW-251](https://issues.apache.org/jira/browse/ARROW-251) - [C++] Expose APIs for getting code and message of the status -* [ARROW-272](https://issues.apache.org/jira/browse/ARROW-272) - Arrow release 0.1 -* [ARROW-298](https://issues.apache.org/jira/browse/ARROW-298) - create release scripts -* [ARROW-35](https://issues.apache.org/jira/browse/ARROW-35) - Add a short call-to-action / how-to-get-involved to the main README.md +## Bug Fixes -## Test - -* [ARROW-260](https://issues.apache.org/jira/browse/ARROW-260) - TestValueVector.testFixedVectorReallocation and testVariableVectorReallocation are flaky -* [ARROW-83](https://issues.apache.org/jira/browse/ARROW-83) - Add basic test infrastructure for DecimalType - -[2]: https://github.com/apache/arrow/releases/tag/apache-arrow-0.1.0 -[3]: https://dist.apache.org/repos/dist/release/arrow/arrow-0.1.0/apache-arrow-0.1.0.tar.gz.md5 -[6]: https://dist.apache.org/repos/dist/release/arrow/arrow-0.1.0/apache-arrow-0.1.0.tar.gz -[7]: https://dist.apache.org/repos/dist/release/arrow/arrow-0.1.0/apache-arrow-0.1.0.tar.gz.asc +* [ARROW-103](https://issues.apache.org/jira/browse/ARROW-103) - Missing patterns from .gitignore +* [ARROW-104](https://issues.apache.org/jira/browse/ARROW-104) - Update Layout.md based on discussion on the mailing list +* [ARROW-105](https://issues.apache.org/jira/browse/ARROW-105) - Unit tests fail if assertions are disabled +* [ARROW-113](https://issues.apache.org/jira/browse/ARROW-113) - TestValueVector test fails if cannot allocate 2GB of memory +* [ARROW-16](https://issues.apache.org/jira/browse/ARROW-16) - Building cpp issues on XCode 7.2.1 +* [ARROW-17](https://issues.apache.org/jira/browse/ARROW-17) - Set some vector fields to default access level for Drill compatibility +* [ARROW-18](https://issues.apache.org/jira/browse/ARROW-18) - Fix bug with decimal precision and scale +* [ARROW-185](https://issues.apache.org/jira/browse/ARROW-185) - [C++] Make sure alignment and memory padding conform to spec +* [ARROW-188](https://issues.apache.org/jira/browse/ARROW-188) - Python: Add numpy as install requirement +* [ARROW-193](https://issues.apache.org/jira/browse/ARROW-193) - For the instruction, typos "int his" should be "in this" +* [ARROW-194](https://issues.apache.org/jira/browse/ARROW-194) - C++: Allow read-only memory mapped source +* [ARROW-200](https://issues.apache.org/jira/browse/ARROW-200) - [Python] Convert Values String looks like it has incorrect error handling +* [ARROW-209](https://issues.apache.org/jira/browse/ARROW-209) - [C++] Broken builds: llvm.org apt repos are unavailable +* [ARROW-210](https://issues.apache.org/jira/browse/ARROW-210) - [C++] Tidy up the type system a little bit +* [ARROW-211](https://issues.apache.org/jira/browse/ARROW-211) - Several typos/errors in Layout.md examples +* [ARROW-217](https://issues.apache.org/jira/browse/ARROW-217) - Fix Travis w.r.t conda 4.1.0 changes +* [ARROW-219](https://issues.apache.org/jira/browse/ARROW-219) - [C++] Passed CMAKE_CXX_FLAGS are being dropped, fix compiler warnings +* [ARROW-223](https://issues.apache.org/jira/browse/ARROW-223) - Do not link against libpython +* [ARROW-225](https://issues.apache.org/jira/browse/ARROW-225) - [C++/Python] master Travis CI build is broken +* [ARROW-244](https://issues.apache.org/jira/browse/ARROW-244) - [C++] Some global APIs of IPC module should be visible to the outside +* [ARROW-246](https://issues.apache.org/jira/browse/ARROW-246) - [Java] UnionVector doesn't call allocateNew() when creating it's vectorType +* [ARROW-247](https://issues.apache.org/jira/browse/ARROW-247) - [C++] Missing explicit destructor in RowBatchReader causes an incomplete type error +* [ARROW-250](https://issues.apache.org/jira/browse/ARROW-250) - Fix for ARROW-246 may cause memory leaks +* [ARROW-259](https://issues.apache.org/jira/browse/ARROW-259) - Use flatbuffer fields in java implementation +* [ARROW-265](https://issues.apache.org/jira/browse/ARROW-265) - Negative decimal values have wrong padding +* [ARROW-266](https://issues.apache.org/jira/browse/ARROW-266) - [C++] Fix the broken build +* [ARROW-274](https://issues.apache.org/jira/browse/ARROW-274) - Make the MapVector nullable +* [ARROW-278](https://issues.apache.org/jira/browse/ARROW-278) - [Format] Struct type name consistency in implementations and metadata +* [ARROW-283](https://issues.apache.org/jira/browse/ARROW-283) - [C++] Update arrow_parquet to account for API changes in PARQUET-573 +* [ARROW-284](https://issues.apache.org/jira/browse/ARROW-284) - [C++] Triage builds by disabling Arrow-Parquet module +* [ARROW-287](https://issues.apache.org/jira/browse/ARROW-287) - [java] Make nullable vectors use a BitVecor instead of UInt1Vector for bits +* [ARROW-297](https://issues.apache.org/jira/browse/ARROW-297) - Fix Arrow pom for release +* [ARROW-304](https://issues.apache.org/jira/browse/ARROW-304) - NullableMapReaderImpl.isSet() always returns true +* [ARROW-308](https://issues.apache.org/jira/browse/ARROW-308) - UnionListWriter.setPosition() should not call startList() +* [ARROW-309](https://issues.apache.org/jira/browse/ARROW-309) - Types.getMinorTypeForArrowType() does not work for Union type +* [ARROW-313](https://issues.apache.org/jira/browse/ARROW-313) - XCode 8.0 breaks builds +* [ARROW-314](https://issues.apache.org/jira/browse/ARROW-314) - JSONScalar is unnecessary and unused. +* [ARROW-320](https://issues.apache.org/jira/browse/ARROW-320) - ComplexCopier.copy(FieldReader, FieldWriter) should not start a list if reader is not set +* [ARROW-321](https://issues.apache.org/jira/browse/ARROW-321) - Fix Arrow licences +* [ARROW-36](https://issues.apache.org/jira/browse/ARROW-36) - Remove fixVersions from patch tool (until we have them) +* [ARROW-46](https://issues.apache.org/jira/browse/ARROW-46) - Port DRILL-4410 to Arrow +* [ARROW-5](https://issues.apache.org/jira/browse/ARROW-5) - Error when run maven install +* [ARROW-51](https://issues.apache.org/jira/browse/ARROW-51) - Move ValueVector test from Drill project +* [ARROW-55](https://issues.apache.org/jira/browse/ARROW-55) - Python: fix legacy Python (2.7) tests and add to Travis CI +* [ARROW-62](https://issues.apache.org/jira/browse/ARROW-62) - Format: Are the nulls bits 0 or 1 for null values? +* [ARROW-63](https://issues.apache.org/jira/browse/ARROW-63) - C++: ctest fails if Python 3 is the active Python interpreter +* [ARROW-65](https://issues.apache.org/jira/browse/ARROW-65) - Python: FindPythonLibsNew does not work in a virtualenv +* [ARROW-69](https://issues.apache.org/jira/browse/ARROW-69) - Change permissions for assignable users +* [ARROW-72](https://issues.apache.org/jira/browse/ARROW-72) - FindParquet searches for non-existent header +* [ARROW-75](https://issues.apache.org/jira/browse/ARROW-75) - C++: Fix handling of empty strings +* [ARROW-77](https://issues.apache.org/jira/browse/ARROW-77) - C++: conform null bit interpretation to match ARROW-62 +* [ARROW-80](https://issues.apache.org/jira/browse/ARROW-80) - Segmentation fault on len(Array) for empty arrays +* [ARROW-88](https://issues.apache.org/jira/browse/ARROW-88) - C++: Refactor given PARQUET-572 +* [ARROW-93](https://issues.apache.org/jira/browse/ARROW-93) - XCode 7.3 breaks builds +* [ARROW-94](https://issues.apache.org/jira/browse/ARROW-94) - Expand list example to clarify null vs empty list \ No newline at end of file diff --git a/site/_release/0.2.0.md b/site/_release/0.2.0.md index d66afd066d7..ddac64e33f5 100644 --- a/site/_release/0.2.0.md +++ b/site/_release/0.2.0.md @@ -31,7 +31,146 @@ limitations under the License. # Changelog -## Bug +## Contributors + +```shell +$ git shortlog -sn apache-arrow-0.1.0..apache-arrow-0.2.0 + 73 Wes McKinney + 55 Uwe L. Korn + 16 Julien Le Dem + 4 Bryan Cutler + 4 Nong Li + 2 Christopher C. Aycock + 2 Jingyuan Wang + 2 Kouhei Sutou + 2 Laurent Goujon + 2 Leif Walsh + 1 Emilio Lahr-Vivaz + 1 Holden Karau + 1 Li Jin + 1 Mohamed Zenadi + 1 Peter Hoffmann + 1 Steven Phillips + 1 adeneche + 1 ahnj + 1 vkorukanti +``` + +## New Features and Improvements + +* [ARROW-108](https://issues.apache.org/jira/browse/ARROW-108) - [C++] Add IPC round trip for union types +* [ARROW-189](https://issues.apache.org/jira/browse/ARROW-189) - C++: Use ExternalProject to build thirdparty dependencies +* [ARROW-191](https://issues.apache.org/jira/browse/ARROW-191) - Python: Provide infrastructure for manylinux1 wheels +* [ARROW-221](https://issues.apache.org/jira/browse/ARROW-221) - Add switch for writing Parquet 1.0 compatible logical types +* [ARROW-227](https://issues.apache.org/jira/browse/ARROW-227) - [C++/Python] Hook arrow_io generic reader / writer interface into arrow_parquet +* [ARROW-228](https://issues.apache.org/jira/browse/ARROW-228) - [Python] Create an Arrow-cpp-compatible interface for reading bytes from Python file-like objects +* [ARROW-243](https://issues.apache.org/jira/browse/ARROW-243) - [C++] Add "driver" option to HdfsClient to choose between libhdfs and libhdfs3 at runtime +* [ARROW-268](https://issues.apache.org/jira/browse/ARROW-268) - [C++] Flesh out union implementation to have all required methods for IPC +* [ARROW-303](https://issues.apache.org/jira/browse/ARROW-303) - [C++] Also build static libraries for leaf libraries +* [ARROW-312](https://issues.apache.org/jira/browse/ARROW-312) - [Python] Provide Python API to read/write the Arrow IPC file format +* [ARROW-317](https://issues.apache.org/jira/browse/ARROW-317) - [C++] Implement zero-copy Slice method on arrow::Buffer that retains reference to parent +* [ARROW-327](https://issues.apache.org/jira/browse/ARROW-327) - [Python] Remove conda builds from Travis CI processes +* [ARROW-328](https://issues.apache.org/jira/browse/ARROW-328) - [C++] Return shared_ptr by value instead of const-ref? +* [ARROW-33](https://issues.apache.org/jira/browse/ARROW-33) - C++: Implement zero-copy array slicing +* [ARROW-330](https://issues.apache.org/jira/browse/ARROW-330) - [C++] CMake functions to simplify shared / static library configuration +* [ARROW-332](https://issues.apache.org/jira/browse/ARROW-332) - [Python] Add helper function to convert RecordBatch to pandas.DataFrame +* [ARROW-333](https://issues.apache.org/jira/browse/ARROW-333) - Make writers update their internal schema even when no data is written. +* [ARROW-335](https://issues.apache.org/jira/browse/ARROW-335) - Improve Type apis and toString() by encapsulating flatbuffers better +* [ARROW-336](https://issues.apache.org/jira/browse/ARROW-336) - Run Apache Rat in Travis builds +* [ARROW-338](https://issues.apache.org/jira/browse/ARROW-338) - [C++] Refactor IPC vector "loading" and "unloading" to be based on cleaner visitor pattern +* [ARROW-350](https://issues.apache.org/jira/browse/ARROW-350) - Add Kerberos support to HDFS shim +* [ARROW-353](https://issues.apache.org/jira/browse/ARROW-353) - Arrow release 0.2 +* [ARROW-355](https://issues.apache.org/jira/browse/ARROW-355) - Add tests for serialising arrays of empty strings to Parquet +* [ARROW-356](https://issues.apache.org/jira/browse/ARROW-356) - Add documentation about reading Parquet +* [ARROW-359](https://issues.apache.org/jira/browse/ARROW-359) - Need to document ARROW_LIBHDFS_DIR +* [ARROW-360](https://issues.apache.org/jira/browse/ARROW-360) - C++: Add method to shrink PoolBuffer using realloc +* [ARROW-361](https://issues.apache.org/jira/browse/ARROW-361) - Python: Support reading a column-selection from Parquet files +* [ARROW-363](https://issues.apache.org/jira/browse/ARROW-363) - Set up Java/C++ integration test harness +* [ARROW-365](https://issues.apache.org/jira/browse/ARROW-365) - Python: Provide Array.to_pandas() +* [ARROW-366](https://issues.apache.org/jira/browse/ARROW-366) - [java] implement Dictionary vector +* [ARROW-367](https://issues.apache.org/jira/browse/ARROW-367) - [java] converter csv/json <=> Arrow file format for Integration tests +* [ARROW-368](https://issues.apache.org/jira/browse/ARROW-368) - Document use of LD_LIBRARY_PATH when using Python +* [ARROW-369](https://issues.apache.org/jira/browse/ARROW-369) - [Python] Add ability to convert multiple record batches at once to pandas +* [ARROW-372](https://issues.apache.org/jira/browse/ARROW-372) - Create JSON arrow file format for integration tests +* [ARROW-373](https://issues.apache.org/jira/browse/ARROW-373) - [C++] Implement C++ version of JSON file format for testing +* [ARROW-374](https://issues.apache.org/jira/browse/ARROW-374) - Python: clarify unicode vs. binary in API +* [ARROW-377](https://issues.apache.org/jira/browse/ARROW-377) - Python: Add support for conversion of Pandas.Categorical +* [ARROW-379](https://issues.apache.org/jira/browse/ARROW-379) - Python: Use setuptools_scm/setuptools_scm_git_archive to provide the version number +* [ARROW-380](https://issues.apache.org/jira/browse/ARROW-380) - [Java] optimize null count when serializing vectors. +* [ARROW-381](https://issues.apache.org/jira/browse/ARROW-381) - [C++] Simplify primitive array type builders to use a default type singleton +* [ARROW-382](https://issues.apache.org/jira/browse/ARROW-382) - Python: Extend API documentation +* [ARROW-383](https://issues.apache.org/jira/browse/ARROW-383) - [C++] Implement C++ version of ARROW-367 integration test validator +* [ARROW-389](https://issues.apache.org/jira/browse/ARROW-389) - Python: Write Parquet files to pyarrow.io.NativeFile objects +* [ARROW-394](https://issues.apache.org/jira/browse/ARROW-394) - Add integration tests for boolean, list, struct, and other basic types +* [ARROW-396](https://issues.apache.org/jira/browse/ARROW-396) - Python: Add pyarrow.schema.Schema.equals +* [ARROW-409](https://issues.apache.org/jira/browse/ARROW-409) - Python: Change pyarrow.Table.dataframe_from_batches API to create Table instead +* [ARROW-410](https://issues.apache.org/jira/browse/ARROW-410) - [C++] Add Flush method to arrow::io::OutputStream +* [ARROW-411](https://issues.apache.org/jira/browse/ARROW-411) - [Java] Move Intergration.compare and Intergration.compareSchemas to a public utils class +* [ARROW-415](https://issues.apache.org/jira/browse/ARROW-415) - C++: Add Equals implementation to compare Tables +* [ARROW-416](https://issues.apache.org/jira/browse/ARROW-416) - C++: Add Equals implementation to compare Columns +* [ARROW-417](https://issues.apache.org/jira/browse/ARROW-417) - C++: Add Equals implementation to compare ChunkedArrays +* [ARROW-418](https://issues.apache.org/jira/browse/ARROW-418) - [C++] Consolidate array container and builder code, remove arrow/types +* [ARROW-419](https://issues.apache.org/jira/browse/ARROW-419) - [C++] Promote util/{status.h, buffer.h, memory-pool.h} to top level of arrow/ source directory +* [ARROW-423](https://issues.apache.org/jira/browse/ARROW-423) - C++: Define BUILD_BYPRODUCTS in external project to support non-make CMake generators +* [ARROW-425](https://issues.apache.org/jira/browse/ARROW-425) - Python: Expose a C function to convert arrow::Table to pyarrow.Table +* [ARROW-426](https://issues.apache.org/jira/browse/ARROW-426) - Python: Conversion from pyarrow.Array to a Python list +* [ARROW-427](https://issues.apache.org/jira/browse/ARROW-427) - [C++] Implement dictionary-encoded array container +* [ARROW-428](https://issues.apache.org/jira/browse/ARROW-428) - [Python] Deserialize from Arrow record batches to pandas in parallel using a thread pool +* [ARROW-430](https://issues.apache.org/jira/browse/ARROW-430) - Python: Better version handling +* [ARROW-432](https://issues.apache.org/jira/browse/ARROW-432) - [Python] Avoid unnecessary memory copy in to_pandas conversion by using low-level pandas internals APIs +* [ARROW-438](https://issues.apache.org/jira/browse/ARROW-438) - [Python] Concatenate Table instances with equal schemas +* [ARROW-440](https://issues.apache.org/jira/browse/ARROW-440) - [C++] Support pkg-config +* [ARROW-441](https://issues.apache.org/jira/browse/ARROW-441) - [Python] Expose Arrow's file and memory map classes as NativeFile subclasses +* [ARROW-442](https://issues.apache.org/jira/browse/ARROW-442) - [Python] Add public Python API to inspect Parquet file metadata +* [ARROW-444](https://issues.apache.org/jira/browse/ARROW-444) - [Python] Avoid unnecessary memory copies from use of PyBytes_* C APIs +* [ARROW-449](https://issues.apache.org/jira/browse/ARROW-449) - Python: Conversion from pyarrow.{Table,RecordBatch} to a Python dict +* [ARROW-450](https://issues.apache.org/jira/browse/ARROW-450) - Python: Fixes for PARQUET-818 +* [ARROW-456](https://issues.apache.org/jira/browse/ARROW-456) - C++: Add jemalloc based MemoryPool +* [ARROW-457](https://issues.apache.org/jira/browse/ARROW-457) - Python: Better control over memory pool +* [ARROW-458](https://issues.apache.org/jira/browse/ARROW-458) - Python: Expose jemalloc MemoryPool +* [ARROW-461](https://issues.apache.org/jira/browse/ARROW-461) - [Python] Implement conversion between arrow::DictionaryArray and pandas.Categorical +* [ARROW-463](https://issues.apache.org/jira/browse/ARROW-463) - C++: Support jemalloc 4.x +* [ARROW-466](https://issues.apache.org/jira/browse/ARROW-466) - C++: ExternalProject for jemalloc +* [ARROW-467](https://issues.apache.org/jira/browse/ARROW-467) - [Python] Run parquet-cpp unit tests in Travis CI +* [ARROW-468](https://issues.apache.org/jira/browse/ARROW-468) - Python: Conversion of nested data in pd.DataFrames to/from Arrow structures +* [ARROW-470](https://issues.apache.org/jira/browse/ARROW-470) - [Python] Add "FileSystem" abstraction to access directories of files in a uniform way +* [ARROW-471](https://issues.apache.org/jira/browse/ARROW-471) - [Python] Enable ParquetFile to pass down separately-obtained file metadata +* [ARROW-472](https://issues.apache.org/jira/browse/ARROW-472) - [Python] Expose parquet::{SchemaDescriptor, ColumnDescriptor}::Equals +* [ARROW-474](https://issues.apache.org/jira/browse/ARROW-474) - Create an Arrow streaming file fomat +* [ARROW-475](https://issues.apache.org/jira/browse/ARROW-475) - [Python] High level support for reading directories of Parquet files (as a single Arrow table) from supported file system interfaces +* [ARROW-476](https://issues.apache.org/jira/browse/ARROW-476) - [Integration] Add integration tests for Binary / Varbytes type +* [ARROW-477](https://issues.apache.org/jira/browse/ARROW-477) - [Java] Add support for second/microsecond/nanosecond timestamps in-memory and in IPC/JSON layer +* [ARROW-478](https://issues.apache.org/jira/browse/ARROW-478) - [Python] Accept a PyBytes object in the pyarrow.io.BufferReader ctor +* [ARROW-479](https://issues.apache.org/jira/browse/ARROW-479) - Python: Test for expected schema in Pandas conversion +* [ARROW-484](https://issues.apache.org/jira/browse/ARROW-484) - Add more detail about what of technology can be found in the Arrow implementations to README +* [ARROW-485](https://issues.apache.org/jira/browse/ARROW-485) - [Java] Users are required to initialize VariableLengthVectors.offsetVector before calling VariableLengthVectors.mutator.getSafe +* [ARROW-490](https://issues.apache.org/jira/browse/ARROW-490) - Python: Update manylinux1 build scripts +* [ARROW-495](https://issues.apache.org/jira/browse/ARROW-495) - [C++] Add C++ implementation of streaming serialized format +* [ARROW-497](https://issues.apache.org/jira/browse/ARROW-497) - [Java] Integration test harness for streaming format +* [ARROW-498](https://issues.apache.org/jira/browse/ARROW-498) - [C++] Integration test harness for streaming format +* [ARROW-503](https://issues.apache.org/jira/browse/ARROW-503) - [Python] Interface to streaming binary format +* [ARROW-506](https://issues.apache.org/jira/browse/ARROW-506) - Implement Arrow Echo server for integration testing +* [ARROW-508](https://issues.apache.org/jira/browse/ARROW-508) - [C++] Make file/memory-mapped file interfaces threadsafe +* [ARROW-509](https://issues.apache.org/jira/browse/ARROW-509) - [Python] Add support for PARQUET-835 (parallel column reads) +* [ARROW-512](https://issues.apache.org/jira/browse/ARROW-512) - C++: Add method to check for primitive types +* [ARROW-514](https://issues.apache.org/jira/browse/ARROW-514) - [Python] Accept pyarrow.io.Buffer as input to StreamReader, FileReader classes +* [ARROW-515](https://issues.apache.org/jira/browse/ARROW-515) - [Python] Add StreamReader/FileReader methods that read all record batches as a Table +* [ARROW-521](https://issues.apache.org/jira/browse/ARROW-521) - [C++/Python] Track peak memory use in default MemoryPool +* [ARROW-524](https://issues.apache.org/jira/browse/ARROW-524) - [java] provide apis to access nested vectors and buffers +* [ARROW-525](https://issues.apache.org/jira/browse/ARROW-525) - Python: Add more documentation to the package +* [ARROW-527](https://issues.apache.org/jira/browse/ARROW-527) - clean drill-module.conf file +* [ARROW-529](https://issues.apache.org/jira/browse/ARROW-529) - Python: Add jemalloc and Python 3.6 to manylinux1 build +* [ARROW-531](https://issues.apache.org/jira/browse/ARROW-531) - Python: Document jemalloc, extend Pandas section, add Getting Involved +* [ARROW-538](https://issues.apache.org/jira/browse/ARROW-538) - [C++] Set up AddressSanitizer (ASAN) builds +* [ARROW-546](https://issues.apache.org/jira/browse/ARROW-546) - Python: Account for changes in PARQUET-867 +* [ARROW-547](https://issues.apache.org/jira/browse/ARROW-547) - [Python] Expose Array::Slice and RecordBatch::Slice +* [ARROW-553](https://issues.apache.org/jira/browse/ARROW-553) - C++: Faster valid bitmap building +* [ARROW-558](https://issues.apache.org/jira/browse/ARROW-558) - Add KEYS files +* [ARROW-81](https://issues.apache.org/jira/browse/ARROW-81) - [Format] Add a Category logical type (distinct from dictionary-encoding) +* [ARROW-96](https://issues.apache.org/jira/browse/ARROW-96) - C++: API documentation using Doxygen +* [ARROW-97](https://issues.apache.org/jira/browse/ARROW-97) - Python: API documentation via sphinx-apidoc + +## Bug Fixes * [ARROW-112](https://issues.apache.org/jira/browse/ARROW-112) - [C++] Style fix for constants/enums * [ARROW-202](https://issues.apache.org/jira/browse/ARROW-202) - [C++] Integrate with appveyor ci for windows support and get arrow building on windows @@ -112,126 +251,6 @@ limitations under the License. * [ARROW-556](https://issues.apache.org/jira/browse/ARROW-556) - [Integration] Can not run Integration tests if different cpp build path * [ARROW-561](https://issues.apache.org/jira/browse/ARROW-561) - Update java & python dependencies to improve downstream packaging experience -## Improvement - -* [ARROW-189](https://issues.apache.org/jira/browse/ARROW-189) - C++: Use ExternalProject to build thirdparty dependencies -* [ARROW-191](https://issues.apache.org/jira/browse/ARROW-191) - Python: Provide infrastructure for manylinux1 wheels -* [ARROW-328](https://issues.apache.org/jira/browse/ARROW-328) - [C++] Return shared_ptr by value instead of const-ref? -* [ARROW-330](https://issues.apache.org/jira/browse/ARROW-330) - [C++] CMake functions to simplify shared / static library configuration -* [ARROW-333](https://issues.apache.org/jira/browse/ARROW-333) - Make writers update their internal schema even when no data is written. -* [ARROW-335](https://issues.apache.org/jira/browse/ARROW-335) - Improve Type apis and toString() by encapsulating flatbuffers better -* [ARROW-336](https://issues.apache.org/jira/browse/ARROW-336) - Run Apache Rat in Travis builds -* [ARROW-338](https://issues.apache.org/jira/browse/ARROW-338) - [C++] Refactor IPC vector "loading" and "unloading" to be based on cleaner visitor pattern -* [ARROW-350](https://issues.apache.org/jira/browse/ARROW-350) - Add Kerberos support to HDFS shim -* [ARROW-355](https://issues.apache.org/jira/browse/ARROW-355) - Add tests for serialising arrays of empty strings to Parquet -* [ARROW-356](https://issues.apache.org/jira/browse/ARROW-356) - Add documentation about reading Parquet -* [ARROW-360](https://issues.apache.org/jira/browse/ARROW-360) - C++: Add method to shrink PoolBuffer using realloc -* [ARROW-361](https://issues.apache.org/jira/browse/ARROW-361) - Python: Support reading a column-selection from Parquet files -* [ARROW-365](https://issues.apache.org/jira/browse/ARROW-365) - Python: Provide Array.to_pandas() -* [ARROW-366](https://issues.apache.org/jira/browse/ARROW-366) - [java] implement Dictionary vector -* [ARROW-374](https://issues.apache.org/jira/browse/ARROW-374) - Python: clarify unicode vs. binary in API -* [ARROW-379](https://issues.apache.org/jira/browse/ARROW-379) - Python: Use setuptools_scm/setuptools_scm_git_archive to provide the version number -* [ARROW-380](https://issues.apache.org/jira/browse/ARROW-380) - [Java] optimize null count when serializing vectors. -* [ARROW-382](https://issues.apache.org/jira/browse/ARROW-382) - Python: Extend API documentation -* [ARROW-396](https://issues.apache.org/jira/browse/ARROW-396) - Python: Add pyarrow.schema.Schema.equals -* [ARROW-409](https://issues.apache.org/jira/browse/ARROW-409) - Python: Change pyarrow.Table.dataframe_from_batches API to create Table instead -* [ARROW-411](https://issues.apache.org/jira/browse/ARROW-411) - [Java] Move Intergration.compare and Intergration.compareSchemas to a public utils class -* [ARROW-423](https://issues.apache.org/jira/browse/ARROW-423) - C++: Define BUILD_BYPRODUCTS in external project to support non-make CMake generators -* [ARROW-425](https://issues.apache.org/jira/browse/ARROW-425) - Python: Expose a C function to convert arrow::Table to pyarrow.Table -* [ARROW-426](https://issues.apache.org/jira/browse/ARROW-426) - Python: Conversion from pyarrow.Array to a Python list -* [ARROW-430](https://issues.apache.org/jira/browse/ARROW-430) - Python: Better version handling -* [ARROW-432](https://issues.apache.org/jira/browse/ARROW-432) - [Python] Avoid unnecessary memory copy in to_pandas conversion by using low-level pandas internals APIs -* [ARROW-450](https://issues.apache.org/jira/browse/ARROW-450) - Python: Fixes for PARQUET-818 -* [ARROW-457](https://issues.apache.org/jira/browse/ARROW-457) - Python: Better control over memory pool -* [ARROW-458](https://issues.apache.org/jira/browse/ARROW-458) - Python: Expose jemalloc MemoryPool -* [ARROW-463](https://issues.apache.org/jira/browse/ARROW-463) - C++: Support jemalloc 4.x -* [ARROW-466](https://issues.apache.org/jira/browse/ARROW-466) - C++: ExternalProject for jemalloc -* [ARROW-468](https://issues.apache.org/jira/browse/ARROW-468) - Python: Conversion of nested data in pd.DataFrames to/from Arrow structures -* [ARROW-474](https://issues.apache.org/jira/browse/ARROW-474) - Create an Arrow streaming file fomat -* [ARROW-479](https://issues.apache.org/jira/browse/ARROW-479) - Python: Test for expected schema in Pandas conversion -* [ARROW-485](https://issues.apache.org/jira/browse/ARROW-485) - [Java] Users are required to initialize VariableLengthVectors.offsetVector before calling VariableLengthVectors.mutator.getSafe -* [ARROW-490](https://issues.apache.org/jira/browse/ARROW-490) - Python: Update manylinux1 build scripts -* [ARROW-524](https://issues.apache.org/jira/browse/ARROW-524) - [java] provide apis to access nested vectors and buffers -* [ARROW-525](https://issues.apache.org/jira/browse/ARROW-525) - Python: Add more documentation to the package -* [ARROW-529](https://issues.apache.org/jira/browse/ARROW-529) - Python: Add jemalloc and Python 3.6 to manylinux1 build -* [ARROW-546](https://issues.apache.org/jira/browse/ARROW-546) - Python: Account for changes in PARQUET-867 -* [ARROW-553](https://issues.apache.org/jira/browse/ARROW-553) - C++: Faster valid bitmap building - -## New Feature - -* [ARROW-108](https://issues.apache.org/jira/browse/ARROW-108) - [C++] Add IPC round trip for union types -* [ARROW-221](https://issues.apache.org/jira/browse/ARROW-221) - Add switch for writing Parquet 1.0 compatible logical types -* [ARROW-227](https://issues.apache.org/jira/browse/ARROW-227) - [C++/Python] Hook arrow_io generic reader / writer interface into arrow_parquet -* [ARROW-228](https://issues.apache.org/jira/browse/ARROW-228) - [Python] Create an Arrow-cpp-compatible interface for reading bytes from Python file-like objects -* [ARROW-243](https://issues.apache.org/jira/browse/ARROW-243) - [C++] Add "driver" option to HdfsClient to choose between libhdfs and libhdfs3 at runtime -* [ARROW-303](https://issues.apache.org/jira/browse/ARROW-303) - [C++] Also build static libraries for leaf libraries -* [ARROW-312](https://issues.apache.org/jira/browse/ARROW-312) - [Python] Provide Python API to read/write the Arrow IPC file format -* [ARROW-317](https://issues.apache.org/jira/browse/ARROW-317) - [C++] Implement zero-copy Slice method on arrow::Buffer that retains reference to parent -* [ARROW-33](https://issues.apache.org/jira/browse/ARROW-33) - C++: Implement zero-copy array slicing -* [ARROW-332](https://issues.apache.org/jira/browse/ARROW-332) - [Python] Add helper function to convert RecordBatch to pandas.DataFrame -* [ARROW-363](https://issues.apache.org/jira/browse/ARROW-363) - Set up Java/C++ integration test harness -* [ARROW-369](https://issues.apache.org/jira/browse/ARROW-369) - [Python] Add ability to convert multiple record batches at once to pandas -* [ARROW-373](https://issues.apache.org/jira/browse/ARROW-373) - [C++] Implement C++ version of JSON file format for testing -* [ARROW-377](https://issues.apache.org/jira/browse/ARROW-377) - Python: Add support for conversion of Pandas.Categorical -* [ARROW-381](https://issues.apache.org/jira/browse/ARROW-381) - [C++] Simplify primitive array type builders to use a default type singleton -* [ARROW-383](https://issues.apache.org/jira/browse/ARROW-383) - [C++] Implement C++ version of ARROW-367 integration test validator -* [ARROW-389](https://issues.apache.org/jira/browse/ARROW-389) - Python: Write Parquet files to pyarrow.io.NativeFile objects -* [ARROW-394](https://issues.apache.org/jira/browse/ARROW-394) - Add integration tests for boolean, list, struct, and other basic types -* [ARROW-410](https://issues.apache.org/jira/browse/ARROW-410) - [C++] Add Flush method to arrow::io::OutputStream -* [ARROW-415](https://issues.apache.org/jira/browse/ARROW-415) - C++: Add Equals implementation to compare Tables -* [ARROW-416](https://issues.apache.org/jira/browse/ARROW-416) - C++: Add Equals implementation to compare Columns -* [ARROW-417](https://issues.apache.org/jira/browse/ARROW-417) - C++: Add Equals implementation to compare ChunkedArrays -* [ARROW-418](https://issues.apache.org/jira/browse/ARROW-418) - [C++] Consolidate array container and builder code, remove arrow/types -* [ARROW-419](https://issues.apache.org/jira/browse/ARROW-419) - [C++] Promote util/{status.h, buffer.h, memory-pool.h} to top level of arrow/ source directory -* [ARROW-427](https://issues.apache.org/jira/browse/ARROW-427) - [C++] Implement dictionary-encoded array container -* [ARROW-428](https://issues.apache.org/jira/browse/ARROW-428) - [Python] Deserialize from Arrow record batches to pandas in parallel using a thread pool -* [ARROW-438](https://issues.apache.org/jira/browse/ARROW-438) - [Python] Concatenate Table instances with equal schemas -* [ARROW-440](https://issues.apache.org/jira/browse/ARROW-440) - [C++] Support pkg-config -* [ARROW-441](https://issues.apache.org/jira/browse/ARROW-441) - [Python] Expose Arrow's file and memory map classes as NativeFile subclasses -* [ARROW-442](https://issues.apache.org/jira/browse/ARROW-442) - [Python] Add public Python API to inspect Parquet file metadata -* [ARROW-444](https://issues.apache.org/jira/browse/ARROW-444) - [Python] Avoid unnecessary memory copies from use of PyBytes_* C APIs -* [ARROW-449](https://issues.apache.org/jira/browse/ARROW-449) - Python: Conversion from pyarrow.{Table,RecordBatch} to a Python dict -* [ARROW-456](https://issues.apache.org/jira/browse/ARROW-456) - C++: Add jemalloc based MemoryPool -* [ARROW-461](https://issues.apache.org/jira/browse/ARROW-461) - [Python] Implement conversion between arrow::DictionaryArray and pandas.Categorical -* [ARROW-467](https://issues.apache.org/jira/browse/ARROW-467) - [Python] Run parquet-cpp unit tests in Travis CI -* [ARROW-470](https://issues.apache.org/jira/browse/ARROW-470) - [Python] Add "FileSystem" abstraction to access directories of files in a uniform way -* [ARROW-471](https://issues.apache.org/jira/browse/ARROW-471) - [Python] Enable ParquetFile to pass down separately-obtained file metadata -* [ARROW-472](https://issues.apache.org/jira/browse/ARROW-472) - [Python] Expose parquet::{SchemaDescriptor, ColumnDescriptor}::Equals -* [ARROW-475](https://issues.apache.org/jira/browse/ARROW-475) - [Python] High level support for reading directories of Parquet files (as a single Arrow table) from supported file system interfaces -* [ARROW-476](https://issues.apache.org/jira/browse/ARROW-476) - [Integration] Add integration tests for Binary / Varbytes type -* [ARROW-477](https://issues.apache.org/jira/browse/ARROW-477) - [Java] Add support for second/microsecond/nanosecond timestamps in-memory and in IPC/JSON layer -* [ARROW-478](https://issues.apache.org/jira/browse/ARROW-478) - [Python] Accept a PyBytes object in the pyarrow.io.BufferReader ctor -* [ARROW-484](https://issues.apache.org/jira/browse/ARROW-484) - Add more detail about what of technology can be found in the Arrow implementations to README -* [ARROW-495](https://issues.apache.org/jira/browse/ARROW-495) - [C++] Add C++ implementation of streaming serialized format -* [ARROW-497](https://issues.apache.org/jira/browse/ARROW-497) - [Java] Integration test harness for streaming format -* [ARROW-498](https://issues.apache.org/jira/browse/ARROW-498) - [C++] Integration test harness for streaming format -* [ARROW-503](https://issues.apache.org/jira/browse/ARROW-503) - [Python] Interface to streaming binary format -* [ARROW-508](https://issues.apache.org/jira/browse/ARROW-508) - [C++] Make file/memory-mapped file interfaces threadsafe -* [ARROW-509](https://issues.apache.org/jira/browse/ARROW-509) - [Python] Add support for PARQUET-835 (parallel column reads) -* [ARROW-512](https://issues.apache.org/jira/browse/ARROW-512) - C++: Add method to check for primitive types -* [ARROW-514](https://issues.apache.org/jira/browse/ARROW-514) - [Python] Accept pyarrow.io.Buffer as input to StreamReader, FileReader classes -* [ARROW-515](https://issues.apache.org/jira/browse/ARROW-515) - [Python] Add StreamReader/FileReader methods that read all record batches as a Table -* [ARROW-521](https://issues.apache.org/jira/browse/ARROW-521) - [C++/Python] Track peak memory use in default MemoryPool -* [ARROW-531](https://issues.apache.org/jira/browse/ARROW-531) - Python: Document jemalloc, extend Pandas section, add Getting Involved -* [ARROW-538](https://issues.apache.org/jira/browse/ARROW-538) - [C++] Set up AddressSanitizer (ASAN) builds -* [ARROW-547](https://issues.apache.org/jira/browse/ARROW-547) - [Python] Expose Array::Slice and RecordBatch::Slice -* [ARROW-81](https://issues.apache.org/jira/browse/ARROW-81) - [Format] Add a Category logical type (distinct from dictionary-encoding) - -## Task - -* [ARROW-268](https://issues.apache.org/jira/browse/ARROW-268) - [C++] Flesh out union implementation to have all required methods for IPC -* [ARROW-327](https://issues.apache.org/jira/browse/ARROW-327) - [Python] Remove conda builds from Travis CI processes -* [ARROW-353](https://issues.apache.org/jira/browse/ARROW-353) - Arrow release 0.2 -* [ARROW-359](https://issues.apache.org/jira/browse/ARROW-359) - Need to document ARROW_LIBHDFS_DIR -* [ARROW-367](https://issues.apache.org/jira/browse/ARROW-367) - [java] converter csv/json <=> Arrow file format for Integration tests -* [ARROW-368](https://issues.apache.org/jira/browse/ARROW-368) - Document use of LD_LIBRARY_PATH when using Python -* [ARROW-372](https://issues.apache.org/jira/browse/ARROW-372) - Create JSON arrow file format for integration tests -* [ARROW-506](https://issues.apache.org/jira/browse/ARROW-506) - Implement Arrow Echo server for integration testing -* [ARROW-527](https://issues.apache.org/jira/browse/ARROW-527) - clean drill-module.conf file -* [ARROW-558](https://issues.apache.org/jira/browse/ARROW-558) - Add KEYS files -* [ARROW-96](https://issues.apache.org/jira/browse/ARROW-96) - C++: API documentation using Doxygen -* [ARROW-97](https://issues.apache.org/jira/browse/ARROW-97) - Python: API documentation via sphinx-apidoc - [2]: https://github.com/apache/arrow/releases/tag/apache-arrow-0.2.0 [3]: https://dist.apache.org/repos/dist/release/arrow/arrow-0.2.0/apache-arrow-0.2.0.tar.gz.md5 [6]: https://dist.apache.org/repos/dist/release/arrow/arrow-0.2.0/apache-arrow-0.2.0.tar.gz diff --git a/site/_release/0.3.0.md b/site/_release/0.3.0.md index d76804250ef..443bcadce78 100644 --- a/site/_release/0.3.0.md +++ b/site/_release/0.3.0.md @@ -32,161 +32,143 @@ limitations under the License. # Changelog -## Bug +## Contributors -* [ARROW-109](https://issues.apache.org/jira/browse/ARROW-109) - [C++] Investigate recursive data types limit in flatbuffers -* [ARROW-208](https://issues.apache.org/jira/browse/ARROW-208) - Add checkstyle policy to java project -* [ARROW-347](https://issues.apache.org/jira/browse/ARROW-347) - Add method to pass CallBack when creating a transfer pair -* [ARROW-413](https://issues.apache.org/jira/browse/ARROW-413) - DATE type is not specified clearly -* [ARROW-431](https://issues.apache.org/jira/browse/ARROW-431) - [Python] Review GIL release and acquisition in to_pandas conversion -* [ARROW-443](https://issues.apache.org/jira/browse/ARROW-443) - [Python] Support for converting from strided pandas data in Table.from_pandas -* [ARROW-451](https://issues.apache.org/jira/browse/ARROW-451) - [C++] Override DataType::Equals for other types with additional metadata -* [ARROW-454](https://issues.apache.org/jira/browse/ARROW-454) - pojo.Field doesn't implement hashCode() -* [ARROW-526](https://issues.apache.org/jira/browse/ARROW-526) - [Format] Update IPC.md to account for File format changes and Streaming format -* [ARROW-565](https://issues.apache.org/jira/browse/ARROW-565) - [C++] Examine "Field::dictionary" member -* [ARROW-570](https://issues.apache.org/jira/browse/ARROW-570) - Determine Java tools JAR location from project metadata -* [ARROW-584](https://issues.apache.org/jira/browse/ARROW-584) - [C++] Fix compiler warnings exposed with -Wconversion -* [ARROW-588](https://issues.apache.org/jira/browse/ARROW-588) - [C++] Fix compiler warnings on 32-bit platforms -* [ARROW-595](https://issues.apache.org/jira/browse/ARROW-595) - [Python] StreamReader.schema returns None -* [ARROW-604](https://issues.apache.org/jira/browse/ARROW-604) - Python: boxed Field instances are missing the reference to DataType -* [ARROW-613](https://issues.apache.org/jira/browse/ARROW-613) - [JS] Implement random-access file format -* [ARROW-617](https://issues.apache.org/jira/browse/ARROW-617) - Time type is not specified clearly -* [ARROW-619](https://issues.apache.org/jira/browse/ARROW-619) - Python: Fix typos in setup.py args and LD_LIBRARY_PATH -* [ARROW-623](https://issues.apache.org/jira/browse/ARROW-623) - segfault with __repr__ of empty Field -* [ARROW-624](https://issues.apache.org/jira/browse/ARROW-624) - [C++] Restore MakePrimitiveArray function -* [ARROW-627](https://issues.apache.org/jira/browse/ARROW-627) - [C++] Compatibility macros for exported extern template class declarations -* [ARROW-628](https://issues.apache.org/jira/browse/ARROW-628) - [Python] Install nomkl metapackage when building parquet-cpp for faster Travis builds -* [ARROW-630](https://issues.apache.org/jira/browse/ARROW-630) - [C++] IPC unloading for BooleanArray does not account for offset -* [ARROW-636](https://issues.apache.org/jira/browse/ARROW-636) - [C++] Add Boost / other system requirements to C++ README -* [ARROW-639](https://issues.apache.org/jira/browse/ARROW-639) - [C++] Invalid offset in slices -* [ARROW-642](https://issues.apache.org/jira/browse/ARROW-642) - [Java] Remove temporary file in java/tools -* [ARROW-644](https://issues.apache.org/jira/browse/ARROW-644) - Python: Cython should be a setup-only requirement -* [ARROW-652](https://issues.apache.org/jira/browse/ARROW-652) - Remove trailing f in merge script output -* [ARROW-654](https://issues.apache.org/jira/browse/ARROW-654) - [C++] Support timezone metadata in file/stream formats -* [ARROW-668](https://issues.apache.org/jira/browse/ARROW-668) - [Python] Convert nanosecond timestamps to pandas.Timestamp when converting from TimestampValue -* [ARROW-671](https://issues.apache.org/jira/browse/ARROW-671) - [GLib] License file isn't installed -* [ARROW-673](https://issues.apache.org/jira/browse/ARROW-673) - [Java] Support additional Time metadata -* [ARROW-677](https://issues.apache.org/jira/browse/ARROW-677) - [java] Fix checkstyle jcl-over-slf4j conflict issue -* [ARROW-678](https://issues.apache.org/jira/browse/ARROW-678) - [GLib] Fix dependenciesfff -* [ARROW-680](https://issues.apache.org/jira/browse/ARROW-680) - [C++] Multiarch support impacts user-supplied install prefix -* [ARROW-682](https://issues.apache.org/jira/browse/ARROW-682) - Add self-validation checks in integration tests -* [ARROW-683](https://issues.apache.org/jira/browse/ARROW-683) - [C++] Support date32 (DateUnit::DAY) in IPC metadata, rename date to date64 -* [ARROW-686](https://issues.apache.org/jira/browse/ARROW-686) - [C++] Account for time metadata changes, add time32 and time64 types -* [ARROW-689](https://issues.apache.org/jira/browse/ARROW-689) - [GLib] Install header files and documents to wrong directories -* [ARROW-691](https://issues.apache.org/jira/browse/ARROW-691) - [Java] Encode dictionary Int type in message format -* [ARROW-697](https://issues.apache.org/jira/browse/ARROW-697) - [Java] Raise appropriate exceptions when encountering large (> INT32_MAX) record batches -* [ARROW-699](https://issues.apache.org/jira/browse/ARROW-699) - [C++] Arrow dynamic libraries are missed on run of unit tests on Windows -* [ARROW-702](https://issues.apache.org/jira/browse/ARROW-702) - Fix BitVector.copyFromSafe to reAllocate instead of returning false -* [ARROW-703](https://issues.apache.org/jira/browse/ARROW-703) - Fix issue where setValueCount(0) doesn’t work in the case that we’ve shipped vectors across the wire -* [ARROW-704](https://issues.apache.org/jira/browse/ARROW-704) - Fix bad import caused by conflicting changes -* [ARROW-709](https://issues.apache.org/jira/browse/ARROW-709) - [C++] Restore type comparator for DecimalType -* [ARROW-713](https://issues.apache.org/jira/browse/ARROW-713) - [C++] Fix linking issue with ipc benchmark -* [ARROW-715](https://issues.apache.org/jira/browse/ARROW-715) - Python: Explicit pandas import makes it a hard requirement -* [ARROW-716](https://issues.apache.org/jira/browse/ARROW-716) - error building arrow/python -* [ARROW-720](https://issues.apache.org/jira/browse/ARROW-720) - [java] arrow should not have a dependency on slf4j bridges in compile -* [ARROW-723](https://issues.apache.org/jira/browse/ARROW-723) - Arrow freezes on write if chunk_size=0 -* [ARROW-726](https://issues.apache.org/jira/browse/ARROW-726) - [C++] PyBuffer dtor may segfault if constructor passed an object not exporting buffer protocol -* [ARROW-732](https://issues.apache.org/jira/browse/ARROW-732) - Schema comparison bugs in struct and union types -* [ARROW-736](https://issues.apache.org/jira/browse/ARROW-736) - [Python] Mixed-type object DataFrame columns should not silently coerce to an Arrow type by default -* [ARROW-738](https://issues.apache.org/jira/browse/ARROW-738) - [Python] Fix manylinux1 packaging -* [ARROW-739](https://issues.apache.org/jira/browse/ARROW-739) - Parallel build fails non-deterministically. -* [ARROW-740](https://issues.apache.org/jira/browse/ARROW-740) - FileReader fails for large objects -* [ARROW-747](https://issues.apache.org/jira/browse/ARROW-747) - [C++] Fix spurious warning caused by passing dl to add_dependencies -* [ARROW-749](https://issues.apache.org/jira/browse/ARROW-749) - [Python] Delete incomplete binary files when writing fails -* [ARROW-753](https://issues.apache.org/jira/browse/ARROW-753) - [Python] Unit tests in arrow/python fail to link on some OS X platforms -* [ARROW-756](https://issues.apache.org/jira/browse/ARROW-756) - [C++] Do not pass -fPIC when compiling with MSVC -* [ARROW-757](https://issues.apache.org/jira/browse/ARROW-757) - [C++] MSVC build fails on googletest when using NMake -* [ARROW-762](https://issues.apache.org/jira/browse/ARROW-762) - Kerberos Problem with PyArrow -* [ARROW-776](https://issues.apache.org/jira/browse/ARROW-776) - [GLib] Cast type is wrong -* [ARROW-777](https://issues.apache.org/jira/browse/ARROW-777) - [Java] Resolve getObject behavior per changes / discussion in ARROW-729 -* [ARROW-778](https://issues.apache.org/jira/browse/ARROW-778) - Modify merge tool to work on Windows -* [ARROW-781](https://issues.apache.org/jira/browse/ARROW-781) - [Python/C++] Increase reference count for base object? -* [ARROW-783](https://issues.apache.org/jira/browse/ARROW-783) - Integration tests fail for length-0 record batch -* [ARROW-787](https://issues.apache.org/jira/browse/ARROW-787) - [GLib] Fix compilation errors caused by ARROW-758 -* [ARROW-793](https://issues.apache.org/jira/browse/ARROW-793) - [GLib] Wrong indent -* [ARROW-794](https://issues.apache.org/jira/browse/ARROW-794) - [C++] Check whether data is contiguous in ipc::WriteTensor -* [ARROW-797](https://issues.apache.org/jira/browse/ARROW-797) - [Python] Add updated pyarrow.* public API listing in Sphinx docs -* [ARROW-800](https://issues.apache.org/jira/browse/ARROW-800) - [C++] Boost headers being transitively included in pyarrow -* [ARROW-805](https://issues.apache.org/jira/browse/ARROW-805) - listing empty HDFS directory returns an error instead of returning empty list -* [ARROW-809](https://issues.apache.org/jira/browse/ARROW-809) - C++: Writing sliced record batch to IPC writes the entire array -* [ARROW-812](https://issues.apache.org/jira/browse/ARROW-812) - Pip install pyarrow on mac failed. -* [ARROW-817](https://issues.apache.org/jira/browse/ARROW-817) - [C++] Fix incorrect code comment from ARROW-722 -* [ARROW-821](https://issues.apache.org/jira/browse/ARROW-821) - [Python] Extra file _table_api.h generated during Python build process -* [ARROW-822](https://issues.apache.org/jira/browse/ARROW-822) - [Python] StreamWriter fails to open with socket as sink -* [ARROW-826](https://issues.apache.org/jira/browse/ARROW-826) - Compilation error on Mac with -DARROW_PYTHON=on -* [ARROW-829](https://issues.apache.org/jira/browse/ARROW-829) - Python: Parquet: Dictionary encoding is deactivated if column-wise compression was selected -* [ARROW-830](https://issues.apache.org/jira/browse/ARROW-830) - Python: jemalloc is not anymore publicly exposed -* [ARROW-839](https://issues.apache.org/jira/browse/ARROW-839) - [C++] Portable alternative to PyDate_to_ms function -* [ARROW-847](https://issues.apache.org/jira/browse/ARROW-847) - C++: BUILD_BYPRODUCTS not specified anymore for gtest -* [ARROW-852](https://issues.apache.org/jira/browse/ARROW-852) - Python: Also set Arrow Library PATHS when detection was done through pkg-config -* [ARROW-853](https://issues.apache.org/jira/browse/ARROW-853) - [Python] It is no longer necessary to modify the RPATH of the Cython extensions on many environments -* [ARROW-858](https://issues.apache.org/jira/browse/ARROW-858) - Remove dependency on boost regex -* [ARROW-866](https://issues.apache.org/jira/browse/ARROW-866) - [Python] Error from file object destructor -* [ARROW-867](https://issues.apache.org/jira/browse/ARROW-867) - [Python] Miscellaneous pyarrow MSVC fixes -* [ARROW-875](https://issues.apache.org/jira/browse/ARROW-875) - Nullable variable length vector fillEmpties() fills an extra value -* [ARROW-879](https://issues.apache.org/jira/browse/ARROW-879) - compat with pandas 0.20.0 -* [ARROW-882](https://issues.apache.org/jira/browse/ARROW-882) - [C++] On Windows statically built lib file overwrites lib file of shared build -* [ARROW-886](https://issues.apache.org/jira/browse/ARROW-886) - VariableLengthVectors don't reAlloc offsets -* [ARROW-887](https://issues.apache.org/jira/browse/ARROW-887) - [format] For backward compatibility, new unit fields must have default values matching previous implied unit -* [ARROW-888](https://issues.apache.org/jira/browse/ARROW-888) - BitVector transfer() does not transfer ownership -* [ARROW-895](https://issues.apache.org/jira/browse/ARROW-895) - Nullable variable length vector lastSet not set correctly -* [ARROW-900](https://issues.apache.org/jira/browse/ARROW-900) - [Python] UnboundLocalError in ParquetDatasetPiece -* [ARROW-903](https://issues.apache.org/jira/browse/ARROW-903) - [GLib] Remove a needless "." -* [ARROW-914](https://issues.apache.org/jira/browse/ARROW-914) - [C++/Python] Fix Decimal ToBytes -* [ARROW-922](https://issues.apache.org/jira/browse/ARROW-922) - Allow Flatbuffers and RapidJSON to be used locally on Windows -* [ARROW-928](https://issues.apache.org/jira/browse/ARROW-928) - Update CMAKE script to detect unsupported msvc compilers versions -* [ARROW-933](https://issues.apache.org/jira/browse/ARROW-933) - [Python] arrow_python bindings have debug print statement -* [ARROW-934](https://issues.apache.org/jira/browse/ARROW-934) - [GLib] Glib sources missing from result of 02-source.sh -* [ARROW-936](https://issues.apache.org/jira/browse/ARROW-936) - Fix release README -* [ARROW-938](https://issues.apache.org/jira/browse/ARROW-938) - Fix Apache Rat errors from source release build +```shell +$ git shortlog -sn apache-arrow-0.2.0..apache-arrow-0.3.0 + 119 Wes McKinney + 55 Kouhei Sutou + 18 Uwe L. Korn + 17 Julien Le Dem + 9 Phillip Cloud + 6 Bryan Cutler + 5 Emilio Lahr-Vivaz + 5 Philipp Moritz + 4 Jeff Knupp + 4 Johan Mabille + 4 Max Risuhin + 3 Miki Tebeka + 3 Steven Phillips + 2 Brian Hulette + 2 Jeff Reback + 2 Leif Walsh + 1 Deepak Majeti + 1 Holden Karau + 1 Itai Incze + 1 Julien Lafaye + 1 Nong Li + 1 Tsuyoshi Ozawa + 1 rvernica +``` -## Improvement +## New Features and Improvements +* [ARROW-183](https://issues.apache.org/jira/browse/ARROW-183) - C++: Add storage type to DecimalType +* [ARROW-231](https://issues.apache.org/jira/browse/ARROW-231) - C++: Add typed Resize to PoolBuffer +* [ARROW-281](https://issues.apache.org/jira/browse/ARROW-281) - [C++] IPC/RPC support on Win32 platforms * [ARROW-316](https://issues.apache.org/jira/browse/ARROW-316) - Finalize Date type +* [ARROW-341](https://issues.apache.org/jira/browse/ARROW-341) - [Python] Making libpyarrow available to third parties +* [ARROW-452](https://issues.apache.org/jira/browse/ARROW-452) - [C++/Python] Merge "Feather" file format implementation +* [ARROW-459](https://issues.apache.org/jira/browse/ARROW-459) - [C++] Implement IPC round trip for DictionaryArray, dictionaries shared across record batches +* [ARROW-483](https://issues.apache.org/jira/browse/ARROW-483) - [C++/Python] Provide access to "custom_metadata" Field attribute in IPC setting +* [ARROW-491](https://issues.apache.org/jira/browse/ARROW-491) - [C++] Add FixedWidthBinary type +* [ARROW-493](https://issues.apache.org/jira/browse/ARROW-493) - [C++] Allow in-memory array over 2^31 -1 elements but require splitting at IPC / RPC boundaries +* [ARROW-502](https://issues.apache.org/jira/browse/ARROW-502) - [C++/Python] Add MemoryPool implementation that logs allocation activity to std::cout +* [ARROW-510](https://issues.apache.org/jira/browse/ARROW-510) - Add integration tests for date and time types +* [ARROW-52](https://issues.apache.org/jira/browse/ARROW-52) - Set up project blog +* [ARROW-520](https://issues.apache.org/jira/browse/ARROW-520) - [C++] Add STL-compliant allocator that hooks into an arrow::MemoryPool +* [ARROW-528](https://issues.apache.org/jira/browse/ARROW-528) - [Python] Support _metadata or _common_metadata files when reading Parquet directories +* [ARROW-534](https://issues.apache.org/jira/browse/ARROW-534) - [C++] Add IPC tests for date/time types +* [ARROW-539](https://issues.apache.org/jira/browse/ARROW-539) - [Python] Support reading Parquet datasets with standard partition directory schemes * [ARROW-542](https://issues.apache.org/jira/browse/ARROW-542) - [Java] Implement dictionaries in stream/file encoding +* [ARROW-550](https://issues.apache.org/jira/browse/ARROW-550) - [Format] Add a TensorMessage type +* [ARROW-552](https://issues.apache.org/jira/browse/ARROW-552) - [Python] Add scalar value support for Dictionary type +* [ARROW-557](https://issues.apache.org/jira/browse/ARROW-557) - [Python] Explicitly opt in to HDFS unit tests * [ARROW-563](https://issues.apache.org/jira/browse/ARROW-563) - C++: Support non-standard gcc version strings * [ARROW-566](https://issues.apache.org/jira/browse/ARROW-566) - Python: Deterministic position of libarrow in manylinux1 wheels +* [ARROW-568](https://issues.apache.org/jira/browse/ARROW-568) - [C++] Add default implementations for TypeVisitor, ArrayVisitor methods that return NotImplemented * [ARROW-569](https://issues.apache.org/jira/browse/ARROW-569) - [C++] Set version for *.pc +* [ARROW-574](https://issues.apache.org/jira/browse/ARROW-574) - Python: Add support for nested Python lists in Pandas conversion +* [ARROW-576](https://issues.apache.org/jira/browse/ARROW-576) - [C++] Complete round trip Union file/stream IPC tests * [ARROW-577](https://issues.apache.org/jira/browse/ARROW-577) - [C++] Refactor StreamWriter and FileWriter to have private implementations +* [ARROW-578](https://issues.apache.org/jira/browse/ARROW-578) - [C++] Add CMake option to add custom $CXXFLAGS * [ARROW-580](https://issues.apache.org/jira/browse/ARROW-580) - C++: Also provide jemalloc_X targets if only a static or shared version is found * [ARROW-582](https://issues.apache.org/jira/browse/ARROW-582) - [Java] Add Date/Time Support to JSON File * [ARROW-589](https://issues.apache.org/jira/browse/ARROW-589) - C++: Use system provided shared jemalloc if static is unavailable * [ARROW-593](https://issues.apache.org/jira/browse/ARROW-593) - [C++] Rename ReadableFileInterface to RandomAccessFile +* [ARROW-598](https://issues.apache.org/jira/browse/ARROW-598) - [Python] Add support for converting pyarrow.Buffer to a memoryview with zero copy +* [ARROW-603](https://issues.apache.org/jira/browse/ARROW-603) - [C++] Add RecordBatch::Validate method that at least checks that schema matches the array metadata +* [ARROW-605](https://issues.apache.org/jira/browse/ARROW-605) - [C++] Refactor generic ArrayLoader class, support work for Feather merge +* [ARROW-606](https://issues.apache.org/jira/browse/ARROW-606) - [C++] Upgrade to flatbuffers 1.6.0 +* [ARROW-608](https://issues.apache.org/jira/browse/ARROW-608) - [Format] Days since epoch date type +* [ARROW-610](https://issues.apache.org/jira/browse/ARROW-610) - [C++] Win32 compatibility in file.cc * [ARROW-612](https://issues.apache.org/jira/browse/ARROW-612) - [Java] Field toString should show nullable flag status * [ARROW-615](https://issues.apache.org/jira/browse/ARROW-615) - Move ByteArrayReadableSeekableByteChannel to vector.util package +* [ARROW-616](https://issues.apache.org/jira/browse/ARROW-616) - [C++] Remove -g flag in release builds +* [ARROW-618](https://issues.apache.org/jira/browse/ARROW-618) - [Python] Implement support for DatetimeTZ custom type from pandas +* [ARROW-620](https://issues.apache.org/jira/browse/ARROW-620) - [C++] Add date/time support to JSON reader/writer for integration testing +* [ARROW-621](https://issues.apache.org/jira/browse/ARROW-621) - [C++] Implement an "inline visitor" template that enables visitor-pattern-like code without virtual function dispatch +* [ARROW-625](https://issues.apache.org/jira/browse/ARROW-625) - [C++] Add time unit to TimeType::ToString +* [ARROW-626](https://issues.apache.org/jira/browse/ARROW-626) - [Python] Enable pyarrow.BufferReader to read from any Python object implementing the buffer/memoryview protocol * [ARROW-631](https://issues.apache.org/jira/browse/ARROW-631) - [GLib] Import C API (C++ API wrapper) based on GLib from https://github.com/kou/arrow-glib +* [ARROW-632](https://issues.apache.org/jira/browse/ARROW-632) - [Python] Add support for FixedWidthBinary type +* [ARROW-635](https://issues.apache.org/jira/browse/ARROW-635) - [C++] Add JSON read/write support for FixedWidthBinary +* [ARROW-637](https://issues.apache.org/jira/browse/ARROW-637) - [Format] Add time zone metadata to Timestamp type * [ARROW-646](https://issues.apache.org/jira/browse/ARROW-646) - Cache miniconda packages * [ARROW-647](https://issues.apache.org/jira/browse/ARROW-647) - [C++] Don't require Boost static libraries to support CentOS 7 * [ARROW-648](https://issues.apache.org/jira/browse/ARROW-648) - [C++] Support multiarch on Debian * [ARROW-650](https://issues.apache.org/jira/browse/ARROW-650) - [GLib] Follow eadableFileInterface -> RnadomAccessFile change * [ARROW-651](https://issues.apache.org/jira/browse/ARROW-651) - [C++] Set shared library version for .deb packages * [ARROW-655](https://issues.apache.org/jira/browse/ARROW-655) - Implement DecimalArray +* [ARROW-656](https://issues.apache.org/jira/browse/ARROW-656) - [C++] Implement IO interface that can read and write to a fixed-size mutable buffer +* [ARROW-657](https://issues.apache.org/jira/browse/ARROW-657) - [Python] Write and read tensors (with zero copy) into shared memory +* [ARROW-658](https://issues.apache.org/jira/browse/ARROW-658) - [C++] Implement in-memory arrow::Tensor objects +* [ARROW-659](https://issues.apache.org/jira/browse/ARROW-659) - [C++] Add multithreaded memcpy implementation (for hardware where it helps) +* [ARROW-660](https://issues.apache.org/jira/browse/ARROW-660) - [C++] Restore function that can read a complete encapsulated record batch message +* [ARROW-661](https://issues.apache.org/jira/browse/ARROW-661) - [C++] Add a Flatbuffer metadata type that supports array data over 2^31 - 1 elements * [ARROW-662](https://issues.apache.org/jira/browse/ARROW-662) - [Format] Factor Flatbuffer schema metadata into a Schema.fbs +* [ARROW-663](https://issues.apache.org/jira/browse/ARROW-663) - [Java] Support additional Time metadata + vector value accessors * [ARROW-664](https://issues.apache.org/jira/browse/ARROW-664) - Make C++ Arrow serialization deterministic +* [ARROW-669](https://issues.apache.org/jira/browse/ARROW-669) - [Python] Attach proper tzinfo when computing boxed scalars for TimestampArray +* [ARROW-670](https://issues.apache.org/jira/browse/ARROW-670) - Arrow 0.3 release +* [ARROW-672](https://issues.apache.org/jira/browse/ARROW-672) - [Format] Bump metadata version for 0.3 release * [ARROW-674](https://issues.apache.org/jira/browse/ARROW-674) - [Java] Support additional Timestamp timezone metadata * [ARROW-675](https://issues.apache.org/jira/browse/ARROW-675) - [GLib] Update package metadata * [ARROW-676](https://issues.apache.org/jira/browse/ARROW-676) - [java] move from MinorType to FieldType in ValueVectors to carry all the relevant type bits * [ARROW-679](https://issues.apache.org/jira/browse/ARROW-679) - [Format] Change RecordBatch and Field length members from int to long * [ARROW-681](https://issues.apache.org/jira/browse/ARROW-681) - [C++] Build Arrow on Windows with dynamically linked boost * [ARROW-684](https://issues.apache.org/jira/browse/ARROW-684) - Python: More informative message when parquet-cpp but not parquet-arrow is available +* [ARROW-687](https://issues.apache.org/jira/browse/ARROW-687) - [C++] Build and run full test suite in Appveyor * [ARROW-688](https://issues.apache.org/jira/browse/ARROW-688) - [C++] Use CMAKE_INSTALL_INCLUDEDIR for consistency * [ARROW-690](https://issues.apache.org/jira/browse/ARROW-690) - Only send JIRA updates to issues@arrow.apache.org +* [ARROW-698](https://issues.apache.org/jira/browse/ARROW-698) - [C++] Add options to StreamWriter/FileWriter to permit large record batches * [ARROW-700](https://issues.apache.org/jira/browse/ARROW-700) - Add headroom interface for allocator. +* [ARROW-701](https://issues.apache.org/jira/browse/ARROW-701) - [Java] Support additional Date metadata * [ARROW-706](https://issues.apache.org/jira/browse/ARROW-706) - [GLib] Add package install document * [ARROW-707](https://issues.apache.org/jira/browse/ARROW-707) - Python: All none-Pandas column should be converted to NullArray * [ARROW-708](https://issues.apache.org/jira/browse/ARROW-708) - [C++] Some IPC code simplification, perf analysis +* [ARROW-710](https://issues.apache.org/jira/browse/ARROW-710) - [Python] Enable Feather APIs to read and write using Python file-like objects * [ARROW-712](https://issues.apache.org/jira/browse/ARROW-712) - [C++] Implement Array::Accept as inline visitor +* [ARROW-717](https://issues.apache.org/jira/browse/ARROW-717) - [C++] IPC zero-copy round trips for arrow::Tensor +* [ARROW-718](https://issues.apache.org/jira/browse/ARROW-718) - [Python] Expose arrow::Tensor with conversions to/from NumPy arrays * [ARROW-719](https://issues.apache.org/jira/browse/ARROW-719) - [GLib] Support prepared source archive release +* [ARROW-722](https://issues.apache.org/jira/browse/ARROW-722) - [Python] pandas conversions for new date and time types/metadata * [ARROW-724](https://issues.apache.org/jira/browse/ARROW-724) - Add "How to Contribute" section to README * [ARROW-725](https://issues.apache.org/jira/browse/ARROW-725) - [Format] Constant length list type * [ARROW-727](https://issues.apache.org/jira/browse/ARROW-727) - [Python] Write memoryview-compatible objects in NativeFile.write with zero copy * [ARROW-728](https://issues.apache.org/jira/browse/ARROW-728) - [C++/Python] Add arrow::Table function for removing a column +* [ARROW-729](https://issues.apache.org/jira/browse/ARROW-729) - [Java] Add vector type for 32-bit date as days since UNIX epoch * [ARROW-731](https://issues.apache.org/jira/browse/ARROW-731) - [C++] Add shared library related versions to .pc +* [ARROW-733](https://issues.apache.org/jira/browse/ARROW-733) - [C++/Format] Change name of Fixed Width Binary to Fixed *Size* Binary for consistency +* [ARROW-734](https://issues.apache.org/jira/browse/ARROW-734) - [Python] Support for pyarrow on Windows / MSVC +* [ARROW-735](https://issues.apache.org/jira/browse/ARROW-735) - [C++] Developer instruction document for MSVC on Windows +* [ARROW-737](https://issues.apache.org/jira/browse/ARROW-737) - [C++] Support obtaining mutable slices of mutable buffers * [ARROW-741](https://issues.apache.org/jira/browse/ARROW-741) - [Python] Add Python 3.6 to Travis CI * [ARROW-743](https://issues.apache.org/jira/browse/ARROW-743) - [C++] Consolidate unit tests for code in array.h * [ARROW-744](https://issues.apache.org/jira/browse/ARROW-744) - [GLib] Re-add an assertion to garrow_table_new() test * [ARROW-745](https://issues.apache.org/jira/browse/ARROW-745) - [C++] Allow use of system cpplint * [ARROW-746](https://issues.apache.org/jira/browse/ARROW-746) - [GLib] Add garrow_array_get_data_type() +* [ARROW-748](https://issues.apache.org/jira/browse/ARROW-748) - [Python] Pin runtime library versions in conda-forge packages to force upgrades * [ARROW-751](https://issues.apache.org/jira/browse/ARROW-751) - [Python] Rename all Cython extensions to "private" status with leading underscore * [ARROW-752](https://issues.apache.org/jira/browse/ARROW-752) - [Python] Construct pyarrow.DictionaryArray from boxed pyarrow array objects * [ARROW-754](https://issues.apache.org/jira/browse/ARROW-754) - [GLib] Add garrow_array_is_null() @@ -195,14 +177,18 @@ limitations under the License. * [ARROW-761](https://issues.apache.org/jira/browse/ARROW-761) - [Python] Add function to compute the total size of tensor payloads, including metadata and padding * [ARROW-763](https://issues.apache.org/jira/browse/ARROW-763) - C++: Use `python-config` to find libpythonX.X.dylib * [ARROW-765](https://issues.apache.org/jira/browse/ARROW-765) - [Python] Make generic ArrowException subclass value error +* [ARROW-768](https://issues.apache.org/jira/browse/ARROW-768) - [Java] Change the "boxed" object representation of date and time types * [ARROW-769](https://issues.apache.org/jira/browse/ARROW-769) - [GLib] Support building without installed Arrow C++ * [ARROW-770](https://issues.apache.org/jira/browse/ARROW-770) - [C++] Move clang-tidy/format config files back to C++ source tree +* [ARROW-771](https://issues.apache.org/jira/browse/ARROW-771) - [Python] Add APIs for reading individual Parquet row groups +* [ARROW-773](https://issues.apache.org/jira/browse/ARROW-773) - [C++] Add function to create arrow::Table with column appended to existing table * [ARROW-774](https://issues.apache.org/jira/browse/ARROW-774) - [GLib] Remove needless LICENSE.txt copy * [ARROW-775](https://issues.apache.org/jira/browse/ARROW-775) - [Java] add simple constructors to value vectors * [ARROW-779](https://issues.apache.org/jira/browse/ARROW-779) - [C++/Python] Raise exception if old metadata encountered * [ARROW-782](https://issues.apache.org/jira/browse/ARROW-782) - [C++] Change struct to class for objects that meet the criteria in the Google style guide * [ARROW-788](https://issues.apache.org/jira/browse/ARROW-788) - Possible nondeterminism in Tensor serialization code * [ARROW-795](https://issues.apache.org/jira/browse/ARROW-795) - [C++] Combine libarrow/libarrow_io/libarrow_ipc +* [ARROW-798](https://issues.apache.org/jira/browse/ARROW-798) - [Docs] Publish Format Markdown documents somehow on arrow.apache.org * [ARROW-802](https://issues.apache.org/jira/browse/ARROW-802) - [GLib] Add read examples * [ARROW-803](https://issues.apache.org/jira/browse/ARROW-803) - [GLib] Update package repository URL * [ARROW-804](https://issues.apache.org/jira/browse/ARROW-804) - [GLib] Update build document @@ -220,6 +206,7 @@ limitations under the License. * [ARROW-828](https://issues.apache.org/jira/browse/ARROW-828) - [CPP] Document new requirement (libboost-regex-dev) in README.md * [ARROW-832](https://issues.apache.org/jira/browse/ARROW-832) - [C++] Upgrade thirdparty gtest to 1.8.0 * [ARROW-833](https://issues.apache.org/jira/browse/ARROW-833) - [Python] "Quickstart" build / environment setup guide for Python developers +* [ARROW-836](https://issues.apache.org/jira/browse/ARROW-836) - Test for timedelta compat with pandas * [ARROW-841](https://issues.apache.org/jira/browse/ARROW-841) - [Python] Add pyarrow build to Appveyor * [ARROW-844](https://issues.apache.org/jira/browse/ARROW-844) - [Format] Revise format/README.md to reflect progress reaching a more complete specification * [ARROW-845](https://issues.apache.org/jira/browse/ARROW-845) - [Python] Sync FindArrow.cmake changes from parquet-cpp @@ -232,11 +219,15 @@ limitations under the License. * [ARROW-862](https://issues.apache.org/jira/browse/ARROW-862) - [Python] Improve source build instructions in README * [ARROW-863](https://issues.apache.org/jira/browse/ARROW-863) - [GLib] Use GBytes to implement zero-copy * [ARROW-864](https://issues.apache.org/jira/browse/ARROW-864) - [GLib] Unify Array files +* [ARROW-865](https://issues.apache.org/jira/browse/ARROW-865) - [Python] Verify Parquet roundtrips for new date/time types * [ARROW-868](https://issues.apache.org/jira/browse/ARROW-868) - [GLib] Use GBytes to reduce copy +* [ARROW-869](https://issues.apache.org/jira/browse/ARROW-869) - [JS] Rename directory to js/ * [ARROW-871](https://issues.apache.org/jira/browse/ARROW-871) - [GLib] Unify DataType files * [ARROW-876](https://issues.apache.org/jira/browse/ARROW-876) - [GLib] Unify ArrayBuffer files * [ARROW-877](https://issues.apache.org/jira/browse/ARROW-877) - [GLib] Add garrow_array_get_null_bitmap() * [ARROW-878](https://issues.apache.org/jira/browse/ARROW-878) - [GLib] Add garrow_binary_array_get_buffer() +* [ARROW-880](https://issues.apache.org/jira/browse/ARROW-880) - [GLib] Add garrow_primitive_array_get_buffer() +* [ARROW-890](https://issues.apache.org/jira/browse/ARROW-890) - [GLib] Add GArrowMutableBuffer * [ARROW-892](https://issues.apache.org/jira/browse/ARROW-892) - [GLib] Fix GArrowTensor document * [ARROW-893](https://issues.apache.org/jira/browse/ARROW-893) - Add GLib document to Web site * [ARROW-894](https://issues.apache.org/jira/browse/ARROW-894) - [GLib] Add GArrowPoolBuffer @@ -252,90 +243,121 @@ limitations under the License. * [ARROW-919](https://issues.apache.org/jira/browse/ARROW-919) - [GLib] Use "id" to get type enum value from GArrowDataType * [ARROW-920](https://issues.apache.org/jira/browse/ARROW-920) - [GLib] Add Lua examples * [ARROW-925](https://issues.apache.org/jira/browse/ARROW-925) - [GLib] Fix GArrowBufferReader test +* [ARROW-926](https://issues.apache.org/jira/browse/ARROW-926) - Update KEYS to include wesm +* [ARROW-927](https://issues.apache.org/jira/browse/ARROW-927) - C++/Python: Add manylinux1 builds to Travis matrix * [ARROW-930](https://issues.apache.org/jira/browse/ARROW-930) - javadoc generation fails with java 8 * [ARROW-931](https://issues.apache.org/jira/browse/ARROW-931) - [GLib] Reconstruct input stream - -## New Feature - -* [ARROW-231](https://issues.apache.org/jira/browse/ARROW-231) - C++: Add typed Resize to PoolBuffer -* [ARROW-281](https://issues.apache.org/jira/browse/ARROW-281) - [C++] IPC/RPC support on Win32 platforms -* [ARROW-341](https://issues.apache.org/jira/browse/ARROW-341) - [Python] Making libpyarrow available to third parties -* [ARROW-452](https://issues.apache.org/jira/browse/ARROW-452) - [C++/Python] Merge "Feather" file format implementation -* [ARROW-459](https://issues.apache.org/jira/browse/ARROW-459) - [C++] Implement IPC round trip for DictionaryArray, dictionaries shared across record batches -* [ARROW-483](https://issues.apache.org/jira/browse/ARROW-483) - [C++/Python] Provide access to "custom_metadata" Field attribute in IPC setting -* [ARROW-491](https://issues.apache.org/jira/browse/ARROW-491) - [C++] Add FixedWidthBinary type -* [ARROW-493](https://issues.apache.org/jira/browse/ARROW-493) - [C++] Allow in-memory array over 2^31 -1 elements but require splitting at IPC / RPC boundaries -* [ARROW-502](https://issues.apache.org/jira/browse/ARROW-502) - [C++/Python] Add MemoryPool implementation that logs allocation activity to std::cout -* [ARROW-510](https://issues.apache.org/jira/browse/ARROW-510) - Add integration tests for date and time types -* [ARROW-520](https://issues.apache.org/jira/browse/ARROW-520) - [C++] Add STL-compliant allocator that hooks into an arrow::MemoryPool -* [ARROW-528](https://issues.apache.org/jira/browse/ARROW-528) - [Python] Support _metadata or _common_metadata files when reading Parquet directories -* [ARROW-534](https://issues.apache.org/jira/browse/ARROW-534) - [C++] Add IPC tests for date/time types -* [ARROW-539](https://issues.apache.org/jira/browse/ARROW-539) - [Python] Support reading Parquet datasets with standard partition directory schemes -* [ARROW-550](https://issues.apache.org/jira/browse/ARROW-550) - [Format] Add a TensorMessage type -* [ARROW-552](https://issues.apache.org/jira/browse/ARROW-552) - [Python] Add scalar value support for Dictionary type -* [ARROW-557](https://issues.apache.org/jira/browse/ARROW-557) - [Python] Explicitly opt in to HDFS unit tests -* [ARROW-568](https://issues.apache.org/jira/browse/ARROW-568) - [C++] Add default implementations for TypeVisitor, ArrayVisitor methods that return NotImplemented -* [ARROW-574](https://issues.apache.org/jira/browse/ARROW-574) - Python: Add support for nested Python lists in Pandas conversion -* [ARROW-576](https://issues.apache.org/jira/browse/ARROW-576) - [C++] Complete round trip Union file/stream IPC tests -* [ARROW-578](https://issues.apache.org/jira/browse/ARROW-578) - [C++] Add CMake option to add custom $CXXFLAGS -* [ARROW-598](https://issues.apache.org/jira/browse/ARROW-598) - [Python] Add support for converting pyarrow.Buffer to a memoryview with zero copy -* [ARROW-603](https://issues.apache.org/jira/browse/ARROW-603) - [C++] Add RecordBatch::Validate method that at least checks that schema matches the array metadata -* [ARROW-605](https://issues.apache.org/jira/browse/ARROW-605) - [C++] Refactor generic ArrayLoader class, support work for Feather merge -* [ARROW-606](https://issues.apache.org/jira/browse/ARROW-606) - [C++] Upgrade to flatbuffers 1.6.0 -* [ARROW-608](https://issues.apache.org/jira/browse/ARROW-608) - [Format] Days since epoch date type -* [ARROW-610](https://issues.apache.org/jira/browse/ARROW-610) - [C++] Win32 compatibility in file.cc -* [ARROW-616](https://issues.apache.org/jira/browse/ARROW-616) - [C++] Remove -g flag in release builds -* [ARROW-618](https://issues.apache.org/jira/browse/ARROW-618) - [Python] Implement support for DatetimeTZ custom type from pandas -* [ARROW-620](https://issues.apache.org/jira/browse/ARROW-620) - [C++] Add date/time support to JSON reader/writer for integration testing -* [ARROW-621](https://issues.apache.org/jira/browse/ARROW-621) - [C++] Implement an "inline visitor" template that enables visitor-pattern-like code without virtual function dispatch -* [ARROW-625](https://issues.apache.org/jira/browse/ARROW-625) - [C++] Add time unit to TimeType::ToString -* [ARROW-626](https://issues.apache.org/jira/browse/ARROW-626) - [Python] Enable pyarrow.BufferReader to read from any Python object implementing the buffer/memoryview protocol -* [ARROW-632](https://issues.apache.org/jira/browse/ARROW-632) - [Python] Add support for FixedWidthBinary type -* [ARROW-635](https://issues.apache.org/jira/browse/ARROW-635) - [C++] Add JSON read/write support for FixedWidthBinary -* [ARROW-637](https://issues.apache.org/jira/browse/ARROW-637) - [Format] Add time zone metadata to Timestamp type -* [ARROW-656](https://issues.apache.org/jira/browse/ARROW-656) - [C++] Implement IO interface that can read and write to a fixed-size mutable buffer -* [ARROW-657](https://issues.apache.org/jira/browse/ARROW-657) - [Python] Write and read tensors (with zero copy) into shared memory -* [ARROW-658](https://issues.apache.org/jira/browse/ARROW-658) - [C++] Implement in-memory arrow::Tensor objects -* [ARROW-659](https://issues.apache.org/jira/browse/ARROW-659) - [C++] Add multithreaded memcpy implementation (for hardware where it helps) -* [ARROW-660](https://issues.apache.org/jira/browse/ARROW-660) - [C++] Restore function that can read a complete encapsulated record batch message -* [ARROW-661](https://issues.apache.org/jira/browse/ARROW-661) - [C++] Add a Flatbuffer metadata type that supports array data over 2^31 - 1 elements -* [ARROW-663](https://issues.apache.org/jira/browse/ARROW-663) - [Java] Support additional Time metadata + vector value accessors -* [ARROW-669](https://issues.apache.org/jira/browse/ARROW-669) - [Python] Attach proper tzinfo when computing boxed scalars for TimestampArray -* [ARROW-687](https://issues.apache.org/jira/browse/ARROW-687) - [C++] Build and run full test suite in Appveyor -* [ARROW-698](https://issues.apache.org/jira/browse/ARROW-698) - [C++] Add options to StreamWriter/FileWriter to permit large record batches -* [ARROW-701](https://issues.apache.org/jira/browse/ARROW-701) - [Java] Support additional Date metadata -* [ARROW-710](https://issues.apache.org/jira/browse/ARROW-710) - [Python] Enable Feather APIs to read and write using Python file-like objects -* [ARROW-717](https://issues.apache.org/jira/browse/ARROW-717) - [C++] IPC zero-copy round trips for arrow::Tensor -* [ARROW-718](https://issues.apache.org/jira/browse/ARROW-718) - [Python] Expose arrow::Tensor with conversions to/from NumPy arrays -* [ARROW-722](https://issues.apache.org/jira/browse/ARROW-722) - [Python] pandas conversions for new date and time types/metadata -* [ARROW-729](https://issues.apache.org/jira/browse/ARROW-729) - [Java] Add vector type for 32-bit date as days since UNIX epoch -* [ARROW-733](https://issues.apache.org/jira/browse/ARROW-733) - [C++/Format] Change name of Fixed Width Binary to Fixed *Size* Binary for consistency -* [ARROW-734](https://issues.apache.org/jira/browse/ARROW-734) - [Python] Support for pyarrow on Windows / MSVC -* [ARROW-735](https://issues.apache.org/jira/browse/ARROW-735) - [C++] Developer instruction document for MSVC on Windows -* [ARROW-737](https://issues.apache.org/jira/browse/ARROW-737) - [C++] Support obtaining mutable slices of mutable buffers -* [ARROW-768](https://issues.apache.org/jira/browse/ARROW-768) - [Java] Change the "boxed" object representation of date and time types -* [ARROW-771](https://issues.apache.org/jira/browse/ARROW-771) - [Python] Add APIs for reading individual Parquet row groups -* [ARROW-773](https://issues.apache.org/jira/browse/ARROW-773) - [C++] Add function to create arrow::Table with column appended to existing table -* [ARROW-865](https://issues.apache.org/jira/browse/ARROW-865) - [Python] Verify Parquet roundtrips for new date/time types -* [ARROW-880](https://issues.apache.org/jira/browse/ARROW-880) - [GLib] Add garrow_primitive_array_get_buffer() -* [ARROW-890](https://issues.apache.org/jira/browse/ARROW-890) - [GLib] Add GArrowMutableBuffer -* [ARROW-926](https://issues.apache.org/jira/browse/ARROW-926) - Update KEYS to include wesm - -## Task - -* [ARROW-52](https://issues.apache.org/jira/browse/ARROW-52) - Set up project blog -* [ARROW-670](https://issues.apache.org/jira/browse/ARROW-670) - Arrow 0.3 release -* [ARROW-672](https://issues.apache.org/jira/browse/ARROW-672) - [Format] Bump metadata version for 0.3 release -* [ARROW-748](https://issues.apache.org/jira/browse/ARROW-748) - [Python] Pin runtime library versions in conda-forge packages to force upgrades -* [ARROW-798](https://issues.apache.org/jira/browse/ARROW-798) - [Docs] Publish Format Markdown documents somehow on arrow.apache.org -* [ARROW-869](https://issues.apache.org/jira/browse/ARROW-869) - [JS] Rename directory to js/ * [ARROW-95](https://issues.apache.org/jira/browse/ARROW-95) - Scaffold Main Documentation using asciidoc +* [ARROW-965](https://issues.apache.org/jira/browse/ARROW-965) - Website updates for 0.3.0 release * [ARROW-98](https://issues.apache.org/jira/browse/ARROW-98) - Java: API documentation -## Test +## Bug Fixes -* [ARROW-836](https://issues.apache.org/jira/browse/ARROW-836) - Test for timedelta compat with pandas -* [ARROW-927](https://issues.apache.org/jira/browse/ARROW-927) - C++/Python: Add manylinux1 builds to Travis matrix +* [ARROW-109](https://issues.apache.org/jira/browse/ARROW-109) - [C++] Investigate recursive data types limit in flatbuffers +* [ARROW-208](https://issues.apache.org/jira/browse/ARROW-208) - Add checkstyle policy to java project +* [ARROW-347](https://issues.apache.org/jira/browse/ARROW-347) - Add method to pass CallBack when creating a transfer pair +* [ARROW-413](https://issues.apache.org/jira/browse/ARROW-413) - DATE type is not specified clearly +* [ARROW-431](https://issues.apache.org/jira/browse/ARROW-431) - [Python] Review GIL release and acquisition in to_pandas conversion +* [ARROW-443](https://issues.apache.org/jira/browse/ARROW-443) - [Python] Support for converting from strided pandas data in Table.from_pandas +* [ARROW-451](https://issues.apache.org/jira/browse/ARROW-451) - [C++] Override DataType::Equals for other types with additional metadata +* [ARROW-454](https://issues.apache.org/jira/browse/ARROW-454) - pojo.Field doesn't implement hashCode() +* [ARROW-526](https://issues.apache.org/jira/browse/ARROW-526) - [Format] Update IPC.md to account for File format changes and Streaming format +* [ARROW-565](https://issues.apache.org/jira/browse/ARROW-565) - [C++] Examine "Field::dictionary" member +* [ARROW-570](https://issues.apache.org/jira/browse/ARROW-570) - Determine Java tools JAR location from project metadata +* [ARROW-584](https://issues.apache.org/jira/browse/ARROW-584) - [C++] Fix compiler warnings exposed with -Wconversion +* [ARROW-588](https://issues.apache.org/jira/browse/ARROW-588) - [C++] Fix compiler warnings on 32-bit platforms +* [ARROW-595](https://issues.apache.org/jira/browse/ARROW-595) - [Python] StreamReader.schema returns None +* [ARROW-604](https://issues.apache.org/jira/browse/ARROW-604) - Python: boxed Field instances are missing the reference to DataType +* [ARROW-613](https://issues.apache.org/jira/browse/ARROW-613) - [JS] Implement random-access file format +* [ARROW-617](https://issues.apache.org/jira/browse/ARROW-617) - Time type is not specified clearly +* [ARROW-619](https://issues.apache.org/jira/browse/ARROW-619) - Python: Fix typos in setup.py args and LD_LIBRARY_PATH +* [ARROW-623](https://issues.apache.org/jira/browse/ARROW-623) - segfault with __repr__ of empty Field +* [ARROW-624](https://issues.apache.org/jira/browse/ARROW-624) - [C++] Restore MakePrimitiveArray function +* [ARROW-627](https://issues.apache.org/jira/browse/ARROW-627) - [C++] Compatibility macros for exported extern template class declarations +* [ARROW-628](https://issues.apache.org/jira/browse/ARROW-628) - [Python] Install nomkl metapackage when building parquet-cpp for faster Travis builds +* [ARROW-630](https://issues.apache.org/jira/browse/ARROW-630) - [C++] IPC unloading for BooleanArray does not account for offset +* [ARROW-636](https://issues.apache.org/jira/browse/ARROW-636) - [C++] Add Boost / other system requirements to C++ README +* [ARROW-639](https://issues.apache.org/jira/browse/ARROW-639) - [C++] Invalid offset in slices +* [ARROW-642](https://issues.apache.org/jira/browse/ARROW-642) - [Java] Remove temporary file in java/tools +* [ARROW-644](https://issues.apache.org/jira/browse/ARROW-644) - Python: Cython should be a setup-only requirement +* [ARROW-652](https://issues.apache.org/jira/browse/ARROW-652) - Remove trailing f in merge script output +* [ARROW-654](https://issues.apache.org/jira/browse/ARROW-654) - [C++] Support timezone metadata in file/stream formats +* [ARROW-668](https://issues.apache.org/jira/browse/ARROW-668) - [Python] Convert nanosecond timestamps to pandas.Timestamp when converting from TimestampValue +* [ARROW-671](https://issues.apache.org/jira/browse/ARROW-671) - [GLib] License file isn't installed +* [ARROW-673](https://issues.apache.org/jira/browse/ARROW-673) - [Java] Support additional Time metadata +* [ARROW-677](https://issues.apache.org/jira/browse/ARROW-677) - [java] Fix checkstyle jcl-over-slf4j conflict issue +* [ARROW-678](https://issues.apache.org/jira/browse/ARROW-678) - [GLib] Fix dependenciesfff +* [ARROW-680](https://issues.apache.org/jira/browse/ARROW-680) - [C++] Multiarch support impacts user-supplied install prefix +* [ARROW-682](https://issues.apache.org/jira/browse/ARROW-682) - Add self-validation checks in integration tests +* [ARROW-683](https://issues.apache.org/jira/browse/ARROW-683) - [C++] Support date32 (DateUnit::DAY) in IPC metadata, rename date to date64 +* [ARROW-686](https://issues.apache.org/jira/browse/ARROW-686) - [C++] Account for time metadata changes, add time32 and time64 types +* [ARROW-689](https://issues.apache.org/jira/browse/ARROW-689) - [GLib] Install header files and documents to wrong directories +* [ARROW-691](https://issues.apache.org/jira/browse/ARROW-691) - [Java] Encode dictionary Int type in message format +* [ARROW-697](https://issues.apache.org/jira/browse/ARROW-697) - [Java] Raise appropriate exceptions when encountering large (> INT32_MAX) record batches +* [ARROW-699](https://issues.apache.org/jira/browse/ARROW-699) - [C++] Arrow dynamic libraries are missed on run of unit tests on Windows +* [ARROW-702](https://issues.apache.org/jira/browse/ARROW-702) - Fix BitVector.copyFromSafe to reAllocate instead of returning false +* [ARROW-703](https://issues.apache.org/jira/browse/ARROW-703) - Fix issue where setValueCount(0) doesn’t work in the case that we’ve shipped vectors across the wire +* [ARROW-704](https://issues.apache.org/jira/browse/ARROW-704) - Fix bad import caused by conflicting changes +* [ARROW-709](https://issues.apache.org/jira/browse/ARROW-709) - [C++] Restore type comparator for DecimalType +* [ARROW-713](https://issues.apache.org/jira/browse/ARROW-713) - [C++] Fix linking issue with ipc benchmark +* [ARROW-715](https://issues.apache.org/jira/browse/ARROW-715) - Python: Explicit pandas import makes it a hard requirement +* [ARROW-716](https://issues.apache.org/jira/browse/ARROW-716) - error building arrow/python +* [ARROW-720](https://issues.apache.org/jira/browse/ARROW-720) - [java] arrow should not have a dependency on slf4j bridges in compile +* [ARROW-723](https://issues.apache.org/jira/browse/ARROW-723) - Arrow freezes on write if chunk_size=0 +* [ARROW-726](https://issues.apache.org/jira/browse/ARROW-726) - [C++] PyBuffer dtor may segfault if constructor passed an object not exporting buffer protocol +* [ARROW-732](https://issues.apache.org/jira/browse/ARROW-732) - Schema comparison bugs in struct and union types +* [ARROW-736](https://issues.apache.org/jira/browse/ARROW-736) - [Python] Mixed-type object DataFrame columns should not silently coerce to an Arrow type by default +* [ARROW-738](https://issues.apache.org/jira/browse/ARROW-738) - [Python] Fix manylinux1 packaging +* [ARROW-739](https://issues.apache.org/jira/browse/ARROW-739) - Parallel build fails non-deterministically. +* [ARROW-740](https://issues.apache.org/jira/browse/ARROW-740) - FileReader fails for large objects +* [ARROW-747](https://issues.apache.org/jira/browse/ARROW-747) - [C++] Fix spurious warning caused by passing dl to add_dependencies +* [ARROW-749](https://issues.apache.org/jira/browse/ARROW-749) - [Python] Delete incomplete binary files when writing fails +* [ARROW-753](https://issues.apache.org/jira/browse/ARROW-753) - [Python] Unit tests in arrow/python fail to link on some OS X platforms +* [ARROW-756](https://issues.apache.org/jira/browse/ARROW-756) - [C++] Do not pass -fPIC when compiling with MSVC +* [ARROW-757](https://issues.apache.org/jira/browse/ARROW-757) - [C++] MSVC build fails on googletest when using NMake +* [ARROW-762](https://issues.apache.org/jira/browse/ARROW-762) - Kerberos Problem with PyArrow +* [ARROW-776](https://issues.apache.org/jira/browse/ARROW-776) - [GLib] Cast type is wrong +* [ARROW-777](https://issues.apache.org/jira/browse/ARROW-777) - [Java] Resolve getObject behavior per changes / discussion in ARROW-729 +* [ARROW-778](https://issues.apache.org/jira/browse/ARROW-778) - Modify merge tool to work on Windows +* [ARROW-781](https://issues.apache.org/jira/browse/ARROW-781) - [Python/C++] Increase reference count for base object? +* [ARROW-783](https://issues.apache.org/jira/browse/ARROW-783) - Integration tests fail for length-0 record batch +* [ARROW-787](https://issues.apache.org/jira/browse/ARROW-787) - [GLib] Fix compilation errors caused by ARROW-758 +* [ARROW-793](https://issues.apache.org/jira/browse/ARROW-793) - [GLib] Wrong indent +* [ARROW-794](https://issues.apache.org/jira/browse/ARROW-794) - [C++] Check whether data is contiguous in ipc::WriteTensor +* [ARROW-797](https://issues.apache.org/jira/browse/ARROW-797) - [Python] Add updated pyarrow.* public API listing in Sphinx docs +* [ARROW-800](https://issues.apache.org/jira/browse/ARROW-800) - [C++] Boost headers being transitively included in pyarrow +* [ARROW-805](https://issues.apache.org/jira/browse/ARROW-805) - listing empty HDFS directory returns an error instead of returning empty list +* [ARROW-809](https://issues.apache.org/jira/browse/ARROW-809) - C++: Writing sliced record batch to IPC writes the entire array +* [ARROW-812](https://issues.apache.org/jira/browse/ARROW-812) - Pip install pyarrow on mac failed. +* [ARROW-817](https://issues.apache.org/jira/browse/ARROW-817) - [C++] Fix incorrect code comment from ARROW-722 +* [ARROW-821](https://issues.apache.org/jira/browse/ARROW-821) - [Python] Extra file _table_api.h generated during Python build process +* [ARROW-822](https://issues.apache.org/jira/browse/ARROW-822) - [Python] StreamWriter fails to open with socket as sink +* [ARROW-826](https://issues.apache.org/jira/browse/ARROW-826) - Compilation error on Mac with -DARROW_PYTHON=on +* [ARROW-829](https://issues.apache.org/jira/browse/ARROW-829) - Python: Parquet: Dictionary encoding is deactivated if column-wise compression was selected +* [ARROW-830](https://issues.apache.org/jira/browse/ARROW-830) - Python: jemalloc is not anymore publicly exposed +* [ARROW-839](https://issues.apache.org/jira/browse/ARROW-839) - [C++] Portable alternative to PyDate_to_ms function +* [ARROW-847](https://issues.apache.org/jira/browse/ARROW-847) - C++: BUILD_BYPRODUCTS not specified anymore for gtest +* [ARROW-852](https://issues.apache.org/jira/browse/ARROW-852) - Python: Also set Arrow Library PATHS when detection was done through pkg-config +* [ARROW-853](https://issues.apache.org/jira/browse/ARROW-853) - [Python] It is no longer necessary to modify the RPATH of the Cython extensions on many environments +* [ARROW-858](https://issues.apache.org/jira/browse/ARROW-858) - Remove dependency on boost regex +* [ARROW-866](https://issues.apache.org/jira/browse/ARROW-866) - [Python] Error from file object destructor +* [ARROW-867](https://issues.apache.org/jira/browse/ARROW-867) - [Python] Miscellaneous pyarrow MSVC fixes +* [ARROW-875](https://issues.apache.org/jira/browse/ARROW-875) - Nullable variable length vector fillEmpties() fills an extra value +* [ARROW-879](https://issues.apache.org/jira/browse/ARROW-879) - compat with pandas 0.20.0 +* [ARROW-882](https://issues.apache.org/jira/browse/ARROW-882) - [C++] On Windows statically built lib file overwrites lib file of shared build +* [ARROW-886](https://issues.apache.org/jira/browse/ARROW-886) - VariableLengthVectors don't reAlloc offsets +* [ARROW-887](https://issues.apache.org/jira/browse/ARROW-887) - [format] For backward compatibility, new unit fields must have default values matching previous implied unit +* [ARROW-888](https://issues.apache.org/jira/browse/ARROW-888) - BitVector transfer() does not transfer ownership +* [ARROW-895](https://issues.apache.org/jira/browse/ARROW-895) - Nullable variable length vector lastSet not set correctly +* [ARROW-900](https://issues.apache.org/jira/browse/ARROW-900) - [Python] UnboundLocalError in ParquetDatasetPiece +* [ARROW-903](https://issues.apache.org/jira/browse/ARROW-903) - [GLib] Remove a needless "." +* [ARROW-914](https://issues.apache.org/jira/browse/ARROW-914) - [C++/Python] Fix Decimal ToBytes +* [ARROW-922](https://issues.apache.org/jira/browse/ARROW-922) - Allow Flatbuffers and RapidJSON to be used locally on Windows +* [ARROW-928](https://issues.apache.org/jira/browse/ARROW-928) - Update CMAKE script to detect unsupported msvc compilers versions +* [ARROW-933](https://issues.apache.org/jira/browse/ARROW-933) - [Python] arrow_python bindings have debug print statement +* [ARROW-934](https://issues.apache.org/jira/browse/ARROW-934) - [GLib] Glib sources missing from result of 02-source.sh +* [ARROW-936](https://issues.apache.org/jira/browse/ARROW-936) - Fix release README +* [ARROW-938](https://issues.apache.org/jira/browse/ARROW-938) - Fix Apache Rat errors from source release build [2]: https://github.com/apache/arrow/releases/tag/apache-arrow-0.3.0 [3]: https://dist.apache.org/repos/dist/release/arrow/arrow-0.3.0/apache-arrow-0.3.0.tar.gz.md5