Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,9 @@ repos:
?^cpp/cmake_modules/UseCython\.cmake$|
?^cpp/src/arrow/util/config\.h\.cmake$|
)
- repo: https://github.com/sphinx-contrib/sphinx-lint
rev: v0.9.1
hooks:
- id: sphinx-lint
files: ^docs/
args: ['--disable', 'all', '--enable', 'trailing-whitespace,missing-final-newline', 'docs']
1 change: 1 addition & 0 deletions ci/conda_env_sphinx.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ pydata-sphinx-theme=0.14
sphinx-autobuild
sphinx-design
sphinx-copybutton
sphinx-lint
sphinxcontrib-jquery
sphinx==6.2
# Requirement for doctest-cython
Expand Down
6 changes: 4 additions & 2 deletions dev/archery/archery/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,7 @@ def build(ctx, src, build_dir, force, targets, **kwargs):
"Check all sources files for license texts via Apache RAT."),
LintCheck('r', "Lint R files."),
LintCheck('docker', "Lint Dockerfiles with hadolint."),
LintCheck('docs', "Lint docs with sphinx-lint."),
]


Expand All @@ -285,9 +286,10 @@ def decorate_lint_command(cmd):
help="Run IWYU on all C++ files if enabled")
@click.option("-a", "--all", is_flag=True, default=False,
help="Enable all checks.")
@click.argument("path", required=False)
@decorate_lint_command
@click.pass_context
def lint(ctx, src, fix, iwyu_all, **checks):
def lint(ctx, src, fix, iwyu_all, path, **checks):
if checks.pop('all'):
# "--all" is given => enable all non-selected checks
for k, v in checks.items():
Expand All @@ -297,7 +299,7 @@ def lint(ctx, src, fix, iwyu_all, **checks):
raise click.UsageError(
"Need to enable at least one lint check (try --help)")
try:
linter(src, fix, iwyu_all=iwyu_all, **checks)
linter(src, fix, iwyu_all=iwyu_all, path=path, **checks)
except LintValidationException:
sys.exit(1)

Expand Down
52 changes: 50 additions & 2 deletions dev/archery/archery/utils/lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,10 +436,55 @@ def docker_linter(src):
cwd=src.path))


def linter(src, fix=False, *, clang_format=False, cpplint=False,
class SphinxLint(Command):
def __init__(self, src, path=None, sphinx_lint_bin=None, disable=None, enable=None):
self.src = src
self.path = path
self.bin = default_bin(sphinx_lint_bin, "sphinx-lint")
self.disable = disable or "all"
self.enable = enable

def lint(self, *args, check=False):
docs_path = os.path.join(self.src.path, "docs")

args = []

if self.disable:
args.extend(["--disable", self.disable])

if self.enable:
args.extend(["--enable", self.enable])

if self.path is not None:
args.extend([self.path])
else:
args.extend([docs_path])

return self.run(*args, check=check)


def docs_linter(src, path=None):
"""Run sphinx-lint on docs."""
logger.info("Running docs linter (sphinx-lint)")

sphinx_lint = SphinxLint(
src,
path=path,
disable="all",
enable="trailing-whitespace,missing-final-newline"
)

if not sphinx_lint.available:
logger.error("sphinx-lint linter requested but sphinx-lint binary not found")
return

yield LintResult.from_cmd(sphinx_lint.lint())


def linter(src, fix=False, path=None, *, clang_format=False, cpplint=False,
clang_tidy=False, iwyu=False, iwyu_all=False,
python=False, numpydoc=False, cmake_format=False, rat=False,
r=False, docker=False):
r=False, docker=False, docs=False):
"""Run all linters."""
with tmpdir(prefix="arrow-lint-") as root:
build_dir = os.path.join(root, "cpp-build")
Expand Down Expand Up @@ -481,6 +526,9 @@ def linter(src, fix=False, *, clang_format=False, cpplint=False,
if docker:
results.extend(docker_linter(src))

if docs:
results.extend(docs_linter(src, path))

# Raise error if one linter failed, ensuring calling code can exit with
# non-zero.
for result in results:
Expand Down
2 changes: 1 addition & 1 deletion dev/archery/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
'integration': ['cffi'],
'integration-java': ['jpype1'],
'lint': ['numpydoc==1.1.0', 'autopep8', 'flake8==6.1.0', 'cython-lint',
'cmake_format==0.6.13'],
'cmake_format==0.6.13', 'sphinx-lint==0.9.1'],
'numpydoc': ['numpydoc==1.1.0'],
'release': ['pygithub', jinja_req, 'jira', 'semver', 'gitpython'],
}
Expand Down
1 change: 1 addition & 0 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@ pydata-sphinx-theme~=0.14
sphinx-autobuild
sphinx-design
sphinx-copybutton
sphinx-lint
sphinx==6.2
pandas
6 changes: 3 additions & 3 deletions docs/source/cpp/acero/developer_guide.rst
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ Examples
task (described below) as completed which allows the plan to finish.
* The ``fetch`` node, in ``InputReceived``, may decide that it has all the data it needs. It can then call
``StopProducing`` on its input.

Initialization / Construction / Destruction
-------------------------------------------

Expand Down Expand Up @@ -271,7 +271,7 @@ distributed systems. Once that has been done then it should be possible to do a
meaning exchanging between multiple exec plan instances on a single system) if desired.

.. figure:: dist_plan.svg

A distributed plan can provide parallelism even if the plans themselves run serially

Pipeline Parallelism
Expand Down Expand Up @@ -472,7 +472,7 @@ Benchmarking

The most complete macro benchmarking for Acero is provided by https://github.com/voltrondata-labs/arrowbench
These include a set of TPC-H benchmarks, executed from the R-dplyr integration, which are run on every Arrow commit and
reported to Conbench at https://conbench.ursa.dev/
reported to Conbench at https://conbench.ursa.dev/

In addition to these TPC-H benchmarks there are a number of micro-benchmarks for various nodes (hash-join, asof-join,
etc.) Finally, the compute functions themselves should mostly have micro-benchmarks. For more on micro benchmarks you
Expand Down
8 changes: 4 additions & 4 deletions docs/source/cpp/acero/overview.rst
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ is very similar to a RecordBatch. It can have zero or more columns and all of t
must have the same length. There are a few key differences from ExecBatch:

.. figure:: rb_vs_eb.svg

Both the record batch and the exec batch have strong ownership of the arrays & buffers

* An `ExecBatch` does not have a schema. This is because an `ExecBatch` is assumed to be
Expand All @@ -217,7 +217,7 @@ must have the same length. There are a few key differences from ExecBatch:
also has a length property which describes how many rows are in a batch. So another way to
view a `Scalar` is a constant array with `length` elements.
* An `ExecBatch` contains additional information used by the exec plan. For example, an
`index` can be used to describe a batch's position in an ordered stream. We expect
`index` can be used to describe a batch's position in an ordered stream. We expect
that `ExecBatch` will also evolve to contain additional fields such as a selection vector.

.. figure:: scalar_vs_array.svg
Expand Down Expand Up @@ -266,5 +266,5 @@ various query representations (e.g. Substrait). The Declaration objects are the
with the DeclarationToXyz methods, are the current public API for Acero.

.. figure:: decl_vs_ep.svg
A declaration is a blueprint that is used to instantiate exec plan instances

A declaration is a blueprint that is used to instantiate exec plan instances
46 changes: 23 additions & 23 deletions docs/source/cpp/acero/substrait.rst
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ Aggregate Relations
* Each measure's arguments must be direct references.
* A measure may not have a filter
* A measure may not have sorts
* A measure's invocation must be AGGREGATION_INVOCATION_ALL or
* A measure's invocation must be AGGREGATION_INVOCATION_ALL or
AGGREGATION_INVOCATION_UNSPECIFIED
* A measure's phase must be AGGREGATION_PHASE_INITIAL_TO_RESULT

Expand Down Expand Up @@ -146,73 +146,73 @@ Types
- Caveat
* - boolean
- boolean
-
-
* - i8
- int8
-
-
* - i16
- int16
-
-
* - i32
- int32
-
-
* - i64
- int64
-
-
* - fp32
- float32
-
-
* - fp64
- float64
-
-
* - string
- string
-
-
* - binary
- binary
-
-
* - timestamp
- timestamp<MICRO,"">
-
-
* - timestamp_tz
- timestamp<MICRO,"UTC">
-
-
* - date
- date32<DAY>
-
-
* - time
- time64<MICRO>
-
-
* - interval_year
-
-
- Not currently supported
* - interval_day
-
-
- Not currently supported
* - uuid
-
-
- Not currently supported
* - FIXEDCHAR<L>
-
-
- Not currently supported
* - VARCHAR<L>
-
-
- Not currently supported
* - FIXEDBINARY<L>
- fixed_size_binary<L>
-
-
* - DECIMAL<P,S>
- decimal128<P,S>
-
-
* - STRUCT<T1...TN>
- struct<T1...TN>
- Arrow struct fields will have no name (empty string)
* - NSTRUCT<N:T1...N:Tn>
-
-
- Not currently supported
* - LIST<T>
- list<T>
-
-
* - MAP<K,V>
- map<K,V>
- K must not be nullable
Expand Down
Loading