diff --git a/geometryops/Project.toml b/geometryops/Project.toml new file mode 100644 index 0000000..35a039e --- /dev/null +++ b/geometryops/Project.toml @@ -0,0 +1,15 @@ +[deps] +CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" +Chairmarks = "0ca39b1e-fe0b-4e98-acfc-b1656634c4de" +DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" +GMT = "5752ebe1-31b9-557e-87aa-f909b540aa54" +GeoDataFrames = "62cb38b5-d8d2-4862-a48e-6a340996859f" +GeoInterface = "cf35fbd7-0cd7-5166-be24-54bfbe79505f" +GeometryOps = "3251bfac-6a57-4b6d-aa61-ac1fef2975ab" +LibGEOS = "a90b1aa1-3769-5649-ba7e-abc5a9d163eb" +MakieTeX = "6d554a22-29e7-47bd-aee5-0c5f06619414" +Proj = "c94c279d-25a6-4763-9509-64d165bea63e" +SwarmMakie = "0b1c068e-6a84-4e66-8136-5c95cafa83ed" + +[compat] +GeometryOps = "0.1.7" diff --git a/geometryops/buffer.jl b/geometryops/buffer.jl new file mode 100644 index 0000000..47011ab --- /dev/null +++ b/geometryops/buffer.jl @@ -0,0 +1,24 @@ +import GeometryOps as GO, GeoInterface as GI +using CairoMakie +import GeoDataFrames # easiest package to use to load Geopackage files +using Chairmarks + +using LibGEOS # activate buffer capabilities in GeometryOps + +include("utils.jl") + +# Load the data +data_path = joinpath(dirname(@__DIR__), "data") +points_gpkg = GeoDataFrames.read(joinpath(data_path, "points.gpkg")) +polygon_gpkg = GeoDataFrames.read(joinpath(data_path, "polygon.gpkg")) +# Process it into a Julia form +point_set = GO.tuples(points_gpkg.geom) +polygon = GO.tuples(only(polygon_gpkg.geom)) + +# Benchmark the distance function +# This uses the `Chairmarks.jl` package to benchmark the `GO.distance` function. +# The benchmark will run for 15 seconds. +benchmark = @be GO.buffer.((GO.GEOS(),), point_set, 100) seconds=15 + +# Write the results to a CSV file +write_benchmark_as_csv(benchmark; task = "buffer") \ No newline at end of file diff --git a/geometryops/distance.jl b/geometryops/distance.jl new file mode 100644 index 0000000..f7086af --- /dev/null +++ b/geometryops/distance.jl @@ -0,0 +1,24 @@ +import GeometryOps as GO, GeoInterface as GI +using CairoMakie +import GeoDataFrames # easiest package to use to load Geopackage files +using Chairmarks + +include("utils.jl") + +# Load the data +data_path = joinpath(dirname(@__DIR__), "data") +points_gpkg = GeoDataFrames.read(joinpath(data_path, "points.gpkg")) +polygon_gpkg = GeoDataFrames.read(joinpath(data_path, "polygon.gpkg")) +# Process it into a Julia form. +# In this particular case, we only compute +# distance between the first 4000 points - otherwise it would get excessive. +point_set = GO.tuples(points_gpkg.geom[1:4000]) +polygon = GO.tuples(only(polygon_gpkg.geom)) + +# Benchmark the distance function +# This uses the `Chairmarks.jl` package to benchmark the `GO.distance` function. +# The benchmark will run for 15 seconds. +distance_benchmark = @be [GO.distance(point1, point2) for point1 in $point_set, point2 in $point_set] seconds=15 gc=false + +# Write the results to a CSV file +write_benchmark_as_csv(distance_benchmark; task = "distance") diff --git a/geometryops/intersects.jl b/geometryops/intersects.jl new file mode 100644 index 0000000..ad35bc3 --- /dev/null +++ b/geometryops/intersects.jl @@ -0,0 +1,22 @@ +import GeometryOps as GO, GeoInterface as GI +using CairoMakie +import GeoDataFrames # easiest package to use to load Geopackage files +using Chairmarks + +include("utils.jl") + +# Load the data +data_path = joinpath(dirname(@__DIR__), "data") +points_gpkg = GeoDataFrames.read(joinpath(data_path, "points.gpkg")) +polygon_gpkg = GeoDataFrames.read(joinpath(data_path, "polygon.gpkg")) +# Process it into a Julia form +point_set = GO.tuples(points_gpkg.geom) +polygon = GO.tuples(only(polygon_gpkg.geom)) + +# Benchmark the intersects function +# This uses the `Chairmarks.jl` package to benchmark the `GO.intersects` function. +# The benchmark will run for 15 seconds. +benchmark = @be GO.intersects.(point_set, (polygon,)) seconds=15 + +# Write the results to a CSV file +write_benchmark_as_csv(benchmark; task = "intersects") \ No newline at end of file diff --git a/geometryops/load.jl b/geometryops/load.jl new file mode 100644 index 0000000..aa4894e --- /dev/null +++ b/geometryops/load.jl @@ -0,0 +1,22 @@ +import GeometryOps as GO, GeoInterface as GI +using CairoMakie +import GeoDataFrames # easiest package to use to load Geopackage files +using Chairmarks + +include("utils.jl") + +# Load the data +data_path = joinpath(dirname(@__DIR__), "data") +points_gpkg = GeoDataFrames.read(joinpath(data_path, "points.gpkg")) +polygon_gpkg = GeoDataFrames.read(joinpath(data_path, "polygon.gpkg")) +# Process it into a Julia form +point_set = GO.tuples(points_gpkg.geom) +polygon = GO.tuples(only(polygon_gpkg.geom)) + +# Benchmark the reading/loading function +# This uses the `Chairmarks.jl` package to benchmark the `GeoDataFrames.read` function. +# The benchmark will run for 15 seconds. +benchmark = @be GeoDataFrames.read($(joinpath(data_path, "points.gpkg"))) seconds=15 + +# Write the results to a CSV file +write_benchmark_as_csv(benchmark; task = "load") \ No newline at end of file diff --git a/geometryops/plots.jl b/geometryops/plots.jl new file mode 100644 index 0000000..bdc3bad --- /dev/null +++ b/geometryops/plots.jl @@ -0,0 +1,137 @@ +using CairoMakie, DelimitedFiles, Statistics + +results_path = joinpath(dirname(@__DIR__), "results") +results_files = readdir(results_path; join = false) +regex = r"(\w+)-(\w+).csv" +regex_matches = match.(regex, results_files) +results = map(regex_matches) do match + task, package = match.captures + data = DelimitedFiles.readdlm(joinpath(results_path, "$task-$package.csv"), ';', header = true) + (task, package, parse.(Float64, replace.(data[1][:, end], ("," => ".",)))) +end + +m1 = regex_matches[1] +task, package = m1.captures +data = DelimitedFiles.readdlm(joinpath(results_path, "$task-$package.csv"), ';', header = true) +data[1][:, end] + + +f = Figure() + +for (idx, task) in enumerate(unique(first.(results))) + records = filter(r -> r[1] == task, results) + colors = fill(Makie.wong_colors()[1], length(records)) + records_ind = findfirst(==("geometryops"), getindex.(records, 2)) + if !isnothing(records_ind) + println("Found GeometryOps for $task, coloring it") + colors[findfirst(==("geometryops"), getindex.(records, 2))] = Makie.wong_colors()[3] + end + a, p = barplot(f[idx, 1], + 1:length(records), + Statistics.median.(last.(records)); + color = colors, + direction = :x, + axis = (; + title = task, + yticks = (1:length(records), getindex.(records, 2)), + ylabel = "Package", + xlabel = "Median time (s)", + ) + ) +end + +resize!(f, 800, 1500) +f + +# summary plot + + +using MakieTeX # to render SVG +# get language logo SVGs +language_logo_url(lang::String) = "https://cdn.jsdelivr.net/gh/devicons/devicon@latest/icons/$(lowercase(lang))/$(lowercase(lang))-plain.svg" +language_marker_dict = Dict( + [ + key => MakieTeX.SVGDocument(read(download(language_logo_url(key)), String)) + for key in ("C", "Go", "javascript", "julia", "python", "r") + ] +) +language_marker_dict["r"] = MakieTeX.SVGDocument(read(download("https://raw.githubusercontent.com/file-icons/icons/master/svg/R.svg"), String)); +language_marker_dict["python"] = MakieTeX.SVGDocument(read(download("https://raw.githubusercontent.com/file-icons/MFixx/master/svg/python.svg"), String)); + +# create a map from package name to marker +marker_map = Dict( + # R packages + "sf" => language_marker_dict["r"], + "s2" => language_marker_dict["r"], + "terra" => language_marker_dict["r"], + "geos" => language_marker_dict["r"], + # Python package + "geopandas" => language_marker_dict["python"], + # Julia package + "geometryops" => language_marker_dict["julia"], +) +# package name to color +color_map = Dict( + # R packages + "sf" => Makie.wong_colors()[2], + "s2" => Makie.wong_colors()[6], + "terra" => Makie.wong_colors()[1], + "geos" => Makie.wong_colors()[4], + # Python package + "geopandas" => Makie.wong_colors()[5], + # Julia package + "geometryops" => Makie.wong_colors()[3], +) + +result_tasks = getindex.(results, 1) .|> string +result_pkgs = getindex.(results, 2) .|> string +result_times = Statistics.median.(getindex.(results, 3)) + +using SwarmMakie # for beeswarm plots and dodging + +using CategoricalArrays +using Makie: RGBA + +ca = CategoricalArray(result_tasks) + +group_marker = [MarkerElement(; color = color_map[package], marker = marker_map[package], markersize = 12) for package in keys(marker_map)] +names_marker = collect(keys(marker_map)) +lang_markers = ["R" => language_marker_dict["r"], "Python" => language_marker_dict["python"], "Julia" => language_marker_dict["julia"]] +group_package = [MarkerElement(; marker, markersize = 12, color = :black) for (lang, marker) in lang_markers] +names_package = first.(lang_markers) + +f, a, p = beeswarm( + ca.refs, result_times; + marker = getindex.((marker_map,), result_pkgs), + color = getindex.((color_map,), result_pkgs), + markersize = 10, + axis = (; + xticks = (1:length(ca.pool.levels), ca.pool.levels), + xlabel = "Task", + ylabel = "Median time (s)", + yscale = log10, + title = "Benchmark vector operations", + xgridvisible = false, + xminorgridvisible = true, + yminorgridvisible = true, + yminorticks = IntervalsBetween(5), + ygridcolor = RGBA{Float32}(0.0f0,0.0f0,0.0f0,0.05f0), + ), + figure = (; backgroundcolor = RGBAf(1, 1, 1, 0)) +) +leg = Legend( + f[1, 2], + [group_marker, group_package], + [names_marker, names_package], + ["Package", "Language"], + tellheight = false, + tellwidth = true, + gridshalign = :left, +) +resize!(f, 650, 450) +a.backgroundcolor[] = RGBAf(1, 1, 1, 0) +leg.backgroundcolor[] = RGBAf(1, 1, 1, 0) +p.markersize[] = 13 +f + + diff --git a/geometryops/sample.jl b/geometryops/sample.jl new file mode 100644 index 0000000..4f97fae --- /dev/null +++ b/geometryops/sample.jl @@ -0,0 +1,45 @@ +import GeometryOps as GO, GeoInterface as GI +using CairoMakie +import GMT # easiest package to use to load Geopackage files +using Chairmarks +using Proj # activate GeometryOps reprojection +include("utils.jl") # include saving utilities as common code + +# Load the data +data_path = joinpath(dirname(@__DIR__), "data") +points_gpkg = GeoDataFrames.read(joinpath(data_path, "points.gpkg")) +polygon_gpkg = GeoDataFrames.read(joinpath(data_path, "polygon.gpkg")) +# Process it into a Julia form +point_set = GO.tuples(points_gpkg.geom) +polygon = GO.tuples(only(polygon_gpkg.geom)) +# Create a `sample` function in the vein of the Python function + +function _sample(polygon, size) + ext = GI.extent(polygon) + xmin, xmax = ext.X + ymin, ymax = ext.Y + + count = 0 + points = Point2d[] + + while count < size + x = xmin + (xmax - xmin) * rand() + y = ymin + (ymax - ymin) * rand() + point = Point2d(x, y) + if GO.contains(polygon, point) + push!(points, point) + count += 1 + end + end + + return points + +end + +# Benchmark the `_sample` function +# This uses the `Chairmarks.jl` package to benchmark the `GO.distance` function. +# The benchmark will run for 15 seconds. +benchmark = @be _sample($polygon, 100_000) seconds=15 + +# Write the results to a CSV file +write_benchmark_as_csv(benchmark; task = "sample") \ No newline at end of file diff --git a/geometryops/transform.jl b/geometryops/transform.jl new file mode 100644 index 0000000..91e0f4f --- /dev/null +++ b/geometryops/transform.jl @@ -0,0 +1,26 @@ +import GeometryOps as GO, GeoInterface as GI +using CairoMakie +import GMT # easiest package to use to load Geopackage files +using Chairmarks + +using Proj # activate GeometryOps reprojection + +include("utils.jl") + +# Load the data +data_path = joinpath(dirname(@__DIR__), "data") +points_gpkg = GeoDataFrames.read(joinpath(data_path, "points.gpkg")) +polygon_gpkg = GeoDataFrames.read(joinpath(data_path, "polygon.gpkg")) +# Process it into a Julia form +point_set = GO.tuples(points_gpkg.geom) +polygon = GO.tuples(polygon_gpkg.geom) +polygon = GO.apply(p -> Point2d(GI.x(p), GI.y(p)), GI.PointTrait(), only(polygon_gpkg.geom)) +# Benchmark the reproject function +# This uses the `Chairmarks.jl` package to benchmark the +# `GO.reproject` function, which calls the Proj C-API. +# In future it will also support native Julia transformations. +# The benchmark will run for 15 seconds. +benchmark = @be GO.reproject($point_set, $(points_gpkg[1].proj4), $("EPSG:4326"); always_xy = false) seconds=15 + +# Write the results to a CSV file +write_benchmark_as_csv(benchmark; task = "transform") \ No newline at end of file diff --git a/geometryops/utils.jl b/geometryops/utils.jl new file mode 100644 index 0000000..8ffdb2a --- /dev/null +++ b/geometryops/utils.jl @@ -0,0 +1,25 @@ +import DelimitedFiles, Chairmarks + +""" + write_benchmark_as_csv(benchmark::Chairmarks.Benchmark; package = "geometryops", task = "distance") + +Write the results of a Chairmarks benchmark to a CSV file. The CSV file will be written to the +""" +function write_benchmark_as_csv(benchmark::Chairmarks.Benchmark; package = "geometryops", task = "distance") + # Extract the time results from the Chairmarks benchmark. + # These are in the form of `Chairmarks.Sample` objects which have various + # properties like `time`, `memory`, etc. + times = getproperty.(benchmark.samples, :time) + # Now, we generate each row of the CSV file as a vector of strings. + time_rows = map(times) do time + [task, package, replace(string(round(time, digits=4)), "." => ",")] + end + # Write the results to a CSV file + header_vec = ["task", "package", "time"] + + DelimitedFiles.writedlm( + joinpath(dirname(@__DIR__), "results", "$task-$package.csv"), + permutedims(hcat(header_vec, time_rows...)), + ';' + ) +end diff --git a/geometryops/write.jl b/geometryops/write.jl new file mode 100644 index 0000000..a33e0ab --- /dev/null +++ b/geometryops/write.jl @@ -0,0 +1,25 @@ +import GeometryOps as GO, GeoInterface as GI +using CairoMakie +import GeoDataFrames # easiest package to use to load Geopackage files +using Chairmarks + +include("utils.jl") + +# Load the data +data_path = joinpath(dirname(@__DIR__), "data") +points_gpkg = GeoDataFrames.read(joinpath(data_path, "points.gpkg")) +polygon_gpkg = GeoDataFrames.read(joinpath(data_path, "polygon.gpkg")) +# Process it into a Julia form +point_set = GO.tuples(points_gpkg.geom) +polygon = GO.tuples(polygon_gpkg.geom) +function _write(gdf) + GeoDataFrames.write(joinpath(tempdir(), "temp.gpkg"), gdf; crs = GI.crs(first(gdf.geometry))) +end +# Benchmark the writing function +# This uses the `Chairmarks.jl` package to benchmark the `GeoDataFrames.write` function. +# The benchmark will run for 15 seconds. +gdf = GeoDataFrames.DataFrame(:geometry => point_set) +benchmark = @be _write($gdf) seconds=20 + +# Write the results to a CSV file +write_benchmark_as_csv(benchmark; task = "write") \ No newline at end of file diff --git a/run_benchmarks.sh b/run_benchmarks.sh old mode 100644 new mode 100755 index 190ca96..f313cd7 --- a/run_benchmarks.sh +++ b/run_benchmarks.sh @@ -2,6 +2,7 @@ R_packages=(sf terra s2 geos) Python_packages=(geopandas) +Julia_packages=(geometryops) mkdir results @@ -24,3 +25,14 @@ do python3 "$path" done done + +# run Julia benchmarks +julia --project=./geometryops -e 'using Pkg; Pkg.instantiate()' +for i in ${Julia_packages[*]} +do + for path in "${i}"/*.jl + do + echo "$path" + julia --project=./geometryops "$path" + done +done \ No newline at end of file diff --git a/timings.csv b/timings.csv index 1f19176..6ab256d 100644 --- a/timings.csv +++ b/timings.csv @@ -1,29 +1,36 @@ task,package,median -buffer,geopandas,10.46 -distance,geopandas,10.17 -intersects,geopandas,0.09 -load,geopandas,17.4 -sample,geopandas,0.8 -transform,geopandas,0.51 -write,geopandas,42.08 -buffer,geos,9.07 -distance,geos,11.17 -intersects,geos,0.12 -buffer,sf,19.65 -distance,sf,1.31 -intersects,sf,2.33 -load,sf,1.23 -sample,sf,2.28 -write,sf,8.73 -buffer,terra,20.62 -distance,terra,1.32 -intersects,terra,0.3 -load,terra,0.64 -sample,terra,5.64 -transform,terra,0.82 -write,terra,7.61 -distance,s2,24.79 -intersects,s2,0.76 -transform,s2,2.38 -transform,sf-project,0.21 -transform,sf-transform,1.93 +buffer,geometryops,2.38 +distance,geometryops,0.04 +intersects,geometryops,0.01 +load,geometryops,0.2 +sample,geometryops,0.09 +transform,geometryops,0.05 +write,geometryops,3.53 +buffer,geopandas,6.09 +distance,geopandas,9.62 +intersects,geopandas,0.13 +load,geopandas,15.81 +sample,geopandas,0.43 +transform,geopandas,0.31 +write,geopandas,20.24 +buffer,geos,2.82 +distance,geos,5.08 +intersects,geos,0.04 +buffer,sf,5.35 +distance,sf,0.36 +intersects,sf,0.64 +load,sf,0.45 +sample,sf,0.64 +write,sf,2.81 +buffer,terra,6.17 +distance,terra,0.38 +intersects,terra,0.16 +load,terra,0.34 +sample,terra,1.47 +transform,terra,0.37 +write,terra,2.53 +distance,s2,10.29 +intersects,s2,0.37 +transform,s2,0.77 +transform,sf-project,0.05 +transform,sf-transform,0.5