Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions cmd/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ type EnvironmentConfig struct {
Redis RedisConfig `yaml:"redis"`
Utapi UtapiConfig `yaml:"utapi"`
MigrationTools MigrationToolsConfig `yaml:"migration_tools"`
Clickhouse ClickhouseConfig `yaml:"clickhouse"`
}

type GlobalConfig struct {
Expand All @@ -70,6 +71,7 @@ type FeatureConfig struct {
CrossRegionReplication CrossRegionReplicationFeatureConfig `yaml:"cross_region_replication"`
Utapi UtapiFeatureConfig `yaml:"utapi"`
Migration MigrationFeatureConfig `yaml:"migration"`
AccessLogging AccessLoggingFeatureConfig `yaml:"access_logging"`
}

type ScubaFeatureConfig struct {
Expand Down Expand Up @@ -212,6 +214,15 @@ type RedisConfig struct {
LogLevel string `yaml:"log_level"`
}

type ClickhouseConfig struct {
Image string `yaml:"image"`
LogLevel string `yaml:"log_level"`
}

type AccessLoggingFeatureConfig struct {
Enabled bool `yaml:"enabled"`
}

func DefaultEnvironmentConfig() EnvironmentConfig {
return EnvironmentConfig{
Global: GlobalConfig{
Expand All @@ -237,6 +248,9 @@ func DefaultEnvironmentConfig() EnvironmentConfig {
CrossRegionReplication: CrossRegionReplicationFeatureConfig{
Enabled: false,
},
AccessLogging: AccessLoggingFeatureConfig{
Enabled: false,
},
},
Cloudserver: CloudserverConfig{},
S3Metadata: MetadataConfig{
Expand Down Expand Up @@ -272,6 +286,7 @@ func DefaultEnvironmentConfig() EnvironmentConfig {
},
Utapi: UtapiConfig{},
MigrationTools: MigrationToolsConfig{},
Clickhouse: ClickhouseConfig{},
}
}

Expand Down Expand Up @@ -348,5 +363,9 @@ func LoadEnvironmentConfig(path string) (EnvironmentConfig, error) {
cfg.MigrationTools.LogLevel = cfg.Global.LogLevel
}

if cfg.Clickhouse.LogLevel == "" {
cfg.Clickhouse.LogLevel = cfg.Global.LogLevel
}

return cfg, nil
}
23 changes: 23 additions & 0 deletions cmd/configure.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ func createLogDirectories(envDir string) error {
filepath.Join(envDir, "logs", "scuba"),
filepath.Join(envDir, "logs", "backbeat"),
filepath.Join(envDir, "logs", "migration-tools"),
filepath.Join(envDir, "logs", "clickhouse-shard-1"),
filepath.Join(envDir, "logs", "clickhouse-shard-2"),
}

for _, dir := range logDirs {
Expand Down Expand Up @@ -71,6 +73,7 @@ func configureEnv(cfg EnvironmentConfig, envDir string) error {
generateKafkaConfig,
generateUtapiConfig,
generateMigrationToolsConfig,
generateClickhouseConfig,
}

configDir := filepath.Join(envDir, "config")
Expand Down Expand Up @@ -199,3 +202,23 @@ func generateMigrationToolsConfig(cfg EnvironmentConfig, path string) error {

return renderTemplates(cfg, "templates/migration-tools", filepath.Join(path, "migration-tools"), templates)
}

func generateClickhouseConfig(cfg EnvironmentConfig, path string) error {
templates := []string{
"Dockerfile.shard",
"Dockerfile.setup",
"entrypoint.sh",
"cluster-config.xml",
"ports-shard-1.xml",
"ports-shard-2.xml",
"init-schema.sh",
"init.d/01-create-database.sql",
"init.d/02-create-ingest-table.sql",
"init.d/03-create-storage-table.sql",
"init.d/04-create-offsets-table.sql",
"init.d/05-create-distributed-tables.sql",
"init.d/06-create-materialized-view.sql",
}

return renderTemplates(cfg, "templates/clickhouse", filepath.Join(path, "clickhouse"), templates)
}
4 changes: 4 additions & 0 deletions cmd/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ func getComposeProfiles(cfg EnvironmentConfig) []string {
profiles = append(profiles, "feature-crr")
}

if cfg.Features.AccessLogging.Enabled {
profiles = append(profiles, "feature-access-logging")
}

return profiles
}

Expand Down
9 changes: 9 additions & 0 deletions templates/clickhouse/Dockerfile.setup
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
ARG BASE_IMAGE
FROM $BASE_IMAGE

USER root

COPY --chmod=755 init-schema.sh /opt/
COPY init.d/*.sql /opt/init.d/

CMD ["/opt/init-schema.sh"]
8 changes: 8 additions & 0 deletions templates/clickhouse/Dockerfile.shard
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
ARG BASE_IMAGE
FROM $BASE_IMAGE

USER root

COPY --chmod=755 entrypoint.sh /usr/local/bin/

ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
23 changes: 23 additions & 0 deletions templates/clickhouse/cluster-config.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
<?xml version="1.0"?>
<clickhouse>
<remote_servers>
<workbench_cluster>
<shard>
<replica>
<host>127.0.0.1</host>
<port>9002</port>
<user>default</user>
<password></password>
</replica>
</shard>
<shard>
<replica>
<host>127.0.0.1</host>
<port>9003</port>
<user>default</user>
<password></password>
</replica>
</shard>
</workbench_cluster>
</remote_servers>
</clickhouse>
9 changes: 9 additions & 0 deletions templates/clickhouse/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/sh
set -e

# Fix permissions on data and log directories
chown -R clickhouse:clickhouse /var/lib/clickhouse
chown -R clickhouse:clickhouse /var/log/clickhouse-server

# Switch to clickhouse user and start server
exec su clickhouse -s /bin/sh -c 'exec /usr/bin/clickhouse-server --config-file=/etc/clickhouse-server/config.xml'
32 changes: 32 additions & 0 deletions templates/clickhouse/init-schema.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/usr/bin/env sh
set -e
set -x

echo "[clickhouse-setup] Starting schema initialization..."

# Wait for both shards to be ready
echo "[clickhouse-setup] Waiting for shard 1..."
until clickhouse-client --host 127.0.0.1 --port 9002 --query "SELECT 1" > /dev/null 2>&1; do
echo "[clickhouse-setup] Shard 1 not ready, waiting 2s..."
sleep 2
done
echo "[clickhouse-setup] Shard 1 is ready!"

echo "[clickhouse-setup] Waiting for shard 2..."
until clickhouse-client --host 127.0.0.1 --port 9003 --query "SELECT 1" > /dev/null 2>&1; do
echo "[clickhouse-setup] Shard 2 not ready, waiting 2s..."
sleep 2
done
echo "[clickhouse-setup] Shard 2 is ready!"

# Execute SQL files on both shards
for sql_file in /opt/init.d/*.sql; do
filename=$(basename "$sql_file")
echo "[clickhouse-setup] Executing $filename on shard 1..."
clickhouse-client --host 127.0.0.1 --port 9002 --multiquery < "$sql_file"

echo "[clickhouse-setup] Executing $filename on shard 2..."
clickhouse-client --host 127.0.0.1 --port 9003 --multiquery < "$sql_file"
done

echo "[clickhouse-setup] Schema initialization completed successfully!"
1 change: 1 addition & 0 deletions templates/clickhouse/init.d/01-create-database.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
CREATE DATABASE IF NOT EXISTS logs;
57 changes: 57 additions & 0 deletions templates/clickhouse/init.d/02-create-ingest-table.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
CREATE TABLE IF NOT EXISTS logs.access_logs_ingest
(
-- Common
timestamp DateTime,
insertedAt DateTime DEFAULT now(),
hostname LowCardinality(String),

-- Analytics
action LowCardinality(String),
accountName String,
accountDisplayName String,
userName String,
clientPort UInt32,
httpMethod LowCardinality(String),
bytesDeleted UInt64,
bytesReceived UInt64,
bodyLength UInt64,
contentLength UInt64,
elapsed_ms Float32,

-- AWS access server logs fields https://docs.aws.amazon.com/AmazonS3/latest/userguide/LogFormat.html
startTime DateTime64(3), -- AWS "Time" field
requester String,
operation String,
requestURI String,
errorCode String,
objectSize UInt64,
totalTime Float32,
turnAroundTime Float32,
referer String,
userAgent String,
versionId String,
signatureVersion LowCardinality(String),
cipherSuite LowCardinality(String),
authenticationType LowCardinality(String),
hostHeader String,
tlsVersion LowCardinality(String),
aclRequired LowCardinality(String),

-- Shared between AWS access server logs and Analytics logs
bucketOwner String, -- AWS "Bucket Owner" field
bucketName String, -- AWS "Bucket" field
req_id String, -- AWS "Request ID" field
bytesSent UInt64, -- AWS "Bytes Sent" field
clientIP String, -- AWS "Remote IP" field
httpCode UInt16, -- AWS "HTTP Status" field
objectKey String, -- AWS "Key" field

-- Scality server access logs extra fields.
logFormatVersion LowCardinality(String),
loggingEnabled Bool,
loggingTargetBucket String,
loggingTargetPrefix String,
awsAccessKeyID String,
raftSessionID UInt16
)
Engine = Null();
46 changes: 46 additions & 0 deletions templates/clickhouse/init.d/03-create-storage-table.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
CREATE TABLE IF NOT EXISTS logs.access_logs
(
-- Common
timestamp DateTime,
insertedAt DateTime DEFAULT now(),
hostname LowCardinality(String),

-- AWS access server logs fields https://docs.aws.amazon.com/AmazonS3/latest/userguide/LogFormat.html
startTime DateTime64(3), -- AWS "Time" field
requester String,
operation String,
requestURI String,
errorCode String,
objectSize UInt64,
totalTime Float32,
turnAroundTime Float32,
referer String,
userAgent String,
versionId String,
signatureVersion LowCardinality(String),
cipherSuite LowCardinality(String),
authenticationType LowCardinality(String),
hostHeader String,
tlsVersion LowCardinality(String),
aclRequired LowCardinality(String),

-- Shared between AWS access server logs and Analytics logs
bucketOwner String, -- AWS "Bucket Owner" field
bucketName String, -- AWS "Bucket" field
req_id String, -- AWS "Request ID" field
bytesSent UInt64, -- AWS "Bytes Sent" field
clientIP String, -- AWS "Remote IP" field
httpCode UInt16, -- AWS "HTTP Status" field
objectKey String, -- AWS "Key" field

-- Scality server access logs extra fields.
logFormatVersion LowCardinality(String),
loggingEnabled Bool,
loggingTargetBucket String,
loggingTargetPrefix String,
awsAccessKeyID String,
raftSessionID UInt16
)
Engine = MergeTree()
PARTITION BY toStartOfInterval(insertedAt, INTERVAL 24 HOUR)
ORDER BY (raftSessionID, bucketName, insertedAt, timestamp, req_id);
10 changes: 10 additions & 0 deletions templates/clickhouse/init.d/04-create-offsets-table.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
CREATE TABLE IF NOT EXISTS logs.offsets
(
bucketName String,
raftSessionID UInt16,
lastProcessedInsertedAt DateTime,
lastProcessedTimestamp DateTime64(3),
lastProcessedReqId String
)
ENGINE = ReplacingMergeTree(lastProcessedInsertedAt)
ORDER BY (bucketName, raftSessionID);
5 changes: 5 additions & 0 deletions templates/clickhouse/init.d/05-create-distributed-tables.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
CREATE TABLE IF NOT EXISTS logs.access_logs_federated AS logs.access_logs
ENGINE = Distributed(workbench_cluster, logs, access_logs, raftSessionID);

CREATE TABLE IF NOT EXISTS logs.offsets_federated AS logs.offsets
ENGINE = Distributed(workbench_cluster, logs, offsets, raftSessionID);
42 changes: 42 additions & 0 deletions templates/clickhouse/init.d/06-create-materialized-view.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
CREATE MATERIALIZED VIEW IF NOT EXISTS logs.access_logs_ingest_mv
TO logs.access_logs_federated
AS
SELECT
timestamp,
insertedAt,
hostname,

startTime,
requester,
operation,
requestURI,
errorCode,
objectSize,
totalTime,
turnAroundTime,
referer,
userAgent,
versionId,
signatureVersion,
cipherSuite,
authenticationType,
hostHeader,
tlsVersion,
aclRequired,

bucketOwner,
bucketName,
req_id,
bytesSent,
clientIP,
httpCode,
objectKey,

logFormatVersion,
loggingEnabled,
loggingTargetBucket,
loggingTargetPrefix,
awsAccessKeyID,
raftSessionID
FROM logs.access_logs_ingest
WHERE loggingEnabled = true;
6 changes: 6 additions & 0 deletions templates/clickhouse/ports-shard-1.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<?xml version="1.0"?>
<clickhouse>
<http_port>8123</http_port>
<tcp_port>9002</tcp_port>
<interserver_http_port>9009</interserver_http_port>
</clickhouse>
6 changes: 6 additions & 0 deletions templates/clickhouse/ports-shard-2.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<?xml version="1.0"?>
<clickhouse>
<http_port>8124</http_port>
<tcp_port>9003</tcp_port>
<interserver_http_port>9010</interserver_http_port>
</clickhouse>
1 change: 1 addition & 0 deletions templates/global/defaults.env
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ SCUBA_IMAGE="{{ .Scuba.Image }}"
BACKBEAT_IMAGE="{{ .Backbeat.Image }}"
UTAPI_IMAGE="{{ .Utapi.Image }}"
MIGRATION_TOOLS_IMAGE="{{ .MigrationTools.Image }}"
CLICKHOUSE_IMAGE="{{ .Clickhouse.Image }}"

METADATA_S3_DB_VERSION="{{ .S3Metadata.VFormat }}"
CLOUDSERVER_ENABLE_NULL_VERSION_COMPAT_MODE="{{ .Cloudserver.EnableNullVersionCompatMode }}"
Loading
Loading