Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions docs/development/extensions-core/kubernetes.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,15 @@ Additionally, this extension has following configuration.
|`druid.discovery.k8s.clusterIdentifier`|`string that matches [a-z0-9][a-z0-9-]*[a-z0-9]`|Unique identifier for this Druid cluster in Kubernetes e.g. us-west-prod-druid.|None|Yes|
|`druid.discovery.k8s.podNameEnvKey`|`Pod Env Variable`|Pod Env variable whose value is that pod's name.|POD_NAME|No|
|`druid.discovery.k8s.podNamespaceEnvKey`|`Pod Env Variable`|Pod Env variable whose value is that pod's kubernetes namespace.|POD_NAMESPACE|No|
|`druid.discovery.k8s.coordinatorLeaderElectionConfigMapNamespace`|`k8s namespace`|Leader election algorithm requires creating a ConfigMap resource in a namespace. This MUST only be provided if different coordinator pods run in different namespaces, such setup is discouraged however.|coordinator pod's namespace|No|
|`druid.discovery.k8s.overlordLeaderElectionConfigMapNamespace`|`k8s namespace`|Leader election algorithm requires creating a ConfigMap resource in a namespace. This MUST only be provided if different overlord pods run in different namespaces, such setup is discouraged however.|overlord pod's namespace|No|
|`druid.discovery.k8s.leaseDuration`|`Duration`|Lease duration used by Leader Election algorithm. Candidates wait for this time before taking over previous Leader.|PT60S|No|
|`druid.discovery.k8s.renewDeadline`|`Duration`|Lease renewal period used by Leader.|PT17S|No|
|`druid.discovery.k8s.retryPeriod`|`Duration`|Retry wait used by Leader Election algorithm on failed operations.|PT5S|No|

### Gotchas

- Label/Annotation path in each pod spec MUST EXIST, which is easily satisfied if there is at least one label/annotation in the pod spec already. This limitation may be removed in future.
- Druid Pods need permissions to be able to add labels to self-pod, List and Watch other Pods, create ConfigMap for leader election. Assuming, "default" service account is used by Druid pods, you might need to add following or something similar Kubernetes Role and Role Binding.
- All Druid Pods belonging to one Druid cluster must be inside same kubernetes namespace.
- All Druid Pods need permissions to be able to add labels to self-pod, List and Watch other Pods, create and read ConfigMap for leader election. Assuming, "default" service account is used by Druid pods, you might need to add following or something similar Kubernetes Role and Role Binding.

```
apiVersion: rbac.authorization.k8s.io/v1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
package org.apache.druid.k8s.discovery;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import com.google.inject.Inject;
import org.apache.druid.discovery.DiscoveryDruidNode;
Expand All @@ -42,7 +41,7 @@
*
* Labels -
* druidDiscoveryAnnouncement-<nodeRole.getJsonName()> = true
* druidDiscoveryAnnouncement-id = encodeHostPort(host:port)
* druidDiscoveryAnnouncement-id-hash = hashEncodeStringForLabelValue(host:port)
* druidDiscoveryAnnouncement-cluster-identifier = <clusterIdentifier>
*
* Annotation -
Expand Down Expand Up @@ -87,7 +86,7 @@ public void announce(DiscoveryDruidNode discoveryDruidNode)
LOGGER.info("Announcing DiscoveryDruidNode[%s]", discoveryDruidNode);

String roleAnnouncementLabel = getRoleAnnouncementLabel(discoveryDruidNode.getNodeRole());
String idAnnouncementLabel = getIdAnnouncementLabel();
String idAnnouncementLabel = getIdHashAnnouncementLabel();
String clusterIdentifierAnnouncementLabel = getClusterIdentifierAnnouncementLabel();
String infoAnnotation = getInfoAnnotation(discoveryDruidNode.getNodeRole());

Expand All @@ -100,7 +99,7 @@ public void announce(DiscoveryDruidNode discoveryDruidNode)
// checking if label/annotation path exists and create if not, however that could lead to race conditions
// so assuming the existence for now.
patches.add(createPatchObj(OP_ADD, getPodDefLabelPath(roleAnnouncementLabel), ANNOUNCEMENT_DONE));
patches.add(createPatchObj(OP_ADD, getPodDefLabelPath(idAnnouncementLabel), encodeHostPort(discoveryDruidNode.getDruidNode().getHostAndPortToUse())));
patches.add(createPatchObj(OP_ADD, getPodDefLabelPath(idAnnouncementLabel), hashEncodeStringForLabelValue(discoveryDruidNode.getDruidNode().getHostAndPortToUse())));
patches.add(createPatchObj(OP_ADD, getPodDefLabelPath(clusterIdentifierAnnouncementLabel), discoveryConfig.getClusterIdentifier()));
patches.add(createPatchObj(OP_ADD, getPodDefAnnocationPath(infoAnnotation), jsonMapper.writeValueAsString(discoveryDruidNode)));

Expand Down Expand Up @@ -130,14 +129,14 @@ public void unannounce(DiscoveryDruidNode discoveryDruidNode)
LOGGER.info("Unannouncing DiscoveryDruidNode[%s]", discoveryDruidNode);

String roleAnnouncementLabel = getRoleAnnouncementLabel(discoveryDruidNode.getNodeRole());
String idAnnouncementLabel = getIdAnnouncementLabel();
String idHashAnnouncementLabel = getIdHashAnnouncementLabel();
String clusterIdentifierAnnouncementLabel = getClusterIdentifierAnnouncementLabel();
String infoAnnotation = getInfoAnnotation(discoveryDruidNode.getNodeRole());

try {
List<Map<String, Object>> patches = new ArrayList<>();
patches.add(createPatchObj(OP_REMOVE, getPodDefLabelPath(roleAnnouncementLabel), null));
patches.add(createPatchObj(OP_REMOVE, getPodDefLabelPath(idAnnouncementLabel), null));
patches.add(createPatchObj(OP_REMOVE, getPodDefLabelPath(idHashAnnouncementLabel), null));
patches.add(createPatchObj(OP_REMOVE, getPodDefLabelPath(clusterIdentifierAnnouncementLabel), null));
patches.add(createPatchObj(OP_REMOVE, getPodDefAnnocationPath(infoAnnotation), null));

Expand Down Expand Up @@ -188,9 +187,9 @@ public static String getRoleAnnouncementLabel(NodeRole nodeRole)
return StringUtils.format("druidDiscoveryAnnouncement-%s", nodeRole.getJsonName());
}

private static String getIdAnnouncementLabel()
private static String getIdHashAnnouncementLabel()
{
return "druidDiscoveryAnnouncement-id";
return "druidDiscoveryAnnouncement-id-hash";
}

public static String getClusterIdentifierAnnouncementLabel()
Expand Down Expand Up @@ -222,8 +221,8 @@ public static String getLabelSelectorForNode(K8sDiscoveryConfig discoveryConfig,
discoveryConfig.getClusterIdentifier(),
K8sDruidNodeAnnouncer.getRoleAnnouncementLabel(nodeRole),
K8sDruidNodeAnnouncer.ANNOUNCEMENT_DONE,
K8sDruidNodeAnnouncer.getIdAnnouncementLabel(),
encodeHostPort(node.getHostAndPortToUse())
K8sDruidNodeAnnouncer.getIdHashAnnouncementLabel(),
hashEncodeStringForLabelValue(node.getHostAndPortToUse())
);
}

Expand All @@ -237,30 +236,14 @@ private String getPodDefAnnocationPath(String annotation)
return StringUtils.format("%s/%s", POD_ANNOTATIONS_PATH_PREFIX, annotation);
}

private static String encodeHostPort(String hostPort)
// a valid label must be an empty string or consist of alphanumeric characters, '-', '_' or '.', and
// must start and end with an alphanumeric character
private static String hashEncodeStringForLabelValue(String str)
{
//K8S requires that label values must match regex (([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?
//So, it is essential to replace ':' with '-'

// it is assumed that hostname does not have ':' in it except for separating host and port
Preconditions.checkState(
hostPort.indexOf(':') == hostPort.lastIndexOf(':'),
"hostname in host:port[%s] has ':' in it", hostPort
);

return hostPort.replace(':', '-');
}

private String replaceLast(String str, char oldChar, char newChar)
{
char[] chars = str.toCharArray();
for (int i = chars.length - 1; i >= 0; i--) {
if (chars[i] == oldChar) {
chars[i] = newChar;
break;
}
int hash = str.hashCode();
if (hash < 0) {
hash = -1 * hash;
}

return String.valueOf(chars);
return String.valueOf(hash);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -94,11 +94,11 @@ public K8sDruidNodeDiscoveryProvider(
@Override
public BooleanSupplier getForNode(DruidNode node, NodeRole nodeRole)
{
return () -> !k8sApiClient.listPods(
return () -> k8sApiClient.listPods(
podInfo.getPodNamespace(),
K8sDruidNodeAnnouncer.getLabelSelectorForNode(discoveryConfig, nodeRole, node),
nodeRole
).getDruidNodes().isEmpty();
).getDruidNodes().containsKey(node.getHostAndPortToUse());
}

@Override
Expand Down Expand Up @@ -219,6 +219,11 @@ private void watch()
String labelSelector = K8sDruidNodeAnnouncer.getLabelSelectorForNodeRole(discoveryConfig, nodeRole);
boolean cacheInitialized = false;

if (!lifecycleLock.awaitStarted()) {
LOGGER.error("Lifecycle not started, Exited Watch for NodeRole [%s].", nodeRole);
return;
}

while (lifecycleLock.awaitStarted(1, TimeUnit.MILLISECONDS)) {
try {
DiscoveryDruidNodeList list = k8sApiClient.listPods(podInfo.getPodNamespace(), labelSelector, nodeRole);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,8 @@ public void testAnnounce() throws Exception
),
ImmutableMap.of(
"op", "add",
"path", "/metadata/labels/druidDiscoveryAnnouncement-id",
"value", "test-host-80"
"path", "/metadata/labels/druidDiscoveryAnnouncement-id-hash",
"value", "1429561393"
),
ImmutableMap.of(
"op", "add",
Expand Down Expand Up @@ -127,7 +127,7 @@ public void testUnannounce() throws Exception
),
ImmutableMap.of(
"op", "remove",
"path", "/metadata/labels/druidDiscoveryAnnouncement-id"
"path", "/metadata/labels/druidDiscoveryAnnouncement-id-hash"
),
ImmutableMap.of(
"op", "remove",
Expand Down