Skip to content
This repository was archived by the owner on Apr 19, 2022. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 86 additions & 9 deletions snakebite/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,16 @@ def get_config_from_env(cls):
core_path = os.path.join(os.environ['HADOOP_HOME'], 'conf', 'core-site.xml')
core_configs = cls.read_core_config(core_path)

maybe_ha_name = None
if len(core_configs.get('namenodes', [])) == 1:
# We may have gotten an HA config from core-site.xml, try
# to use it to resolve HA names
maybe_ha_name = core_configs.get('namenodes')[0].get('namenode')

hdfs_path = os.path.join(os.environ['HADOOP_HOME'], 'conf', 'hdfs-site.xml')
hdfs_configs = cls.read_hdfs_config(hdfs_path)
# May have interpreted defaultFS as a NN, pass this to
# read_hdfs_config to try to resolve an HA configuration
hdfs_configs = cls.read_hdfs_config(hdfs_path, maybe_ha_name)

if (not core_configs) and (not hdfs_configs):
raise Exception("No config found in %s nor in %s" % (core_path, hdfs_path))
Expand Down Expand Up @@ -73,23 +81,32 @@ def read_core_config(cls, core_site_path):
else:
configs['use_sasl'] = False

if namenodes:
if namenodes:
configs['namenodes'] = namenodes

return configs

@classmethod
def read_hdfs_config(cls, hdfs_site_path):
def read_hdfs_config(cls, hdfs_site_path, maybe_ha_name=None):
configs = {}
ha_configs = cls.read_hdfs_ha_configs(hdfs_site_path)

namenodes = []
for property in cls.read_hadoop_config(hdfs_site_path):
if property.findall('name')[0].text.startswith("dfs.namenode.rpc-address"):
if property.findall('name')[0].text.startswith('dfs.namenode.rpc-address'):
prop_name = property.findall('name')[0].text
parse_result = urlparse("//" + property.findall('value')[0].text)
log.debug("Got namenode '%s' from %s" % (parse_result.geturl(), hdfs_site_path))
namenodes.append({"namenode": parse_result.hostname,
"port": parse_result.port if parse_result.port
else Namenode.DEFAULT_PORT})

if (prop_name == 'dfs.namenode.rpc-address' or
cls.valid_ha_namenode(maybe_ha_name,
ha_configs,
parse_result.geturl(),
hdfs_site_path,
prop_name)):
log.debug("Got namenode '%s' from %s" % (parse_result.geturl(), hdfs_site_path))
namenodes.append({"namenode": parse_result.hostname,
"port": parse_result.port if parse_result.port
else Namenode.DEFAULT_PORT})

if property.findall('name')[0].text == 'fs.trash.interval':
configs['use_trash'] = True
Expand Down Expand Up @@ -151,7 +168,11 @@ def get_external_config(cls):

hdfs_configs = {}
for hdfs_conf_path in cls.hdfs_try_paths:
hdfs_configs = cls.read_hdfs_config(hdfs_conf_path)
if len(core_configs.get('namenodes', [])) == 1:
hdfs_configs = cls.read_hdfs_config(hdfs_conf_path,
core_configs.get('namenodes')[0].get('namenode'))
else:
hdfs_configs = cls.read_hdfs_config(hdfs_conf_path)
if hdfs_configs:
break

Expand All @@ -169,3 +190,59 @@ def get_external_config(cls):
}

return configs

@classmethod
def valid_ha_namenode(cls, maybe_ha_name, ha_configs, nn_url, hdfs_site_path, name):
name_parts = name.split('.')

if len(name_parts) != 5:
log.debug("Could not parse cluster name from %s, skipping %s from %s" %
(name,
nn_url,
hdfs_site_path))
return False

cluster = name_parts[-2]
if cluster != maybe_ha_name:
log.debug("Skipping %s from %s, becuause it does not belong to our active cluster %s" %
(nn_url, hdfs_site_path, maybe_ha_name))
return False

if cluster not in ha_configs.get('clusters', []):
log.debug("Skipping %s from %s, becuause it is no in the configured cluster list: %s" %
(nn_url, hdfs_site_path, ha_configs.get('clusters')))
return False

logical_namenode = name_parts[-1]
cluster_logical_namenodes = (ha_configs
.get('logical_namenodes', {})
.get(cluster, []))
if logical_namenode not in cluster_logical_namenodes:
log.debug("Could not find logical mapping for %s in cluster %s from %s, skipping" %
(nn_url,
cluster,
hdfs_site_path))
return False
return True

@classmethod
def read_hdfs_ha_configs(cls, hdfs_site_path):
ha_configs = {}
for property in cls.read_hadoop_config(hdfs_site_path):
name = property.findall('name')[0].text
value = property.findall('value')[0].text

if name == 'dfs.nameservices':
ha_configs['clusters'] = value.split(',')

if name.startswith('dfs.ha.namenodes'):
name_parts = property.findall('name')[0].text.split('.')
if len(name_parts) != 4:
log.debug("Could not parse cluster name from %s, skipping" %
(property.findall('name')[0].text))
continue
if 'logical_namenodes' not in ha_configs:
ha_configs['logical_namenodes'] = {}
cluster = name_parts[-1]
ha_configs['logical_namenodes'][cluster] = value.split(',')
return ha_configs
33 changes: 31 additions & 2 deletions test/config_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def _verify_hdfs_noport_settings(self, config):

def test_read_hdfs_config_ha(self):
hdfs_site_path = self.get_config_path('ha-port-hdfs-site.xml')
config = HDFSConfig.read_hdfs_config(hdfs_site_path)
config = HDFSConfig.read_hdfs_config(hdfs_site_path, 'testha')
self._verify_hdfs_settings(config)

def test_read_core_config_ha(self):
Expand Down Expand Up @@ -97,7 +97,7 @@ def test_ha_without_ports(self, environ_get):
@patch('os.environ.get')
def test_ha_config_trash_in_core(self, environ_get):
environ_get.return_value = False
HDFSConfig.core_try_paths = (self.get_config_path('core-with-trash.xml'),)
HDFSConfig.core_try_paths = (self.get_config_path('ha-core-with-trash.xml'),)
HDFSConfig.hdfs_try_paths = (self.get_config_path('ha-noport-hdfs-site.xml'),)
config = HDFSConfig.get_external_config()

Expand Down Expand Up @@ -143,3 +143,32 @@ def test_use_datanode_hostname_configs(self):
conf_path = self.get_config_path('use-datanode-hostname-hdfs-site.xml')
config = HDFSConfig.read_hdfs_config(conf_path)
self.assertTrue(config['use_datanode_hostname'])

def test_ha_multi(self):
HDFSConfig.core_try_paths = (self.get_config_path('ha-core-site.xml'),)
HDFSConfig.hdfs_try_paths = (self.get_config_path('ha-multi-hdfs-site.xml'),)
config = HDFSConfig.get_external_config()

self._verify_hdfs_settings(config)

def test_ha_multi_missing_nameservices(self):
HDFSConfig.core_try_paths = (self.get_config_path('ha-core-site.xml'),)
HDFSConfig.hdfs_try_paths = (self.get_config_path('ha-multi-no-nameservices-hdfs-site.xml'),)
config = HDFSConfig.get_external_config()

self.assertEquals(config['namenodes'], [{'namenode': 'testha', 'port': 8020}])

def test_ha_multi_bad_logical_nn_mapping(self):
HDFSConfig.core_try_paths = (self.get_config_path('ha-core-site.xml'),)
HDFSConfig.hdfs_try_paths = (self.get_config_path('ha-multi-bad-nn-hdfs-site.xml'),)
config = HDFSConfig.get_external_config()

self.assertEquals(config['namenodes'], [{'namenode': 'testha', 'port': 8020}])

def test_ha_multi_missing_default_fs(self):
HDFSConfig.core_try_paths = (self.get_config_path('ha-no-default-fs-core-site.xml'),)
HDFSConfig.hdfs_try_paths = (self.get_config_path('ha-multi-hdfs-site.xml'),)
config = HDFSConfig.get_external_config()

print config
self.assertEquals(config['namenodes'], [])
19 changes: 19 additions & 0 deletions test/testconfig/conf/ha-core-with-trash.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://testha</value>
</property>

<property>
<name>fs.trash.interval</name>
<value>1</value>
</property>

<property>
<name>fs.trash.checkpoint.interval</name>
<value>1</value>
</property>
</configuration>
60 changes: 60 additions & 0 deletions test/testconfig/conf/ha-multi-bad-nn-hdfs-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<configuration>
<property>
<name>dfs.nameservices</name>
<value>testha,testotherha</value>
</property>

<property>
<name>dfs.ha.namenodes.testha</name>
<value>nonexistent1,nonexistent2</value>
</property>

<property>
<name>dfs.namenode.rpc-address.testha.namenode1-mydomain</name>
<value>namenode1.mydomain:8888</value>
</property>

<property>
<name>dfs.namenode.rpc-address.testha.namenode2-mydomain</name>
<value>namenode2.mydomain:8888</value>
</property>

<property>
<name>dfs.namenode.http-address.testha.namenode1-mydomain</name>
<value>namenode1.mydomain:50070</value>
</property>

<property>
<name>dfs.namenode.http-address.testha.namenode2-mydomain</name>
<value>namenode2.mydomain:50070</value>
</property>

<property>
<name>dfs.ha.namenodes.testotherha</name>
<value>namenode-other1-mydomain,namenode-other2-mydomain</value>
</property>

<property>
<name>dfs.namenode.rpc-address.testotherha.namenode-other1-mydomain</name>
<value>namenode-other1.mydomain:8888</value>
</property>

<property>
<name>dfs.namenode.rpc-address.testotherha.namenode-other2-mydomain</name>
<value>namenode-other2.mydomain:8888</value>
</property>

<property>
<name>dfs.namenode.http-address.testotherha.namenode-other1-mydomain</name>
<value>namenode-other1.mydomain:50070</value>
</property>

<property>
<name>dfs.namenode.http-address.testotherha.namenode-other2-mydomain</name>
<value>namenode-other2.mydomain:50070</value>
</property>

</configuration>
60 changes: 60 additions & 0 deletions test/testconfig/conf/ha-multi-hdfs-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<configuration>
<property>
<name>dfs.nameservices</name>
<value>testha,testotherha</value>
</property>

<property>
<name>dfs.ha.namenodes.testha</name>
<value>namenode1-mydomain,namenode2-mydomain</value>
</property>

<property>
<name>dfs.namenode.rpc-address.testha.namenode1-mydomain</name>
<value>namenode1.mydomain:8888</value>
</property>

<property>
<name>dfs.namenode.rpc-address.testha.namenode2-mydomain</name>
<value>namenode2.mydomain:8888</value>
</property>

<property>
<name>dfs.namenode.http-address.testha.namenode1-mydomain</name>
<value>namenode1.mydomain:50070</value>
</property>

<property>
<name>dfs.namenode.http-address.testha.namenode2-mydomain</name>
<value>namenode2.mydomain:50070</value>
</property>

<property>
<name>dfs.ha.namenodes.testotherha</name>
<value>namenode-other1-mydomain,namenode-other2-mydomain</value>
</property>

<property>
<name>dfs.namenode.rpc-address.testotherha.namenode-other1-mydomain</name>
<value>namenode-other1.mydomain:8888</value>
</property>

<property>
<name>dfs.namenode.rpc-address.testotherha.namenode-other2-mydomain</name>
<value>namenode-other2.mydomain:8888</value>
</property>

<property>
<name>dfs.namenode.http-address.testotherha.namenode-other1-mydomain</name>
<value>namenode-other1.mydomain:50070</value>
</property>

<property>
<name>dfs.namenode.http-address.testotherha.namenode-other2-mydomain</name>
<value>namenode-other2.mydomain:50070</value>
</property>

</configuration>
55 changes: 55 additions & 0 deletions test/testconfig/conf/ha-multi-no-nameservices-hdfs-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<configuration>
<property>
<name>dfs.ha.namenodes.testha</name>
<value>namenode1-mydomain,namenode2-mydomain</value>
</property>

<property>
<name>dfs.namenode.rpc-address.testha.namenode1-mydomain</name>
<value>namenode1.mydomain:8888</value>
</property>

<property>
<name>dfs.namenode.rpc-address.testha.namenode2-mydomain</name>
<value>namenode2.mydomain:8888</value>
</property>

<property>
<name>dfs.namenode.http-address.testha.namenode1-mydomain</name>
<value>namenode1.mydomain:50070</value>
</property>

<property>
<name>dfs.namenode.http-address.testha.namenode2-mydomain</name>
<value>namenode2.mydomain:50070</value>
</property>

<property>
<name>dfs.ha.namenodes.testotherha</name>
<value>namenode-other1-mydomain,namenode-other2-mydomain</value>
</property>

<property>
<name>dfs.namenode.rpc-address.testotherha.namenode-other1-mydomain</name>
<value>namenode-other1.mydomain:8888</value>
</property>

<property>
<name>dfs.namenode.rpc-address.testotherha.namenode-other2-mydomain</name>
<value>namenode-other2.mydomain:8888</value>
</property>

<property>
<name>dfs.namenode.http-address.testotherha.namenode-other1-mydomain</name>
<value>namenode-other1.mydomain:50070</value>
</property>

<property>
<name>dfs.namenode.http-address.testotherha.namenode-other2-mydomain</name>
<value>namenode-other2.mydomain:50070</value>
</property>

</configuration>
5 changes: 5 additions & 0 deletions test/testconfig/conf/ha-no-default-fs-core-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<configuration>
</configuration>