diff --git a/snakebite/config.py b/snakebite/config.py
index 9582d74..cee0c37 100644
--- a/snakebite/config.py
+++ b/snakebite/config.py
@@ -1,5 +1,4 @@
import os
-import sys
import logging
import xml.etree.ElementTree as ET
@@ -11,74 +10,81 @@
class HDFSConfig(object):
use_trash = False
- @classmethod
- def get_config_from_env(cls):
- '''Gets configuration out of environment.
-
- Returns list of dicts - list of namenode representations
- '''
- core_path = os.path.join(os.environ['HADOOP_HOME'], 'conf', 'core-site.xml')
- configs = cls.read_core_config(core_path)
-
- hdfs_path = os.path.join(os.environ['HADOOP_HOME'], 'conf', 'hdfs-site.xml')
- tmp_config = cls.read_hdfs_config(hdfs_path)
-
- if tmp_config:
- # if config exists in hdfs - it's HA config, update configs
- configs = tmp_config
-
- if not configs:
- raise Exception("No config found in %s nor in %s" % (core_path, hdfs_path))
-
- return configs
-
-
@staticmethod
def read_hadoop_config(hdfs_conf_path):
+ config_entries = []
if os.path.exists(hdfs_conf_path):
try:
tree = ET.parse(hdfs_conf_path)
except:
log.error("Unable to parse %s" % hdfs_conf_path)
- return
+ return config_entries
root = tree.getroot()
- for p in root.findall("./property"):
- yield p
-
+ config_entries.extend(root.findall("./property"))
+ return config_entries
@classmethod
def read_core_config(cls, core_site_path):
- config = []
- for property in cls.read_hadoop_config(core_site_path):
+ conf = cls.read_hadoop_config(core_site_path)
+ configs = []
+ for property in conf:
if property.findall('name')[0].text == 'fs.defaultFS':
parse_result = urlparse(property.findall('value')[0].text)
- log.debug("Got namenode '%s' from %s" % (parse_result.geturl(), core_site_path))
+ configs.append(cls.get_namenode(parse_result))
- config.append({"namenode": parse_result.hostname,
- "port": parse_result.port if parse_result.port
- else Namenode.DEFAULT_PORT})
-
- if property.findall('name')[0].text == 'fs.trash.interval':
- cls.use_trash = True
+ cls.set_trash_mode(property)
- return config
+ return configs
@classmethod
- def read_hdfs_config(cls, hdfs_site_path):
+ def read_hdfs_config(cls, config_path, conf, environment, environment_suffixes):
configs = []
- for property in cls.read_hadoop_config(hdfs_site_path):
- if property.findall('name')[0].text.startswith("dfs.namenode.rpc-address"):
+ for property in conf:
+ if cls.is_namenode_host(property, environment, environment_suffixes):
parse_result = urlparse("//" + property.findall('value')[0].text)
- log.debug("Got namenode '%s' from %s" % (parse_result.geturl(), hdfs_site_path))
- configs.append({"namenode": parse_result.hostname,
- "port": parse_result.port if parse_result.port
- else Namenode.DEFAULT_PORT})
+ log.debug("Got namenode '%s' from %s" % (parse_result.geturl(), config_path))
+ configs.append(cls.get_namenode(parse_result))
- if property.findall('name')[0].text == 'fs.trash.interval':
- cls.use_trash = True
+ cls.set_trash_mode(property)
return configs
+ @classmethod
+ def is_namenode_host(cls, property, environment, environment_suffixes):
+ name = property.findall('name')[0].text
+ if not environment_suffixes:
+ return name.startswith("dfs.namenode.rpc-address")
+ for suffix in environment_suffixes:
+ if name.startswith("dfs.namenode.rpc-address." + environment + "." + suffix):
+ return True
+ return False
+
+ @classmethod
+ def get_namenode(cls, parse_result):
+ return {
+ "namenode": parse_result.hostname,
+ "port": parse_result.port if parse_result.port else Namenode.DEFAULT_PORT
+ }
+
+ @classmethod
+ def set_trash_mode(cls, property):
+ if property.findall('name')[0].text == 'fs.trash.interval':
+ cls.use_trash = True
+
+ @classmethod
+ def get_environment(cls, core_config):
+ for config in core_config:
+ environment_name = config['namenode']
+ if environment_name:
+ return environment_name
+
+ @classmethod
+ def get_environment_suffixes(cls, environment, hadoop_config):
+ for property in hadoop_config:
+ if property.findall('name')[0].text == 'dfs.ha.namenodes.' + environment:
+ return property.findall('value')[0].text.split(',')
+
+
core_try_paths = ('/etc/hadoop/conf/core-site.xml',
'/usr/local/etc/hadoop/conf/core-site.xml',
'/usr/local/hadoop/conf/core-site.xml')
@@ -90,19 +96,30 @@ def read_hdfs_config(cls, hdfs_site_path):
@classmethod
def get_external_config(cls):
if os.environ.get('HADOOP_HOME'):
- configs = cls.get_config_from_env()
- return configs
+ core_paths = [os.path.join(os.environ['HADOOP_HOME'], 'conf', 'core-site.xml')]
+ hdfs_paths = [os.path.join(os.environ['HADOOP_HOME'], 'conf', 'hdfs-site.xml')]
else:
# Try to find other paths
- configs = []
- for core_conf_path in cls.core_try_paths:
- configs = cls.read_core_config(core_conf_path)
- if configs:
- break
-
- for hdfs_conf_path in cls.hdfs_try_paths:
- tmp_config = cls.read_hdfs_config(hdfs_conf_path)
- if tmp_config:
- # if there is hdfs-site data available return it
- return tmp_config
- return configs
+ core_paths = cls.core_try_paths
+ hdfs_paths = cls.hdfs_try_paths
+
+ configs = []
+ for core_conf_path in core_paths:
+ configs = cls.read_core_config(core_conf_path)
+ if configs:
+ break
+
+ environment = cls.get_environment(configs)
+
+ for hdfs_conf_path in hdfs_paths:
+ hadoop_config = cls.read_hadoop_config(hdfs_conf_path)
+ environment_suffixes = cls.get_environment_suffixes(environment, hadoop_config)
+ tmp_config = cls.read_hdfs_config(hdfs_conf_path, hadoop_config, environment, environment_suffixes)
+ if tmp_config:
+ # if there is hdfs-site data available return it
+ return tmp_config
+
+ if not configs:
+ raise Exception("No configs found")
+
+ return configs
diff --git a/test/config_test.py b/test/config_test.py
index e337b9c..3aa4abb 100644
--- a/test/config_test.py
+++ b/test/config_test.py
@@ -31,6 +31,19 @@ def _verify_hdfs_settings(self, config):
self.assertEqual('namenode2.mydomain', config[1]['namenode'])
self.assertEqual(8888, config[1]['port'])
+ # namenodes in ha-port-hdfs-site.xml with no namespace (so all of them expected)
+ def _verify_hdfs_settings_all(self, config):
+ self.assertEquals(len(config), 3)
+ # assert first NN
+ self.assertEqual('namenode1.mydomain', config[0]['namenode'])
+ self.assertEqual(8888, config[0]['port'])
+ # assert second NN
+ self.assertEqual('namenode2.mydomain', config[1]['namenode'])
+ self.assertEqual(8888, config[1]['port'])
+ # assert third NN
+ self.assertEqual('namenode.other-domain', config[2]['namenode'])
+ self.assertEqual(8888, config[2]['port'])
+
def _verify_hdfs_noport_settings(self, config):
self.assertEquals(len(config), 2)
# assert first NN
@@ -40,10 +53,27 @@ def _verify_hdfs_noport_settings(self, config):
self.assertEqual('namenode2.mydomain', config[1]['namenode'])
self.assertEqual(8020, config[1]['port'])
+ # namenodes in ha-port-hdfs-site.xml using namespace in ha-core-site.xml
+ def _verify_hdfs_port_settings(self, config):
+ self.assertEquals(len(config), 2)
+ # assert first NN
+ self.assertEqual('namenode1.mydomain', config[0]['namenode'])
+ self.assertEqual(8888, config[0]['port'])
+ # assert second NN
+ self.assertEqual('namenode2.mydomain', config[1]['namenode'])
+ self.assertEqual(8888, config[1]['port'])
+
def test_read_hdfs_config_ha(self):
- hdfs_site_path = self.get_config_path('ha-port-hdfs-site.xml')
- config = HDFSConfig.read_hdfs_config(hdfs_site_path)
- self._verify_hdfs_settings(config)
+ hdfs_core_path = self.get_config_path('ha-port-hdfs-site.xml')
+ conf = HDFSConfig.read_hadoop_config(hdfs_core_path)
+ config = HDFSConfig.read_hdfs_config('', conf, '', [])
+ self._verify_hdfs_settings_all(config)
+
+ def test_read_hdfs_port_config_ha(self):
+ hdfs_core_path = self.get_config_path('ha-port-hdfs-site.xml')
+ conf = HDFSConfig.read_hadoop_config(hdfs_core_path)
+ config = HDFSConfig.read_hdfs_config('', conf, 'testha', ['namenode1-mydomain', 'namenode2-mydomain'])
+ self._verify_hdfs_port_settings(config)
def test_read_core_config_ha(self):
core_site_path = self.get_config_path('ha-core-site.xml')
diff --git a/test/testconfig/conf/ha-port-hdfs-site.xml b/test/testconfig/conf/ha-port-hdfs-site.xml
index 5aa344b..654ca51 100644
--- a/test/testconfig/conf/ha-port-hdfs-site.xml
+++ b/test/testconfig/conf/ha-port-hdfs-site.xml
@@ -22,6 +22,11 @@
namenode2.mydomain:8888
+
+ dfs.namenode.rpc-address.testha.other-domain
+ namenode.other-domain:8888
+
+
dfs.namenode.http-address.testha.namenode1-mydomain
namenode1.mydomain:50070