Skip to content
This repository was archived by the owner on Apr 19, 2022. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 77 additions & 60 deletions snakebite/config.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import os
import sys
import logging
import xml.etree.ElementTree as ET

Expand All @@ -11,74 +10,81 @@
class HDFSConfig(object):
use_trash = False

@classmethod
def get_config_from_env(cls):
'''Gets configuration out of environment.

Returns list of dicts - list of namenode representations
'''
core_path = os.path.join(os.environ['HADOOP_HOME'], 'conf', 'core-site.xml')
configs = cls.read_core_config(core_path)

hdfs_path = os.path.join(os.environ['HADOOP_HOME'], 'conf', 'hdfs-site.xml')
tmp_config = cls.read_hdfs_config(hdfs_path)

if tmp_config:
# if config exists in hdfs - it's HA config, update configs
configs = tmp_config

if not configs:
raise Exception("No config found in %s nor in %s" % (core_path, hdfs_path))

return configs


@staticmethod
def read_hadoop_config(hdfs_conf_path):
config_entries = []
if os.path.exists(hdfs_conf_path):
try:
tree = ET.parse(hdfs_conf_path)
except:
log.error("Unable to parse %s" % hdfs_conf_path)
return
return config_entries
root = tree.getroot()
for p in root.findall("./property"):
yield p

config_entries.extend(root.findall("./property"))
return config_entries

@classmethod
def read_core_config(cls, core_site_path):
config = []
for property in cls.read_hadoop_config(core_site_path):
conf = cls.read_hadoop_config(core_site_path)
configs = []
for property in conf:
if property.findall('name')[0].text == 'fs.defaultFS':
parse_result = urlparse(property.findall('value')[0].text)
log.debug("Got namenode '%s' from %s" % (parse_result.geturl(), core_site_path))
configs.append(cls.get_namenode(parse_result))

config.append({"namenode": parse_result.hostname,
"port": parse_result.port if parse_result.port
else Namenode.DEFAULT_PORT})

if property.findall('name')[0].text == 'fs.trash.interval':
cls.use_trash = True
cls.set_trash_mode(property)

return config
return configs

@classmethod
def read_hdfs_config(cls, hdfs_site_path):
def read_hdfs_config(cls, config_path, conf, environment, environment_suffixes):
configs = []
for property in cls.read_hadoop_config(hdfs_site_path):
if property.findall('name')[0].text.startswith("dfs.namenode.rpc-address"):
for property in conf:
if cls.is_namenode_host(property, environment, environment_suffixes):
parse_result = urlparse("//" + property.findall('value')[0].text)
log.debug("Got namenode '%s' from %s" % (parse_result.geturl(), hdfs_site_path))
configs.append({"namenode": parse_result.hostname,
"port": parse_result.port if parse_result.port
else Namenode.DEFAULT_PORT})
log.debug("Got namenode '%s' from %s" % (parse_result.geturl(), config_path))
configs.append(cls.get_namenode(parse_result))

if property.findall('name')[0].text == 'fs.trash.interval':
cls.use_trash = True
cls.set_trash_mode(property)

return configs

@classmethod
def is_namenode_host(cls, property, environment, environment_suffixes):
name = property.findall('name')[0].text
if not environment_suffixes:
return name.startswith("dfs.namenode.rpc-address")
for suffix in environment_suffixes:
if name.startswith("dfs.namenode.rpc-address." + environment + "." + suffix):
return True
return False

@classmethod
def get_namenode(cls, parse_result):
return {
"namenode": parse_result.hostname,
"port": parse_result.port if parse_result.port else Namenode.DEFAULT_PORT
}

@classmethod
def set_trash_mode(cls, property):
if property.findall('name')[0].text == 'fs.trash.interval':
cls.use_trash = True

@classmethod
def get_environment(cls, core_config):
for config in core_config:
environment_name = config['namenode']
if environment_name:
return environment_name

@classmethod
def get_environment_suffixes(cls, environment, hadoop_config):
for property in hadoop_config:
if property.findall('name')[0].text == 'dfs.ha.namenodes.' + environment:
return property.findall('value')[0].text.split(',')


core_try_paths = ('/etc/hadoop/conf/core-site.xml',
'/usr/local/etc/hadoop/conf/core-site.xml',
'/usr/local/hadoop/conf/core-site.xml')
Expand All @@ -90,19 +96,30 @@ def read_hdfs_config(cls, hdfs_site_path):
@classmethod
def get_external_config(cls):
if os.environ.get('HADOOP_HOME'):
configs = cls.get_config_from_env()
return configs
core_paths = [os.path.join(os.environ['HADOOP_HOME'], 'conf', 'core-site.xml')]
hdfs_paths = [os.path.join(os.environ['HADOOP_HOME'], 'conf', 'hdfs-site.xml')]
else:
# Try to find other paths
configs = []
for core_conf_path in cls.core_try_paths:
configs = cls.read_core_config(core_conf_path)
if configs:
break

for hdfs_conf_path in cls.hdfs_try_paths:
tmp_config = cls.read_hdfs_config(hdfs_conf_path)
if tmp_config:
# if there is hdfs-site data available return it
return tmp_config
return configs
core_paths = cls.core_try_paths
hdfs_paths = cls.hdfs_try_paths

configs = []
for core_conf_path in core_paths:
configs = cls.read_core_config(core_conf_path)
if configs:
break

environment = cls.get_environment(configs)

for hdfs_conf_path in hdfs_paths:
hadoop_config = cls.read_hadoop_config(hdfs_conf_path)
environment_suffixes = cls.get_environment_suffixes(environment, hadoop_config)
tmp_config = cls.read_hdfs_config(hdfs_conf_path, hadoop_config, environment, environment_suffixes)
if tmp_config:
# if there is hdfs-site data available return it
return tmp_config

if not configs:
raise Exception("No configs found")

return configs
36 changes: 33 additions & 3 deletions test/config_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,19 @@ def _verify_hdfs_settings(self, config):
self.assertEqual('namenode2.mydomain', config[1]['namenode'])
self.assertEqual(8888, config[1]['port'])

# namenodes in ha-port-hdfs-site.xml with no namespace (so all of them expected)
def _verify_hdfs_settings_all(self, config):
self.assertEquals(len(config), 3)
# assert first NN
self.assertEqual('namenode1.mydomain', config[0]['namenode'])
self.assertEqual(8888, config[0]['port'])
# assert second NN
self.assertEqual('namenode2.mydomain', config[1]['namenode'])
self.assertEqual(8888, config[1]['port'])
# assert third NN
self.assertEqual('namenode.other-domain', config[2]['namenode'])
self.assertEqual(8888, config[2]['port'])

def _verify_hdfs_noport_settings(self, config):
self.assertEquals(len(config), 2)
# assert first NN
Expand All @@ -40,10 +53,27 @@ def _verify_hdfs_noport_settings(self, config):
self.assertEqual('namenode2.mydomain', config[1]['namenode'])
self.assertEqual(8020, config[1]['port'])

# namenodes in ha-port-hdfs-site.xml using namespace in ha-core-site.xml
def _verify_hdfs_port_settings(self, config):
self.assertEquals(len(config), 2)
# assert first NN
self.assertEqual('namenode1.mydomain', config[0]['namenode'])
self.assertEqual(8888, config[0]['port'])
# assert second NN
self.assertEqual('namenode2.mydomain', config[1]['namenode'])
self.assertEqual(8888, config[1]['port'])

def test_read_hdfs_config_ha(self):
hdfs_site_path = self.get_config_path('ha-port-hdfs-site.xml')
config = HDFSConfig.read_hdfs_config(hdfs_site_path)
self._verify_hdfs_settings(config)
hdfs_core_path = self.get_config_path('ha-port-hdfs-site.xml')
conf = HDFSConfig.read_hadoop_config(hdfs_core_path)
config = HDFSConfig.read_hdfs_config('', conf, '', [])
self._verify_hdfs_settings_all(config)

def test_read_hdfs_port_config_ha(self):
hdfs_core_path = self.get_config_path('ha-port-hdfs-site.xml')
conf = HDFSConfig.read_hadoop_config(hdfs_core_path)
config = HDFSConfig.read_hdfs_config('', conf, 'testha', ['namenode1-mydomain', 'namenode2-mydomain'])
self._verify_hdfs_port_settings(config)

def test_read_core_config_ha(self):
core_site_path = self.get_config_path('ha-core-site.xml')
Expand Down
5 changes: 5 additions & 0 deletions test/testconfig/conf/ha-port-hdfs-site.xml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@
<value>namenode2.mydomain:8888</value>
</property>

<property>
<name>dfs.namenode.rpc-address.testha.other-domain</name>
<value>namenode.other-domain:8888</value>
</property>

<property>
<name>dfs.namenode.http-address.testha.namenode1-mydomain</name>
<value>namenode1.mydomain:50070</value>
Expand Down