#
# Cookbook Name:: hadoop
# Recipe:: apache
#
# Copyright 2013, whitestar
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

require 'digest/sha2'

::Chef::Recipe.send(:include, Commons::PackageUtils)

::Chef::Recipe.send(:include, Hadoop::Helper)

conf_files = nil

conf_files_v1 = [
  'capacity-scheduler.xml',
  'configuration.xsl',
  'core-site.xml',
  'fair-scheduler.xml',
  'hadoop-env.sh',
  'hadoop-metrics2.properties',
  'hadoop-policy.xml',
  'hdfs-site.xml',
  'hosts.include',
  'hosts.exclude',
  'log4j.properties',
  'mapred-queue-acls.xml',
  'mapred-site.xml',
  'masters',
  'slaves',
  #'ssl-client.xml',
  #'ssl-server.xml'
]

conf_files_v2 = \
  conf_files_v1 \
  - [
    'fair-scheduler.xml',
    'mapred-queue-acls.xml',
    'masters'
  ] \
  + [
    'httpfs-env.sh',
    'httpfs-log4j.properties',
    'httpfs-signature.secret',
    'httpfs-site.xml',
    'mapred-env.sh',
    'yarn-env.sh',
    'yarn-site.xml'
  ] 

# for ver. 1.0
krb5_conf_files = [
  'krb5-strong.conf',
  'krb5-weak.conf'
]

version, major_version, middle_version \
  = parse_version_number(node['hadoop']['version'])

users = get_users(major_version)
hadoop_accounts_setup 'hadoop daemon users' do
  users(users)
end

case major_version
  when '1'
    conf_files = conf_files_v1
  when '2'
    conf_files = conf_files_v2
end

if middle_version == '1.2' then 
  conf_files.push('task-log4j.properties')
end

active_vol_nums = setup_directories(major_version)
log "This node active volumes: #{active_vol_nums}"

file_cache_path = Chef::Config[:file_cache_path]
install_root = "#{node['grid']['app_root']}/hadoop-#{version}"
tarball = nil
case major_version
  when '1'
    tarball = "hadoop-#{version}-bin.tar.gz"
  when '2'
    tarball = "hadoop-#{version}.tar.gz"
end
tarball_mds = "#{tarball}.mds"
downloaded_tarball = "#{file_cache_path}/#{tarball}"
downloaded_tarball_mds = "#{file_cache_path}/#{tarball_mds}"

archive_url = node['hadoop']['archive_url']
if ! FileTest.directory? install_root then
  remote_file downloaded_tarball_mds do
    source "#{archive_url}/hadoop-#{version}/#{tarball_mds}"
    action :create_if_missing
  end

  remote_file downloaded_tarball do
    source "#{archive_url}/hadoop-#{version}/#{tarball}"
    action :create_if_missing
  end

  ruby_block "sha256 checksum #{downloaded_tarball}" do
    block do
=begin
e.g. md file format
'hadoop-1.1.2-bin.tar.gz:    MD5 = 4B 59 F4 81 A7 52 D2 A9  20 3D D7 D0 A9 50 5C
                                  18
hadoop-1.1.2-bin.tar.gz:   SHA1 = DCCC 01A0 4C42 587D 9DF1  83CA 7DC8 83F7 A6A4
                                  8D80
...'
=end
      checksum = File.read(downloaded_tarball_mds).
        gsub(/(\s)+/, '').
        scan(/#{tarball}:(.+?)=([0-9A-Z]+)/).
        assoc('SHA256')[1]
      Chef::Log.info "#{tarball}: SHA256 = #{checksum}"
      actual_checksum = Digest::SHA256.file(downloaded_tarball).to_s
      Chef::Log.info "#{tarball}: actual SHA256 = #{actual_checksum}"
      if ! checksum.casecmp(actual_checksum) then
        Chef::Application.fatal!("Invalid SHA256 checksum of #{downloaded_tarball}, expected: #{checksum}")
      end
    end
    action :create
  end

  pkg = 'tar'
  resources(:package => pkg) rescue package pkg do
    action :install
  end

  bash "install_hadoop-#{version}" do
    code <<-EOC
      tar xvzf #{downloaded_tarball} -C #{node['grid']['app_root']}
      # for 2.0.x
      chown -R root:root #{install_root}
    EOC
    creates install_root
  end
end

if node['hadoop']['with_hadoop_lzo'] then
  # N/A in the Apache Hadoop
  # do nothing.
end

if node['hadoop']['io.compression.codec.bzip2.library'] == 'system-native' \
  && middle_version >= '2.1' then
  package get_libbz2_pkg_name do
    action :install
  end
end

link node['hadoop']['HADOOP_PREFIX'] do
  to install_root
  action [:delete, :create]
end

get_limits_files(major_version).each {|limits_file|
  template "/etc/security/limits.d/#{limits_file}" do
    source "etc/security/limits.d/#{limits_file}"
    owner 'root'
    group 'root'
    mode '0644'
  end
}

conf_dir = nil
case major_version
  when '1'
    conf_dir = "#{node['grid']['app_root']}/hadoop-#{version}/conf"
  when '2'
    conf_dir = "#{node['grid']['app_root']}/hadoop-#{version}/etc/hadoop"
end

tpl_vars = {
  :active_vol_nums => active_vol_nums
}
conf_template(conf_dir, middle_version, conf_files, tpl_vars)

# with security
if node['hadoop']['with_security'] then
  directory node['hadoop']['this.keytab.dir'] do
    owner 'root'
    group 'root'
    mode '0755'
    action :create
    recursive true
  end
  
  case major_version
  when '1'
    if node[:kernel][:machine] != 'x86_64' then
      package get_jsvc_pkg_name do
        action :install
      end
    
      link "#{install_root}/libexec/jsvc.i386" do
        to '/usr/bin/jsvc'
      end
    end

    file "#{node['grid']['app_root']}/hadoop-#{version}/bin/task-controller" do
      owner 'root'
      group users[:mapred][:name]
      mode '6050'
    end
    
    template "#{node['grid']['app_root']}/hadoop-#{version}/conf/taskcontroller.cfg" do
      source "conf-#{middle_version}/taskcontroller.cfg"
      owner 'root'
      group 'root'
      mode '0400'
      variables({
        :active_vol_nums => active_vol_nums
      })
    end
  when '2'
    package get_jsvc_pkg_name do
      action :install
    end
    
    file "#{node['grid']['app_root']}/hadoop-#{version}/bin/container-executor" do
      owner 'root'
      group users[:yarn][:name]
      mode '6050'
    end
    
    template "#{node['grid']['app_root']}/hadoop-#{version}/etc/hadoop/container-executor.cfg" do
      source "etc-#{middle_version}/hadoop/container-executor.cfg"
      owner 'root'
      group 'root'
      mode '0400'
      variables({
        :active_vol_nums => active_vol_nums
      })
    end

    setup_cgroup(middle_version)
  end
  
  if middle_version == '1.0' then
    tpl_vars = nil
    conf_template(conf_dir, middle_version, krb5_conf_files, tpl_vars)
  end
end

datanode_sudo_user_opt = (node['hadoop']['with_security']) ? '       ' : '-u hdfs'

# Note: update the following template (hadoop_{pseudo,full}_distributed_init.sh)
#       if this section will be modified.
case major_version
when '1'
  log <<-EOM
Note:
You must initialize HDFS in the first installation:
  $ cd #{node['grid']['app_root']}/hadoop
  $ sudo -u hdfs ./bin/hadoop namenode -format
  $ sudo -u hdfs ./bin/hadoop-daemon.sh start namenode
  $ sudo #{datanode_sudo_user_opt} ./bin/hadoop-daemon.sh start datanode
  $ sudo -u hdfs ./bin/hadoop dfsadmin -safemode wait
  $ sudo -u hdfs ./bin/hadoop fs -chown hdfs:hdfs /
  $ sudo -u hdfs ./bin/hadoop fs -chmod 755 /
  $ sudo -u hdfs ./bin/hadoop fs -mkdir /user
  $ sudo -u hdfs ./bin/hadoop fs -mkdir #{node['grid']['vol_root']}/0/var/lib/mapred
  $ sudo -u hdfs ./bin/hadoop fs -chown mapred:mapred #{node['grid']['vol_root']}/0/var/lib/mapred
  EOM

  examples_jar = "hadoop-examples-#{version}.jar"
when '2'
  log <<-EOM
Note:
You must initialize HDFS in the first installation:
  $ cd #{node['grid']['app_root']}/hadoop
  $ sudo -u hdfs ./bin/hdfs namenode -format
  $ sudo -u hdfs ./sbin/hadoop-daemon.sh start namenode
  $ sudo #{datanode_sudo_user_opt} ./sbin/hadoop-daemon.sh start datanode
  $ sudo -u hdfs ./bin/hdfs dfsadmin -safemode wait
  $ sudo -u hdfs ./bin/hadoop fs -chown hdfs:hdfs /
  $ sudo -u hdfs ./bin/hadoop fs -chmod 755 /
  $ sudo -u hdfs ./bin/hadoop fs -mkdir /user
  $ sudo -u hdfs ./bin/hadoop fs -mkdir -p #{node['grid']['vol_root']}/0/var/log/yarn/nm
  $ sudo -u hdfs ./bin/hadoop fs -chown yarn:hadoop #{node['grid']['vol_root']}/0/var/log/yarn/nm
  $ sudo -u hdfs ./bin/hadoop fs -chmod 1777        #{node['grid']['vol_root']}/0/var/log/yarn/nm
  $ sudo -u hdfs ./bin/hadoop fs -mkdir -p #{node['grid']['vol_root']}/0/var/lib/mapred/history
  $ sudo -u hdfs ./bin/hadoop fs -chown -R mapred:hadoop #{node['grid']['vol_root']}/0/var/lib/mapred
  $ sudo -u hdfs ./bin/hadoop fs -chmod -R 755           #{node['grid']['vol_root']}/0/var/lib/mapred
  EOM

  examples_jar = "share/hadoop/mapreduce/hadoop-mapreduce-examples-#{version}.jar"
end

%w{
  hadoop_pseudo_distributed_init.sh
  hadoop_full_distributed_init.sh
}.each {|init_script|
  template "#{node['grid']['app_root']}/sbin/#{init_script}" do
    source "grid/usr/sbin/#{init_script}"
    owner 'root'
    group 'root'
    mode '0755'
    variables({
      :major_version => major_version,
      :datanode_sudo_user_opt => datanode_sudo_user_opt
    })
  end
}

if node['hadoop']['with_security'] then
  log <<-EOM
Note:
Example MapReduce job execution:
  $ sudo -u alice kinit
  Password for alice@LOCALDOMAIN: 
  $ sudo -u alice bin/hadoop jar #{examples_jar} pi \\
  > -D mapreduce.job.acl-view-job=* -D mapreduce.job.acl-modify-job=alice 5 10
  EOM
else
  log <<-EOM
Note:
Example MapReduce job execution:
  $ sudo adduser alice
  $ sudo -u hdfs ./bin/hadoop fs -mkdir /user/alice
  $ sudo -u hdfs ./bin/hadoop fs -chown alice:alice /user/alice
  $ sudo -u alice ./bin/hadoop jar #{examples_jar} pi 5 10
  EOM
end

