#!/bin/bash
####RPI Konfigurace
#Konfigurace /etc/hosts
cat <<EOF>/etc/hosts
172.16.20.11    hadoop-rpi1.labka.cz hadoop-rpi1
172.16.20.12    hadoop-rpi2.labka.cz hadoop-rpi2 
172.16.20.13    hadoop-rpi3.labka.cz hadoop-rpi3
172.16.20.14    hadoop-rpi4.labka.cz hadoop-rpi4
127.0.0.1       localhost
::1             localhost ip6-localhost ip6-loopback
ff02::1         ip6-allnodes
ff02::2         ip6-allrouters
127.0.1.1       hadoop-rpi4.labka.cz hadoop-rpi4 
EOF

#Konfigurace /etc/network/interfaces
cp /etc/network/interfaces{,.bak}
rm -f /etc/network/interfaces
cat <<EOF>/etc/network/interfaces
source-directory /etc/network/interfaces.d
source /etc/network/interfaces.d/*
EOF

#Konfigurace eth0
cat <<EOF >/etc/network/interfaces.d/eth0
auto eth0
iface eth0 inet dhcp
EOF
ifup eth0

#update a instalace nastroju
apt-get update
apt-get upgrade
apt-get install zip unzip ntp lsof sysstat wget ansible dnsutils rsync

#vytvoreni admin uctu
adduser aiadmin
adduser aiadmin sudo
sudo cat <<EOF> /etc/sudoers.d/010_aiadmin-nopasswd
aiadmin ALL = (ALL) NOPASSWD: ALL
EOF

#vytvoreni klicu pro admina
su aiadmin
mkdir .ssh
cd .ssh
touch authorized_keys
chmod 700 ~/.ssh/
chmod 600 ~/.ssh/authorized_keys
ssh-keygen -b 2048 -t rsa -P ''  -f ~/.ssh/id_rsa
ssh-agent $SHELL
ssh-add -l
ssh-add
exit
ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop-rpi3

#vytvoreni hadoop uctu
addgroup hadoop
adduser --ingroup hadoop hduser
adduser hduser sudo
cat <<EOF> /etc/sudoers.d/020_hduser-nopasswd
hduser ALL=(ALL) NOPASSWD: ALL
EOF

#vytvoreni klicu pro hduser
su hduser
mkdir .ssh
cd .ssh
touch authorized_keys
chmod 700 ~/.ssh/
chmod 600 ~/.ssh/authorized_keys
ssh-keygen -b 2048 -t rsa -P ''  -f ~/.ssh/id_rsa
ssh-agent $SHELL
ssh-add -l
ssh-add
exit

#pridani klicu na remote-hosts
ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop-rpi1
ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop-rpi2
ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop-rpi3
ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop-rpi4

#overeni ssh spojeni
ssh hadoop-rpi1.labka.cz
ssh hadoop-rpi2.labka.cz
ssh hadoop-rpi3.labka.cz
ssh hadoop-rpi4.labka.cz

#smazani vychoziho pi uzivatele
userdel pi
rm -f /etc/sudoers.d/010_pi-nopasswd
rm -rf /home/pi

#update systemu a nastroju
apt-get update
apt-get upgrade
apt-get install zip unzip ntp lsof tcpdump sysstat wget ansible dnsutils --fix-missing

#overeni konfigurace
cat /etc/hosts
cat /etc/resolv.conf
ls -la /etc/sudoers.d/

#Konfigurace /opt/hadoop-2.7.4/etc/hadoop/masters
cat <<EOF>/opt/hadoop-2.7.4/etc/hadoop/masters
hadoop-rpi1.labka.cz
EOF

#Konfigurace /opt/hadoop-2.7.4/etc/hadoop/slaves
cp /opt/hadoop-2.7.4/etc/hadoop/slaves{,.bak}
cat <<EOF>/opt/hadoop-2.7.4/etc/hadoop/slaves
hadoop-rpi1.labka.cz
hadoop-rpi2.labka.cz
hadoop-rpi3.labka.cz
hadoop-rpi4.labka.cz
EOF

#Konfigurace SingleNode /opt/hadoop-2.7.4/etc/hadoop/slaves
cp /opt/hadoop-2.7.4/etc/hadoop/slaves{,.bak}
cat <<EOF>/opt/hadoop-2.7.4/etc/hadoop/slaves
hadoop-rpi1.labka.cz
EOF

#overeni konfigurace
ls -la /opt/hadoop-2.7.4/etc/hadoop
cat /opt/hadoop-2.7.4/etc/hadoop/masters
cat /opt/hadoop-2.7.4/etc/hadoop/slaves

#Konfigurace /opt/hadoop-2.7.4/etc/hadoop/mapred-site.xml
cat <<EOF>/opt/hadoop-2.7.4/etc/hadoop/mapred-site.xml
<configuration>
  <property>
    <name>mapreduce.framework.name</name>
    <value>yarn</value>
  </property>
  <property>
    <name>mapreduce.map.memory.mb</name>
    <value>256</value>
  </property>
  <property>
    <name>mapreduce.map.java.opts</name>
    <value>-Xmx204m</value>
  </property>
  <property>
    <name>mapreduce.reduce.memory.mb</name>
    <value>102</value>
  </property>
  <property>
    <name>mapreduce.reduce.java.opts</name>
    <value>-Xmx102m</value>
  </property>
  <property>
    <name>yarn.app.mapreduce.am.resource.mb</name>
    <value>128</value>
  </property>
  <property>
    <name>yarn.app.mapreduce.am.command-opts</name>
    <value>-Xmx102m</value>
  </property>
</configuration>
EOF

#/opt/hadoop-2.7.4/etc/hadoop/hdfs-site.xml
cat <<EOF>/opt/hadoop-2.7.4/etc/hadoop/hdfs-site.xml
<configuration>
   <property> 
      <name>dfs.replication</name> 
      <value>1</value> 
   </property> 
   <property> 
      <name>dfs.name.dir</name> 
      <value>file:///hdfs/namenode</value> 
   </property> 
   <property> 
      <name>dfs.data.dir</name>
      <value>file:///hdfs/datanode</value> 
   </property>
</configuration>
EOF

#Konfigurace /opt/hadoop-2.7.4/etc/hadoop/core-site.xml
cat <<EOF>/opt/hadoop-2.7.4/etc/hadoop/core-site.xml
<configuration>
  <property>
    <name>fs.defaultFS</name>
    <value>hdfs://hadoop-rpi1.labka.cz:9000</value>
  </property>
  <property>
    <name>hadoop.tmp.dir</name>
    <value>/hdfs/tmp</value>
  </property>
</configuration>
EOF

#Konfigurace /opt/hadoop-2.7.4/etc/hadoop/yarn-site.xml
cat <<EOF>/opt/hadoop-2.7.4/etc/hadoop/yarn-site.xml
<configuration>
  <property>
    <name>yarn.resourcemanager.hostname</name>
    <value>hadoop-rpi1.labka.cz</value>
  </property>
  <property>
    <name>yarn.resourcemanager.address</name>
    <value>hadoop-rpi1.labka.cz:8050</value>
  </property>
  <property>
    <name>yarn.resourcemanager.scheduler.address</name>
    <value>hadoop-rpi1.labka.cz:8030</value>
  </property>
  <property>
    <name>yarn.resourcemanager.resource-tracker.address</name>
     <value>hadoop-rpi1.labka.cz:8031</value>
  </property>
  <property>
    <name>yarn.resourcemanager.webapp.address</name>
     <value>hadoop-rpi1.labka.cz:8088</value>
  </property>
    <name>yarn.resourcemanager.admin.address</name>
     <value>hadoop-rpi1.labka.cz:8033</value>
  </property>
    <name>yarn.nodemanager.hostname</name>
     <value>hadoop-rpi1.labka.cz</value>
  </property>
  </property>
    <name>yarn.nodemanager.address</name>
     <value>hadoop-rpi1.labka.cz:8060</value>
  </property>
  </property>
    <name>yarn.nodemanager.localizer.address</name>
     <value>hadoop-rpi1.labka.cz:8040</value>
  </property>
  <property>
    <name>yarn.nodemanager.aux-services</name>
    <value>mapreduce_shuffle</value>
  </property>
  <property>
    <name>yarn.nodemanager.resource.cpu-vcores</name>
    <value>4</value>
  </property>
  <property>
    <name>yarn.nodemanager.resource.memory-mb</name>
    <value>1024</value>
  </property>
  <property>
    <name>yarn.scheduler.minimum-allocation-mb</name>
    <value>128</value>
  </property>
  <property>
    <name>yarn.scheduler.maximum-allocation-mb</name>
    <value>1024</value>
  </property>
  <property>
    <name>yarn.scheduler.minimum-allocation-vcores</name>
    <value>1</value>
  </property>
  <property>
    <name>yarn.scheduler.maximum-allocation-vcores</name>
    <value>4</value>
  </property>
  <property>
    <name>yarn.nodemanager.vmem-check-enabled</name>
    <value>false</value>
  </property>
  <property>
     <name>yarn.nodemanager.pmem-check-enabled</name>
     <value>true</value>
  </property>
  <property>
    <name>yarn.nodemanager.vmem-pmem-ratio</name>
    <value>4</value>
  </property>
  <property>
    <name>yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage</name>
    <value>98.5</value>
  </property>
</configuration>
EOF

#Konfigurace podle $SPARK_HOME/conf/spark-env.sh.template
cp /opt/spark-2.1.0-bin-hadoop2.7/conf/spark-env.sh{,.bak}
cat <<EOF>/opt/spark-2.1.0-bin-hadoop2.7/conf/spark-env.sh
#!/usr/bin/env bash
SPARK_MASTER_HOST=hadoop-rpi1.labka.cz
SPARK_WORKER_MEMORY=512m
EOT

#Konfigurace /opt/hadoop-2.7.4/etc/hadoop/hadoop-env.sh
cat <<EOT>/opt/hadoop-2.7.4/etc/hadoop/hadoop-env.sh
#!/usr/bin/env bash
# Set Hadoop-specific environment variables here.
#export JAVA_HOME=${JAVA_HOME}
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-armhf/jre
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/opt/hadoop-2.7.4/etc/hadoop"}

# Extra Java CLASSPATH elements.  Automatically insert capacity-scheduler.
for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar; do
  if [ "$HADOOP_CLASSPATH" ]; then
    export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f
  else
    export HADOOP_CLASSPATH=$f
  fi
done

# The maximum amount of heap to use, in MB. Default is 1000.
#export HADOOP_HEAPSIZE=
#export HADOOP_NAMENODE_INIT_HEAPSIZE=""

# Extra Java runtime options.  Empty by default.
export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"

# Command specific options appended to HADOOP_OPTS when specified
export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_NAMENODE_OPTS"
export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS $HADOOP_DATANODE_OPTS"

export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS"

export HADOOP_NFS3_OPTS="$HADOOP_NFS3_OPTS"
export HADOOP_PORTMAP_OPTS="-Xmx512m $HADOOP_PORTMAP_OPTS"

# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS"
#HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS"

# On secure datanodes, user to run the datanode as after dropping privileges.
# This **MUST** be uncommented to enable secure HDFS if using privileged ports
# to provide authentication of data transfer protocol.  This **MUST NOT** be
# defined if SASL is configured for authentication of data transfer protocol
# using non-privileged ports.
export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER}

# Where log files are stored.  $HADOOP_HOME/logs by default.
#export HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$USER

# Where log files are stored in the secure data environment.
export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER}

# HDFS Mover specific parameters
# Specify the JVM options to be used when starting the HDFS Mover.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
# export HADOOP_MOVER_OPTS=""
export HADOOP_PID_DIR=${HADOOP_PID_DIR}
export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR}

export HADOOP_IDENT_STRING=$USER
EOF

#Pridani do /home/hduser/.bashrc
cat <<EOF>>/home/hduser/.bashrc
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-armhf/jre
export HADOOP_HOME=/opt/hadoop-2.7.4
export HADOOP_PREFIX=$HADOOP_HOME
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export HADOOP_YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export HADOOP_YARN_CONF_DIR=$HADOOP_HOME/etc/hadoop
export YARN_HOME=$HADOOP_HOME
export YARN_CONF_DIR=$HADOOP_HOME/etc/hadoop
export HADOOP_SPARK_HOME=/opt/spark-2.1.0-bin-hadoop2.7
export SPARK_HOME=/opt/spark-2.1.0-bin-hadoop2.7
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
export PATH=$PATH:$SPARK_HOME/bin
EOF

source /home/hduser/.bashrc

#Osetrime vlastnictvi
chown -R hduser:hadoop /home/hduser/
chown -R hduser:hadoop /opt/hadoop-2.7.4/
chown -R hduser:hadoop /opt/spark-2.1.0-bin-hadoop2.7

#Konfigurace /hdfs 
#Zopakujem na vsech nodech
mkdir -p /hdfs/tmp
mkdir -p /hdfs/namenode
mkdir -p /hdfs/datanode
chown -R hduser:hadoop /hdfs/
chmod -R 750 /hdfs/
/opt/hadoop-2.7.4/bin/hdfs namenode -format

#spusteni hdfs
/opt/hadoop-2.7.4/sbin/start-dfs.sh
curl  http://hadoop-rpi1.labka.cz:50070/
/opt/hadoop-2.7.4/sbin/start-yarn.sh
curl http://hadoop-rpi1.labka.cz:8088/

#vytvorit slozky
/opt/hadoop-2.7.4/bin/hdfs dfs -mkdir /user
/opt/hadoop-2.7.4/bin/hdfs dfs -mkdir /user/hduser
/opt/hadoop-2.7.4/bin/hdfs dfs -mkdir /user/hduser/input
/opt/hadoop-2.7.4/bin/hdfs dfs -mkdir /user/hduser/output
/opt/hadoop-2.7.4/bin/hdfs dfs -mkdir /user/hduser/pcaps
/opt/hadoop-2.7.4/bin/hdfs dfs -put /opt/hadoop-2.7.4/etc/hadoop /user/hduser/input

#spustit priklad mapr
/opt/hadoop-2.7.4/bin/hadoop jar /opt/hadoop-2.7.4/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.4.jar grep input output 'dfs[a-z.]+'
/opt/hadoop-2.7.4/bin/hdfs dfs -get output output
cat output/*
/opt/hadoop-2.7.4/bin/hdfs dfs -cat output/*