Installing Apache Spark via Puppet: Difference between revisions
| Line 56: | Line 56: | ||
</pre> | </pre> | ||
/usr/local/spark/conf/spark-conf | /usr/local/spark/conf/spark-defaults.conf | ||
<pre> | <pre> | ||
# Example: | # Example: | ||
| Line 66: | Line 66: | ||
# spark.serializer org.apache.spark.serializer.KryoSerializer | # spark.serializer org.apache.spark.serializer.KryoSerializer | ||
# spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three" | # spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three" | ||
</pre> | </pre> | ||
Revision as of 04:20, 9 January 2020
Install Java Module into Puppet
/etc/puppetlabs/code/environments/production$ sudo /opt/puppetlabs/bin/puppet module install puppetlabs/java
Install Spark Module to /etc/puppetlabs/code/environments/production/manifests/spark.pp Note that this hard codes server names. Not ideal, but it's a starting point.
$master_hostname='spark-master.bpopp.net'
class{'hadoop':
realm => '',
hdfs_hostname => $master_hostname,
slaves => ['spark1.bpopp.net', 'spark2.bpopp.net'],
}
class{'spark':
master_hostname => $master_hostname,
hdfs_hostname => $master_hostname,
historyserver_hostname => $master_hostname,
yarn_enable => false,
}
node 'spark-master.bpopp.net' {
include spark::master
include spark::historyserver
include hadoop::namenode
include spark::hdfs
}
node /spark(1|2).bpopp.net/ {
include spark::worker
include hadoop::datanode
}
node 'client.bpopp.net' {
include hadoop::frontend
include spark::frontend
}
Spark Config
/usr/local/spark/conf/slaves
# A Spark Worker will be started on each of the machines listed below. spark1 spark2 spark3 #spark4
/usr/local/spark/conf/spark-env.sh
export SPARK_DIST_CLASSPATH=$(/usr/local/hadoop/bin/hadoop --config /usr/local/hadoop/etc/hadoop classpath) export SPARK_DIST_CLASSPATH=$(/usr/local/hadoop/bin/hadoop classpath)
/usr/local/spark/conf/spark-defaults.conf
# Example: spark.master spark://spark1.lab.bpopp.net:7077 #spark.driver.memory 2g spark.executor.memory 2g # spark.eventLog.enabled true # spark.eventLog.dir hdfs://namenode:8021/directory # spark.serializer org.apache.spark.serializer.KryoSerializer # spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"