Ignore:
Timestamp:
2016-10-25T10:02:58+13:00 (8 years ago)
Author:
davidb
Message:

Renaming to better represent what the cluster is designed for

Location:
other-projects/hathitrust/vagrant-spark-hdfs-cluster
Files:
3 edited
1 moved

Legend:

Unmodified
Added
Removed
  • other-projects/hathitrust/vagrant-spark-hdfs-cluster/trunk/README.txt

    r30905 r30913  
    11
    2 Vargrant provisioning files to spin up a modest (4 node) Hadoop
    3 cluster for experiments processing HTRC Extracted Feature JSON files
    4 suitable for ingesting into Solr.
     2Vargrant provisioning files to spin up a modest Spark cluster (master
     3+ 3 slaves + backup) for experiments processing HTRC Extracted Feature
     4JSON files suitable for ingesting into Solr.
     5
     6To aid parallelism, code is designed to read JSON files from HDFS, so
     7the provision of the cluster includes Hadoop core in addition to Spark
    58
    69
    7 Top-level code Apache Spark, processing HDFS stored JSON files, hence
    8 the need for an underlying Hadoop cluster.
     10Provisioning uses Puppet scripting, based on the following on-line
     11resources, but updated to use newer versions of Ubuntu, Java,
     12and Hadoop.  Spark is then added in on top of that.
    913
    10 Provisioning based on the following online resources, but updated to
    11 use newer versions of Ubuntu, Java, and Hadoop.
    1214
    1315  http://cscarioni.blogspot.co.nz/2012/09/setting-up-hadoop-virtual-cluster-with.html
    1416
    1517  https://github.com/calo81/vagrant-hadoop-cluster
     18
     19To get everything setup, type:
     20
     21  vargrant up
     22
     23Then log in to the master node, and swithc to 'ubuntu' user
     24
     25  vargrant ssh master
     26  sudo su - ubuntu
     27
     28If the first time, you need to format an HDFS area to use:
     29  hdfs namenode -format
     30
     31Otherwise start up HDFS and Spark deamon processes:
     32
     33  start-dfs.sh
     34  spark-start-all.sh
     35
     36You can visit the Spark cluster monitoring page at:
     37
     38  http://10.10.0.52:8080/
     39
    1640
    1741
     
    6084http://kvz.io/blog/2013/01/16/vagrant-tip-keep-virtualbox-guest-additions-in-sync/
    6185
     86----
     87SecondaryNode
     88----
     89
     90http://stackoverflow.com/questions/23581425/hadoop-how-to-start-secondary-namenode-on-other-node
     91
     92<property>
     93  <name>dfs.namenode.secondary.http-address</name>
     94  <value>ec2-54-187-222-213.us-west-2.compute.amazonaws.com:50090</value>
     95</property>
     96
     97----
     98Spark Cluster
     99----
     100
     101http://spark.apache.org/docs/latest/spark-standalone.html
  • other-projects/hathitrust/vagrant-spark-hdfs-cluster/trunk/manifests/base-hadoop.pp

    r30903 r30913  
    11$install_dir = "/opt"
    22$hadoop_home = "${install_dir}/hadoop"
     3$spark_home  = "${install_dir}/spark"
     4
    35$user = "ubuntu"
    46$group = "ubuntu"
     7
    58$hadoop_master = '10.10.0.52'
    69$hadoop_backup = '10.10.0.51'
     
    1013
    1114include hadoop
     15include spark
    1216
    1317file {
     
    4650}
    4751
    48  
     52#package { "subversion" :
     53#   ensure => present,
     54#  require => [ Exec['apt-get update'], File["/home/${user}"] ]
     55#}
     56
     57#package { "maven" :
     58#   ensure => present,
     59#  require => [ Exec['apt-get update'], File["/home/${user}"] ]
     60#}
     61
    4962file {
    5063    "/home/${user}/.ssh":
    5164    ensure => "directory",
    5265    owner  => "${user}",
    53     group  => "${user}",
     66    group  => "${group}",
    5467    mode   => 750,
    5568    require => [ Exec['apt-get update'], File["/home/${user}"] ]
  • other-projects/hathitrust/vagrant-spark-hdfs-cluster/trunk/modules/hadoop/manifests/init.pp

    r30903 r30913  
    22
    33exec { "download_hadoop":
    4 # Download from nearby mirror, otherwise task can time-out
    5 command => "wget -O /tmp/hadoop.tar.gz http://apache.mirror.amaze.com.au/hadoop/common/hadoop-2.7.3/hadoop-2.7.3.tar.gz",
    6 # command => "wget -O /tmp/hadoop.tar.gz http://www-eu.apache.org/dist/hadoop/common/hadoop-2.7.3/hadoop-2.7.3.tar.gz",
    7 path => $path,
    8 user => $user,
    9 group => $user,
    10 unless => "ls ${install_dir} | grep hadoop-2.7.3",
    11 require => Package["openjdk-7-jdk"]
    12 }
     4  # Download from nearby mirror, otherwise task can time-out
     5  command => "wget -O /tmp/hadoop-2.7.3.tar.gz http://apache.mirror.amaze.com.au/hadoop/common/hadoop-2.7.3/hadoop-2.7.3.tar.gz",
     6  # command => "wget -O /tmp/hadoop-2.7.3.tar.gz http://www-eu.apache.org/dist/hadoop/common/hadoop-2.7.3/hadoop-2.7.3.tar.gz",
     7  path => $path,
     8  user => $user,
     9  group => $user,
     10  unless => "ls ${install_dir} | grep hadoop-2.7.3",
     11  require => Package["openjdk-7-jdk"]
     12 }
    1313
    1414exec { "unpack_hadoop" :
    15   command => "tar -zxf /tmp/hadoop.tar.gz -C ${install_dir}",
     15  command => "tar -zxf /tmp/hadoop-2.7.3.tar.gz -C ${install_dir}",
    1616  path => $path,
    1717  creates => "${hadoop_home}-2.7.3",
    1818  require => Exec["download_hadoop"]
    19 }
     19 }
    2020
    21 exec { 'chown':
    22   command => "/bin/chown -R ${user}:${group} /opt/hadoop-2.7.3",
     21exec { "rename_hadoop" :
     22  command => "ln -s ${install_dir}/hadoop-2.7.3 ${install_dir}/hadoop",
     23  path => $path,
     24  creates => "${hadoop_home}",
     25  require => Exec["unpack_hadoop"]
     26 }
     27
     28exec { 'chown_hadoop':
     29  command => "/bin/chown -R ${user}:${group} ${hadoop_home}-2.7.3",
    2330  path => '/bin',
    2431  user => 'root',
    25   require => Exec["unpack_hadoop"]
    26 }
     32  require => Exec["rename_hadoop"]
     33 }
    2734
    28 file {
    29   "${hadoop_home}-2.7.3/etc/hadoop/slaves":
     35file { 
     36  "${hadoop_home}/etc/hadoop/slaves":
    3037  content => template('hadoop/slaves'),
    3138  mode => 644,
    3239  owner => $user,
    3340  group => $group,
    34   require => Exec["unpack_hadoop"]
     41  require => Exec["chown_hadoop"]
    3542 }
    3643 
    37 file {
    38   "${hadoop_home}-2.7.3/etc/hadoop/masters":
     44file { 
     45  "${hadoop_home}/etc/hadoop/masters":
    3946  content => template('hadoop/masters'),
    4047  mode => 644,
    4148  owner => $user,
    4249  group => $group,
    43   require => Exec["unpack_hadoop"]
     50  require => Exec["chown_hadoop"]
    4451 }
    4552
    4653file {
    47   "${hadoop_home}-2.7.3/etc/hadoop/core-site.xml":
     54  "${hadoop_home}/etc/hadoop/core-site.xml":
    4855  content => template('hadoop/core-site.xml'),
    4956  mode => 644,
    5057  owner => $user,
    5158  group => $group,
    52   require => Exec["unpack_hadoop"]
     59  require => Exec["chown_hadoop"]
    5360 }
    5461 
    5562file {
    56   "${hadoop_home}-2.7.3/etc/hadoop/mapred-site.xml":
     63  "${hadoop_home}/etc/hadoop/mapred-site.xml":
    5764  content => template('hadoop/mapred-site.xml'),
    5865  mode => 644,
    5966  owner => $user,
    6067  group => $group,
    61   require => Exec["unpack_hadoop"]
     68  require => Exec["chown_hadoop"]
    6269 }
    6370 
    6471 file {
    65   "${hadoop_home}-2.7.3/etc/hadoop/hdfs-site.xml":
     72  "${hadoop_home}/etc/hadoop/hdfs-site.xml":
    6673  content => template('hadoop/hdfs-site.xml'),
    6774  mode => 644,
    6875  owner => $user,
    6976  group => $group,
    70   require => Exec["unpack_hadoop"]
     77  require => Exec["chown_hadoop"]
    7178 }
    7279 
    7380file {
    74   "${hadoop_home}-2.7.3/etc/hadoop/hadoop-env.sh":
     81  "${hadoop_home}/etc/hadoop/hadoop-env.sh":
    7582  content => template('hadoop/hadoop-env.sh'),
    7683  mode => 644,
    7784  owner => $user,
    7885  group => $group,
    79   require => Exec["unpack_hadoop"]
     86  require => Exec["chown_hadoop"]
    8087 }
    8188
    82 file{
    83    [ "${hadoop_home}-2.7.3/hadoop_store",
    84      "${hadoop_home}-2.7.3/hadoop_store/hdfs",
    85      "${hadoop_home}-2.7.3/hadoop_store/hdfs/namenode",
    86      "${hadoop_home}-2.7.3/hadoop_store/hdfs/datanode"]:
     89file {
     90   [ "${hadoop_home}/hadoop_store",
     91     "${hadoop_home}/hadoop_store/hdfs",
     92     "${hadoop_home}/hadoop_store/hdfs/namenode",
     93     "${hadoop_home}/hadoop_store/hdfs/datanode"]:
    8794   ensure => 'directory',
    8895   owner => "${user}",
    8996   group => "${group}",
    9097   mode => 755,
    91    require => Exec["unpack_hadoop"]
     98   require => Exec["chown_hadoop"]
    9299  }
    93100
    94 file_line { "add_hadoop_home":
     101file {
     102  "/home/${user}/.bashrc-setup-hadoop":
     103  content => template('hadoop/setup-hadoop.bash'),
     104  mode => 644,
     105  owner => $user,
     106  group => $group,
     107  require => [  Exec["unpack_hadoop"], File["/home/${user}"] ]
     108 }
     109
     110file_line { "setup_hadoop_home":
    95111    ensure => present,
    96112    path => "/home/${user}/.bashrc",
    97     line => "export HADOOP_HOME=\"${hadoop_home}-2.7.3\"",
     113    line => ". .bashrc-setup-hadoop",
    98114    require => [  Exec["unpack_hadoop"], File["/home/${user}"] ]   
    99 }
     115 }
    100116
    101 file_line { "add_hadoop_confdir":
    102     ensure => present,
    103     path => "/home/${user}/.bashrc",
    104     line => 'export HADOOP_CONF_DIR="$HADOOP_HOME/etc/hadoop"',
    105     require => [  Exec["unpack_hadoop"], File["/home/${user}"] ]   
    106 }
    107117
    108 file_line { "add_hadoop_setup":
    109     ensure => present,
    110     path => "/home/${user}/.bashrc",
    111     line => 'source "$HADOOP_HOME/etc/hadoop/hadoop-env.sh"',
    112     require => [  Exec["unpack_hadoop"], File["/home/${user}"] ]
    113 }
    114 
    115 file_line { "add_hadoop_path":
    116     ensure => present,
    117     path => "/home/${user}/.bashrc",
    118     line => 'export PATH="$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin"',
    119     require => [  Exec["unpack_hadoop"], File["/home/${user}"] ]   
    120 }
    121118
    122119}
Note: See TracChangeset for help on using the changeset viewer.