-
Notifications
You must be signed in to change notification settings - Fork 27
/
myhadoop-1.2.1.patch
127 lines (121 loc) · 4.31 KB
/
myhadoop-1.2.1.patch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
diff -rup a/core-site.xml b/core-site.xml
--- a/core-site.xml 2013-07-22 15:26:38.000000000 -0700
+++ b/core-site.xml 2014-02-09 11:50:08.682450643 -0800
@@ -5,4 +5,20 @@
<configuration>
+<property>
+ <name>hadoop.tmp.dir</name>
+ <value>HADOOP_TMP_DIR</value>
+ <description>A base for other temporary directories.</description>
+</property>
+
+<property>
+ <name>fs.default.name</name>
+ <value>hdfs://MASTER_NODE:54310</value>
+ <description>The name of the default file system. A URI whose
+ scheme and authority determine the FileSystem implementation. The
+ uri's scheme determines the config property (fs.SCHEME.impl) naming
+ the FileSystem implementation class. The uri's authority is used to
+ determine the host, port, etc. for a filesystem.</description>
+</property>
+
</configuration>
diff -rup a/hdfs-site.xml b/hdfs-site.xml
--- a/hdfs-site.xml 2013-07-22 15:26:38.000000000 -0700
+++ b/hdfs-site.xml 2014-02-09 11:50:08.689450937 -0800
@@ -5,4 +5,45 @@
<configuration>
+ <property>
+ <name>dfs.name.dir</name>
+ <value>DFS_NAME_DIR</value>
+ <description>Determines where on the local filesystem the DFS name node
+ should store the name table. If this is a comma-delimited list
+ of directories then the name table is replicated in all of the
+ directories, for redundancy. </description>
+ <final>true</final>
+ </property>
+
+ <property>
+ <name>dfs.data.dir</name>
+ <value>DFS_DATA_DIR</value>
+ <description>Determines where on the local filesystem an DFS data node
+ should store its blocks. If this is a comma-delimited
+ list of directories, then data will be stored in all named
+ directories, typically on different devices.
+ Directories that do not exist are ignored.
+ </description>
+ <final>true</final>
+ </property>
+
+ <property>
+ <name>dfs.replication</name>
+ <value>DFS_REPLICATION</value>
+ <description>HDFS is partly designed to allow storage failures and uses
+ replication for this. Since either your data on myhadoop jobs is only
+ supposed to live through a single run or you can use persistent data
+ that will most likely run on solid hardware it is quite save to keep
+ replication at 1 and reduce the IO overhead.
+ </description>
+ </property>
+
+ <property>
+ <name>dfs.block.size</name>
+ <value>DFS_BLOCK_SIZE</value>
+ <description>The HDFS block size defines the size of the parts in which
+ the HDFS files will be divided and distributed over the data nodes.
+ </description>
+ </property>
+
</configuration>
diff -rup a/mapred-site.xml b/mapred-site.xml
--- a/mapred-site.xml 2013-07-22 15:26:38.000000000 -0700
+++ b/mapred-site.xml 2014-02-09 11:50:08.695451185 -0800
@@ -5,4 +5,49 @@
<configuration>
+<property>
+ <name>mapred.job.tracker</name>
+ <value>MASTER_NODE:54311</value>
+ <description>The host and port that the MapReduce job tracker runs
+ at. If "local", then jobs are run in-process as a single map
+ and reduce task.
+ </description>
+</property>
+
+<property>
+ <name>mapred.local.dir</name>
+ <value>MAPRED_LOCAL_DIR</value>
+ <final>true</final>
+</property>
+
+<property>
+ <name>mapred.tasktracker.map.tasks.maximum</name>
+ <value>MAPRED_TASKTRACKER_MAP_TASKS_MAXIMUM</value>
+ <description>The MH_MAP_TASKS_MAXIMUM will set the maximum amount of
+ MAP tasks to be started on a single TASK node, this is either CPU
+ or memory bound.</description>
+</property>
+
+<property>
+ <name>mapred.tasktracker.reduce.tasks.maximum</name>
+ <value>MAPRED_TASKTRACKER_REDUCE_TASKS_MAXIMUM</value>
+ <description>The MH_REDUCE_TASKS_MAXIMUM will set the maximum amount
+ of REDUCE tasks to be started on a single TASK node, this is most
+ likely memory bound.</description>
+</property>
+
+<property>
+ <name>mapred.map.tasks</name>
+ <value>MAPRED_MAP_TASKS</value>
+ <description>The MH_MAP_TASKS is used to hint the application of the
+ total amount of MAP tasks that can be run on the cluster.</description>
+</property>
+
+<property>
+ <name>mapred.reduce.tasks</name>
+ <value>MAPRED_REDUCE_TASKS</value>
+ <description>The MH_REDUCE_TASKS is used to hint the application of
+ the total amount of REDUCE tasks that can be run on the cluster.</description>
+</property>
+
</configuration>
Only in a: myhadoop-1.2.1.patch