-
Notifications
You must be signed in to change notification settings - Fork 1
/
metadatabench.properties
99 lines (96 loc) · 6.23 KB
/
metadatabench.properties
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# Number of slave nodes in the cluster. Used to know when all nodes have joined and the generation can be started.
# Required. No default value. Can be entered as command line parameter as well.
master.numberofslaves = 2
# Number of directories to create during the initial namespace generation phase.
# Required. No default value. Can be entered as command line parameter as well.
master.numberofdirs = 100000
# Number of files to create during the initial namespace generation phase
# Required. No default value. Can be entered as command line parameter as well.
master.numberoffiles = 100000
# Number of operations to execute during the workload phase. The mix of operations can be specified with
# the operation probability parameters.
# Required. No default value. Can be entered as command line parameter as well.
master.numberofops = 500000
# Address of the file system server. For example for HDFS this is the value of the fs.default.name or
# fs.defaultFS parameter as defined in core-site.xml, using the format hdfs://host.name:port/.
# Required. No default value. Can be entered as command line parameter as well.
slave.filesystemaddress = hdfs://node-1.eth1:8020/
# The path separator to be used for the file system. Default: /
misc.pathseparator = /
# The directory where the benchmark will operate (basically the root directory for the benchmark namespace.
# Default: /workDir
master.namespace.workdir = /workDir
# The prefix to use for directory names. After this prefix the id will be appended. For example dir69.
# Default: dir
master.namespace.dirnameprefix = dir
# The prefix to use for file names. After this prefix the id will be appended. For example file20.
# Default: file
master.namespace.filenameprefix = file
# If true, the generated namespace will be deleted after the benchmark has finished executing all operations.
master.namespace.delete = false
# When an element is renamed, this suffix and a rename count is appended to or incremented at the end of its name.
# For example file20 -> file20.r1 or file20.r69 -> file20.r70. Default: .r
master.workload.renamesuffix = .r
# This parameter specifies the maximum size of the accessed element cache.
# During the workload execution, accessed elements are cached for a given time, in order to prevent further
# access to them that could cause conflicts. The reason is that the benchmark is distributed and operates
# asynchronously and thus the workload operation generation is not the same as the execution order. For example
# an open file20 operation can be generated, followed by a delete file20. Both are dispatched and the delete
# operation could get executed before the open, leading to an error.
# If throttling is needed for your workload, setting the maximum size of the cache to a value slightly
# higher than master.workload.throttle.aftergeneratedops and adjusting the master.workload.accessedelementcache.ttl
# parameter can minimize or prevent file system access conflicts. Note that this cache is local to the workload
# generator (in the master).
# Default: 110000
master.workload.accessedelementcache.maxsize = 110000
# This parameter specifies the time that has to pass between generating two operations that access the same
# directory or file. Default: 5000
master.workload.accessedelementcache.ttl = 5000
# This parameter specifies after how many generated operations, the benchmark should check whether the slaves
# could process a sufficient amount of the generated operations.
# If the generator is faster than the slaves or the file system under test, then unexecuted operations
# could queue up and slow the down the benchmark. Thus, this option is provided to throttle the generation
# and prevent such an overload of the distributed system.
# For example, if you set this parameter to 100000, then after every 100000th generated operation, the master
# will check how many operations have been executed by the slaves and if needed, it will sleep until a given
# threshold has been reached (master.workload.throttle.continuethreshold).
# If you generate a smaller amount of operations (less than 1-2 million), you can switch throttling off by
# setting it to a number larger than the number of operations.
# Default: 100000
master.workload.throttle.aftergeneratedops = 100000
# Specifies the maximum difference between generated and executed operations that has to be reached
# before continuing with the generation. For example if this parameter is set to 2000 and
# master.workload.throttle.aftergeneratedops is 100000, then the master will wait until at least 98000
# operations have been executed before generating new operations.
# Default: 2000
master.workload.throttle.continuethreshold = 2000
# The time to sleep between two checks whether the needed amount of operations have been executed by the slaves.
# Default: 900
master.workload.throttle.duration = 900
# Parameters for operation types and their probability.
# An operation probability is the percentage of operations of the given type in the workload (master.numberofops).
# The values have to be between 0 (exclusive) and 1 (inclusive) and have to add up to 1. If an operation type is
# not needed in the workload, exclude it, instead of setting it to 0.
master.workload.operation.create = 0.05
#master.workload.operation.mkdir = 0.05
master.workload.operation.delete = 0.05
master.workload.operation.lsfile = 0.4
master.workload.operation.lsdir = 0.05
master.workload.operation.openfile = 0.35
master.workload.operation.renamefile = 0.05
master.workload.operation.movefile = 0.05
# The warm-up time of the system, that is, the time to wait between starting the operation execution and starting
# the measurements. Default: 0
measurement.warmup = 0
# The number of buckets for the measurement histogram. Mutually exclusive with measurement.timeseriesgranularity
# Default: 100
measurement.histogrambuckets = 100
# The time step for the time series measurement. Mutually exclusive with measurement.histogrambuckets
# Default: 2000
#measurement.timeseriesgranularity = 2000
# The size of the thread pool used for file system operation execution at each slave.
# Default: 100
slave.threadpoolsize = 100
# The frequency (in milliseconds) with which each slave should report its progress (number of executed operations)
# to the master. Default: 2500
slave.progressreportfrequency = 2500