metadatabench.properties

# Number of slave nodes in the cluster. Used to know when all nodes have joined and the generation can be started.
# Required. No default value. Can be entered as command line parameter as well.
master.numberofslaves = 2
# Number of directories to create during the initial namespace generation phase.
# Required. No default value. Can be entered as command line parameter as well.
master.numberofdirs = 100000
# Number of files to create during the initial namespace generation phase
# Required. No default value. Can be entered as command line parameter as well.
master.numberoffiles = 100000
# Number of operations to execute during the workload phase. The mix of operations can be specified with 
# the operation probability parameters.
# Required. No default value. Can be entered as command line parameter as well.
master.numberofops = 500000
# Address of the file system server. For example for HDFS this is the value of the fs.default.name or 
# fs.defaultFS parameter as defined in core-site.xml, using the format hdfs://host.name:port/.
# Required. No default value. Can be entered as command line parameter as well.
slave.filesystemaddress = hdfs://node-1.eth1:8020/

# The path separator to be used for the file system. Default: /
misc.pathseparator = /
# The directory where the benchmark will operate (basically the root directory for the benchmark namespace.
# Default: /workDir
master.namespace.workdir = /workDir
# The prefix to use for directory names. After this prefix the id will be appended. For example dir69.
# Default: dir
master.namespace.dirnameprefix = dir
# The prefix to use for file names. After this prefix the id will be appended. For example file20.
# Default: file
master.namespace.filenameprefix = file
# If true, the generated namespace will be deleted after the benchmark has finished executing all operations.
master.namespace.delete = false 
# When an element is renamed, this suffix and a rename count is appended to or incremented at the end of its name. 
# For example file20 -> file20.r1 or file20.r69 -> file20.r70. Default: .r
master.workload.renamesuffix = .r
# This parameter specifies the maximum size of the accessed element cache.
# During the workload execution, accessed elements are cached for a given time, in order to prevent further 
# access to them that could cause conflicts. The reason is that the benchmark is distributed and operates 
# asynchronously and thus the workload operation generation is not the same as the execution order. For example 
# an open file20 operation can be generated, followed by a delete file20. Both are dispatched and the delete 
# operation could get executed before the open, leading to an error.
# If throttling is needed for your workload, setting the maximum size of the cache to a value slightly 
# higher than master.workload.throttle.aftergeneratedops and adjusting the master.workload.accessedelementcache.ttl 
# parameter can minimize or prevent file system access conflicts. Note that this cache is local to the workload 
# generator (in the master).
# Default: 110000
master.workload.accessedelementcache.maxsize = 110000
# This parameter specifies the time that has to pass between generating two operations that access the same 
# directory or file. Default: 5000
master.workload.accessedelementcache.ttl = 5000
# This parameter specifies after how many generated operations, the benchmark should check whether the slaves 
# could process a sufficient amount of the generated operations.
# If the generator is faster than the slaves or the file system under test, then unexecuted operations 
# could queue up and slow the down the benchmark. Thus, this option is provided to throttle the generation 
# and prevent such an overload of the distributed system.
# For example, if you set this parameter to 100000, then after every 100000th generated operation, the master 
# will check how many operations have been executed by the slaves and if needed, it will sleep until a given 
# threshold has been reached (master.workload.throttle.continuethreshold).
# If you generate a smaller amount of operations (less than 1-2 million), you can switch throttling off by 
# setting it to a number larger than the number of operations.
# Default: 100000
master.workload.throttle.aftergeneratedops = 100000
# Specifies the maximum difference between generated and executed operations that has to be reached 
# before continuing with the generation. For example if this parameter is set to 2000 and 
# master.workload.throttle.aftergeneratedops is 100000, then the master will wait until at least 98000 
# operations have been executed before generating new operations.
# Default: 2000
master.workload.throttle.continuethreshold = 2000
# The time to sleep between two checks whether the needed amount of operations have been executed by the slaves.
# Default: 900
master.workload.throttle.duration = 900

# Parameters for operation types and their probability.
# An operation probability is the percentage of operations of the given type in the workload (master.numberofops). 
# The values have to be between 0 (exclusive) and 1 (inclusive) and have to add up to 1. If an operation type is 
# not needed in the workload, exclude it, instead of setting it to 0. 
master.workload.operation.create = 0.05
#master.workload.operation.mkdir = 0.05
master.workload.operation.delete = 0.05
master.workload.operation.lsfile = 0.4
master.workload.operation.lsdir = 0.05
master.workload.operation.openfile = 0.35
master.workload.operation.renamefile = 0.05
master.workload.operation.movefile = 0.05

# The warm-up time of the system, that is, the time to wait between starting the operation execution and starting 
# the measurements. Default: 0
measurement.warmup = 0
# The number of buckets for the measurement histogram. Mutually exclusive with measurement.timeseriesgranularity
# Default: 100
measurement.histogrambuckets = 100
# The time step for the time series measurement. Mutually exclusive with measurement.histogrambuckets
# Default: 2000
#measurement.timeseriesgranularity = 2000
# The size of the thread pool used for file system operation execution at each slave.
# Default: 100
slave.threadpoolsize = 100
# The frequency (in milliseconds) with which each slave should report its progress (number of executed operations) 
# to the master. Default: 2500
slave.progressreportfrequency = 2500