diff --git a/engine.json.minimum b/engine.json.minimum index 99341f0..24b2d8a 100644 --- a/engine.json.minimum +++ b/engine.json.minimum @@ -16,7 +16,6 @@ "spark.kryo.referenceTracking": "false", "spark.kryoserializer.buffer.mb": "300", "spark.kryoserializer.buffer": "300m", - "spark.executor.memory": "4g", "es.index.auto.create": "true" }, "algorithms": [ diff --git a/engine.json.spark-tuning b/engine.json.spark-tuning new file mode 100644 index 0000000..b5bde13 --- /dev/null +++ b/engine.json.spark-tuning @@ -0,0 +1,38 @@ +{ + "comment":" This config file uses default settings for all but the required values see README.md for docs", + "id": "default", + "description": "Default settings", + "engineFactory": "org.template.RecommendationEngine", + "datasource": { + "params" : { + "name": "sample-handmade-data.txt", + "appName": "handmade", + "eventNames": ["purchase", "view"] + } + }, + "sparkConf": { + "spark.serializer": "org.apache.spark.serializer.KryoSerializer", + "spark.kryo.registrator": "org.apache.mahout.sparkbindings.io.MahoutKryoRegistrator", + "spark.kryo.referenceTracking": "false", + "spark.kryoserializer.buffer": "300m", + "spark.executor.memory": "4g", + "spark.executor.cores": "2", + "spark.task.cpus": "2", + "spark.default.parallelism": "16", + "es.index.auto.create": "true" + }, + "algorithms": [ + { + "comment": "simplest setup where all values are default, popularity based backfill, must add eventsNames", + "name": "ur", + "params": { + "appName": "handmade", + "indexName": "urindex", + "typeName": "items", + "comment": "must have data for the first event or the model will not build, other events are optional", + "eventNames": ["purchase", "view"] + } + } + ] +} + diff --git a/src/main/scala/DataSource.scala b/src/main/scala/DataSource.scala index ced7ccd..4d70af8 100644 --- a/src/main/scala/DataSource.scala +++ b/src/main/scala/DataSource.scala @@ -65,7 +65,7 @@ class DataSource(val dsp: DataSourceParams) appName = dsp.appName, entityType = Some("user"), eventNames = Some(eventNames), - targetEntityType = Some(Some("item")))(sc) + targetEntityType = Some(Some("item")))(sc).repartition(sc.defaultParallelism) // now separate the events by event name val actionRDDs = eventNames.map { eventName =>