From adf74b1213f1bbc31df76e62cdff90f70aa850d0 Mon Sep 17 00:00:00 2001 From: "m.orazow" Date: Tue, 17 Jun 2014 16:57:06 +0200 Subject: [PATCH 1/3] Enable compression for Tsv, Csv, etc. Enable compression for delimited scheme outputs. > Method setSinkCompression sets the sinkCompression of this TextLine object. If null, compression will remain disabled. Cascading automatically disables the compression when sinkCompression set to null. With sinkCompression set to 'TextLine.Compress.DEFAULT' one can specify the compression on job configuration. --- .../src/main/scala/com/twitter/scalding/FileSource.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scalding-core/src/main/scala/com/twitter/scalding/FileSource.scala b/scalding-core/src/main/scala/com/twitter/scalding/FileSource.scala index de760ade8b..df2418b93c 100644 --- a/scalding-core/src/main/scala/com/twitter/scalding/FileSource.scala +++ b/scalding-core/src/main/scala/com/twitter/scalding/FileSource.scala @@ -279,7 +279,7 @@ trait DelimitedScheme extends SchemedSource { override def localScheme = new CLTextDelimited(fields, skipHeader, writeHeader, separator, strict, quote, types, safe) override def hdfsScheme = { - HadoopSchemeInstance(new CHTextDelimited(fields, null, skipHeader, writeHeader, separator, strict, quote, types, safe)) + HadoopSchemeInstance(new CHTextDelimited(fields, CHTextLine.Compress.DEFAULT, skipHeader, writeHeader, separator, strict, quote, types, safe)) } } From 26c6fd3fc1ea94294c73574210886b3638b9e08a Mon Sep 17 00:00:00 2001 From: Muhammet Orazov Date: Wed, 13 Aug 2014 17:03:43 +0200 Subject: [PATCH 2/3] Update Cascading JDBC Version to 2.5.4. --- project/Build.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/Build.scala b/project/Build.scala index c2fe0f96b0..b3bba80314 100644 --- a/project/Build.scala +++ b/project/Build.scala @@ -199,7 +199,7 @@ object ScaldingBuild extends Build { System.getenv.asScala.getOrElse("SCALDING_CASCADING_VERSION", "2.5.5") lazy val cascadingJDBCVersion = - System.getenv.asScala.getOrElse("SCALDING_CASCADING_JDBC_VERSION", "2.5.3") + System.getenv.asScala.getOrElse("SCALDING_CASCADING_JDBC_VERSION", "2.5.4") val hadoopVersion = "1.2.1" val algebirdVersion = "0.7.0" From 80b5bd699baebcb552539a0d63919f03a918d438 Mon Sep 17 00:00:00 2001 From: Muhammet Orazov Date: Wed, 13 Aug 2014 17:19:07 +0200 Subject: [PATCH 3/3] Merge FileSource. --- .../src/main/scala/com/twitter/scalding/FileSource.scala | 6 ------ 1 file changed, 6 deletions(-) diff --git a/scalding-core/src/main/scala/com/twitter/scalding/FileSource.scala b/scalding-core/src/main/scala/com/twitter/scalding/FileSource.scala index e79b82c6cd..8eb55e1b28 100644 --- a/scalding-core/src/main/scala/com/twitter/scalding/FileSource.scala +++ b/scalding-core/src/main/scala/com/twitter/scalding/FileSource.scala @@ -273,14 +273,8 @@ trait DelimitedScheme extends SchemedSource { //These should not be changed: override def localScheme = new CLTextDelimited(fields, skipHeader, writeHeader, separator, strict, quote, types, safe) -<<<<<<< HEAD - override def hdfsScheme = { - HadoopSchemeInstance(new CHTextDelimited(fields, CHTextLine.Compress.DEFAULT, skipHeader, writeHeader, separator, strict, quote, types, safe)) - } -======= override def hdfsScheme = HadoopSchemeInstance(new CHTextDelimited(fields, null, skipHeader, writeHeader, separator, strict, quote, types, safe)) ->>>>>>> 166e390072f02759ff8e84176709a242614346f3 } trait SequenceFileScheme extends SchemedSource {