From 89681f0f15449399c1e9c104c74cc9fe2efac998 Mon Sep 17 00:00:00 2001 From: Oleksandr Zaitsev Date: Tue, 23 Apr 2024 16:35:02 +0200 Subject: [PATCH] Fixed #285. Renamed removeRowsOfColumnElementsSatisfying: onColumn: and removeRowsOfColumnElementsSatisfing: onColumnNamed: --- .../DataFrameInternalTest.class.st | 29 +- src/DataFrame-Tests/DataFrameTest.class.st | 66 +-- src/DataFrame/Array.extension.st | 8 +- src/DataFrame/Behavior.extension.st | 4 +- src/DataFrame/Collection.extension.st | 14 +- src/DataFrame/DataCorrelationMethod.class.st | 10 +- src/DataFrame/DataFrame.class.st | 464 +++++++++--------- src/DataFrame/DataFrameInternal.class.st | 78 ++- .../DataPearsonCorrelationMethod.class.st | 10 +- src/DataFrame/DataSeries.class.st | 256 +++++----- src/DataFrame/String.extension.st | 6 +- src/DataFrame/package.st | 2 +- 12 files changed, 470 insertions(+), 477 deletions(-) diff --git a/src/DataFrame-Tests/DataFrameInternalTest.class.st b/src/DataFrame-Tests/DataFrameInternalTest.class.st index a4131a96..2a3e7da2 100644 --- a/src/DataFrame-Tests/DataFrameInternalTest.class.st +++ b/src/DataFrame-Tests/DataFrameInternalTest.class.st @@ -4,7 +4,7 @@ Class { #instVars : [ 'df' ], - #category : 'DataFrame-Tests-Core' + #category : #'DataFrame-Tests-Core' } { #category : #running } @@ -322,22 +322,23 @@ DataFrameInternalTest >> testRemoveRowAt [ ] { #category : #tests } -DataFrameInternalTest >> testRemoveRowsOfColumnElementsSatisfyingOnColumn [ +DataFrameInternalTest >> testRemoveRowsWhereElementsInColumnAtSatisfy [ | expected | + df := DataFrameInternal withRows: + #( #( 'Barcelona' 1.609 nil ) #( 'Dubai' 2.789 true ) + #( 'London' 8.788 nil ) #( 'Washington' nil true ) + #( 'Delhi' 10.422 nil ) ). - df := DataFrameInternal withRows: #( - ('Barcelona' 1.609 nil) - ('Dubai' 2.789 true) - ('London' 8.788 nil) - ('Washington' nil true) - ('Delhi' 10.422 nil)). - - expected := DataFrameInternal withRows: #( - ('Dubai' 2.789 true) - ('Washington' nil true)). - - self assert: (df removeRowsOfColumnElementsSatisfying: [ :ele | ele isNil ] onColumn: 3) equals: expected + expected := DataFrameInternal withRows: + #( #( 'Dubai' 2.789 true ) #( 'Washington' nil true ) ). + + self + assert: + (df + removeRowsWhereElementsInColumnAt: 3 + satisfy: [ :ele | ele isNil ]) + equals: expected ] { #category : #tests } diff --git a/src/DataFrame-Tests/DataFrameTest.class.st b/src/DataFrame-Tests/DataFrameTest.class.st index 6895675c..756115f2 100644 --- a/src/DataFrame-Tests/DataFrameTest.class.st +++ b/src/DataFrame-Tests/DataFrameTest.class.st @@ -4016,60 +4016,60 @@ DataFrameTest >> testRemoveRowsAt [ ] { #category : #removing } -DataFrameTest >> testRemoveRowsOfColumnElementsSatisfingOnColumnAllTrue [ +DataFrameTest >> testRemoveRowsWhereElementsInColumnAtSatisfy [ | expected aBlock | + df := DataFrame withRows: + #( #( 1 2 3 ) #( Dubai 4 5.0 ) #( nil 8.788 false ) ). - expected := DataFrame withColumns: #(). - aBlock := [ :rowElement | true ]. - - self assert: (df removeRowsOfColumnElementsSatisfying: aBlock onColumn: 2) equals: expected -] + df rowNames: #( A B C ). + df columnNames: #( X Y Z ). -{ #category : #removing } -DataFrameTest >> testRemoveRowsOfColumnElementsSatisfingOnColumnNamed [ + expected := DataFrame withRows: #( #( Dubai 4 5.0 ) ). - | expected aBlock | - df := DataFrame withRows: #( - (1 2 3) - (Dubai 4 5.0) - (nil 8.788 false)). + expected rowNames: #( B ). + expected columnNames: #( X Y Z ). - df rowNames: #(A B C). - df columnNames: #(X Y Z). + aBlock := [ :rowElement | rowElement ~= 4 ]. - expected := DataFrame withRows: #( - (Dubai 4 5.0)). + self + assert: (df removeRowsWhereElementsInColumnAt: 2 satisfy: aBlock) + equals: expected +] - expected rowNames: #(B). - expected columnNames: #(X Y Z). +{ #category : #removing } +DataFrameTest >> testRemoveRowsWhereElementsInColumnAtSatisfyAllTrue [ - aBlock := [ :rowElement | rowElement ~= 4 ]. + | expected aBlock | + expected := DataFrame withColumns: #( ). + aBlock := [ :rowElement | true ]. - self assert: (df removeRowsOfColumnElementsSatisfing: aBlock onColumnNamed: #Y) equals: expected + self + assert: (df removeRowsWhereElementsInColumnAt: 2 satisfy: aBlock) + equals: expected ] { #category : #removing } -DataFrameTest >> testRemoveRowsOfColumnElementsSatisfyingOnColumn [ +DataFrameTest >> testRemoveRowsWhereElementsInColumnNamedSatisfy [ | expected aBlock | - df := DataFrame withRows: #( - (1 2 3) - (Dubai 4 5.0) - (nil 8.788 false)). + df := DataFrame withRows: + #( #( 1 2 3 ) #( Dubai 4 5.0 ) #( nil 8.788 false ) ). - df rowNames: #(A B C). - df columnNames: #(X Y Z). + df rowNames: #( A B C ). + df columnNames: #( X Y Z ). - expected := DataFrame withRows: #( - (Dubai 4 5.0)). + expected := DataFrame withRows: #( #( Dubai 4 5.0 ) ). - expected rowNames: #(B). - expected columnNames: #(X Y Z). + expected rowNames: #( B ). + expected columnNames: #( X Y Z ). aBlock := [ :rowElement | rowElement ~= 4 ]. - self assert: (df removeRowsOfColumnElementsSatisfying: aBlock onColumn: 2) equals: expected + self + assert: + (df removeRowsWhereElementsInColumnNamed: #Y satisfy: aBlock) + equals: expected ] { #category : #removing } diff --git a/src/DataFrame/Array.extension.st b/src/DataFrame/Array.extension.st index 861f8bb1..3193db8d 100644 --- a/src/DataFrame/Array.extension.st +++ b/src/DataFrame/Array.extension.st @@ -1,6 +1,6 @@ -Extension { #name : 'Array' } +Extension { #name : #Array } -{ #category : '*DataFrame' } +{ #category : #'*DataFrame' } Array >> calculateDataType [ | types | @@ -18,7 +18,7 @@ Array >> calculateDataType [ ^ UndefinedObject ] -{ #category : '*DataFrame' } +{ #category : #'*DataFrame' } Array >> leastCommonSuperclassOf: firstClass and: secondClass [ "Determines the closest element of class hierarchy which is the common ancestor of two given classes" @@ -40,7 +40,7 @@ Array >> leastCommonSuperclassOf: firstClass and: secondClass [ ^ Object ] -{ #category : '*DataFrame' } +{ #category : #'*DataFrame' } Array >> sortIfPossible [ "Sort if possible" diff --git a/src/DataFrame/Behavior.extension.st b/src/DataFrame/Behavior.extension.st index bb7cec6e..96924460 100644 --- a/src/DataFrame/Behavior.extension.st +++ b/src/DataFrame/Behavior.extension.st @@ -1,6 +1,6 @@ -Extension { #name : 'Behavior' } +Extension { #name : #Behavior } -{ #category : '*DataFrame-Core-Base' } +{ #category : #'*DataFrame-Core-Base' } Behavior >> inheritsFromOrEqualTo: aClass [ "Answer whether the argument, aClass, is equal to the receiver or belongs to its superclass chain." diff --git a/src/DataFrame/Collection.extension.st b/src/DataFrame/Collection.extension.st index 96bc6818..30df4bf0 100644 --- a/src/DataFrame/Collection.extension.st +++ b/src/DataFrame/Collection.extension.st @@ -1,12 +1,12 @@ -Extension { #name : 'Collection' } +Extension { #name : #Collection } -{ #category : '*DataFrame-Core-Base' } +{ #category : #'*DataFrame-Core-Base' } Collection >> ** arg [ ^ self raisedTo: arg ] -{ #category : '*DataFrame' } +{ #category : #'*DataFrame' } Collection >> asDataFrame [ | numberOfRows numberOfColumns dataFrame | @@ -31,26 +31,26 @@ Collection >> asDataFrame [ ^ dataFrame ] -{ #category : '*DataFrame-Core-Base' } +{ #category : #'*DataFrame-Core-Base' } Collection >> asDataSeries [ ^ DataSeries newFrom: self ] -{ #category : '*DataFrame-Core-Base' } +{ #category : #'*DataFrame-Core-Base' } Collection >> closeTo: aCollection [ ^ (self - aCollection) inject: true into: [ :accum :each | accum and: (each closeTo: 0) ] ] -{ #category : '*DataFrame-Core-Base' } +{ #category : #'*DataFrame-Core-Base' } Collection >> variance [ ^ self stdev squared ] -{ #category : '*DataFrame' } +{ #category : #'*DataFrame' } Collection >> withSeries: aDataSeries collect: twoArgBlock [ "Collect and return the result of evaluating twoArgBlock with corresponding elements from this collection and aDataSeries." | result | diff --git a/src/DataFrame/DataCorrelationMethod.class.st b/src/DataFrame/DataCorrelationMethod.class.st index 2046a563..846c3a0e 100644 --- a/src/DataFrame/DataCorrelationMethod.class.st +++ b/src/DataFrame/DataCorrelationMethod.class.st @@ -1,12 +1,10 @@ Class { - #name : 'DataCorrelationMethod', - #superclass : 'Object', - #category : 'DataFrame-Math', - #package : 'DataFrame', - #tag : 'Math' + #name : #DataCorrelationMethod, + #superclass : #Object, + #category : 'DataFrame-Math' } -{ #category : 'comparing' } +{ #category : #comparing } DataCorrelationMethod class >> between: x and: y [ "Calcualte the correlation coefficient between two data series" self subclassResponsibility diff --git a/src/DataFrame/DataFrame.class.st b/src/DataFrame/DataFrame.class.st index 047bdd1a..422b67a3 100644 --- a/src/DataFrame/DataFrame.class.st +++ b/src/DataFrame/DataFrame.class.st @@ -148,26 +148,24 @@ Internal Representation and Key Implementation Points. DataFrameInternal defines how data is stored inside me. " Class { - #name : 'DataFrame', - #superclass : 'Collection', + #name : #DataFrame, + #superclass : #Collection, #instVars : [ 'contents', 'rowNames', 'columnNames', 'dataTypes' ], - #category : 'DataFrame-Core', - #package : 'DataFrame', - #tag : 'Core' + #category : #'DataFrame-Core' } -{ #category : 'instance creation' } +{ #category : #'instance creation' } DataFrame class >> new: aPoint [ ^ super new initialize: aPoint ] -{ #category : 'instance creation' } +{ #category : #'instance creation' } DataFrame class >> withColumnNames: anArrayOfColumnNames [ "Create an empty data frame with given column names" | numberOfColumns df | @@ -179,7 +177,7 @@ DataFrame class >> withColumnNames: anArrayOfColumnNames [ ^ df ] -{ #category : 'instance creation' } +{ #category : #'instance creation' } DataFrame class >> withColumnNames: anArrayOfColumnNames withRowNames: anArrayOfRowNames [ "Create an empty data frame with given column and row names" @@ -194,13 +192,13 @@ DataFrame class >> withColumnNames: anArrayOfColumnNames withRowNames: anArrayOf ^ df ] -{ #category : 'instance creation' } +{ #category : #'instance creation' } DataFrame class >> withColumns: anArrayOfArrays [ ^ self new initializeColumns: anArrayOfArrays ] -{ #category : 'instance creation' } +{ #category : #'instance creation' } DataFrame class >> withColumns: anArrayOfArrays columnNames: anArrayOfColumnNames [ | df | @@ -209,7 +207,7 @@ DataFrame class >> withColumns: anArrayOfArrays columnNames: anArrayOfColumnName ^ df ] -{ #category : 'instance creation' } +{ #category : #'instance creation' } DataFrame class >> withColumns: anArrayOfArrays rowNames: anArrayOfRowNames [ ^ anArrayOfArrays ifNotEmpty: [ (self withColumns: anArrayOfArrays) @@ -218,7 +216,7 @@ DataFrame class >> withColumns: anArrayOfArrays rowNames: anArrayOfRowNames [ ifEmpty: [ self withRowNames: anArrayOfRowNames ] ] -{ #category : 'instance creation' } +{ #category : #'instance creation' } DataFrame class >> withColumns: anArrayOfArrays rowNames: anArrayOfRowNames columnNames: anArrayOfColumnNames [ ^ anArrayOfArrays ifNotEmpty: [ (self withColumns: anArrayOfArrays) @@ -228,7 +226,7 @@ DataFrame class >> withColumns: anArrayOfArrays rowNames: anArrayOfRowNames colu ifEmpty: [ self withRowNames: anArrayOfRowNames ] ] -{ #category : 'instance creation' } +{ #category : #'instance creation' } DataFrame class >> withDataFrameInternal: aDataFrameIndernal rowNames: rows columnNames: columns [ ^ self new @@ -237,7 +235,7 @@ DataFrame class >> withDataFrameInternal: aDataFrameIndernal rowNames: rows colu columnNames: columns ] -{ #category : 'instance creation' } +{ #category : #'instance creation' } DataFrame class >> withRowNames: anArrayOfRowNames [ "Create an empty data frame with given row names" | numberOfRows df | @@ -249,7 +247,7 @@ DataFrame class >> withRowNames: anArrayOfRowNames [ ^ df ] -{ #category : 'instance creation' } +{ #category : #'instance creation' } DataFrame class >> withRowNames: anArrayOfRowNames columnNames: anArrayOfColumnNames [ "Create an empty data frame with given row and column names" | numberOfRows numberOfColumns df | @@ -265,13 +263,13 @@ DataFrame class >> withRowNames: anArrayOfRowNames columnNames: anArrayOfColumnN ^ df ] -{ #category : 'instance creation' } +{ #category : #'instance creation' } DataFrame class >> withRows: anArrayOfArrays [ ^ self new initializeRows: anArrayOfArrays ] -{ #category : 'instance creation' } +{ #category : #'instance creation' } DataFrame class >> withRows: anArrayOfArrays columnNames: anArrayOfColumnNames [ ^ anArrayOfArrays ifNotEmpty: [ (self withRows: anArrayOfArrays) @@ -280,7 +278,7 @@ DataFrame class >> withRows: anArrayOfArrays columnNames: anArrayOfColumnNames [ ifEmpty: [ self withColumnNames: anArrayOfColumnNames ] ] -{ #category : 'instance creation' } +{ #category : #'instance creation' } DataFrame class >> withRows: anArrayOfArrays rowNames: anArrayOfRowNames [ | df | @@ -289,7 +287,7 @@ DataFrame class >> withRows: anArrayOfArrays rowNames: anArrayOfRowNames [ ^ df ] -{ #category : 'instance creation' } +{ #category : #'instance creation' } DataFrame class >> withRows: anArrayOfArrays rowNames: anArrayOfRowNames columnNames: anArrayOfColumnNames [ ^ anArrayOfArrays ifNotEmpty: [ (self withRows: anArrayOfArrays) @@ -299,7 +297,7 @@ DataFrame class >> withRows: anArrayOfArrays rowNames: anArrayOfRowNames columnN ifEmpty: [ self withColumnNames: anArrayOfColumnNames ] ] -{ #category : 'comparing' } +{ #category : #comparing } DataFrame >> , aDataFrame [ | dataFrame rows | @@ -313,7 +311,7 @@ DataFrame >> , aDataFrame [ ^ dataFrame ] -{ #category : 'comparing' } +{ #category : #comparing } DataFrame >> = aDataFrame [ "Most objects will fail here" @@ -332,7 +330,7 @@ DataFrame >> = aDataFrame [ ^ aDataFrame contents = self contents ] -{ #category : 'adding' } +{ #category : #adding } DataFrame >> add: aDataSeries [ "Add DataSeries as a new row at the end" @@ -342,7 +340,7 @@ DataFrame >> add: aDataSeries [ self addRow: aDataSeries ] -{ #category : 'adding' } +{ #category : #adding } DataFrame >> addColumn: aDataSeries [ "Add DataSeries as a new column at the end" @@ -356,7 +354,7 @@ DataFrame >> addColumn: aDataSeries [ put: aDataSeries calculateDataType ] -{ #category : 'adding' } +{ #category : #adding } DataFrame >> addColumn: aDataSeries atPosition: aNumber [ "Add DataSeries as a new column at the given position" @@ -370,13 +368,13 @@ DataFrame >> addColumn: aDataSeries atPosition: aNumber [ atPosition: aNumber ] -{ #category : 'adding' } +{ #category : #adding } DataFrame >> addColumn: anArray named: aString [ "Add a new column at the end" self addColumn: anArray named: aString atPosition: self numberOfColumns + 1 ] -{ #category : 'adding' } +{ #category : #adding } DataFrame >> addColumn: anArray named: aString atPosition: aNumber [ "Add a new column at the given position" (self columnNames includes: aString) @@ -387,31 +385,31 @@ DataFrame >> addColumn: anArray named: aString atPosition: aNumber [ dataTypes at: aString put: (anArray asDataSeries calculateDataType) ] -{ #category : 'adding' } +{ #category : #adding } DataFrame >> addEmptyColumnNamed: aString [ "Add an empty column at the end" self addEmptyColumnNamed: aString atPosition: self numberOfColumns + 1 ] -{ #category : 'adding' } +{ #category : #adding } DataFrame >> addEmptyColumnNamed: aString atPosition: aNumber [ "Add an empty column at the given position" self addColumn: (Array new: self numberOfRows) named: aString atPosition: aNumber ] -{ #category : 'adding' } +{ #category : #adding } DataFrame >> addEmptyRowNamed: aString [ "Add an empty row at the end" self addEmptyRowNamed: aString atPosition: self numberOfRows + 1 ] -{ #category : 'adding' } +{ #category : #adding } DataFrame >> addEmptyRowNamed: aString atPosition: aNumber [ "Add an empty row at the given position" self addRow: (Array new: self numberOfColumns) named: aString atPosition: aNumber ] -{ #category : 'adding' } +{ #category : #adding } DataFrame >> addRow: aDataSeries [ "Add DataSeries as a new row at the end" @@ -422,7 +420,7 @@ DataFrame >> addRow: aDataSeries [ self addRow: aDataSeries atPosition: self numberOfRows + 1 ] -{ #category : 'adding' } +{ #category : #adding } DataFrame >> addRow: aDataSeries atPosition: aNumber [ "Add DataSeries as a new row at the given position" @@ -441,13 +439,13 @@ DataFrame >> addRow: aDataSeries atPosition: aNumber [ self addRow: row named: aDataSeries name atPosition: aNumber ] -{ #category : 'adding' } +{ #category : #adding } DataFrame >> addRow: anArray named: aString [ "Add a new row at the end" self addRow: anArray named: aString atPosition: self numberOfRows + 1 ] -{ #category : 'adding' } +{ #category : #adding } DataFrame >> addRow: anArray named: aString atPosition: aNumber [ "Add a new row at the given position" (self rowNames includes: aString) @@ -457,7 +455,7 @@ DataFrame >> addRow: anArray named: aString atPosition: aNumber [ rowNames add: aString afterIndex: aNumber - 1 ] -{ #category : 'applying' } +{ #category : #applying } DataFrame >> applyElementwise: aBlock [ "Applies a given block to all columns of a data frame" @@ -466,7 +464,7 @@ DataFrame >> applyElementwise: aBlock [ self toColumns: self columnNames applyElementwise: aBlock ] -{ #category : 'enumerating' } +{ #category : #enumerating } DataFrame >> applySize [ "Answer a new instance of the receiver with the size of each element at each element position" @@ -476,7 +474,7 @@ DataFrame >> applySize [ name: i ] ] -{ #category : 'private' } +{ #category : #private } DataFrame >> applyToAllColumns: aSymbol [ "Sends the unary selector, aSymbol, to all columns of DataFrame and collects the result into a DataSeries object. Used by statistical functions of DataFrame" @@ -493,7 +491,7 @@ DataFrame >> applyToAllColumns: aSymbol [ ^ series ] -{ #category : 'converting' } +{ #category : #converting } DataFrame >> asArray [ "Converts DataFrame to the array of rows" @@ -504,7 +502,7 @@ DataFrame >> asArray [ ^ self asArrayOfRows ] -{ #category : 'converting' } +{ #category : #converting } DataFrame >> asArrayOfColumns [ "Converts DataFrame to the array of columns" @@ -515,7 +513,7 @@ DataFrame >> asArrayOfColumns [ ^ contents asArrayOfColumns ] -{ #category : 'converting' } +{ #category : #converting } DataFrame >> asArrayOfRows [ "Converts DataFrame to the array of rows" @@ -526,7 +524,7 @@ DataFrame >> asArrayOfRows [ ^ contents asArrayOfRows ] -{ #category : 'converting' } +{ #category : #converting } DataFrame >> asArrayOfRowsWithName [ "Answer an OrderedCollection where each item is an Array with: - the name of that row, in first place, @@ -539,7 +537,7 @@ DataFrame >> asArrayOfRowsWithName [ nextPutAll: (self at: index) ] ] ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> at: aNumber [ "Returns the row of a DataFrame at row index aNumber" @@ -550,7 +548,7 @@ DataFrame >> at: aNumber [ ^ self rowAt: aNumber ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> at: rowNumber at: columnNumber [ "Returns the value whose row index is rowNumber and column index is columnNumber" @@ -561,7 +559,7 @@ DataFrame >> at: rowNumber at: columnNumber [ ^ contents at: rowNumber at: columnNumber ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> at: rowNumber at: columnNumber put: value [ "Replaces the original value of a DataFrame at row index rowNumber and column index columnNumber with a given value" @@ -572,7 +570,7 @@ DataFrame >> at: rowNumber at: columnNumber put: value [ contents at: rowNumber at: columnNumber put: value ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> at: rowIndex at: columnIndex transform: aBlock [ "Evaluate aBlock on the value at the intersection of rowIndex and columnIndex and replace that value with the result" @@ -583,7 +581,7 @@ DataFrame >> at: rowIndex at: columnIndex transform: aBlock [ self at: rowIndex at: columnIndex put: (aBlock value: value) ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> at: aNumber transform: aBlock [ "Evaluate aBlock on the row at aNumber and replace that row with the result" @@ -592,7 +590,7 @@ DataFrame >> at: aNumber transform: aBlock [ ^ self rowAt: aNumber transform: aBlock ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> atAll: indexes [ "For polymorphisme with other collections." @@ -603,7 +601,7 @@ DataFrame >> atAll: indexes [ ^ self rowsAt: indexes ] -{ #category : 'statistics' } +{ #category : #statistics } DataFrame >> average [ "Average is the ratio of sum of values in a set to the number of values in the set" @@ -612,7 +610,7 @@ DataFrame >> average [ ^ self applyToAllColumns: #average ] -{ #category : 'data-types' } +{ #category : #'data-types' } DataFrame >> calculateDataTypes [ self asArrayOfColumns doWithIndex: [ :column :i | @@ -621,7 +619,7 @@ DataFrame >> calculateDataTypes [ put: column calculateDataType ] ] -{ #category : 'comparing' } +{ #category : #comparing } DataFrame >> closeTo: aDataFrame [ "(#(#(1 2) #(3 4)) asDataFrame closeTo: #(#(1.0001 1.9999) #(3 4.0001)) asDataFrame ) >>> true" @@ -646,7 +644,7 @@ DataFrame >> closeTo: aDataFrame [ ^ true ] -{ #category : 'comparing' } +{ #category : #comparing } DataFrame >> closeTo: aDataFrame precision: epsilon [ "(#(#(1 2) #(3 4)) asDataFrame closeTo: #(#(1.2 2.19) #(3 4)) asDataFrame precision: 0.2 ) >>> true" @@ -670,7 +668,7 @@ DataFrame >> closeTo: aDataFrame precision: epsilon [ ^ true ] -{ #category : 'enumerating' } +{ #category : #enumerating } DataFrame >> collect: aBlock [ "Overrides the Collection>>collect to create DataFrame with the same number of columns as values in the first row" | firstRow newDataFrame | @@ -683,7 +681,7 @@ DataFrame >> collect: aBlock [ ^ newDataFrame ] -{ #category : 'enumerating' } +{ #category : #enumerating } DataFrame >> collectWithIndex: aBlock [ "Overrides the Collection>>collect to create DataFrame with the same number of columns as values in the first row" | firstRow newDataFrame | @@ -696,7 +694,7 @@ DataFrame >> collectWithIndex: aBlock [ ^ newDataFrame ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> column: columnName [ "Answer the column with columnName as a DataSeries or signal an exception if a column with that name was not found" | index | @@ -704,7 +702,7 @@ DataFrame >> column: columnName [ ^ self columnAt: index ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> column: columnName ifAbsent: exceptionBlock [ "Answer the column with columnName as a DataSeries or evaluate exception block if a column with that name was not found" | index | @@ -715,7 +713,7 @@ DataFrame >> column: columnName ifAbsent: exceptionBlock [ ^ self columnAt: index ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> column: columnName put: anArray [ "Replace the current values of column with columnName with anArray or signal an exception if a column with that name was not found" | index | @@ -723,7 +721,7 @@ DataFrame >> column: columnName put: anArray [ ^ self columnAt: index put: anArray ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> column: columnName put: anArray ifAbsent: exceptionBlock [ "Replace the current values of column with columnName with anArray or evaluate exception block if a column with that name was not found" | index | @@ -734,7 +732,7 @@ DataFrame >> column: columnName put: anArray ifAbsent: exceptionBlock [ ^ self columnAt: index put: anArray ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> column: columnName transform: aBlock [ "Evaluate aBlock on the column with columnName and replace column with the result. Signal an exception if columnName was not found" | column | @@ -742,7 +740,7 @@ DataFrame >> column: columnName transform: aBlock [ self column: columnName put: (aBlock value: column) asArray ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> column: columnName transform: aBlock ifAbsent: exceptionBlock [ "Evaluate aBlock on the column with columnName and replace column with the result. Evaluate exceptionBlock if columnName was not found" | column | @@ -750,7 +748,7 @@ DataFrame >> column: columnName transform: aBlock ifAbsent: exceptionBlock [ self column: columnName put: (aBlock value: column) ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> columnAt: aNumber [ "Returns the column of a DataFrame at column index aNumber" @@ -765,7 +763,7 @@ DataFrame >> columnAt: aNumber [ yourself ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> columnAt: aNumber put: anArray [ "Replaces the column at column index aNumber with contents of the array anArray" @@ -778,7 +776,7 @@ DataFrame >> columnAt: aNumber put: anArray [ contents columnAt: aNumber put: anArray ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> columnAt: aNumber transform: aBlock [ "Evaluate aBlock on the column at aNumber and replace that column with the result" @@ -789,14 +787,14 @@ DataFrame >> columnAt: aNumber transform: aBlock [ self columnAt: aNumber put: (aBlock value: column) asArray ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> columnNames [ "Returns the column names of a DataFrame" ^ columnNames ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> columnNames: aCollection [ "Sets the column names of a DataFrame with contents of the collection aCollection" @@ -816,7 +814,7 @@ DataFrame >> columnNames: aCollection [ columnNames := aCollection asOrderedCollection ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> columns [ "Returns a collection of all columns" @@ -827,7 +825,7 @@ DataFrame >> columns [ ^ (1 to: self numberOfColumns) collect: [ :j | self columnAt: j ] ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> columns: anArrayOfNames [ "Returns a collection of columns whose column names are present in the array anArrayOfNames" @@ -840,7 +838,7 @@ DataFrame >> columns: anArrayOfNames [ ^ self columnsAt: anArrayOfNumbers ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> columns: anArrayOfColumnNames put: anArrayOfArrays [ "Replaces the columns whose column names are present in the array anArrayOfColumnNames with the contents of the array of arrays anArrayOfArrays" @@ -851,14 +849,14 @@ DataFrame >> columns: anArrayOfColumnNames put: anArrayOfArrays [ self column: name put: array ] ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> columnsAllBut: aCollectionOfColumnNames [ "Returns a of except those present in aCollectionOfColumnNames" ^ self columns: (self columnNames copyWithoutAll: aCollectionOfColumnNames) ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> columnsAt: anArrayOfNumbers [ "Returns a collection of columns whose column indices are present in the array anArrayOfNumbers" @@ -876,7 +874,7 @@ DataFrame >> columnsAt: anArrayOfNumbers [ columnNames: newColumnNames ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> columnsAt: anArrayOfNumbers put: anArrayOfArrays [ "Replaces the columns whose column indices are present in the array anArrayOfNumbers with the contents of the array of arrays anArrayOfArrays" @@ -890,7 +888,7 @@ DataFrame >> columnsAt: anArrayOfNumbers put: anArrayOfArrays [ do: [ :index :array | self columnAt: index put: array ] ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> columnsFrom: begin to: end [ "Returns a collection of columns whose column indices are present between begin and end" @@ -906,7 +904,7 @@ DataFrame >> columnsFrom: begin to: end [ ^ self columnsAt: array ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> columnsFrom: firstNumber to: secondNumber put: anArrayOfArrays [ "Replaces the columns whose column indices are present between firstNumber and secondNumber with the contents of the array of arrays anArrayOfArrays" @@ -924,14 +922,14 @@ DataFrame >> columnsFrom: firstNumber to: secondNumber put: anArrayOfArrays [ self columnAt: columnIndex put: (anArrayOfArrays at: i) ] ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> contents [ "Returns all the values of the DataFrame" ^ contents ] -{ #category : 'copying' } +{ #category : #copying } DataFrame >> copyReplace: missingValue in2DCollectionBy: arrayOfReplacementValues [ "I am a 2D collection and the goal is to return a copy replace the missing values by the values of my second parameter. The good value is the index of the missing value in the sub collection. @@ -946,13 +944,13 @@ DataFrame >> copyReplace: missingValue in2DCollectionBy: arrayOfReplacementValue ^ copy ] -{ #category : 'statistics' } +{ #category : #statistics } DataFrame >> correlationMatrix [ "Calculate a correlation matrix (correlation of every column with every column) using Pearson's correlation coefficient" ^ self correlationMatrixUsing: DataPearsonCorrelationMethod ] -{ #category : 'statistics' } +{ #category : #statistics } DataFrame >> correlationMatrixUsing: aCorrelationCoefficient [ "Calculate a correlation matrix (correlation of every column with every column) using the given correlation coefficient" @@ -983,7 +981,7 @@ DataFrame >> correlationMatrixUsing: aCorrelationCoefficient [ ^ correlationMatrix ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> crossTabulate: colName1 with: colName2 [ "Returns the cross tabulation of a column named colName1 with the column named colName2 of the DataFrame" @@ -995,7 +993,7 @@ DataFrame >> crossTabulate: colName1 with: colName2 [ ^ col1 crossTabulateWith: col2 ] -{ #category : 'copying' } +{ #category : #copying } DataFrame >> dataPreProcessingEncodeWith: anEncoder [ "This method is here to speed up pharo-ai/data-preprocessing algos without coupling both projects." @@ -1011,54 +1009,54 @@ DataFrame >> dataPreProcessingEncodeWith: anEncoder [ ^ copy ] -{ #category : 'data-types' } +{ #category : #'data-types' } DataFrame >> dataTypeOfColumn: aColumnName [ "Given a column name of the DataFrame, it returns the data type of that column" ^ dataTypes at: aColumnName ] -{ #category : 'data-types' } +{ #category : #'data-types' } DataFrame >> dataTypeOfColumn: aColumnName put: aDataType [ "Given a column name and a data type, it replaces the original data type of that column with the data type that was given as a parameter" dataTypes at: aColumnName put: aDataType ] -{ #category : 'data-types' } +{ #category : #'data-types' } DataFrame >> dataTypeOfColumnAt: aNumber [ "Given a column index of the DataFrame, it returns the data type of that column" ^ self dataTypeOfColumn: (columnNames at: aNumber) ] -{ #category : 'data-types' } +{ #category : #'data-types' } DataFrame >> dataTypeOfColumnAt: aNumber put: aDataType [ "Given a column index and a data type, it replaces the original data type of that column with the data type that was given as a parameter" ^ self dataTypeOfColumn: (columnNames at: aNumber) put: aDataType ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> dataTypes [ "Returns the data types of each column" ^ dataTypes ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> dataTypes: anObject [ dataTypes := anObject ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> defaultHeadTailSize [ ^ 5 ] -{ #category : 'statistics' } +{ #category : #statistics } DataFrame >> describe [ "Answer another data frame with statistics describing the columns of this data frame" @@ -1081,7 +1079,7 @@ DataFrame >> describe [ columnNames: #( count mean std min '25%' '50%' '75%' max dtype ) ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> dimensions [ "Returns the number of rows and number of columns in a DataFrame" @@ -1094,7 +1092,7 @@ DataFrame >> dimensions [ ^ self numberOfRows @ self numberOfColumns ] -{ #category : 'enumerating' } +{ #category : #enumerating } DataFrame >> do: aBlock [ "We enumerate through the data enrties - through rows of a data frame" | row | @@ -1106,14 +1104,14 @@ DataFrame >> do: aBlock [ self rowAt: i put: row asArray ] ] -{ #category : 'find-select' } +{ #category : #'find-select' } DataFrame >> findAll: anObject atColumn: columnName [ "Returns rowNames of rows having anObject at columnName" ^ self rowNames select: [ :row | ((self column: columnName) at: row) = anObject ] ] -{ #category : 'find-select' } +{ #category : #'find-select' } DataFrame >> findAllIndicesOf: anObject atColumn: columnName [ "Returns indices of rows having anObject at columnName" | output | @@ -1122,7 +1120,7 @@ DataFrame >> findAllIndicesOf: anObject atColumn: columnName [ ^ output ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> first [ "Returns the first row of the DataFrame" @@ -1133,7 +1131,7 @@ DataFrame >> first [ ^ self at: 1 ] -{ #category : 'statistics' } +{ #category : #statistics } DataFrame >> firstQuartile [ "25% of the values in a set are smaller than or equal to the first Quartile of that set" @@ -1142,7 +1140,7 @@ DataFrame >> firstQuartile [ ^ self applyToAllColumns: #firstQuartile ] -{ #category : 'private' } +{ #category : #private } DataFrame >> getJointColumnsWith: aDataFrame [ "comment stating purpose of message" @@ -1163,14 +1161,14 @@ DataFrame >> getJointColumnsWith: aDataFrame [ ^ outputColumns ] -{ #category : 'grouping' } +{ #category : #grouping } DataFrame >> group: anAggregateColumnName by: aGroupColumnName aggregateUsing: aBlock [ "Group the values of the cloumn named anAggregateColumnName by the unique values of the column named aGroupColumnName, aggregate them using aBlock. With the same name as anAggregateColumnName" ^ self group: anAggregateColumnName by: aGroupColumnName aggregateUsing: aBlock as: anAggregateColumnName ] -{ #category : 'grouping' } +{ #category : #grouping } DataFrame >> group: anAggregateColumnName by: aGroupColumnName aggregateUsing: aBlock as: aNewColumnName [ "Group the values of the cloumn named anAggregateColumnName by the unique values of the column named aGroupColumnName, aggregate them using aBlock. With a new column name aNewColumnName" @@ -1185,7 +1183,7 @@ DataFrame >> group: anAggregateColumnName by: aGroupColumnName aggregateUsing: a ^ aggregateColumn groupBy: groupColumn aggregateUsing: aBlock as: aNewColumnName ] -{ #category : 'grouping' } +{ #category : #grouping } DataFrame >> groupBy: columnName aggregate: anArrayOfUsingAsStatements [ | aggregatedColumns | @@ -1199,7 +1197,7 @@ DataFrame >> groupBy: columnName aggregate: anArrayOfUsingAsStatements [ columnNames: (aggregatedColumns collect: #name) ] -{ #category : 'replacing' } +{ #category : #replacing } DataFrame >> hasNils [ "Returns true if there is atleast one nil value in the data frame. Returns false if there are no nil values in the dataframe" @@ -1217,7 +1215,7 @@ DataFrame >> hasNils [ ^ false ] -{ #category : 'replacing' } +{ #category : #replacing } DataFrame >> hasNilsByColumn [ "Returns a dictionary which indicates the presence of any nil values column wise" @@ -1234,14 +1232,14 @@ DataFrame >> hasNilsByColumn [ ^ dictionary ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> head [ "Returns the first 5 rows of the DataFrame" ^ self head: self defaultHeadTailSize ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> head: aNumber [ "Returns the first aNumber rows of a DataFrame" @@ -1252,7 +1250,7 @@ DataFrame >> head: aNumber [ ^ self rowsAt: (1 to: (self numberOfRows min: aNumber)) ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> indexOfColumnNamed: columnName [ "Answer the index of a column with a given name or signal an exception if the column with that name was not found" ^ self @@ -1260,7 +1258,7 @@ DataFrame >> indexOfColumnNamed: columnName [ ifAbsent: [ self error: ('Column ', columnName, ' was not found') ] ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> indexOfColumnNamed: columnName ifAbsent: exceptionBlock [ "Answer the index of a column with a given name or evaluate the exceptionBlock if the column with that name was not found" ^ self columnNames @@ -1268,7 +1266,7 @@ DataFrame >> indexOfColumnNamed: columnName ifAbsent: exceptionBlock [ ifAbsent: exceptionBlock ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> indexOfRowNamed: rowName [ "Answer the index of a row with a given name or signal an exception if the row with that name was not found" ^ self @@ -1276,7 +1274,7 @@ DataFrame >> indexOfRowNamed: rowName [ ifAbsent: [ self error: ('Row ', rowName, ' was not found') ] ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> indexOfRowNamed: rowName ifAbsent: exceptionBlock [ "Answer the index of a row with a given name or evaluate the exceptionBlock if the row with that name was not found" ^ self rowNames @@ -1284,7 +1282,7 @@ DataFrame >> indexOfRowNamed: rowName ifAbsent: exceptionBlock [ ifAbsent: exceptionBlock ] -{ #category : 'printing' } +{ #category : #printing } DataFrame >> info [ "Prints the number of entries and number of columns of a data frame. For each column of the data frame, it prints the column index, column name, number of non-nil values in the column and the data type of the contents of the column" @@ -1317,7 +1315,7 @@ DataFrame >> info [ cr ] ] ] -{ #category : 'initialization' } +{ #category : #initialization } DataFrame >> initialize [ super initialize. @@ -1328,7 +1326,7 @@ DataFrame >> initialize [ self calculateDataTypes ] -{ #category : 'initialization' } +{ #category : #initialization } DataFrame >> initialize: aPoint [ super initialize. @@ -1338,7 +1336,7 @@ DataFrame >> initialize: aPoint [ self calculateDataTypes ] -{ #category : 'initialization' } +{ #category : #initialization } DataFrame >> initializeColumns: anArrayOfArrays [ contents := DataFrameInternal withColumns: anArrayOfArrays. @@ -1346,7 +1344,7 @@ DataFrame >> initializeColumns: anArrayOfArrays [ self calculateDataTypes ] -{ #category : 'initialization' } +{ #category : #initialization } DataFrame >> initializeContents: aDataFrameInternal rowNames: rows columnNames: columns [ super initialize. @@ -1357,7 +1355,7 @@ DataFrame >> initializeContents: aDataFrameInternal rowNames: rows columnNames: self calculateDataTypes ] -{ #category : 'initialization' } +{ #category : #initialization } DataFrame >> initializeRows: anArrayOfArrays [ contents := DataFrameInternal withRows: anArrayOfArrays. @@ -1365,7 +1363,7 @@ DataFrame >> initializeRows: anArrayOfArrays [ self calculateDataTypes ] -{ #category : 'enumerating' } +{ #category : #enumerating } DataFrame >> inject: thisValue into: binaryBlock [ | series | series := super inject: thisValue into: binaryBlock. @@ -1373,7 +1371,7 @@ DataFrame >> inject: thisValue into: binaryBlock [ ^ series ] -{ #category : 'splitjoin' } +{ #category : #splitjoin } DataFrame >> innerJoin: aDataFrame [ "Performs inner join on aDataFrame with rowNames as keys" @@ -1392,13 +1390,13 @@ DataFrame >> innerJoin: aDataFrame [ ^ outputDf ] -{ #category : 'splitjoin' } +{ #category : #splitjoin } DataFrame >> innerJoin: aDataFrame on: aColumnName [ "Inner join of self with aDataFrame on a column that has a name aColumnName in both data frames" ^ self innerJoin: aDataFrame onLeft: aColumnName onRight: aColumnName ] -{ #category : 'splitjoin' } +{ #category : #splitjoin } DataFrame >> innerJoin: aDataFrame onLeft: leftColumn onRight: rightColumn [ "Performs inner join on aDataFrame with rowNames as keys. rowNames are not preserved. @@ -1433,7 +1431,7 @@ DataFrame >> innerJoin: aDataFrame onLeft: leftColumn onRight: rightColumn [ ^ outputDf ] -{ #category : 'newtools' } +{ #category : #newtools } DataFrame >> inspectionItems: aBuilder [ @@ -1461,7 +1459,7 @@ DataFrame >> inspectionItems: aBuilder [ ^ table ] -{ #category : 'statistics' } +{ #category : #statistics } DataFrame >> interquartileRange [ "The Inter Quartile Range is the difference between the third Quartile and the first Quartile" @@ -1470,7 +1468,7 @@ DataFrame >> interquartileRange [ ^ self applyToAllColumns: #interquartileRange ] -{ #category : 'splitjoin' } +{ #category : #splitjoin } DataFrame >> leftJoin: aDataFrame [ "Performs left join on aDataFrame with rowNames as keys" @@ -1488,13 +1486,13 @@ DataFrame >> leftJoin: aDataFrame [ ^ outputDf ] -{ #category : 'splitjoin' } +{ #category : #splitjoin } DataFrame >> leftJoin: aDataFrame on: aColumnName [ "Left join of self with aDataFrame on a column that has a name aColumnName in both data frames" ^ self leftJoin: aDataFrame onLeft: aColumnName onRight: aColumnName ] -{ #category : 'splitjoin' } +{ #category : #splitjoin } DataFrame >> leftJoin: aDataFrame onLeft: leftColumn onRight: rightColumn [ "Performs left join on aDataFrame with rowNames as keys. rowNames are not preserved. @@ -1533,7 +1531,7 @@ DataFrame >> leftJoin: aDataFrame onLeft: leftColumn onRight: rightColumn [ ^ outputDf ] -{ #category : 'statistics' } +{ #category : #statistics } DataFrame >> max [ "Max is the largest value present in a set of values" @@ -1542,7 +1540,7 @@ DataFrame >> max [ ^ self applyToAllColumns: #max ] -{ #category : 'statistics' } +{ #category : #statistics } DataFrame >> median [ "50% of data points have a value smaller or equal to the median . The median of a set of values is the middle value of the set when the set is arranged in increasing order." @@ -1551,7 +1549,7 @@ DataFrame >> median [ ^ self applyToAllColumns: #median ] -{ #category : 'statistics' } +{ #category : #statistics } DataFrame >> min [ "Min is the smallest value present in a set of values" @@ -1560,7 +1558,7 @@ DataFrame >> min [ ^ self applyToAllColumns: #min ] -{ #category : 'statistics' } +{ #category : #statistics } DataFrame >> mode [ "The mode of a set of values is the value that appears most often. " @@ -1569,7 +1567,7 @@ DataFrame >> mode [ ^ self applyToAllColumns: #mode ] -{ #category : 'converting' } +{ #category : #converting } DataFrame >> normalized [ "This methods returns a new DataFrame, without altering this one, that has all the columns normalized." @@ -1583,7 +1581,7 @@ DataFrame >> normalized [ ^ self class withColumns: normalizedColumns columnNames: self columnNames ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> numberOfColumns [ "Returns the number of columns of a DataFrame" @@ -1594,7 +1592,7 @@ DataFrame >> numberOfColumns [ ^ contents numberOfColumns ] -{ #category : 'replacing' } +{ #category : #replacing } DataFrame >> numberOfNils [ "Returns a dictionary which indicates the number of nil values column wise" @@ -1612,7 +1610,7 @@ DataFrame >> numberOfNils [ ^ dictionary ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> numberOfRows [ "Returns the number of rows of a DataFrame" @@ -1623,7 +1621,7 @@ DataFrame >> numberOfRows [ ^ contents numberOfRows ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> numericalColumnNames [ "Returns the names of all numerical columns of the dataframe" @@ -1631,7 +1629,7 @@ DataFrame >> numericalColumnNames [ (self dataTypes at: columnName) includesBehavior: Number ] ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> numericalColumns [ "Returns all numerical columns of the dataframe" @@ -1639,7 +1637,7 @@ DataFrame >> numericalColumns [ (self dataTypes at: column name) includesBehavior: Number ] ] -{ #category : 'splitjoin' } +{ #category : #splitjoin } DataFrame >> outerJoin: aDataFrame [ "Performs outer join on aDataFrame with rowNames as keys" @@ -1663,13 +1661,13 @@ DataFrame >> outerJoin: aDataFrame [ ^ outputDf ] -{ #category : 'splitjoin' } +{ #category : #splitjoin } DataFrame >> outerJoin: aDataFrame on: aColumnName [ "Outer join of self with aDataFrame on a column that has a name aColumnName in both data frames" ^ self outerJoin: aDataFrame onLeft: aColumnName onRight: aColumnName ] -{ #category : 'splitjoin' } +{ #category : #splitjoin } DataFrame >> outerJoin: aDataFrame onLeft: leftColumn onRight: rightColumn [ "Performs outer join on aDataFrame with rowNames as keys. rowNames are not preserved. @@ -1722,7 +1720,7 @@ DataFrame >> outerJoin: aDataFrame onLeft: leftColumn onRight: rightColumn [ ^ outputDf ] -{ #category : 'copying' } +{ #category : #copying } DataFrame >> postCopy [ super postCopy. @@ -1732,7 +1730,7 @@ DataFrame >> postCopy [ dataTypes := dataTypes copy ] -{ #category : 'printing' } +{ #category : #printing } DataFrame >> printOn: aStream [ | title | @@ -1744,14 +1742,14 @@ DataFrame >> printOn: aStream [ nextPutAll: self dimensions asString ] -{ #category : 'private' } +{ #category : #private } DataFrame >> privateRowNames: anArray [ "I am a private method skipping the assertions when my internal mecanisms know they can skip them." rowNames := anArray asOrderedCollection ] -{ #category : 'statistics' } +{ #category : #statistics } DataFrame >> range [ "Range is the difference between the highest value and the lowest value in a set" @@ -1760,7 +1758,7 @@ DataFrame >> range [ ^ self applyToAllColumns: #range ] -{ #category : 'removing' } +{ #category : #removing } DataFrame >> removeColumn: columnName [ "Removes the column named columnName from a data frame" @@ -1769,7 +1767,7 @@ DataFrame >> removeColumn: columnName [ self removeColumnAt: index ] -{ #category : 'removing' } +{ #category : #removing } DataFrame >> removeColumnAt: columnNumber [ "Removes the column at column index columnNumber from a data frame" @@ -1786,7 +1784,7 @@ DataFrame >> removeColumnAt: columnNumber [ columnNames := columnNames copyWithoutIndex: columnNumber ] -{ #category : 'removing' } +{ #category : #removing } DataFrame >> removeColumns: aCollectionOfColumnNames [ "Removes all columns from a data frame whose names are present in the collection aCollectionOfColumnNames" @@ -1795,7 +1793,7 @@ DataFrame >> removeColumns: aCollectionOfColumnNames [ ] ] -{ #category : 'removing' } +{ #category : #removing } DataFrame >> removeColumnsAt: aCollectionOfColumnIndices [ "Removes all columns from a data frame whose column indices are present in the collection aCollectionOfColumnIndices" @@ -1809,7 +1807,7 @@ DataFrame >> removeColumnsAt: aCollectionOfColumnIndices [ self removeColumns: columnNamesToRemove ] -{ #category : 'removing' } +{ #category : #removing } DataFrame >> removeColumnsOfRowElementsSatisfing: aBlock onRowNamed: rowName [ "Removes columns from a data frame whose row elements at the row named rowName satisfy a given block" @@ -1818,7 +1816,7 @@ DataFrame >> removeColumnsOfRowElementsSatisfing: aBlock onRowNamed: rowName [ self removeColumnsOfRowElementsSatisfying: aBlock onRow: index ] -{ #category : 'removing' } +{ #category : #removing } DataFrame >> removeColumnsOfRowElementsSatisfying: aBlock onRow: rowNumber [ "Removes columns from a data frame whose row elements at the row index rowNumber satisfy a given block" @@ -1840,7 +1838,7 @@ DataFrame >> removeColumnsOfRowElementsSatisfying: aBlock onRow: rowNumber [ self numberOfColumns = 0 ifTrue: [ rowNames removeAll ] ] -{ #category : 'handling nils' } +{ #category : #'handling nils' } DataFrame >> removeColumnsWithNilsAtRow: rowNumber [ "Removes all columns with nil values at row number rowNumber from the data frame" @@ -1853,14 +1851,14 @@ DataFrame >> removeColumnsWithNilsAtRow: rowNumber [ onRow: rowNumber ] -{ #category : 'handling nils' } +{ #category : #'handling nils' } DataFrame >> removeColumnsWithNilsAtRowNamed: rowName [ "Removes all columns with nil values at a row named rowName from the data frame" self removeColumnsOfRowElementsSatisfing: [ :ele | ele isNil ] onRowNamed: rowName ] -{ #category : 'removing' } +{ #category : #removing } DataFrame >> removeDuplicatedRows [ "Removes duplicate rows of a dataframe except the first unique row" @@ -1880,7 +1878,7 @@ DataFrame >> removeDuplicatedRows [ ^ self removeRowsAt: aSet ] -{ #category : 'removing' } +{ #category : #removing } DataFrame >> removeRow: rowName [ "Removes the row named rowName from a data frame" @@ -1889,7 +1887,7 @@ DataFrame >> removeRow: rowName [ self removeRowAt: index ] -{ #category : 'removing' } +{ #category : #removing } DataFrame >> removeRowAt: rowNumber [ "Removes the row at row index rowNumber from a data frame" @@ -1904,7 +1902,7 @@ DataFrame >> removeRowAt: rowNumber [ rowNames := rowNames copyWithoutIndex: rowNumber ] -{ #category : 'removing' } +{ #category : #removing } DataFrame >> removeRows: aCollectionOfRowNames [ "Removes all rows from a data frame whose names are present in the collection aCollectionOfRowNames" @@ -1912,7 +1910,7 @@ DataFrame >> removeRows: aCollectionOfRowNames [ self removeRow: each ] ] -{ #category : 'removing' } +{ #category : #removing } DataFrame >> removeRowsAt: aCollectionOfRowIndices [ "Removes all rows from a data frame whose row indices are present in the collection aCollectionOfRowIndices" @@ -1926,17 +1924,8 @@ DataFrame >> removeRowsAt: aCollectionOfRowIndices [ self removeRows: rowNamesToRemove ] -{ #category : 'removing' } -DataFrame >> removeRowsOfColumnElementsSatisfing: aBlock onColumnNamed: columnName [ - "Removes rows from a data frame whose column elements at the column named columnName satisfy a given block" - - | index | - index := self indexOfColumnNamed: columnName. - self removeRowsOfColumnElementsSatisfying: aBlock onColumn: index -] - -{ #category : 'removing' } -DataFrame >> removeRowsOfColumnElementsSatisfying: aBlock onColumn: columnNumber [ +{ #category : #removing } +DataFrame >> removeRowsWhereElementsInColumnAt: columnNumber satisfy: aBlock [ "Removes rows from a data frame whose column elements at the column index columnNumber satisfy a given block" "(#(#(1 2) #(3 4) #(5 6)) asDataFrame removeRowsOfColumnElementsSatisfying: [ :x | x >= 4 ] onColumn: 2) >>> (#(#(1 2)) asDataFrame)" @@ -1951,13 +1940,22 @@ DataFrame >> removeRowsOfColumnElementsSatisfying: aBlock onColumn: columnNumber (aBlock value: (contents at: i at: columnNumber)) ifFalse: [ rowNames add: rowName ] ]. contents - removeRowsOfColumnElementsSatisfying: aBlock - onColumn: columnNumber. + removeRowsWhereElementsInColumnAt: columnNumber + satisfy: aBlock. self numberOfRows = 0 ifTrue: [ columnNames removeAll ] ] -{ #category : 'removing' } +{ #category : #removing } +DataFrame >> removeRowsWhereElementsInColumnNamed: columnName satisfy: aBlock [ + "Removes rows from a data frame whose column elements at the column named columnName satisfy a given block" + + | index | + index := self indexOfColumnNamed: columnName. + self removeRowsWhereElementsInColumnAt: index satisfy: aBlock +] + +{ #category : #removing } DataFrame >> removeRowsWithNils [ "Removes all rows from a data frame which have atleast one nil value" @@ -1967,11 +1965,11 @@ DataFrame >> removeRowsWithNils [ 1 to: self numberOfColumns do: [ :i | self - removeRowsOfColumnElementsSatisfying: [ :ele | ele isNil ] - onColumn: i ] + removeRowsWhereElementsInColumnAt: i + satisfy: [ :ele | ele isNil ] ] ] -{ #category : 'handling nils' } +{ #category : #'handling nils' } DataFrame >> removeRowsWithNilsAtColumn: columnNumber [ "Removes all rows with nil values at column number columnNumber from the data frame" @@ -1980,18 +1978,20 @@ DataFrame >> removeRowsWithNilsAtColumn: columnNumber [ "(#(#(nil r1c2) #(r2c1 nil)) asDataFrame removeRowsWithNilsAtColumn: 2) >>> (#(#(nil r1c2)) asDataFrame)" self - removeRowsOfColumnElementsSatisfying: [ :ele | ele isNil ] - onColumn: columnNumber + removeRowsWhereElementsInColumnAt: columnNumber + satisfy: [ :ele | ele isNil ] ] -{ #category : 'handling nils' } +{ #category : #'handling nils' } DataFrame >> removeRowsWithNilsAtColumnNamed: columnName [ "Removes all rows with nil values at a column named columnName from the data frame" - self removeRowsOfColumnElementsSatisfing: [ :ele | ele isNil ] onColumnNamed: columnName + self + removeRowsWhereElementsInColumnNamed: columnName + satisfy: [ :ele | ele isNil ] ] -{ #category : 'renaming' } +{ #category : #renaming } DataFrame >> renameColumn: oldName to: newName [ "Find a column with oldName and rename it to newName" | index | @@ -2002,7 +2002,7 @@ DataFrame >> renameColumn: oldName to: newName [ self dataTypes removeKey: oldName ] -{ #category : 'renaming' } +{ #category : #renaming } DataFrame >> renameRow: oldName to: newName [ "Find a row with oldName and rename it to newName" | index | @@ -2010,7 +2010,7 @@ DataFrame >> renameRow: oldName to: newName [ self rowNames at: index put: newName ] -{ #category : 'handling nils' } +{ #category : #'handling nils' } DataFrame >> replaceAllNilsWithZeros [ self deprecated: 'Use #replaceNilsWithZero instead.' transformWith: '`@receiver replaceAllNilsWithZeros' -> '`@receiver replaceNilsWithZero'. @@ -2018,7 +2018,7 @@ DataFrame >> replaceAllNilsWithZeros [ self replaceNilsWithZero ] -{ #category : 'replacing' } +{ #category : #replacing } DataFrame >> replaceNilsWith: anObject [ "Replaces all nil values of a data frame with the object anObject" @@ -2034,7 +2034,7 @@ DataFrame >> replaceNilsWith: anObject [ self at: rowIndex at: columnIndex put: anObject ] ] ] ] -{ #category : 'replacing' } +{ #category : #replacing } DataFrame >> replaceNilsWithAverage [ "Replaces all nil values of a data frame with the average value of the column in which it is present" @@ -2050,7 +2050,7 @@ DataFrame >> replaceNilsWithAverage [ (self at: j at: i) ifNil: [ self at: j at: i put: averageOfColumn ] ] ] ] -{ #category : 'replacing' } +{ #category : #replacing } DataFrame >> replaceNilsWithMedian [ "Replaces all nil values of a data frame with the median of the column in which it is present" @@ -2066,7 +2066,7 @@ DataFrame >> replaceNilsWithMedian [ (self at: j at: i) ifNil: [ self at: j at: i put: medianOfColumn ] ] ] ] -{ #category : 'replacing' } +{ #category : #replacing } DataFrame >> replaceNilsWithMode [ "Replaces all nil values of a data frame with the mode of the column in which it is present" @@ -2084,7 +2084,7 @@ DataFrame >> replaceNilsWithMode [ modeOfColumn := nil ] ] -{ #category : 'replacing' } +{ #category : #replacing } DataFrame >> replaceNilsWithNextRowValue [ "Replaces all nil values of a data frame with the next non-nil value of the column in which it is present. If there is no non-nil value after it, it is not replaced" @@ -2101,7 +2101,7 @@ DataFrame >> replaceNilsWithNextRowValue [ value := self at: j at: i ] ] ] -{ #category : 'replacing' } +{ #category : #replacing } DataFrame >> replaceNilsWithPreviousRowValue [ "Replaces all nil values of a data frame with the previous non-nil value of the column in which it is present. If there is no non-nil value before it, it is not replaced" @@ -2117,7 +2117,7 @@ DataFrame >> replaceNilsWithPreviousRowValue [ value := self at: j at: i ] ] ] -{ #category : 'replacing' } +{ #category : #replacing } DataFrame >> replaceNilsWithZero [ "Replaces all nil values of a data frame with zero" @@ -2128,7 +2128,7 @@ DataFrame >> replaceNilsWithZero [ self replaceNilsWith: 0 ] -{ #category : 'splitjoin' } +{ #category : #splitjoin } DataFrame >> rightJoin: aDataFrame [ "Performs right join on aDataFrame with rowNames as keys" @@ -2147,13 +2147,13 @@ DataFrame >> rightJoin: aDataFrame [ ^ outputDf ] -{ #category : 'splitjoin' } +{ #category : #splitjoin } DataFrame >> rightJoin: aDataFrame on: aColumnName [ "Right join of self with aDataFrame on a column that has a name aColumnName in both data frames" ^ self rightJoin: aDataFrame onLeft: aColumnName onRight: aColumnName ] -{ #category : 'splitjoin' } +{ #category : #splitjoin } DataFrame >> rightJoin: aDataFrame onLeft: leftColumn onRight: rightColumn [ "Performs right join on aDataFrame with rowNames as keys. rowNames are not preserved. @@ -2196,7 +2196,7 @@ DataFrame >> rightJoin: aDataFrame onLeft: leftColumn onRight: rightColumn [ ^ outputDf ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> row: rowName [ "Answer the row with rowName as a DataSeries or signal an exception if a row with that name was not found" | index | @@ -2204,7 +2204,7 @@ DataFrame >> row: rowName [ ^ self rowAt: index ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> row: rowName ifAbsent: exceptionBlock [ "Answer the row with rowName as a DataSeries or evaluate exception block if a row with that name was not found" | index | @@ -2215,7 +2215,7 @@ DataFrame >> row: rowName ifAbsent: exceptionBlock [ ^ self rowAt: index ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> row: rowName put: anArray [ "Replace the current values of row with rowName with anArray or signal an exception if a row with that name was not found" | index | @@ -2223,7 +2223,7 @@ DataFrame >> row: rowName put: anArray [ ^ self rowAt: index put: anArray ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> row: rowName put: anArray ifAbsent: exceptionBlock [ "Replace the current values of row with rowName with anArray or evaluate exception block if a row with that name was not found" | index | @@ -2234,7 +2234,7 @@ DataFrame >> row: rowName put: anArray ifAbsent: exceptionBlock [ ^ self rowAt: index put: anArray ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> row: rowName transform: aBlock [ "Evaluate aBlock on the row with rowName and replace row with the result. Signal an exception if rowName was not found" | row | @@ -2242,7 +2242,7 @@ DataFrame >> row: rowName transform: aBlock [ self row: rowName put: (aBlock value: row) asArray ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> row: rowName transform: aBlock ifAbsent: exceptionBlock [ "Evaluate aBlock on the row with rowName and replace row with the result. Evaluate exceptionBlock if rowName was not found" | row | @@ -2250,7 +2250,7 @@ DataFrame >> row: rowName transform: aBlock ifAbsent: exceptionBlock [ self row: rowName put: (aBlock value: row) ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> rowAt: aNumber [ "Returns the row of a DataFrame at row index aNumber" @@ -2265,7 +2265,7 @@ DataFrame >> rowAt: aNumber [ ^ series ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> rowAt: aNumber put: anArray [ "Replaces the row at row index aNumber with contents of the array anArray" @@ -2278,7 +2278,7 @@ DataFrame >> rowAt: aNumber put: anArray [ contents rowAt: aNumber put: anArray ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> rowAt: aNumber transform: aBlock [ "Evaluate aBlock on the row at aNumber and replace that row with the result" @@ -2289,14 +2289,14 @@ DataFrame >> rowAt: aNumber transform: aBlock [ self rowAt: aNumber put: (aBlock value: row) asArray ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> rowNames [ "Returns the row names of a DataFrame" ^ rowNames ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> rowNames: anArray [ "Sets the row names of a DataFrame with contents of the collection aCollection" @@ -2307,7 +2307,7 @@ DataFrame >> rowNames: anArray [ self privateRowNames: anArray ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> rows [ "Returns a collection of all rows" @@ -2318,7 +2318,7 @@ DataFrame >> rows [ ^ (1 to: self numberOfRows) collect: [ :j | self rowAt: j ] ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> rows: anArrayOfNames [ "Returns a collection of rows whose row names are present in the array anArrayOfNames" @@ -2331,7 +2331,7 @@ DataFrame >> rows: anArrayOfNames [ ^ self rowsAt: anArrayOfNumbers ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> rows: anArrayOfRowNames put: anArrayOfArrays [ "Replaces the rows whose row names are present in the array anArrayOfRowNames with the contents of the array of arrays anArrayOfArrays" @@ -2342,7 +2342,7 @@ DataFrame >> rows: anArrayOfRowNames put: anArrayOfArrays [ self row: name put: array ] ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> rowsAt: anArrayOfNumbers [ "Returns a collection of rows whose row indices are present in the array anArrayOfNumbers" @@ -2359,7 +2359,7 @@ DataFrame >> rowsAt: anArrayOfNumbers [ columnNames: self columnNames ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> rowsAt: anArrayOfNumbers put: anArrayOfArrays [ "Replaces the rows whose row indices are present in the array anArrayOfNumbers with the contents of the array of arrays anArrayOfArrays" @@ -2373,7 +2373,7 @@ DataFrame >> rowsAt: anArrayOfNumbers put: anArrayOfArrays [ do: [ :index :array | self rowAt: index put: array ] ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> rowsFrom: begin to: end [ "Returns a collection of rows whose row indices are present between begin and end" @@ -2384,7 +2384,7 @@ DataFrame >> rowsFrom: begin to: end [ ^ self rowsAt: (begin to: end) ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> rowsFrom: firstNumber to: secondNumber put: anArrayOfArrays [ "Replaces the rows whose row indices are present between firstNumber and secondNumber with the contents of the array of arrays anArrayOfArrays" @@ -2402,7 +2402,7 @@ DataFrame >> rowsFrom: firstNumber to: secondNumber put: anArrayOfArrays [ self rowAt: rowIndex put: (anArrayOfArrays at: i) ] ] -{ #category : 'enumerating' } +{ #category : #enumerating } DataFrame >> select: aBlock [ "Evaluate aBlock with each of the receiver's elements as the argument. Collect into a new collection like the receiver, only those elements for @@ -2416,14 +2416,14 @@ DataFrame >> select: aBlock [ ^ self rowsAt: selectedIndexes ] -{ #category : 'private' } +{ #category : #private } DataFrame >> setDefaultRowColumnNames [ self privateRowNames: (1 to: self numberOfRows). self columnNames: (1 to: self numberOfColumns) ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> shuffleBy: aNumber [ "Modify the receiver but with its elements in random positions. This method use Random class as random generator" @@ -2441,20 +2441,20 @@ DataFrame >> shuffleBy: aNumber [ rowNames := newRowNames. ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> shuffleWithSeed: aNumber [ " Fix the random seed with aNumber to ensure reproducibility " ^ self shuffleBy: (Random new seed: aNumber) ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> shuffled [ self shuffleBy: Random new ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> size [ "Returns the number of rows of a DataFrame" @@ -2467,7 +2467,7 @@ DataFrame >> size [ ^ self numberOfRows ] -{ #category : 'sorting' } +{ #category : #sorting } DataFrame >> sortBy: columnName [ "Rearranges the rows of the data frame in ascending order of the values in the column named columnName" @@ -2478,7 +2478,7 @@ DataFrame >> sortBy: columnName [ self sortBy: columnName using: [ :a :b | a <= b ] ] -{ #category : 'sorting' } +{ #category : #sorting } DataFrame >> sortBy: columnName using: aBlock [ "Rearranges the rows of the data frame by applying the given block on the column named columnName" @@ -2501,7 +2501,7 @@ DataFrame >> sortBy: columnName using: aBlock [ self rowNames: sortedKeys ] -{ #category : 'sorting' } +{ #category : #sorting } DataFrame >> sortByAll: arrayOfColumnNames [ " Chain sorts the data frame in ascending order. The data frame is sorted based on the first column in the array of column names, if there are same values, then it sorts these same values based on the values of the second column and so on.." @@ -2514,14 +2514,14 @@ DataFrame >> sortByAll: arrayOfColumnNames [ ^ self ] -{ #category : 'sorting' } +{ #category : #sorting } DataFrame >> sortByRowNames [ "Sorts the rows of the data frame based on the row names in ascending order" self sortByRowNamesUsing: [ :a :b | a <= b ] ] -{ #category : 'sorting' } +{ #category : #sorting } DataFrame >> sortByRowNamesUsing: aBlock [ "Sorts the rows of the data frame based on the row names using the given comparison block" @@ -2537,7 +2537,7 @@ DataFrame >> sortByRowNamesUsing: aBlock [ self rowNames: sortedKeys ] -{ #category : 'sorting' } +{ #category : #sorting } DataFrame >> sortDescendingBy: columnName [ "Rearranges the rows of the data frame in descending order of the values in the column named columnName" @@ -2548,7 +2548,7 @@ DataFrame >> sortDescendingBy: columnName [ self sortBy: columnName using: [ :a :b | a >= b ] ] -{ #category : 'sorting' } +{ #category : #sorting } DataFrame >> sortDescendingByAll: arrayOfColumnNames [ " Chain sorts the data frame in descending order. The data frame is sorted based on the first column in the array of column names, if there are same values, then it sorts these same values based on the values of the second column and so on.." @@ -2561,14 +2561,14 @@ DataFrame >> sortDescendingByAll: arrayOfColumnNames [ ^ self ] -{ #category : 'sorting' } +{ #category : #sorting } DataFrame >> sortDescendingByRowNames [ "Sorts the rows of the data frame based on the row names in descending order" self sortByRowNamesUsing: [ :a :b | a >= b ] ] -{ #category : 'statistics' } +{ #category : #statistics } DataFrame >> stdev [ "Standard deviation is a measure of how dispersed the data is in relation to the average" @@ -2597,7 +2597,7 @@ DataFrame >> tail [ ^ self tail: self defaultHeadTailSize ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrame >> tail: aNumber [ "Returns the last aNumber rows of aDataFrame" | rows | @@ -2606,7 +2606,7 @@ DataFrame >> tail: aNumber [ ^ self rowsAt: (rows - (rows min: aNumber) + 1 to: rows) ] -{ #category : 'statistics' } +{ #category : #statistics } DataFrame >> thirdQuartile [ "75% of the values in a set are smaller than or equal to the third Quartile of that set" @@ -2615,7 +2615,7 @@ DataFrame >> thirdQuartile [ ^ self applyToAllColumns: #thirdQuartile ] -{ #category : 'applying' } +{ #category : #applying } DataFrame >> toColumn: columnName applyElementwise: aBlock [ "Applies a given block to a column named columnName of a data frame" @@ -2625,7 +2625,7 @@ DataFrame >> toColumn: columnName applyElementwise: aBlock [ self column: columnName put: column asArray ] -{ #category : 'applying' } +{ #category : #applying } DataFrame >> toColumnAt: columnNumber applyElementwise: aBlock [ "Applies a given block to a column whose column index is columnNumber of a data frame" @@ -2636,7 +2636,7 @@ DataFrame >> toColumnAt: columnNumber applyElementwise: aBlock [ ^ self toColumn: columnName applyElementwise: aBlock ] -{ #category : 'applying' } +{ #category : #applying } DataFrame >> toColumns: arrayOfColumnNames applyElementwise: aBlock [ "Applies a given block to columns whose names are present in the array arrayOfColumnNames of a data frame" @@ -2644,7 +2644,7 @@ DataFrame >> toColumns: arrayOfColumnNames applyElementwise: aBlock [ self toColumn: each applyElementwise: aBlock ] ] -{ #category : 'applying' } +{ #category : #applying } DataFrame >> toColumnsAt: arrayOfColumnNumbers applyElementwise: aBlock [ "Applies a given block to columns whose indices are present in the array arrayOfColumnNumbers of a data frame" @@ -2654,7 +2654,7 @@ DataFrame >> toColumnsAt: arrayOfColumnNumbers applyElementwise: aBlock [ self toColumnAt: each applyElementwise: aBlock ] ] -{ #category : 'converting' } +{ #category : #converting } DataFrame >> toHtml [ "Prints the DataFrame as an HTML formatted table" @@ -2728,7 +2728,7 @@ DataFrame >> toHtml [ ^ html contents ] -{ #category : 'converting' } +{ #category : #converting } DataFrame >> toLatex [ " Prints the DataFrame as a Latex formatted table" @@ -2781,7 +2781,7 @@ DataFrame >> toLatex [ ^ markdown contents ] -{ #category : 'converting' } +{ #category : #converting } DataFrame >> toMarkdown [ " Prints the DataFrame as a Markdown formatted table" @@ -2827,7 +2827,7 @@ DataFrame >> toMarkdown [ ^ markdown contents ] -{ #category : 'converting' } +{ #category : #converting } DataFrame >> toString [ " Prints the DataFrame as a String formatted table" @@ -2864,7 +2864,7 @@ DataFrame >> toString [ ^ stringTable contents ] -{ #category : 'geometry' } +{ #category : #geometry } DataFrame >> transposed [ "Returns a transposed DataFrame. Columns become rows and rows become columns." @@ -2881,7 +2881,7 @@ DataFrame >> transposed [ ^ transposedDf ] -{ #category : 'statistics' } +{ #category : #statistics } DataFrame >> variance [ "variance measures how far each number in the set is from the average value of the set. It is the square of standard deviation." @@ -2890,7 +2890,7 @@ DataFrame >> variance [ ^ self applyToAllColumns: #variance ] -{ #category : 'enumerating' } +{ #category : #enumerating } DataFrame >> withIndexCollect: elementAndIndexBlock [ "Overrides withIndexCollect: to create DataFrame with the same number of columns as values in the first row" | firstRow newDataFrame | @@ -2903,7 +2903,7 @@ DataFrame >> withIndexCollect: elementAndIndexBlock [ ^ newDataFrame ] -{ #category : 'enumerating' } +{ #category : #enumerating } DataFrame >> withIndexDo: elementAndIndexBlock [ 1 to: self size do: [ :i | @@ -2915,7 +2915,7 @@ DataFrame >> withIndexDo: elementAndIndexBlock [ self rowAt: i put: row asArray ] ] -{ #category : 'enumerating' } +{ #category : #enumerating } DataFrame >> withIndexReject: elementAndIndexBlock [ "Evaluate aBlock with each of the receiver's elements and index as the arguments. Collect into a new collection like the receiver, only those elements for @@ -2923,7 +2923,7 @@ DataFrame >> withIndexReject: elementAndIndexBlock [ ^ self withIndexSelect: [ :row :index | (elementAndIndexBlock value: row value: index) not ] ] -{ #category : 'enumerating' } +{ #category : #enumerating } DataFrame >> withIndexSelect: aBlock [ "Evaluate aBlock with each of the receiver's elements and index as the arguments. Collect into a new collection like the receiver, only those elements for diff --git a/src/DataFrame/DataFrameInternal.class.st b/src/DataFrame/DataFrameInternal.class.st index 0d651814..67a9de05 100644 --- a/src/DataFrame/DataFrameInternal.class.st +++ b/src/DataFrame/DataFrameInternal.class.st @@ -2,17 +2,15 @@ I am the internal representation of a DataFrame. I store the data very efficiently and allow you to access it very quickly. " Class { - #name : 'DataFrameInternal', - #superclass : 'SequenceableCollection', + #name : #DataFrameInternal, + #superclass : #SequenceableCollection, #instVars : [ 'contents' ], - #category : 'DataFrame-Core', - #package : 'DataFrame', - #tag : 'Core' + #category : #'DataFrame-Core' } -{ #category : 'instance creation' } +{ #category : #'instance creation' } DataFrameInternal class >> fromArray2D: anArray2D [ | dfInternal | @@ -27,13 +25,13 @@ DataFrameInternal class >> fromArray2D: anArray2D [ ^ dfInternal ] -{ #category : 'instance creation' } +{ #category : #'instance creation' } DataFrameInternal class >> new: aPoint [ ^ self new initialize: aPoint ] -{ #category : 'instance creation' } +{ #category : #'instance creation' } DataFrameInternal class >> withColumns: anArrayOfArrays [ | numberOfRows numberOfColumns dfInternal | @@ -53,7 +51,7 @@ DataFrameInternal class >> withColumns: anArrayOfArrays [ ^ dfInternal ] -{ #category : 'instance creation' } +{ #category : #'instance creation' } DataFrameInternal class >> withRows: anArrayOfArrays [ | numberOfRows numberOfColumns dfInternal | @@ -73,7 +71,7 @@ DataFrameInternal class >> withRows: anArrayOfArrays [ ^ dfInternal ] -{ #category : 'comparing' } +{ #category : #comparing } DataFrameInternal >> = other [ ((other class == self class) @@ -88,7 +86,7 @@ DataFrameInternal >> = other [ ^ true ] -{ #category : 'adding' } +{ #category : #adding } DataFrameInternal >> addColumn: anArray atPosition: aNumber [ | rows cols newContents | @@ -120,7 +118,7 @@ DataFrameInternal >> addColumn: anArray atPosition: aNumber [ contents := newContents ] -{ #category : 'adding' } +{ #category : #adding } DataFrameInternal >> addRow: anArray atPosition: aNumber [ | rows cols newContents | @@ -152,12 +150,12 @@ DataFrameInternal >> addRow: anArray atPosition: aNumber [ contents := newContents ] -{ #category : 'converting' } +{ #category : #converting } DataFrameInternal >> asArray2D [ ^ contents ] -{ #category : 'converting' } +{ #category : #converting } DataFrameInternal >> asArrayOfColumns [ "Converts DataFrameInternal to the array of columns" @@ -166,7 +164,7 @@ DataFrameInternal >> asArrayOfColumns [ self at: i at: j ] ] ] -{ #category : 'converting' } +{ #category : #converting } DataFrameInternal >> asArrayOfRows [ "Converts DataFrameInternal to the array of rows" @@ -175,33 +173,33 @@ DataFrameInternal >> asArrayOfRows [ self at: i at: j ] ] ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrameInternal >> at: rowIndex at: columnIndex [ ^ contents at: rowIndex at: columnIndex ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrameInternal >> at: rowIndex at: columnIndex put: value [ ^ contents at: rowIndex at: columnIndex put: value ] -{ #category : 'enumerating' } +{ #category : #enumerating } DataFrameInternal >> collect: aBlock [ ^ self class fromArray2D: (contents collect: aBlock) ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrameInternal >> columnAt: aNumber [ ^ (1 to: self numberOfRows) collect: [ :i | self at: i at: aNumber ] ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrameInternal >> columnAt: aNumber put: anArray [ "Sets all the values of a given column equal to the values in array. It is assumed that array is of the same size as every column (number of rows). This should be tested in DataFrame" @@ -209,7 +207,7 @@ DataFrameInternal >> columnAt: aNumber put: anArray [ self at: i at: aNumber put: (anArray at: i) ] ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrameInternal >> columnsAt: anArrayOfNumbers [ | dfInternal numberOfRows numberOfColumns | @@ -225,50 +223,50 @@ DataFrameInternal >> columnsAt: anArrayOfNumbers [ ^ dfInternal ] -{ #category : 'copying' } +{ #category : #copying } DataFrameInternal >> deepCopy [ ^ self class fromArray2D: contents ] -{ #category : 'enumerating' } +{ #category : #enumerating } DataFrameInternal >> do: aBlock [ ^ contents do: aBlock ] -{ #category : 'initialization' } +{ #category : #initialization } DataFrameInternal >> initialize [ super initialize. contents := Array2D new ] -{ #category : 'initialization' } +{ #category : #initialization } DataFrameInternal >> initialize: aPoint [ contents := Array2D rows: aPoint x columns: aPoint y ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrameInternal >> numberOfColumns [ ^ contents numberOfColumns ifNil: [ 0 ] ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrameInternal >> numberOfRows [ ^ contents numberOfRows ifNil: [ 0 ] ] -{ #category : 'printing' } +{ #category : #printing } DataFrameInternal >> printOn: aStream [ contents printOn: aStream ] -{ #category : 'removing' } +{ #category : #removing } DataFrameInternal >> removeColumnAt: columnNumber [ | newContents | @@ -288,7 +286,7 @@ DataFrameInternal >> removeColumnAt: columnNumber [ contents := newContents ] -{ #category : 'removing' } +{ #category : #removing } DataFrameInternal >> removeColumnsOfRowElementsSatisfying: aBlock onRow: rowNumber [ "Executes aBlock for all elements in specified rowNumber and deletes the column which satisfied condition given in aBlock." @@ -313,7 +311,7 @@ DataFrameInternal >> removeColumnsOfRowElementsSatisfying: aBlock onRow: rowNumb contents := newContents ] -{ #category : 'removing' } +{ #category : #removing } DataFrameInternal >> removeRowAt: rowNumber [ | newContents | @@ -333,8 +331,8 @@ DataFrameInternal >> removeRowAt: rowNumber [ contents := newContents ] -{ #category : 'removing' } -DataFrameInternal >> removeRowsOfColumnElementsSatisfying: aBlock onColumn: columnNumber [ +{ #category : #removing } +DataFrameInternal >> removeRowsWhereElementsInColumnAt: columnNumber satisfy: aBlock [ "Removes all rows having a nil value at the column columnNumber" | newContents rowsToDrop k | @@ -356,7 +354,7 @@ DataFrameInternal >> removeRowsOfColumnElementsSatisfying: aBlock onColumn: colu contents := newContents ] -{ #category : 'filling' } +{ #category : #filling } DataFrameInternal >> replaceMissingValuesStrings: aSet [ "Replaces dataframeinternal values with nil if they are a part of aSet" @@ -367,14 +365,14 @@ DataFrameInternal >> replaceMissingValuesStrings: aSet [ ] ] ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrameInternal >> rowAt: aNumber [ ^ (1 to: self numberOfColumns) collect: [ :j | self at: aNumber at: j ] ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrameInternal >> rowAt: aNumber put: anArray [ "Sets all the values of a given row equal to the values in array. It is assumed that array is of the same size as every row (number of columns). This should be tested in DataFrame" @@ -382,7 +380,7 @@ DataFrameInternal >> rowAt: aNumber put: anArray [ self at: aNumber at: j put: (anArray at: j) ] ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrameInternal >> rowsAt: anArrayOfNumbers [ | dfInternal numberOfRows numberOfColumns | @@ -398,21 +396,21 @@ DataFrameInternal >> rowsAt: anArrayOfNumbers [ ^ dfInternal ] -{ #category : 'accessing' } +{ #category : #accessing } DataFrameInternal >> size [ "Answer how many elements the receiver contains." ^ contents size ] -{ #category : 'enumerating' } +{ #category : #enumerating } DataFrameInternal >> withIndicesCollect: aBlock [ ^ self class fromArray2D: (contents withIndicesCollect: aBlock) ] -{ #category : 'enumerating' } +{ #category : #enumerating } DataFrameInternal >> withIndicesDo: aBlock [ ^ contents withIndicesDo: aBlock diff --git a/src/DataFrame/DataPearsonCorrelationMethod.class.st b/src/DataFrame/DataPearsonCorrelationMethod.class.st index 4aeb5062..fa5dfce9 100644 --- a/src/DataFrame/DataPearsonCorrelationMethod.class.st +++ b/src/DataFrame/DataPearsonCorrelationMethod.class.st @@ -1,12 +1,10 @@ Class { - #name : 'DataPearsonCorrelationMethod', - #superclass : 'DataCorrelationMethod', - #category : 'DataFrame-Math', - #package : 'DataFrame', - #tag : 'Math' + #name : #DataPearsonCorrelationMethod, + #superclass : #DataCorrelationMethod, + #category : 'DataFrame-Math' } -{ #category : 'comparing' } +{ #category : #comparing } DataPearsonCorrelationMethod class >> between: x and: y [ "Calcualte the Pearson correlation coefficient between two data series" diff --git a/src/DataFrame/DataSeries.class.st b/src/DataFrame/DataSeries.class.st index e7795e2f..19c9eddc 100644 --- a/src/DataFrame/DataSeries.class.st +++ b/src/DataFrame/DataSeries.class.st @@ -1,16 +1,14 @@ Class { - #name : 'DataSeries', - #superclass : 'OrderedDictionary', + #name : #DataSeries, + #superclass : #OrderedDictionary, #instVars : [ 'name', 'forcedIsNumerical' ], - #category : 'DataFrame-Core', - #package : 'DataFrame', - #tag : 'Core' + #category : 'DataFrame-Core' } -{ #category : 'instance creation' } +{ #category : #'instance creation' } DataSeries class >> newFrom: aCollection [ aCollection ifEmpty: [ ^ self new ]. @@ -23,7 +21,7 @@ DataSeries class >> newFrom: aCollection [ ifFalse: [ aCollection withIndexCollect: [ :each :i | i -> each ] ]) ] -{ #category : 'instance creation' } +{ #category : #'instance creation' } DataSeries class >> newFromKeys: keys andValues: values [ | dict | @@ -33,31 +31,31 @@ DataSeries class >> newFromKeys: keys andValues: values [ ^ dict ] -{ #category : 'instance creation' } +{ #category : #'instance creation' } DataSeries class >> withKeys: keys values: values [ ^ self newFromKeys: keys andValues: values ] -{ #category : 'instance creation' } +{ #category : #'instance creation' } DataSeries class >> withKeys: keys values: values name: aName [ ^ (self withKeys: keys values: values) name: aName; yourself ] -{ #category : 'instance creation' } +{ #category : #'instance creation' } DataSeries class >> withValues: values [ | keys | keys := (1 to: values size) asArray. ^ self withKeys: keys values: values ] -{ #category : 'instance creation' } +{ #category : #'instance creation' } DataSeries class >> withValues: values name: aName [ | keys | keys := (1 to: values size) asArray. ^ (self withKeys: keys values: values) name: aName; yourself ] -{ #category : 'comparing' } +{ #category : #comparing } DataSeries >> < arg [ "Element-wise comparision between two DataSeries. Does not consider keys for comparision." @@ -65,7 +63,7 @@ DataSeries >> < arg [ ^ arg adaptToCollection: self andSend: #< ] -{ #category : 'comparing' } +{ #category : #comparing } DataSeries >> <= arg [ "Element-wise comparision between two DataSeries. Does not consider keys for comparision." @@ -73,7 +71,7 @@ DataSeries >> <= arg [ ^ arg adaptToCollection: self andSend: #<= ] -{ #category : 'comparing' } +{ #category : #comparing } DataSeries >> = anObject [ (super = anObject) ifFalse: [ ^ false ]. @@ -83,7 +81,7 @@ DataSeries >> = anObject [ and: [ anObject keys = self keys ] ] -{ #category : 'comparing' } +{ #category : #comparing } DataSeries >> > arg [ "Element-wise comparision between two DataSeries. Does not consider keys for comparision." @@ -91,7 +89,7 @@ DataSeries >> > arg [ ^ arg adaptToCollection: self andSend: #> ] -{ #category : 'comparing' } +{ #category : #comparing } DataSeries >> >= arg [ "Element-wise comparision between two DataSeries. Does not consider keys for comparision." @@ -99,7 +97,7 @@ DataSeries >> >= arg [ ^ arg adaptToCollection: self andSend: #>= ] -{ #category : 'adapting' } +{ #category : #adapting } DataSeries >> adaptToCollection: rcvr andSend: selector [ "If I am involved in arithmetic with another Collection, return a Collection of the results of each element combined with the scalar in that expression." @@ -113,7 +111,7 @@ DataSeries >> adaptToCollection: rcvr andSend: selector [ ifFalse: [ rcvrElement perform: selector with: myElement ] ] ] -{ #category : 'statistics' } +{ #category : #statistics } DataSeries >> argmax [ "Returns the key which corresponds to the maximum value of the dataseries" @@ -124,7 +122,7 @@ DataSeries >> argmax [ ^ self keyAtValue: self max ] -{ #category : 'statistics' } +{ #category : #statistics } DataSeries >> argmin [ "Returns the key which corresponds to the minimum value of the dataseries" @@ -135,7 +133,7 @@ DataSeries >> argmin [ ^ self keyAtValue: self min ] -{ #category : 'converting' } +{ #category : #converting } DataSeries >> asDataFrame [ "Converts a data series to a data frame with 1 column. The values in the column of the data frame are the values of the data series. The row names of this data frame are the keys of the data series. The column name of the data frame is same as the name of the data series" @@ -145,7 +143,7 @@ DataSeries >> asDataFrame [ columnNames: { self name } ] -{ #category : 'accessing' } +{ #category : #accessing } DataSeries >> at: aKey transform: aBlock [ "Evaluate aBlock on the value at aKey and replace that value with the result. Signal an exception if aKey was not found" @@ -157,7 +155,7 @@ DataSeries >> at: aKey transform: aBlock [ ifAbsent: [ self errorKeyNotFound: aKey ] ] -{ #category : 'accessing' } +{ #category : #accessing } DataSeries >> at: aKey transform: aBlock ifAbsent: exceptionBlock [ "Evaluate aBlock on the value at aKey and replace that value with the result. Evaluate exceptionBlock if aKey was not found" | oldValue | @@ -168,14 +166,14 @@ DataSeries >> at: aKey transform: aBlock ifAbsent: exceptionBlock [ self at: aKey put: (aBlock value: oldValue) ] -{ #category : 'accessing' } +{ #category : #accessing } DataSeries >> atAll: aCollectionOfIndexes [ "Returns a data series of only those elements of the receiver whose indices are present in the collection aCollectionOfIndexes" ^ self withIndexSelect: [ :each :index | aCollectionOfIndexes includes: index ] ] -{ #category : 'accessing' } +{ #category : #accessing } DataSeries >> atIndex: aNumber [ "Answer the element of the receiver at index aNumber" @@ -186,14 +184,14 @@ DataSeries >> atIndex: aNumber [ ^ self at: (self keys at: aNumber) ] -{ #category : 'accessing' } +{ #category : #accessing } DataSeries >> atIndex: aNumber put: aValue [ "Replace the element of the receiver at index aNumber with the value aValue" ^ self at: (self keys at: aNumber) put: aValue ] -{ #category : 'accessing' } +{ #category : #accessing } DataSeries >> atIndex: aNumber transform: aBlock [ "Evaluate aBlock on the value at aNumber and replace that value with the result" @@ -210,7 +208,7 @@ DataSeries >> atIndex: aNumber transform: aBlock [ self at: key transform: aBlock ] -{ #category : 'statistics' } +{ #category : #statistics } DataSeries >> average [ "Returns the average without including nils" @@ -219,7 +217,7 @@ DataSeries >> average [ ^ self removeNils values average ] -{ #category : 'data-types' } +{ #category : #'data-types' } DataSeries >> calculateDataType [ "Returns the data type of the data series" @@ -234,12 +232,12 @@ DataSeries >> calculateDataType [ ^ self values calculateDataType ] -{ #category : 'comparing' } +{ #category : #comparing } DataSeries >> closeTo: anObject [ ^ self closeTo: anObject precision: self defaultPrecision ] -{ #category : 'comparing' } +{ #category : #comparing } DataSeries >> closeTo: anObject precision: aPrecision [ self == anObject ifTrue: [^ true]. @@ -260,7 +258,7 @@ DataSeries >> closeTo: anObject precision: aPrecision [ ifNone: [ true ] ] -{ #category : 'enumerating' } +{ #category : #enumerating } DataSeries >> collect: aBlock [ "Applies aBlock to every element" @@ -270,14 +268,14 @@ DataSeries >> collect: aBlock [ ^ result ] -{ #category : 'enumerating' } +{ #category : #enumerating } DataSeries >> collectWithNotNils: aBlock [ "Applies aBlock to every non-nil element" ^ self collect: [ :each | each ifNotNil: [ aBlock value: each ] ] ] -{ #category : 'math functions' } +{ #category : #'math functions' } DataSeries >> correlationWith: otherSeries [ "Calculate the Pearson correlation coefficient between self and the other series" @@ -290,7 +288,7 @@ DataSeries >> correlationWith: otherSeries [ using: DataPearsonCorrelationMethod ] -{ #category : 'math functions' } +{ #category : #'math functions' } DataSeries >> correlationWith: otherSeries using: aCorrelationCoefficient [ "Calculate the correlation coefficient between self and the other series using the given method" @@ -301,7 +299,7 @@ DataSeries >> correlationWith: otherSeries using: aCorrelationCoefficient [ ^ aCorrelationCoefficient between: self and: otherSeries ] -{ #category : 'statistics' } +{ #category : #statistics } DataSeries >> countNils [ "Returns the number of nil values in the data series" @@ -312,7 +310,7 @@ DataSeries >> countNils [ ^ self count: [ :each | each isNil ] ] -{ #category : 'statistics' } +{ #category : #statistics } DataSeries >> countNonNils [ "Returns the number of non-nil values in the data series" @@ -323,7 +321,7 @@ DataSeries >> countNonNils [ ^ self count: [ :each | each isNotNil ] ] -{ #category : 'statistics' } +{ #category : #statistics } DataSeries >> crossTabulateWith: aSeries [ "A DataFrame is returned which is useful in quantitatively analyzing the relationship of values in one data series with the values in another data series" @@ -344,7 +342,7 @@ DataSeries >> crossTabulateWith: aSeries [ ^ df ] -{ #category : 'statistics' } +{ #category : #statistics } DataSeries >> cumulativeSum [ "Calculate the cumulative sum of a data series and return a new data series with keys as self keys and values as cumulative sum" @@ -360,22 +358,22 @@ DataSeries >> cumulativeSum [ sum ] ] -{ #category : 'defaults' } +{ #category : #defaults } DataSeries >> defaultHeadTailSize [ ^ 5 ] -{ #category : 'defaults' } +{ #category : #defaults } DataSeries >> defaultName [ ^ '(no name)' ] -{ #category : 'defaults' } +{ #category : #defaults } DataSeries >> defaultPrecision [ ^ 0.0001 ] -{ #category : 'accessing' } +{ #category : #accessing } DataSeries >> eighth [ "Answer the eighth element of the receiver. Raise an error if there are not enough elements." @@ -385,7 +383,7 @@ DataSeries >> eighth [ ^ self atIndex: 8 ] -{ #category : 'converting' } +{ #category : #converting } DataSeries >> encodeOneHot [ "Encode the values of the DataSeries into one-hot vectors." @@ -410,18 +408,18 @@ DataSeries >> encodeOneHot [ ^ DataSeries withKeys: self keys values: oneHotValues name: self name ] -{ #category : 'private' } +{ #category : #private } DataSeries >> errorKeyNotFound: aKey [ KeyNotFound signalFor: aKey ] -{ #category : 'errors' } +{ #category : #errors } DataSeries >> errorKeysMismatch [ Error signal: 'Keys of two series do not match' ] -{ #category : 'accessing' } +{ #category : #accessing } DataSeries >> fifth [ "Answer the fifth element of the receiver. Raise an error if there are not enough elements." @@ -431,7 +429,7 @@ DataSeries >> fifth [ ^ self atIndex: 5 ] -{ #category : 'accessing' } +{ #category : #accessing } DataSeries >> first [ "Answer the first element of the receiver. Raise an error if there are not enough elements." @@ -441,7 +439,7 @@ DataSeries >> first [ ^ self atIndex: 1 ] -{ #category : 'statistics' } +{ #category : #statistics } DataSeries >> firstQuartile [ "25% of the values in a set are smaller than or equal to the first Quartile of that set" @@ -450,7 +448,7 @@ DataSeries >> firstQuartile [ ^ self quartile: 1 ] -{ #category : 'accessing' } +{ #category : #accessing } DataSeries >> fourth [ "Answer the fourth element of the receiver. Raise an error if there are not enough elements." @@ -460,7 +458,7 @@ DataSeries >> fourth [ ^ self atIndex: 4 ] -{ #category : 'statistics' } +{ #category : #statistics } DataSeries >> fourthQuartile [ "Fourth Quartile is the maximum value in a set of values" @@ -469,13 +467,13 @@ DataSeries >> fourthQuartile [ ^ self quartile: 4 ] -{ #category : 'grouping' } +{ #category : #grouping } DataSeries >> groupBy: otherSeries aggregateUsing: aBlock [ "Group my values by the unique values of otherSeries, aggregate them using aBlock. Use my name by default" ^ self groupBy: otherSeries aggregateUsing: aBlock as: self name ] -{ #category : 'grouping' } +{ #category : #grouping } DataSeries >> groupBy: otherSeries aggregateUsing: aBlock as: aNewName [ "Group my values by the unique values of otherSeries, aggregate them using aBlock, and answer a new DataSeries with unique values of otherSeries as keys, aggregated values of myself as values, and aNewName as name" @@ -495,13 +493,13 @@ DataSeries >> groupBy: otherSeries aggregateUsing: aBlock as: aNewName [ name: aNewName ] -{ #category : 'grouping' } +{ #category : #grouping } DataSeries >> groupByBins: bins [ ^ self groupByBins: bins labelled: (1 to: bins size - 1) ] -{ #category : 'grouping' } +{ #category : #grouping } DataSeries >> groupByBins: bins labelled: aCollection [ "I receive two parameters: - A collection of bins that will determine intervals to group the values @@ -518,13 +516,13 @@ DataSeries >> groupByBins: bins labelled: aCollection [ ^ self collect: [ :each | labelledIntervals keyAtValue: (labelledIntervals values detect: [ :asso | each between: asso key and: asso value ]) ] ] -{ #category : 'grouping' } +{ #category : #grouping } DataSeries >> groupByUniqueValuesAndAggregateUsing: aBlock [ "Group my values by their unique values and aggregate them using aBlock. Use my name by default" ^ self groupByUniqueValuesAndAggregateUsing: aBlock as: self name ] -{ #category : 'grouping' } +{ #category : #grouping } DataSeries >> groupByUniqueValuesAndAggregateUsing: aBlock as: aNewName [ "Group my values by unique values, aggregate them using aBlock, and answer a new DataSeries with theunique values as keys, aggregated values of myself as values, and aNewName as name" @@ -540,7 +538,7 @@ DataSeries >> groupByUniqueValuesAndAggregateUsing: aBlock as: aNewName [ name: aNewName ] -{ #category : 'testing' } +{ #category : #testing } DataSeries >> hasNil [ "return true if data series has at least one nil value" @@ -555,7 +553,7 @@ DataSeries >> hasNil [ ^ self includes: nil ] -{ #category : 'slicing' } +{ #category : #slicing } DataSeries >> head [ "Returns a data series with first 5 elements of the receiver" @@ -566,7 +564,7 @@ DataSeries >> head [ ^ self head: self defaultHeadTailSize ] -{ #category : 'slicing' } +{ #category : #slicing } DataSeries >> head: aNumber [ "Returns a data series with first aNumber elements of the receiver" @@ -580,20 +578,20 @@ DataSeries >> head: aNumber [ name: self name ] -{ #category : 'initialization' } +{ #category : #initialization } DataSeries >> initialize [ super initialize. name := self defaultName ] -{ #category : 'initialization' } +{ #category : #initialization } DataSeries >> initialize: aCapacity [ "Make sure that initialize is called and the default name is set" self initialize. ^ super initialize: aCapacity ] -{ #category : 'statistics' } +{ #category : #statistics } DataSeries >> interquartileRange [ "The Inter Quartile Range is the difference between the third Quartile and the first Quartile" @@ -602,7 +600,7 @@ DataSeries >> interquartileRange [ ^ self thirdQuartile - self firstQuartile ] -{ #category : 'categorical-numerical' } +{ #category : #'categorical-numerical' } DataSeries >> isCategorical [ "Returns true if atleast one value of the data series is non numerical and returns false otherwise" @@ -615,7 +613,7 @@ DataSeries >> isCategorical [ ^ self isNumerical not ] -{ #category : 'categorical-numerical' } +{ #category : #'categorical-numerical' } DataSeries >> isNumerical [ "Returns true if all values of the data series are numerical values and returns false otherwise" @@ -630,12 +628,12 @@ DataSeries >> isNumerical [ each isNumber ] ] ] -{ #category : 'testing' } +{ #category : #testing } DataSeries >> isSequenceable [ ^ true ] -{ #category : 'private' } +{ #category : #private } DataSeries >> keys: anArrayOfKeys [ | keys | keys := anArrayOfKeys asArray deepCopy. @@ -643,7 +641,7 @@ DataSeries >> keys: anArrayOfKeys [ orderedKeys := keys ] -{ #category : 'accessing' } +{ #category : #accessing } DataSeries >> last [ "Answer the last element of the receiver. Raise an error if there are not enough elements." @@ -653,7 +651,7 @@ DataSeries >> last [ ^ self atIndex: self size ] -{ #category : 'math functions' } +{ #category : #'math functions' } DataSeries >> log: base [ "Returns a data series containing the logarithm of each value in the receiver using the specified base." @@ -664,21 +662,21 @@ DataSeries >> log: base [ ^ self collect: [ :each | each log: base ] ] -{ #category : 'categorical-numerical' } +{ #category : #'categorical-numerical' } DataSeries >> makeCategorical [ "Converts a data series to a categorical data series" forcedIsNumerical := false ] -{ #category : 'categorical-numerical' } +{ #category : #'categorical-numerical' } DataSeries >> makeNumerical [ "Converts a data series to a numerical data series" forcedIsNumerical := true ] -{ #category : 'statistics' } +{ #category : #statistics } DataSeries >> max [ "Returns the maximum value of the dataseries without including nils" @@ -687,7 +685,7 @@ DataSeries >> max [ ^ self removeNils values max ] -{ #category : 'statistics' } +{ #category : #statistics } DataSeries >> median [ "Returns the median without including nils" @@ -696,7 +694,7 @@ DataSeries >> median [ ^ self removeNils values median ] -{ #category : 'statistics' } +{ #category : #statistics } DataSeries >> min [ "Returns the minimum value of the dataseries without including nils" @@ -705,7 +703,7 @@ DataSeries >> min [ ^ self removeNils values min ] -{ #category : 'accessing' } +{ #category : #accessing } DataSeries >> mode [ "The mode of a set of values is the value that appears most often. " @@ -718,21 +716,21 @@ DataSeries >> mode [ ^ valueCounts keyAtValue: valueCounts max ] -{ #category : 'accessing' } +{ #category : #accessing } DataSeries >> name [ "Answer the name of the receiver" ^ name ] -{ #category : 'accessing' } +{ #category : #accessing } DataSeries >> name: anObject [ "Set the name of the receiver to anObject" name := anObject ] -{ #category : 'accessing' } +{ #category : #accessing } DataSeries >> ninth [ "Answer the ninth element of the receiver. Raise an error if there are not enough elements." @@ -742,7 +740,7 @@ DataSeries >> ninth [ ^ self atIndex: 9 ] -{ #category : 'statistics' } +{ #category : #statistics } DataSeries >> quantile: aNumber [ "A quantile determines how many values in a distribution are above or below a certain limit. Eg: if the parameter aNumber is 85, a value from the data series is returned which is greater than or equal to 85% of the values in the data series" @@ -758,7 +756,7 @@ Eg: if the parameter aNumber is 85, a value from the data series is returned whi ^ sortedSeries atIndex: index ] -{ #category : 'statistics' } +{ #category : #statistics } DataSeries >> quartile: aNumber [ "Quartiles are three values that split sorted data into four parts, each with an equal number of observations. Eg: if the parameter aNumber is 3, the Third Quartile of the data series is returned" @@ -768,7 +766,7 @@ Eg: if the parameter aNumber is 3, the Third Quartile of the data series is retu ^ self quantile: 25 * aNumber ] -{ #category : 'enumerating' } +{ #category : #enumerating } DataSeries >> reject: aBlock [ | result | result := super reject: aBlock. @@ -776,21 +774,21 @@ DataSeries >> reject: aBlock [ ^ result ] -{ #category : 'removing' } +{ #category : #removing } DataSeries >> removeAt: aKey [ "Removes element from the data series with key aKey" ^ self removeKey: aKey ] -{ #category : 'removing' } +{ #category : #removing } DataSeries >> removeAtIndex: aNumber [ "Removes element from the data series with index aNumber" ^ self removeAt: (self keys at: aNumber) ] -{ #category : 'removing' } +{ #category : #removing } DataSeries >> removeDuplicates [ "Answer the unique values of the receiver by removing duplicates" @@ -801,7 +799,7 @@ DataSeries >> removeDuplicates [ ^ self asSet asArray ] -{ #category : 'removing' } +{ #category : #removing } DataSeries >> removeNils [ "Removes elements with nil values from the data series" @@ -816,7 +814,7 @@ DataSeries >> removeNils [ self removeKeys: keysWithNilValues ] -{ #category : 'replacing' } +{ #category : #replacing } DataSeries >> replaceNilsWith: anObject [ "Replaces nils inplace with anObject" @@ -828,7 +826,7 @@ DataSeries >> replaceNilsWith: anObject [ ele ifNil: [ self atIndex: index put: anObject ] ] ] -{ #category : 'replacing' } +{ #category : #replacing } DataSeries >> replaceNilsWithAverage [ "Replaces nils inplace with average" @@ -841,7 +839,7 @@ DataSeries >> replaceNilsWithAverage [ self replaceNilsWith: mean ] -{ #category : 'replacing' } +{ #category : #replacing } DataSeries >> replaceNilsWithMedian [ "Replaces nils inplace with median" @@ -854,7 +852,7 @@ DataSeries >> replaceNilsWithMedian [ self replaceNilsWith: median ] -{ #category : 'replacing' } +{ #category : #replacing } DataSeries >> replaceNilsWithMode [ "Replaces nils inplace with mode" @@ -867,7 +865,7 @@ DataSeries >> replaceNilsWithMode [ self replaceNilsWith: mode ] -{ #category : 'replacing' } +{ #category : #replacing } DataSeries >> replaceNilsWithPreviousValue [ "Replaces nils inplace with previous non-nil value" @@ -881,7 +879,7 @@ DataSeries >> replaceNilsWithPreviousValue [ value := self atIndex: index ] ] -{ #category : 'replacing' } +{ #category : #replacing } DataSeries >> replaceNilsWithZeros [ "Replaces nils inplace with zero" @@ -892,7 +890,7 @@ DataSeries >> replaceNilsWithZeros [ self replaceNilsWith: 0 ] -{ #category : 'accessing' } +{ #category : #accessing } DataSeries >> second [ "Answer the second element of the receiver. Raise an error if there are not enough elements." @@ -902,7 +900,7 @@ DataSeries >> second [ ^ self atIndex: 2 ] -{ #category : 'statistics' } +{ #category : #statistics } DataSeries >> secondQuartile [ "50% of the values in a set are smaller than or equal to the second Quartile of that set. It is also known as the median" @@ -911,7 +909,7 @@ DataSeries >> secondQuartile [ ^ self quartile: 2 ] -{ #category : 'enumerating' } +{ #category : #enumerating } DataSeries >> select: aBlock [ | result | result := super select: aBlock. @@ -919,7 +917,7 @@ DataSeries >> select: aBlock [ ^ result ] -{ #category : 'accessing' } +{ #category : #accessing } DataSeries >> seventh [ "Answer the seventh element of the receiver. Raise an error if there are not enough elements." @@ -929,7 +927,7 @@ DataSeries >> seventh [ ^ self atIndex: 7 ] -{ #category : 'accessing' } +{ #category : #accessing } DataSeries >> sixth [ "Answer the sixth element of the receiver. Raise an error if there are not enough elements." @@ -939,7 +937,7 @@ DataSeries >> sixth [ ^ self atIndex: 6 ] -{ #category : 'sorting' } +{ #category : #sorting } DataSeries >> sort [ "Arranges a data series in ascending order of its values" @@ -950,7 +948,7 @@ DataSeries >> sort [ self sort: [ :a :b | a <= b ] ] -{ #category : 'sorting' } +{ #category : #sorting } DataSeries >> sort: aBlock [ "Arranges a data series by applying aBlock on its values" @@ -966,7 +964,7 @@ DataSeries >> sort: aBlock [ self sortAssociations: associationBlock ] -{ #category : 'sorting' } +{ #category : #sorting } DataSeries >> sortAssociations: aBlock [ | sortedAssociations | sortedAssociations := self associations sort: aBlock. @@ -974,7 +972,7 @@ DataSeries >> sortAssociations: aBlock [ self addAll: sortedAssociations ] -{ #category : 'sorting' } +{ #category : #sorting } DataSeries >> sortDescending [ "Arranges a data series in descending order of its values" @@ -985,7 +983,7 @@ DataSeries >> sortDescending [ self sort: [ :a :b | a > b ] ] -{ #category : 'sorting' } +{ #category : #sorting } DataSeries >> sorted [ "Returns a sorted copy of the data series without rearranging the original data series" @@ -996,7 +994,7 @@ DataSeries >> sorted [ ^ self sorted: [ :a :b | a <= b ] ] -{ #category : 'sorting' } +{ #category : #sorting } DataSeries >> sorted: aBlock [ "Returns a copy of the data series after applying aBlock without rearranging the original data series" @@ -1012,14 +1010,14 @@ DataSeries >> sorted: aBlock [ ^ self sortedAssociations: associationBlock ] -{ #category : 'sorting' } +{ #category : #sorting } DataSeries >> sortedAssociations: aBlock [ | sortedAssociations | sortedAssociations := self associations sort: aBlock. ^ sortedAssociations asDataSeries name: self name; yourself ] -{ #category : 'sorting' } +{ #category : #sorting } DataSeries >> sortedDescending [ "Returns a sorted copy of the data series in descending order without rearranging the original data series" @@ -1030,7 +1028,7 @@ DataSeries >> sortedDescending [ ^ self sorted: [ :a :b | a > b ] ] -{ #category : 'statistics' } +{ #category : #statistics } DataSeries >> stdev [ "Returns the standard deviation of the dataseries without including nils" @@ -1039,7 +1037,7 @@ DataSeries >> stdev [ ^ self removeNils values stdev ] -{ #category : 'transformation' } +{ #category : #transformation } DataSeries >> sum [ "Return the sum of the values over the requested axis. Nil values are excluded." @@ -1055,7 +1053,7 @@ DataSeries >> sum [ ^ result ] -{ #category : 'statistics' } +{ #category : #statistics } DataSeries >> summary [ "A data series is returned which is a statistical summary of the data series. With keys as different statistical measures and values as the values returned @@ -1078,7 +1076,7 @@ DataSeries >> summary [ ^ summary ] -{ #category : 'slicing' } +{ #category : #slicing } DataSeries >> tail [ "Returns a data series with last 5 elements of the receiver" @@ -1089,7 +1087,7 @@ DataSeries >> tail [ ^ self tail: self defaultHeadTailSize ] -{ #category : 'slicing' } +{ #category : #slicing } DataSeries >> tail: aNumber [ "Returns a data series with last aNumber elements of the receiver" @@ -1105,7 +1103,7 @@ DataSeries >> tail: aNumber [ name: self name ] -{ #category : 'accessing' } +{ #category : #accessing } DataSeries >> third [ "Answer the third element of the receiver. Raise an error if there are not enough elements." @@ -1115,7 +1113,7 @@ DataSeries >> third [ ^ self atIndex: 3 ] -{ #category : 'statistics' } +{ #category : #statistics } DataSeries >> thirdQuartile [ "75% of the values in a set are smaller than or equal to the third Quartile of that set" @@ -1124,7 +1122,7 @@ DataSeries >> thirdQuartile [ ^ self quartile: 3 ] -{ #category : 'accessing' } +{ #category : #accessing } DataSeries >> uniqueValues [ self @@ -1135,7 +1133,7 @@ DataSeries >> uniqueValues [ ^ self removeDuplicates ] -{ #category : 'statistics' } +{ #category : #statistics } DataSeries >> valueCounts [ "Calculates the frequency of each value in the data series and returns a data series in descending order of frequencies" @@ -1144,7 +1142,7 @@ DataSeries >> valueCounts [ ^ (self groupByUniqueValuesAndAggregateUsing: #size) sortDescending ] -{ #category : 'statistics' } +{ #category : #statistics } DataSeries >> valueFrequencies [ "Calculates the relative frequency of values in the data series. Relative frequency is the ratio of the number of times a value occurs in a set to the total number of values in the set" @@ -1157,7 +1155,7 @@ DataSeries >> valueFrequencies [ ^ freq ] -{ #category : 'enumerating' } +{ #category : #enumerating } DataSeries >> with: aCollection collect: twoArgBlock [ "Collect and return the result of evaluating twoArgBlock with corresponding elements from this series and aCollection." | result | @@ -1174,7 +1172,7 @@ DataSeries >> with: aCollection collect: twoArgBlock [ ^ result ] -{ #category : 'enumerating' } +{ #category : #enumerating } DataSeries >> withIndexCollect: aBlock [ | result | result := self species newFrom: @@ -1184,13 +1182,13 @@ DataSeries >> withIndexCollect: aBlock [ ^ result ] -{ #category : 'enumerating' } +{ #category : #enumerating } DataSeries >> withIndexDetect: aBlock [ ^ self withIndexDetect: aBlock ifNone: [ NotFound signal ] ] -{ #category : 'enumerating' } +{ #category : #enumerating } DataSeries >> withIndexDetect: aBlock ifNone: exceptionBlock [ | selectedIndex | @@ -1202,17 +1200,17 @@ DataSeries >> withIndexDetect: aBlock ifNone: exceptionBlock [ ^ self atIndex: selectedIndex ] -{ #category : 'enumerating' } +{ #category : #enumerating } DataSeries >> withIndexDo: aBlock [ self keys withIndexDo: [ :each :i | aBlock value: (self at: each) value: i ] ] -{ #category : 'enumerating' } +{ #category : #enumerating } DataSeries >> withIndexReject: aBlock [ ^ self withIndexSelect: [ :each :i | (aBlock value: each value: i) not ] ] -{ #category : 'enumerating' } +{ #category : #enumerating } DataSeries >> withIndexSelect: aBlock [ | selectedIndices | @@ -1225,7 +1223,7 @@ DataSeries >> withIndexSelect: aBlock [ name: self name ] -{ #category : 'enumerating' } +{ #category : #enumerating } DataSeries >> withKeyCollect: aBlock [ | result | result := self species newFrom: @@ -1235,13 +1233,13 @@ DataSeries >> withKeyCollect: aBlock [ ^ result ] -{ #category : 'enumerating' } +{ #category : #enumerating } DataSeries >> withKeyDetect: aBlock [ ^ self withKeyDetect: aBlock ifNone: [ NotFound signal ] ] -{ #category : 'enumerating' } +{ #category : #enumerating } DataSeries >> withKeyDetect: aBlock ifNone: exceptionBlock [ | selectedKey | @@ -1253,17 +1251,17 @@ DataSeries >> withKeyDetect: aBlock ifNone: exceptionBlock [ ^ self at: selectedKey ] -{ #category : 'enumerating' } +{ #category : #enumerating } DataSeries >> withKeyDo: aBlock [ self keysDo: [ :each | aBlock value: (self at: each) value: each ] ] -{ #category : 'enumerating' } +{ #category : #enumerating } DataSeries >> withKeyReject: aBlock [ ^ self withKeySelect: [ :each :key | (aBlock value: each value: key) not ] ] -{ #category : 'enumerating' } +{ #category : #enumerating } DataSeries >> withKeySelect: aBlock [ | selectedKeys | @@ -1276,7 +1274,7 @@ DataSeries >> withKeySelect: aBlock [ name: self name ] -{ #category : 'enumerating' } +{ #category : #enumerating } DataSeries >> withSeries: otherDataSeries collect: twoArgBlock [ "Collect and return the result of evaluating twoArgBlock with corresponding elements from this series and otherDataSeries." | result | @@ -1294,7 +1292,7 @@ DataSeries >> withSeries: otherDataSeries collect: twoArgBlock [ ^ result ] -{ #category : 'private' } +{ #category : #private } DataSeries >> withoutNils [ "Returns a copy of the data series without the nil values" @@ -1305,7 +1303,7 @@ DataSeries >> withoutNils [ ^ self reject: #isNil ] -{ #category : 'statistics' } +{ #category : #statistics } DataSeries >> zerothQuartile [ "Zeroth Quartile is the minimum value in a set of values" diff --git a/src/DataFrame/String.extension.st b/src/DataFrame/String.extension.st index ba771fcb..47217cda 100644 --- a/src/DataFrame/String.extension.st +++ b/src/DataFrame/String.extension.st @@ -1,6 +1,6 @@ -Extension { #name : 'String' } +Extension { #name : #String } -{ #category : '*DataFrame' } +{ #category : #'*DataFrame' } String >> using: aBlock [ "Answer a block which takes two arguments: a data frame and a column name. This block groups a column of data frame which has name equal to myself by the values of another column, aggregates them using aBlock, and returns a new data series" ^ [ :dataFrame :groupColumnName | @@ -10,7 +10,7 @@ String >> using: aBlock [ aggregateUsing: aBlock ] ] -{ #category : '*DataFrame' } +{ #category : #'*DataFrame' } String >> using: aBlock as: aNewName [ "Answer a block which takes two arguments: a data frame and a column name. This block groups a column of data frame which has name equal to myself by the values of another column, aggregates them using aBlock, and returns a new data series with aNewName" ^ [ :dataFrame :groupColumnName | diff --git a/src/DataFrame/package.st b/src/DataFrame/package.st index cbf8c4f5..b853b550 100644 --- a/src/DataFrame/package.st +++ b/src/DataFrame/package.st @@ -1 +1 @@ -Package { #name : 'DataFrame' } +Package { #name : #DataFrame }