From 9861e7921eb4cccdf132b3910845f7795d9a97c3 Mon Sep 17 00:00:00 2001 From: Shad Storhaug Date: Mon, 28 Oct 2019 23:59:55 +0700 Subject: [PATCH] Added SCG.ISet to TreeSet (fixes #53) --- C5.Tests/Trees/RedBlackTreeSetTests.cs | 203 +++++++++++ C5/Trees/TreeSet.cs | 481 ++++++++++++++++++++++++- 2 files changed, 683 insertions(+), 1 deletion(-) diff --git a/C5.Tests/Trees/RedBlackTreeSetTests.cs b/C5.Tests/Trees/RedBlackTreeSetTests.cs index 59196798..e589a5c1 100644 --- a/C5.Tests/Trees/RedBlackTreeSetTests.cs +++ b/C5.Tests/Trees/RedBlackTreeSetTests.cs @@ -3040,4 +3040,207 @@ public void Dispose() } } + + [TestFixture] + public class SCGISet + { + private SCG.ISet tree; + + [SetUp] + public void Init() + { + tree = new TreeSet(new SC()) + { + "A", "C", "E" + }; + } + + [TearDown] + public void Dispose() + { + tree = null; + } + + [Test] + public void Add() + { + Assert.IsTrue(tree.Add("Z")); + Assert.AreEqual(4, tree.Count); + Assert.IsTrue(tree.Contains("Z")); + Assert.IsFalse(tree.Add("A")); + } + + [Test] + public void ExceptWith() + { + tree.ExceptWith(new SCG.List { "C", "E", "Z" }); + Assert.AreEqual(1, tree.Count); + Assert.IsTrue(tree.Contains("A")); + } + + [Test] + public void IntersectWith() + { + tree.IntersectWith(new SCG.List { "C", "E", "Z" }); + Assert.AreEqual(2, tree.Count); + Assert.IsTrue(tree.Contains("C")); + Assert.IsTrue(tree.Contains("E")); + } + + [Test] + public void IsProperSubsetOf() + { + Assert.IsFalse(tree.IsProperSubsetOf(new SCG.List())); + Assert.IsFalse(tree.IsProperSubsetOf(new SCG.List { "C", "E", "A" })); + Assert.IsTrue(tree.IsProperSubsetOf(new SCG.List { "C", "E", "A", "X" })); + Assert.IsFalse(tree.IsProperSubsetOf(new SCG.List { "C", "Z" })); + tree.Clear(); + Assert.IsTrue(tree.IsProperSubsetOf(new SCG.List { "C", "A" })); + } + + [Test] + public void IsProperSupersetOf() + { + Assert.IsTrue(tree.IsProperSupersetOf(new SCG.List())); + Assert.IsFalse(tree.IsProperSupersetOf(new SCG.List { "C", "E", "A" })); + Assert.IsTrue(tree.IsProperSupersetOf(new SCG.List { "C", "A" })); + Assert.IsFalse(tree.IsProperSupersetOf(new SCG.List { "C", "Z" })); + tree.Clear(); + Assert.IsFalse(tree.IsProperSupersetOf(new SCG.List { "C", "A" })); + } + + [Test] + public void IsSubsetOf() + { + Assert.IsFalse(tree.IsSubsetOf(new SCG.List())); + Assert.IsTrue(tree.IsSubsetOf(new SCG.List { "C", "E", "A" })); + Assert.IsTrue(tree.IsSubsetOf(new SCG.List { "C", "E", "A", "X" })); + Assert.IsFalse(tree.IsSubsetOf(new SCG.List { "C", "Z" })); + Assert.IsFalse(tree.IsSubsetOf(new SCG.List { "C", "A", "Z" })); + tree.Clear(); + Assert.IsTrue(tree.IsSubsetOf(new SCG.List { "C", "A" })); + } + + [Test] + public void IsSupersetOf() + { + Assert.IsTrue(tree.IsSupersetOf(new SCG.List())); + Assert.IsTrue(tree.IsSupersetOf(new SCG.List { "C", "E", "A" })); + Assert.IsFalse(tree.IsSupersetOf(new SCG.List { "C", "E", "A", "X" })); + Assert.IsFalse(tree.IsSupersetOf(new SCG.List { "C", "Z" })); + Assert.IsTrue(tree.IsSupersetOf(new SCG.List { "C", "A" })); + tree.Clear(); + Assert.IsFalse(tree.IsSupersetOf(new SCG.List { "C", "A" })); + } + + [Test] + public void Overlaps() + { + Assert.IsFalse(tree.Overlaps(new SCG.List())); + Assert.IsTrue(tree.Overlaps(new SCG.List { "C", "E", "A" })); + Assert.IsTrue(tree.Overlaps(new SCG.List { "C", "E", "A", "X" })); + Assert.IsFalse(tree.Overlaps(new SCG.List { "X", "Z" })); + Assert.IsTrue(tree.Overlaps(new SCG.List { "C", "A" })); + tree.Clear(); + Assert.IsFalse(tree.Overlaps(new SCG.List { "C", "A" })); + } + + [Test] + public void SetEquals() + { + Assert.IsFalse(tree.SetEquals(new SCG.List())); + Assert.IsTrue(tree.SetEquals(new SCG.List { "C", "E", "A" })); + Assert.IsFalse(tree.SetEquals(new SCG.List { "C", "E", "A", "X" })); + Assert.IsFalse(tree.SetEquals(new SCG.List { "X", "Z" })); + Assert.IsFalse(tree.SetEquals(new SCG.List { "C", "A" })); + tree.Clear(); + Assert.IsFalse(tree.SetEquals(new SCG.List { "C", "A" })); + Assert.IsTrue(tree.SetEquals(new SCG.List())); + } + + [Test] + public void SymmetricExceptWith() + { + tree.SymmetricExceptWith(new SCG.List()); + Assert.AreEqual(3, tree.Count); + tree.SymmetricExceptWith(new SCG.List { "C", "E", "R", "X" }); + Assert.AreEqual(3, tree.Count); + Assert.IsTrue(tree.SetEquals(new SCG.List { "A", "R", "X" })); + tree.SymmetricExceptWith(new SCG.List(new SCG.List { "A", "R", "X" })); + Assert.AreEqual(0, tree.Count); + + tree.Clear(); + tree.SymmetricExceptWith(new SCG.List { "C", "E", "A" }); + Assert.IsTrue(tree.SetEquals(new SCG.List { "C", "E", "A" })); + } + + [Test] + public void UnionWith() + { + tree.UnionWith(new SCG.List()); + Assert.AreEqual(3, tree.Count); + tree.UnionWith(new SCG.List { "C", "E", "R", "X" }); + Assert.AreEqual(5, tree.Count); + Assert.IsTrue(tree.SetEquals(new SCG.List { "A", "C", "E", "R", "X" })); + tree.UnionWith(new SCG.List(new SCG.List { "A", "R", "X" })); + Assert.AreEqual(5, tree.Count); + Assert.IsTrue(tree.SetEquals(new SCG.List { "A", "C", "E", "R", "X" })); + + tree.Clear(); + tree.UnionWith(new SCG.List { "C", "E", "A" }); + Assert.IsTrue(tree.SetEquals(new SCG.List { "C", "E", "A" })); + } + + // ICollection members + [Test] + public void Clear() + { + Assert.AreEqual(3, tree.Count); + tree.Clear(); + Assert.AreEqual(0, tree.Count); + } + + [Test] + public void Contains() + { + Assert.IsTrue(tree.Contains("A")); + Assert.IsFalse(tree.Contains("Z")); + } + + [Test] + public void CopyTo() + { + var values = new string[tree.Count + 2]; + tree.CopyTo(values, 1); + Assert.AreEqual(null, values[0]); + Assert.AreEqual("A", values[1]); + Assert.AreEqual("C", values[2]); + Assert.AreEqual("E", values[3]); + Assert.AreEqual(null, values[4]); + } + + [Test] + public void Remove() + { + Assert.AreEqual(3, tree.Count); + Assert.IsTrue(tree.Remove("A")); + Assert.AreEqual(2, tree.Count); + Assert.IsFalse(tree.Remove("A")); + Assert.AreEqual(2, tree.Count); + } + + [Test] + public void Count() + { + Assert.AreEqual(3, tree.Count); + tree.Add("Foo"); + Assert.AreEqual(4, tree.Count); + } + + [Test] + public void IsReadOnly() + { + Assert.AreEqual(false, tree.IsReadOnly); + } + } } \ No newline at end of file diff --git a/C5/Trees/TreeSet.cs b/C5/Trees/TreeSet.cs index 0a2b2803..93739408 100644 --- a/C5/Trees/TreeSet.cs +++ b/C5/Trees/TreeSet.cs @@ -21,7 +21,7 @@ namespace C5 /// leak possible with other usage modes. /// [Serializable] - public class TreeSet : SequencedBase, IIndexedSorted, IPersistentSorted + public class TreeSet : SequencedBase, IIndexedSorted, IPersistentSorted, SCG.ISet { #region Fields @@ -520,6 +520,485 @@ void System.Collections.IEnumerator.Reset() #endregion } + #endregion + + #region ISet Members + + /// + /// Modifies the current object to contain all elements that are present in itself, the specified collection, or both. + /// + /// The collection to compare to the current set. + public virtual void UnionWith(SCG.IEnumerable other) + { + if (other == null) + throw new ArgumentNullException(nameof(other)); + + AddAll(other); + } + + /// + /// Modifies the current object so that it contains only elements that are also in a specified collection. + /// + /// The collection to compare to the current set. + public virtual void IntersectWith(SCG.IEnumerable other) + { + if (other == null) + throw new ArgumentNullException(nameof(other)); + + // intersection of anything with empty set is empty set, so return if count is 0 + if (this.size == 0) + { + return; + } + + // if other is empty, intersection is empty set; remove all elements and we're done + // can only figure this out if implements ICollection. (IEnumerable has no count) + var otherAsCollection = other as SCG.ICollection; + if (otherAsCollection != null) + { + if (otherAsCollection.Count == 0) + { + Clear(); + return; + } + + var otherAsSet = other as TreeSet; + // faster if other is a hashset using same equality comparer; so check + // that other is a hashset using the same equality comparer. + if (otherAsSet != null && AreEqualityComparersEqual(this, otherAsSet)) + { + IntersectWithHashSetWithSameEC(otherAsSet); + return; + } + } + + IntersectWithEnumerable(other); + } + + /// + /// Removes all elements in the specified collection from the current object. + /// + /// The collection of items to remove from the set. + public virtual void ExceptWith(SCG.IEnumerable other) + { + if (other == null) + throw new ArgumentNullException(nameof(other)); + + // this is already the enpty set; return + if (this.size == 0) + return; + + // special case if other is this; a set minus itself is the empty set + if (other == this) + { + Clear(); + return; + } + + // remove every element in other from this + foreach (T element in other) + { + Remove(element); + } + } + + /// + /// Modifies the current set so that it contains only elements that are present either in the current + /// object or in the specified collection, but not both. + /// + /// The collection to compare to the current object. + public virtual void SymmetricExceptWith(SCG.IEnumerable other) + { + if (other == null) + throw new ArgumentNullException(nameof(other)); + + // if set is empty, then symmetric difference is other + if (this.size == 0) + { + UnionWith(other); + return; + } + + // special case this; the symmetric difference of a set with itself is the empty set + if (other == this) + { + Clear(); + return; + } + + var otherAsSet = other as TreeSet; + // If other is a HashSet, it has unique elements according to its equality comparer, + // but if they're using different equality comparers, then assumption of uniqueness + // will fail. So first check if other is a hashset using the same equality comparer; + // symmetric except is a lot faster and avoids bit array allocations if we can assume + // uniqueness + if (otherAsSet != null && AreEqualityComparersEqual(this, otherAsSet)) + { + SymmetricExceptWithUniqueTreeSet(otherAsSet); + } + else + { + var temp = new SCG.SortedSet(other, comparer); + temp.ExceptWith(this); + this.ExceptWith(other); + this.UnionWith(temp); + } + } + + /// + /// Determines whether a object is a subset of the specified collection. + /// + /// The collection to compare to the current object. + /// true if the object is a subset of other; otherwise, false. + public virtual bool IsSubsetOf(SCG.IEnumerable other) + { + if (other == null) + throw new ArgumentNullException(nameof(other)); + + if (this.size == 0) + { + return true; + } + + var otherAsSet = other as TreeSet; + // faster if other has unique elements according to this equality comparer; so check + // that other is a hashset using the same equality comparer. + if (otherAsSet != null && AreEqualityComparersEqual(this, otherAsSet)) + { + // if this has more elements then it can't be a subset + if (this.size > otherAsSet.Count) + { + return false; + } + + // already checked that we're using same equality comparer. simply check that + // each element in this is contained in other. + return IsSubsetOfTreeSetWithSameEC(otherAsSet); + } + else + { + // we just need to return true if the other set + // contains all of the elements of the this set, + // but we need to use the comparison rules of the current set. + this.CheckUniqueAndUnfoundElements(other, false, out int uniqueCount, out int unfoundCount); + return uniqueCount == this.size; + } + } + + /// + /// Determines whether a object is a superset of the specified collection. + /// + /// The collection to compare to the current object. + /// true if the object is a superset of other; otherwise, false. + public virtual bool IsSupersetOf(SCG.IEnumerable other) + { + if (other == null) + throw new ArgumentNullException(nameof(other)); + + // try to fall out early based on counts + var is2 = other as SCG.ICollection; + if (is2 != null) + { + // if other is the empty set then this is a superset + if (is2.Count == 0) + return true; + + var otherAsSet = other as TreeSet; + // try to compare based on counts alone if other is a hashset with + // same equality comparer + if (otherAsSet != null && AreEqualityComparersEqual(this, otherAsSet)) + { + if (otherAsSet.Count > this.size) + { + return false; + } + } + } + + return this.ContainsAll(other); + } + + /// + /// Determines whether a object is a proper superset of the specified collection. + /// + /// The collection to compare to the current object. + /// true if the object is a proper superset of other; otherwise, false. + public virtual bool IsProperSupersetOf(SCG.IEnumerable other) + { + if (other == null) + throw new ArgumentNullException(nameof(other)); + + // the empty set isn't a proper superset of any set. + if (this.size == 0) + { + return false; + } + + var otherAsCollection = other as SCG.ICollection; + if (otherAsCollection != null) + { + // if other is the empty set then this is a superset + if (otherAsCollection.Count == 0) + return true; // note that this has at least one element, based on above check + + var otherAsSet = other as TreeSet; + // faster if other is a hashset with the same equality comparer + if (otherAsSet != null && AreEqualityComparersEqual(this, otherAsSet)) + { + if (otherAsSet.Count >= this.size) + { + return false; + } + // now perform element check + return ContainsAll(otherAsSet); + } + } + + // couldn't fall out in the above cases; do it the long way + this.CheckUniqueAndUnfoundElements(other, true, out int uniqueCount, out int unfoundCount); + return uniqueCount < this.size && unfoundCount == 0; + } + + /// + /// Determines whether a object is a proper subset of the specified collection. + /// + /// The collection to compare to the current object. + /// true if the object is a proper subset of other; otherwise, false. + public virtual bool IsProperSubsetOf(SCG.IEnumerable other) + { + if (other == null) + throw new ArgumentNullException(nameof(other)); + + + var otherAsCollection = other as SCG.ICollection; + if (otherAsCollection != null) + { + // the empty set is a proper subset of anything but the empty set + if (this.size == 0) + return otherAsCollection.Count > 0; + + var otherAsSet = other as TreeSet; + // faster if other is a hashset (and we're using same equality comparer) + if (otherAsSet != null && AreEqualityComparersEqual(this, otherAsSet)) + { + if (this.size >= otherAsSet.Count) + { + return false; + } + // this has strictly less than number of items in other, so the following + // check suffices for proper subset. + return IsSubsetOfTreeSetWithSameEC(otherAsSet); + } + } + + this.CheckUniqueAndUnfoundElements(other, false, out int uniqueCount, out int unfoundCount); + return uniqueCount == this.size && unfoundCount > 0; + } + + /// + /// Determines whether the current object and a specified collection share common elements. + /// + /// The collection to compare to the current object. + /// true if the object and other share at least one common element; otherwise, false. + public virtual bool Overlaps(SCG.IEnumerable other) + { + if (other == null) + throw new ArgumentNullException(nameof(other)); + + if (this.size != 0) + { + foreach (var local in other) + { + if (this.Contains(local)) + { + return true; + } + } + } + return false; + } + + /// + /// Determines whether the current and the specified collection contain the same elements. + /// + /// The collection to compare to the current . + /// true if the current is equal to other; otherwise, false. + public virtual bool SetEquals(SCG.IEnumerable other) + { + if (other == null) + throw new ArgumentNullException(nameof(other)); + + var otherAsSet = other as TreeSet; + // faster if other is a hashset and we're using same equality comparer + if (otherAsSet != null && AreEqualityComparersEqual(this, otherAsSet)) + { + // attempt to return early: since both contain unique elements, if they have + // different counts, then they can't be equal + if (this.size != otherAsSet.Count) + return false; + + // already confirmed that the sets have the same number of distinct elements, so if + // one is a superset of the other then they must be equal + return ContainsAll(otherAsSet); + } + else + { + var otherAsCollection = other as SCG.ICollection; + if (otherAsCollection != null) + { + // if this count is 0 but other contains at least one element, they can't be equal + if (this.size == 0 && otherAsCollection.Count > 0) + return false; + } + + this.CheckUniqueAndUnfoundElements(other, true, out int uniqueCount, out int unfoundCount); + return uniqueCount == this.size && unfoundCount == 0; + } + } + + private void CheckUniqueAndUnfoundElements(SCG.IEnumerable other, bool returnIfUnfound, out int uniqueCount, out int unfoundCount) + { + // need special case in case this has no elements. + if (this.size == 0) + { + int numElementsInOther = 0; + foreach (T item in other) + { + numElementsInOther++; + // break right away, all we want to know is whether other has 0 or 1 elements + break; + } + uniqueCount = 0; + unfoundCount = numElementsInOther; + return; + } + + int originalLastIndex = this.size; + var bitArray = new System.Collections.BitArray(originalLastIndex, false); + + // count of unique items in other found in this + uniqueCount = 0; + // count of items in other not found in this + unfoundCount = 0; + + foreach (var item in other) + { + var index = IndexOf(item); + if (index >= 0) + { + if (!bitArray.Get(index)) + { + // item hasn't been seen yet + bitArray.Set(index, true); + uniqueCount++; + } + } + else + { + unfoundCount++; + if (returnIfUnfound) + break; + } + } + } + + /// + /// Checks if equality comparers are equal. This is used for algorithms that can + /// speed up if it knows the other item has unique elements. I.e. if they're using + /// different equality comparers, then uniqueness assumption between sets break. + /// + /// + /// + /// + private static bool AreEqualityComparersEqual(TreeSet set1, TreeSet set2) + { + return set1.Comparer.Equals(set2.Comparer); + } + + /// + /// If other is a hashset that uses same equality comparer, intersect is much faster + /// because we can use other's Contains + /// + /// + private void IntersectWithHashSetWithSameEC(TreeSet other) + { + foreach (var item in this) + { + if (!other.Contains(item)) + { + Remove(item); + } + } + } + + private void IntersectWithEnumerable(SCG.IEnumerable other) + { + // keep track of current last index; don't want to move past the end of our bit array + // (could happen if another thread is modifying the collection) + int originalLastIndex = this.size; + var bitArray = new System.Collections.BitArray(originalLastIndex, false); + + foreach (var item in other) + { + int index = IndexOf(item); + if (index >= 0) + bitArray.Set(index, true); + } + + // if anything unmarked, remove it. + for (int i = originalLastIndex - 1; i >= 0; i--) + { + if (!bitArray.Get(i)) + RemoveAt(i); + } + } + + /// + /// if other is a set, we can assume it doesn't have duplicate elements, so use this + /// technique: if can't remove, then it wasn't present in this set, so add. + /// + /// As with other methods, callers take care of ensuring that other is a hashset using the + /// same equality comparer. + /// + /// + private void SymmetricExceptWithUniqueTreeSet(TreeSet other) + { + foreach (T item in other) + { + if (!Remove(item)) + { + Add(item); + } + } + } + + /// + /// Implementation Notes: + /// If other is a hashset and is using same equality comparer, then checking subset is + /// faster. Simply check that each element in this is in other. + /// + /// Note: if other doesn't use same equality comparer, then Contains check is invalid, + /// which is why callers must take are of this. + /// + /// If callers are concerned about whether this is a proper subset, they take care of that. + /// + /// + /// + /// + private bool IsSubsetOfTreeSetWithSameEC(TreeSet other) + { + + foreach (T item in this) + { + if (!other.Contains(item)) + { + return false; + } + } + return true; + } + + #endregion #region IEnumerable Members