datastax · jacek-lewandowski · Jun 1, 2021 · Jun 8, 2021 · Jun 11, 2021 · Apr 2, 2021
diff --git a/.gitignore b/.gitignore
@@ -11,3 +11,6 @@ upgrade
 html/
 doxygen/doxypy-0.4.2/
 .pytest_cache/
+.vscode/
+
+pytest.ini
diff --git a/auth_test.py b/auth_test.py
@@ -552,10 +552,16 @@ def test_materialized_views_auth(self):
         * Create a new user, 'cathy', with no permissions
         * Create a ks, table
         * Connect as cathy
+        *
         * Try CREATE MV without ALTER permission on base table, assert throws Unauthorized
         * Grant cathy ALTER permissions, then CREATE MV successfully
+        *
+        * Try to MODIFY base without WRITE permission on base, assert throws Unauthorized
+        * Grant cathy WRITE permissions on base, and modify base successfully
+        *
         * Try to SELECT from the mv, assert throws Unauthorized
-        * Grant cathy SELECT permissions, and read from the MV successfully
+        * Grant cathy SELECT permissions on base, and read from the MV successfully
+        *
         * Revoke cathy's ALTER permissions, assert DROP MV throws Unauthorized
         * Restore cathy's ALTER permissions, DROP MV successfully
         """
@@ -576,12 +582,36 @@ def test_materialized_views_auth(self):
         cassandra.execute("GRANT ALTER ON ks.cf TO cathy")
         cathy.execute(create_mv)
 
-        # TRY SELECT MV without SELECT permission on base table
-        assert_unauthorized(cathy, "SELECT * FROM ks.mv1", "User cathy has no SELECT permission on <table ks.cf> or any of its parents")
+        # Try MODIFY base without WRITE permission on base
+        assert_unauthorized(cathy, "INSERT INTO ks.cf(id, value) VALUES(1, '1')", "User cathy has no MODIFY permission on <table ks.cf> or any of its parents")
 
-        # Grant SELECT permission and CREATE MV
-        cassandra.execute("GRANT SELECT ON ks.cf TO cathy")
-        cathy.execute("SELECT * FROM ks.mv1")
+        if self.cluster.version() >= LooseVersion('4.0'):
+            # From 4.0 onward, only base MODIFY permission is required to update base with MV
+            # Grant WRITE permission on Base
+            cassandra.execute("GRANT MODIFY ON ks.cf TO cathy")
+            cathy.execute("INSERT INTO ks.cf(id, value) VALUES(1, '1')")
+
+            # TRY SELECT MV without SELECT permission on base table
+            assert_unauthorized(cathy, "SELECT * FROM ks.cf", "User cathy has no SELECT permission on <table ks.cf> or any of its parents")
+            assert_unauthorized(cathy, "SELECT * FROM ks.mv1", "User cathy has no SELECT permission on <table ks.cf> or any of its parents")
+
+            # Grant SELECT permission
+            cassandra.execute("GRANT SELECT ON ks.cf TO cathy")
+            assert_one(cathy, "SELECT * FROM ks.cf", [1, '1'])
+            assert_one(cathy, "SELECT * FROM ks.mv1", ['1', 1])
+        else:
+            # Before 4.0, MODIFY on MV is required to insert to base
+            # Grant WRITE permission on Base
+            cassandra.execute("GRANT MODIFY ON ks.cf TO cathy")
+            assert_unauthorized(cathy, "INSERT INTO ks.cf(id, value) VALUES(1, '1')", "User cathy has no SELECT permission on <table ks.cf> or any of its parents")
+            cassandra.execute("GRANT SELECT ON ks.cf TO cathy")
+            assert_unauthorized(cathy, "INSERT INTO ks.cf(id, value) VALUES(1, '1')", "User cathy has no MODIFY permission on <table ks.mv1> or any of its parents")
+
+            # Grant WRITE permission on MV
+            cassandra.execute("GRANT MODIFY ON ks.mv1 TO cathy")
+            cathy.execute("INSERT INTO ks.cf(id, value) VALUES(1, '1')")
+            assert_one(cathy, "SELECT * FROM ks.cf", [1, '1'])
+            assert_one(cathy, "SELECT * FROM ks.mv1", ['1', 1])
 
         # Revoke ALTER permission and try DROP MV
         cassandra.execute("REVOKE ALTER ON ks.cf FROM cathy")

diff --git a/bootstrap_test.py b/bootstrap_test.py
@@ -849,6 +849,12 @@ def test_simultaneous_bootstrap(self):
                           " cannot bootstrap while cassandra.consistent.rangemovement is true"
 
         cluster = self.cluster
+        configuration_options = {
+            'request_timeout_in_ms': 120000,
+            'read_request_timeout_in_ms': 120000,
+            'range_request_timeout_in_ms': 120000
+        }
+        cluster.set_configuration_options(configuration_options)
         cluster.set_environment_variable('CASSANDRA_TOKEN_PREGENERATION_DISABLED', 'True')
         cluster.populate(1)
         cluster.start()
@@ -884,7 +890,10 @@ def test_simultaneous_bootstrap(self):
         # bugs like 9484, where count(*) fails at higher
         # data loads.
         for _ in range(5):
-            assert_one(session, "SELECT count(*) from keyspace1.standard1", [500000], cl=ConsistencyLevel.ONE)
+            logger.info("Querying: SELECT count(*) from keyspace1.standard1")
+            # Improve reliability for slower/loaded test systems by using larger client timeout
+            assert_one(session, "SELECT count(*) from keyspace1.standard1", [500000], cl=ConsistencyLevel.ONE, timeout=180)
+            logger.info("Querying completed")
 
     def test_cleanup(self):
         """
@@ -1019,6 +1028,56 @@ def test_bootstrap_binary_disabled(self):
         assert_bootstrap_state(self, node3, 'COMPLETED', user='cassandra', password='cassandra')
         node3.wait_for_binary_interface()
 
+    @since('4.0')
+    @pytest.mark.no_vnodes
+    def test_simple_bootstrap_with_everywhere_strategy(self):
+        cluster = self.cluster
+        tokens = cluster.balanced_tokens(2)
+        cluster.set_configuration_options(values={'num_tokens': 1})
+
+        logger.debug("[node1, node2] tokens: %r" % (tokens,))
+
+        keys = 10000
+
+        # Create a single node cluster
+        cluster.populate(1)
+        node1 = cluster.nodelist()[0]
+        node1.set_configuration_options(values={'initial_token': tokens[0]})
+        cluster.start()
+
+        session = self.patient_cql_connection(node1)
+        create_ks(session, 'ks', 'EverywhereStrategy')
+        create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})
+
+        insert_statement = session.prepare("INSERT INTO ks.cf (key, c1, c2) VALUES (?, 'value1', 'value2')")
+        execute_concurrent_with_args(session, insert_statement, [['k%d' % k] for k in range(keys)])
+
+        node1.flush()
+        node1.compact()
+
+        # Reads inserted data all during the bootstrap process. We shouldn't
+        # get any error
+        query_c1c2(session, random.randint(0, keys - 1), ConsistencyLevel.ONE)
+        session.shutdown()
+
+        # Bootstrapping a new node in the current version
+        node2 = new_node(cluster)
+        node2.set_configuration_options(values={'initial_token': tokens[1]})
+        node2.start(wait_for_binary_proto=True)
+        node2.compact()
+
+        node1.cleanup()
+        logger.debug("node1 size for ks.cf after cleanup: %s" % float(data_size(node1,'ks','cf')))
+        node1.compact()
+        logger.debug("node1 size for ks.cf after compacting: %s" % float(data_size(node1,'ks','cf')))
+
+        logger.debug("node2 size for ks.cf after compacting: %s" % float(data_size(node2,'ks','cf')))
+
+        size1 = float(data_size(node1,'ks','cf'))
+        size2 = float(data_size(node2,'ks','cf'))
+        assert_almost_equal(size1, size2, error=0.3)
+
+        assert_bootstrap_state(self, node2, 'COMPLETED')
 
 class TestBootstrap(BootstrapTester):
     """

diff --git a/byteman/guardrails/disk_usage_full.btm b/byteman/guardrails/disk_usage_full.btm
@@ -0,0 +1,8 @@
+RULE return FULL disk usage
+CLASS org.apache.cassandra.service.disk.usage.DiskUsageMonitor
+METHOD getState
+AT EXIT
+IF TRUE
+DO
+    return org.apache.cassandra.service.disk.usage.DiskUsageState.FULL;
+ENDRULE
diff --git a/byteman/guardrails/disk_usage_stuffed.btm b/byteman/guardrails/disk_usage_stuffed.btm
@@ -0,0 +1,8 @@
+RULE return STUFFED disk usage
+CLASS org.apache.cassandra.service.disk.usage.DiskUsageMonitor
+METHOD getState
+AT EXIT
+IF TRUE
+DO
+    return org.apache.cassandra.service.disk.usage.DiskUsageState.STUFFED;
+ENDRULE
diff --git a/byteman/merge_schema_failure_4x.btm b/byteman/merge_schema_failure_4x.btm
@@ -3,7 +3,7 @@
 #
 RULE inject node failure on merge schema exit
 CLASS org.apache.cassandra.schema.Schema
-METHOD merge
+METHOD mergeAndUpdateVersion
 AT EXIT
 # set flag to only run this rule once.
 IF TRUE

diff --git a/client_request_metrics_test.py b/client_request_metrics_test.py
@@ -42,14 +42,15 @@ def fixture_add_additional_log_patterns(self, fixture_dtest_setup):
         fixture_dtest_setup.ignore_log_patterns = (
             'Testing write failures',  # The error to simulate a write failure
             'ERROR WRITE_FAILURE',  # Logged in DEBUG mode for write failures
-            f"Scanned over {TOMBSTONE_FAILURE_THRESHOLD + 1} tombstones during query"  # Caused by the read failure tests
+            f"Scanned over {TOMBSTONE_FAILURE_THRESHOLD + 1} (tombstones|tombstone rows) during query"  # Caused by the read failure tests
         )
 
     def setup_once(self):
         cluster = self.cluster
         cluster.set_configuration_options({'read_request_timeout_in_ms': 3000,
                                            'write_request_timeout_in_ms': 3000,
                                            'phi_convict_threshold': 12,
+                                           'tombstone_warn_threshold': -1,
                                            'tombstone_failure_threshold': TOMBSTONE_FAILURE_THRESHOLD,
                                            'enable_materialized_views': 'true'})
         cluster.populate(2, debug=True)

diff --git a/compaction_test.py b/compaction_test.py
@@ -15,7 +15,7 @@
 since = pytest.mark.since
 logger = logging.getLogger(__name__)
 
-strategies = ['LeveledCompactionStrategy', 'SizeTieredCompactionStrategy', 'DateTieredCompactionStrategy']
+strategies = ['LeveledCompactionStrategy', 'SizeTieredCompactionStrategy', 'DateTieredCompactionStrategy', 'UnifiedCompactionStrategy']
 
 
 class TestCompaction(Tester):
@@ -114,25 +114,30 @@ def test_bloomfilter_size(self, strategy):
         else:
             if strategy == "DateTieredCompactionStrategy":
                 strategy_string = "strategy=DateTieredCompactionStrategy,base_time_seconds=86400"  # we want a single sstable, so make sure we don't have a tiny first window
+            elif self.strategy == "UnifiedCompactionStrategy":
+                strategy_string = "strategy=UnifiedCompactionStrategy,max_sstables_to_compact=4"  # disable layout-preserving compaction which can leave more than one sstable
             else:
                 strategy_string = "strategy={}".format(strategy)
             min_bf_size = 100000
             max_bf_size = 150000
         cluster = self.cluster
         cluster.populate(1).start()
         [node1] = cluster.nodelist()
+        logger.debug("Compaction: " + strategy_string)
 
         for x in range(0, 5):
             node1.stress(['write', 'n=100K', "no-warmup", "cl=ONE", "-rate",
                           "threads=300", "-schema", "replication(factor=1)",
                           "compaction({},enabled=false)".format(strategy_string)])
             node1.flush()
+        logger.debug(node1.nodetool('cfstats keyspace1.standard1').stdout)
 
         node1.nodetool('enableautocompaction')
         node1.wait_for_compactions()
 
         table_name = 'standard1'
-        output = node1.nodetool('cfstats').stdout
+        output = node1.nodetool('cfstats keyspace1.standard1').stdout
+        logger.debug(output)
         output = output[output.find(table_name):]
         output = output[output.find("Bloom filter space used"):]
         bfSize = int(output[output.find(":") + 1:output.find("\n")].strip())
@@ -153,7 +158,12 @@ def test_bloomfilter_size(self, strategy):
 
         logger.debug("bloom filter size is: {}".format(bfSize))
         logger.debug("size factor = {}".format(size_factor))
-        assert bfSize >= size_factor * min_bf_size
+        # In the case where the number of sstables is greater than the number of directories, it's possible this to be
+        # both with unique keys (where the bf size will remain close to the unadjusted limit) or with repetitions
+        # of keys (where the bf size will be a multiple of the expected). Permit both by only using the size factor on
+        # the maximum size. Note that the test is designed to end up with size_factor == 1 and most runs do so, thus
+        # this is not a loosening of the test in the common case, only ensures that we don't end up with flakes.
+        assert bfSize >= min_bf_size
         assert bfSize <= size_factor * max_bf_size
 
     @pytest.mark.parametrize("strategy", strategies)
@@ -298,7 +308,7 @@ def test_compaction_strategy_switching(self, strategy):
         Ensure that switching strategies does not result in problems.
         Insert data, switch strategies, then check against data loss.
         """
-        strategies = ['LeveledCompactionStrategy', 'SizeTieredCompactionStrategy', 'DateTieredCompactionStrategy']
+        strategies = ['LeveledCompactionStrategy', 'SizeTieredCompactionStrategy', 'DateTieredCompactionStrategy', 'UnifiedCompactionStrategy']
 
         if strategy in strategies:
             strategies.remove(strategy)
@@ -307,6 +317,7 @@ def test_compaction_strategy_switching(self, strategy):
             [node1] = cluster.nodelist()
 
             for strat in strategies:
+                logger.debug("Switching to {}".format(strat))
                 session = self.patient_cql_connection(node1)
                 create_ks(session, 'ks', 1)
 
@@ -339,7 +350,10 @@ def test_large_compaction_warning(self):
         Check that we log a warning when the partition size is bigger than compaction_large_partition_warning_threshold_mb
         """
         cluster = self.cluster
-        cluster.set_configuration_options({'compaction_large_partition_warning_threshold_mb': 1})
+        if self.supports_guardrails:
+            cluster.set_configuration_options({'guardrails': {'partition_size_warn_threshold_in_mb': 1}})
+        else:
+            cluster.set_configuration_options({'compaction_large_partition_warning_threshold_mb': 1})
         cluster.populate(1).start()
         [node] = cluster.nodelist()
 
@@ -361,7 +375,10 @@ def test_large_compaction_warning(self):
         node.nodetool('compact ks large')
         verb = 'Writing' if self.cluster.version() > '2.2' else 'Compacting'
         sizematcher = '\d+ bytes' if self.cluster.version() < LooseVersion('3.6') else '\d+\.\d{3}(K|M|G)iB'
-        node.watch_log_for('{} large partition ks/large:user \({}'.format(verb, sizematcher), from_mark=mark, timeout=180)
+        log_message = '{} large partition ks/large:user \({}'.format(verb, sizematcher)
+        if self.supports_guardrails:
+            log_message = "Detected partition 'user' in ks.large of size 2MB is greater than the maximum recommended size \(1MB\)"
+        node.watch_log_for(log_message, from_mark=mark, timeout=180)
 
         ret = list(session.execute("SELECT properties from ks.large where userid = 'user'"))
         assert_length_equal(ret, 1)

diff --git a/conftest.py b/conftest.py
@@ -42,6 +42,9 @@ def check_required_loopback_interfaces_available():
 
 
 def pytest_addoption(parser):
+    parser.addoption("--sstable-format", action="store", default="bti",
+                     help="SSTable format to be used by default for all newly created SSTables: "
+                          "big or bti (default: bti)")
     parser.addoption("--use-vnodes", action="store_true", default=False,
                      help="Determines wither or not to setup clusters using vnodes for tests")
     parser.addoption("--use-off-heap-memtables", action="store_true", default=False,
@@ -175,6 +178,13 @@ class level seems to work, and I guess it's not that much extra overhead to setu
         if pytest.config.inicfg.get("log_format") is not None:
             logging_format = pytest.config.inicfg.get("log_format")
 
+    # ccm logger is configured to spit everything to console
+    # we want it to use logging setup configured for tests
+    # unless we do that, we get duplicated log records from ccm module
+    ccmLogger = logging.getLogger("ccm")
+    for handler in ccmLogger.handlers:
+        logging.getLogger("ccm").removeHandler(handler)
+
     logging.basicConfig(level=log_level,
                         format=logging_format)
 

diff --git a/cqlsh_tests/cqlshrc.sample.cloud b/cqlsh_tests/cqlshrc.sample.cloud
@@ -0,0 +1,17 @@
+; Copyright DataStax, Inc.
+;
+; Licensed under the Apache License, Version 2.0 (the "License");
+; you may not use this file except in compliance with the License.
+; You may obtain a copy of the License at
+;
+; http://www.apache.org/licenses/LICENSE-2.0
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the License is distributed on an "AS IS" BASIS,
+; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+; See the License for the specific language governing permissions and
+; limitations under the License.
+;
+; Sample ~/.cqlshrc file with cloud configuration.
+[connection]
+secure_connect_bundle = /path/to/creds.zip
diff --git a/cqlsh_tests/secure-connect-test.zip b/cqlsh_tests/secure-connect-test.zip