Skip to content

Commit

Permalink
Merge pull request #11572 from rouault/GDALTranspose2D
Browse files Browse the repository at this point in the history
Add GDALTranspose2D() for fast 2D matrix transposition
  • Loading branch information
rouault authored Jan 6, 2025
2 parents ffdf582 + 8043ece commit 615fe1e
Show file tree
Hide file tree
Showing 13 changed files with 1,825 additions and 120 deletions.
173 changes: 173 additions & 0 deletions autotest/cpp/test_gdal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4951,4 +4951,177 @@ TEST_F(test_gdal, GDALComputeRasterMinMaxLocation_with_mask)
EXPECT_EQ(nMaxY, 0);
}

TEST_F(test_gdal, GDALTranspose2D)
{
constexpr int COUNT = 6;
const GByte abyData[] = {1, 2, 3, 4, 5, 6};
GByte abySrcData[COUNT * 2 * sizeof(double)];
GByte abyDstData[COUNT * 2 * sizeof(double)];
GByte abyDstAsByteData[COUNT * 2 * sizeof(double)];
for (int eSrcDTInt = GDT_Byte; eSrcDTInt < GDT_TypeCount; ++eSrcDTInt)
{
const auto eSrcDT = static_cast<GDALDataType>(eSrcDTInt);
GDALCopyWords(abyData, GDT_Byte, 1, abySrcData, eSrcDT,
GDALGetDataTypeSizeBytes(eSrcDT), COUNT);
for (int eDstDTInt = GDT_Byte; eDstDTInt < GDT_TypeCount; ++eDstDTInt)
{
const auto eDstDT = static_cast<GDALDataType>(eDstDTInt);
memset(abyDstData, 0, sizeof(abyDstData));
GDALTranspose2D(abySrcData, eSrcDT, abyDstData, eDstDT, 3, 2);

memset(abyDstAsByteData, 0, sizeof(abyDstAsByteData));
GDALCopyWords(abyDstData, eDstDT, GDALGetDataTypeSizeBytes(eDstDT),
abyDstAsByteData, GDT_Byte, 1, COUNT);

EXPECT_EQ(abyDstAsByteData[0], 1)
<< "eSrcDT=" << eSrcDT << ", eDstDT=" << eDstDT;
EXPECT_EQ(abyDstAsByteData[1], 4)
<< "eSrcDT=" << eSrcDT << ", eDstDT=" << eDstDT;
EXPECT_EQ(abyDstAsByteData[2], 2)
<< "eSrcDT=" << eSrcDT << ", eDstDT=" << eDstDT;
EXPECT_EQ(abyDstAsByteData[3], 5)
<< "eSrcDT=" << eSrcDT << ", eDstDT=" << eDstDT;
EXPECT_EQ(abyDstAsByteData[4], 3)
<< "eSrcDT=" << eSrcDT << ", eDstDT=" << eDstDT;
EXPECT_EQ(abyDstAsByteData[5], 6)
<< "eSrcDT=" << eSrcDT << ", eDstDT=" << eDstDT;
}
}
}

TEST_F(test_gdal, GDALTranspose2D_Byte_optims)
{
std::vector<GByte> in;
for (int i = 0; i < 19 * 17; ++i)
in.push_back(static_cast<GByte>(i % 256));

std::vector<GByte> out(in.size());

// SSSE3 optim (16x16) blocks
{
constexpr int W = 19;
constexpr int H = 17;
GDALTranspose2D(in.data(), GDT_Byte, out.data(), GDT_Byte, W, H);
for (int y = 0; y < H; ++y)
{
for (int x = 0; x < W; ++x)
{
EXPECT_EQ(out[x * H + y], in[y * W + x]);
}
}
}

// Optim H = 2 with W < 16
{
constexpr int W = 15;
constexpr int H = 2;
GDALTranspose2D(in.data(), GDT_Byte, out.data(), GDT_Byte, W, H);
for (int y = 0; y < H; ++y)
{
for (int x = 0; x < W; ++x)
{
EXPECT_EQ(out[x * H + y], in[y * W + x]);
}
}
}

// Optim H = 2 with W >= 16
{
constexpr int W = 19;
constexpr int H = 2;
GDALTranspose2D(in.data(), GDT_Byte, out.data(), GDT_Byte, W, H);
for (int y = 0; y < H; ++y)
{
for (int x = 0; x < W; ++x)
{
EXPECT_EQ(out[x * H + y], in[y * W + x]);
}
}
}

// SSSE3 optim H = 3 with W < 16
{
constexpr int W = 15;
constexpr int H = 3;
GDALTranspose2D(in.data(), GDT_Byte, out.data(), GDT_Byte, W, H);
for (int y = 0; y < H; ++y)
{
for (int x = 0; x < W; ++x)
{
EXPECT_EQ(out[x * H + y], in[y * W + x]);
}
}
}

// SSSE3 optim H = 3 with W >= 16
{
constexpr int W = 19;
constexpr int H = 3;
GDALTranspose2D(in.data(), GDT_Byte, out.data(), GDT_Byte, W, H);
for (int y = 0; y < H; ++y)
{
for (int x = 0; x < W; ++x)
{
EXPECT_EQ(out[x * H + y], in[y * W + x]);
}
}
}

// Optim H = 4 with H < 16
{
constexpr int W = 15;
constexpr int H = 4;
GDALTranspose2D(in.data(), GDT_Byte, out.data(), GDT_Byte, W, H);
for (int y = 0; y < H; ++y)
{
for (int x = 0; x < W; ++x)
{
EXPECT_EQ(out[x * H + y], in[y * W + x]);
}
}
}

// Optim H = 4 with H >= 16
{
constexpr int W = 19;
constexpr int H = 4;
GDALTranspose2D(in.data(), GDT_Byte, out.data(), GDT_Byte, W, H);
for (int y = 0; y < H; ++y)
{
for (int x = 0; x < W; ++x)
{
EXPECT_EQ(out[x * H + y], in[y * W + x]);
}
}
}

// SSSE3 optim H = 5 with W < 16
{
constexpr int W = 15;
constexpr int H = 5;
GDALTranspose2D(in.data(), GDT_Byte, out.data(), GDT_Byte, W, H);
for (int y = 0; y < H; ++y)
{
for (int x = 0; x < W; ++x)
{
EXPECT_EQ(out[x * H + y], in[y * W + x]);
}
}
}

// SSSE3 optim H = 5 with W >= 16
{
constexpr int W = 19;
constexpr int H = 5;
GDALTranspose2D(in.data(), GDT_Byte, out.data(), GDT_Byte, W, H);
for (int y = 0; y < H; ++y)
{
for (int x = 0; x < W; ++x)
{
EXPECT_EQ(out[x * H + y], in[y * W + x]);
}
}
}
}

} // namespace
197 changes: 195 additions & 2 deletions autotest/gdrivers/vrtprocesseddataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,10 +123,13 @@ def test_vrtprocesseddataset_errors(tmp_vsimem):
# Test nominal cases of BandAffineCombination algorithm


def test_vrtprocesseddataset_affine_combination_nominal(tmp_vsimem):
@pytest.mark.parametrize("INTERLEAVE", ["PIXEL", "BAND"])
def test_vrtprocesseddataset_affine_combination_nominal(tmp_vsimem, INTERLEAVE):

src_filename = str(tmp_vsimem / "src.tif")
src_ds = gdal.GetDriverByName("GTiff").Create(src_filename, 2, 1, 3)
src_ds = gdal.GetDriverByName("GTiff").Create(
src_filename, 2, 1, 3, options=["INTERLEAVE=" + INTERLEAVE]
)
src_ds.GetRasterBand(1).WriteArray(np.array([[1, 3]]))
src_ds.GetRasterBand(2).WriteArray(np.array([[2, 6]]))
src_ds.GetRasterBand(3).WriteArray(np.array([[3, 3]]))
Expand Down Expand Up @@ -1323,3 +1326,193 @@ def test_vrtprocesseddataset_OutputBands():
match="Invalid value for OutputBands.dataType",
):
gdal.Open("data/vrt/processed_OutputBands_USER_PROVIDED_invalid_type.vrt")


###############################################################################
# Test VRTProcessedDataset::RasterIO()


def test_vrtprocesseddataset_RasterIO(tmp_vsimem):

src_filename = str(tmp_vsimem / "src.tif")
src_ds = gdal.GetDriverByName("GTiff").Create(src_filename, 2, 3, 4)
src_ds.GetRasterBand(1).WriteArray(np.array([[1, 2], [3, 4], [5, 6]]))
src_ds.GetRasterBand(2).WriteArray(np.array([[7, 8], [9, 10], [11, 12]]))
src_ds.GetRasterBand(3).WriteArray(np.array([[13, 14], [15, 16], [17, 18]]))
src_ds.GetRasterBand(4).WriteArray(np.array([[19, 20], [21, 22], [23, 24]]))
src_ds.BuildOverviews("NEAR", [2])
src_ds = None

vrt_content = f"""<VRTDataset subclass='VRTProcessedDataset'>
<Input>
<SourceFilename>{src_filename}</SourceFilename>
</Input>
<ProcessingSteps>
<Step name="Affine combination of band values">
<Algorithm>BandAffineCombination</Algorithm>
<Argument name="coefficients_1">0,0,1,0,0</Argument>
<Argument name="coefficients_2">0,0,0,1,0</Argument>
<Argument name="coefficients_3">0,0,0,0,1</Argument>
<Argument name="coefficients_4">0,1,0,0,0</Argument>
</Step>
</ProcessingSteps>
</VRTDataset>
"""

ds = gdal.Open(vrt_content)
assert ds.RasterXSize == 2
assert ds.RasterYSize == 3
assert ds.RasterCount == 4

# Optimized code path with INTERLEAVE=BAND
np.testing.assert_equal(
ds.ReadAsArray(),
np.array(
[
[[7, 8], [9, 10], [11, 12]],
[[13, 14], [15, 16], [17, 18]],
[[19, 20], [21, 22], [23, 24]],
[[1, 2], [3, 4], [5, 6]],
]
),
)

# Optimized code path with INTERLEAVE=BAND but buf_type != native type
np.testing.assert_equal(
ds.ReadAsArray(buf_type=gdal.GDT_Int16),
np.array(
[
[[7, 8], [9, 10], [11, 12]],
[[13, 14], [15, 16], [17, 18]],
[[19, 20], [21, 22], [23, 24]],
[[1, 2], [3, 4], [5, 6]],
]
),
)

# Optimized code path with INTERLEAVE=BAND
np.testing.assert_equal(
ds.ReadAsArray(1, 2, 1, 1),
np.array([[[12]], [[18]], [[24]], [[6]]]),
)

# Optimized code path with INTERLEAVE=PIXEL
np.testing.assert_equal(
ds.ReadAsArray(interleave="PIXEL"),
np.array(
[
[[7, 13, 19, 1], [8, 14, 20, 2]],
[[9, 15, 21, 3], [10, 16, 22, 4]],
[[11, 17, 23, 5], [12, 18, 24, 6]],
]
),
)

# Optimized code path with INTERLEAVE=PIXEL but buf_type != native type
np.testing.assert_equal(
ds.ReadAsArray(interleave="PIXEL", buf_type=gdal.GDT_Int16),
np.array(
[
[[7, 13, 19, 1], [8, 14, 20, 2]],
[[9, 15, 21, 3], [10, 16, 22, 4]],
[[11, 17, 23, 5], [12, 18, 24, 6]],
]
),
)

# Optimized code path with INTERLEAVE=PIXEL
np.testing.assert_equal(
ds.ReadAsArray(1, 2, 1, 1, interleave="PIXEL"),
np.array([[[12, 18, 24, 6]]]),
)

# Not optimized INTERLEAVE=BAND because not enough bands
np.testing.assert_equal(
ds.ReadAsArray(band_list=[1, 2, 3]),
np.array(
[
[[7, 8], [9, 10], [11, 12]],
[[13, 14], [15, 16], [17, 18]],
[[19, 20], [21, 22], [23, 24]],
]
),
)

# Not optimized INTERLEAVE=BAND because of out-of-order band list
np.testing.assert_equal(
ds.ReadAsArray(band_list=[4, 1, 2, 3]),
np.array(
[
[[1, 2], [3, 4], [5, 6]],
[[7, 8], [9, 10], [11, 12]],
[[13, 14], [15, 16], [17, 18]],
[[19, 20], [21, 22], [23, 24]],
]
),
)

# Not optimized INTERLEAVE=PIXEL because of out-of-order band list
np.testing.assert_equal(
ds.ReadAsArray(interleave="PIXEL", band_list=[4, 1, 2, 3]),
np.array(
[
[[1, 7, 13, 19], [2, 8, 14, 20]],
[[3, 9, 15, 21], [4, 10, 16, 22]],
[[5, 11, 17, 23], [6, 12, 18, 24]],
]
),
)

# Optimized code path with overviews
assert ds.GetRasterBand(1).GetOverview(0).XSize == 1
assert ds.GetRasterBand(1).GetOverview(0).YSize == 2
np.testing.assert_equal(
ds.ReadAsArray(buf_xsize=1, buf_ysize=2),
np.array([[[7], [11]], [[13], [17]], [[19], [23]], [[1], [5]]]),
)

# Non-optimized code path with overviews
np.testing.assert_equal(
ds.ReadAsArray(buf_xsize=1, buf_ysize=1),
np.array([[[11]], [[17]], [[23]], [[5]]]),
)

# Test buffer splitting
with gdal.config_option("VRT_PROCESSED_DATASET_ALLOWED_RAM_USAGE", "96"):
ds = gdal.Open(vrt_content)

# Optimized code path with INTERLEAVE=BAND
np.testing.assert_equal(
ds.ReadAsArray(),
np.array(
[
[[7, 8], [9, 10], [11, 12]],
[[13, 14], [15, 16], [17, 18]],
[[19, 20], [21, 22], [23, 24]],
[[1, 2], [3, 4], [5, 6]],
]
),
)

# I/O error
gdal.GetDriverByName("GTiff").Create(
src_filename, 1024, 1024, 4, options=["TILED=YES"]
)
f = gdal.VSIFOpenL(src_filename, "rb+")
gdal.VSIFTruncateL(f, 4096)
gdal.VSIFCloseL(f)

ds = gdal.Open(vrt_content)

# Error in INTERLEAVE=BAND optimized code path
with pytest.raises(Exception):
ds.ReadAsArray()

# Error in INTERLEAVE=PIXEL optimized code path
with pytest.raises(Exception):
ds.ReadAsArray(interleave="PIXEL")

with gdal.config_option("VRT_PROCESSED_DATASET_ALLOWED_RAM_USAGE", "96"):
ds = gdal.Open(vrt_content)
with pytest.raises(Exception):
ds.ReadAsArray()
Loading

0 comments on commit 615fe1e

Please sign in to comment.