-
Notifications
You must be signed in to change notification settings - Fork 0
/
Task_3.py
155 lines (113 loc) · 6.89 KB
/
Task_3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
from Task_2 import *
def encode_D_frame(image_array, I_frame, quantization_matrix, num_blocks_height, num_blocks_width, block_size=8, decimals=0):
# Split the image into blocks
image_blocks = split_image_into_blocks(image_array, num_blocks_height, num_blocks_width, block_size)
# Transform coding
dct_coefficients_blocks = transform_coding(image_blocks, decimals=decimals)
# Quantization
quantized_blocks = quantization(dct_coefficients_blocks, quantization_matrix, decimals=decimals)
# Inverse quantization
inverse_dct_coefficients_blocks = inverse_quantization(quantized_blocks, quantization_matrix, decimals=decimals)
# Inverse transform coding
inverse_image_blocks = inverse_transform_coding(inverse_dct_coefficients_blocks, decimals=decimals)
# Split I-frame into blocks
I_frame_blocks = split_image_into_blocks(I_frame, num_blocks_height, num_blocks_width, block_size)
# Compute the difference between the D-frame and the I-frame
difference_blocks = I_frame_blocks - inverse_image_blocks
# Transform coding for the difference
D_frame_dct_coefficients_blocks = transform_coding(difference_blocks, decimals=decimals)
# Quantization for the difference
D_frame_quantized_blocks = quantization(D_frame_dct_coefficients_blocks, quantization_matrix, decimals=decimals)
# Zigzag scan
zigzad_blocks = zigzag_scan(D_frame_quantized_blocks)
# Entropy coding
bitstream, codec = entropy_coding(zigzad_blocks)
# Return the quantized blocks
return bitstream, codec
def decode_D_frame(bitstream, I_frame, codec, quantization_matrix, num_blocks_height, num_blocks_width, block_size=8, decimals=0):
# Entropy decoding
zigzad_blocks = inverse_entropy_coding(bitstream, codec)
# Inverse zigzag scan
D_frame_quantized_blocks = inverse_zigzag_scan(zigzad_blocks, num_blocks_height, num_blocks_width, block_size)
# Inverse quantization for the difference
D_frame_dct_coefficients_blocks = inverse_quantization(D_frame_quantized_blocks, quantization_matrix, decimals=decimals)
# Inverse transform coding for the difference
difference_blocks = inverse_transform_coding(D_frame_dct_coefficients_blocks, decimals=decimals)
# Split I-frame into blocks
I_frame_blocks = split_image_into_blocks(I_frame, num_blocks_height, num_blocks_width, block_size)
# Compute the D-frame
D_frame_blocks = I_frame_blocks - difference_blocks
# Reconstruct the image
D_frame = merge_blocks_into_image(D_frame_blocks)
# Return the D-frame
return D_frame
def rate_distortion_curve_task3(frames, GOP, quantization_matrix, fps, num_blocks_height, num_blocks_width, block_size, decimals):
psnr_values = []
bps_values = []
quantization_levels = [1] + [2 ** i for i in range(1, 6)]
for quantization_level in quantization_levels:
print(f"Quantization level: {quantization_level}")
psnr_sum, size_sum = 0, 0
for i, frame in enumerate(frames):
# Encode frame
if i % GOP == 0:
bitstream, codec = encode(frame, quantization_matrix * quantization_level, num_blocks_height, num_blocks_width, block_size, decimals)
I_frame = decode(bitstream, codec, quantization_matrix * quantization_level, num_blocks_height, num_blocks_width, block_size, decimals)
else:
bitstream, codec = encode_D_frame(frame, I_frame, quantization_matrix * quantization_level, num_blocks_height, num_blocks_width, block_size, decimals)
# Calculate frame size in bits
size_sum += len(bitstream) * 8
if i % GOP == 0:
# Decode frame
decoded_frame = decode(bitstream, codec, quantization_matrix * quantization_level, num_blocks_height, num_blocks_width, block_size, decimals)
else:
decoded_frame = decode_D_frame(bitstream, I_frame, codec, quantization_matrix * quantization_level, num_blocks_height, num_blocks_width, block_size, decimals)
# Calculate PSNR
psnr_sum += PSNR(frame, decoded_frame)
# Calculate average PSNR and BPS
psnr = psnr_sum / len(frames)
bps = size_sum / (len(frames) / fps)
# Store PSNR and BPS values
psnr_values.append(round(psnr, 2))
bps_values.append(round(bps/ 1000, 2))
return psnr_values, bps_values, quantization_levels
if __name__ == "__main__":
filename = "media/input/foreman_qcif_mono.y4m"
frames, metadata = read_y4m_video(filename)
frame_height, frame_width, block_size = int(metadata["H"]), int(metadata["W"]), 8
num_blocks_height = frame_height // block_size
num_blocks_width = frame_width // block_size
decimals = 0
GOP = 5
quantization_matrix = np.array([[16, 11, 10, 16, 24, 40, 51, 61],
[12, 12, 14, 19, 26, 58, 60, 55],
[14, 13, 16, 24, 40, 57, 69, 56],
[14, 17, 22, 29, 51, 87, 80, 62],
[18, 22, 37, 56, 68, 109, 103, 77],
[24, 35, 55, 64, 81, 104, 113, 92],
[49, 64, 78, 87, 103, 121, 120, 101],
[72, 92, 95, 98, 112, 100, 103, 99]])
# compressed_frames = []
# t = time.time()
# for i, frame in enumerate(frames):
# print(f"Encoding frame {i}")
# if i % GOP == 0:
# bitstream, codec = encode(frame, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals)
# I_frame = decode(bitstream, codec, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals)
# else:
# bitstream, codec = encode_D_frame(frame, I_frame, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals)
# compressed_frames.append((bitstream, codec))
# decompressed_frames = []
# for i, compressed_frame in enumerate(compressed_frames):
# print(f"Decoding frame {i}")
# bitstream, codec = compressed_frame
# if i % GOP == 0:
# I_frame = decode(bitstream, codec, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals)
# decompressed_frames.append(I_frame)
# else:
# decoded_frame = decode_D_frame(bitstream, I_frame, codec, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals)
# decompressed_frames.append(decoded_frame)
# print(f"Time: {round(time.time() - t, 2)}")
# create_y4m_video("media/output/foreman_qcif_mono_task3.y4m", decompressed_frames, metadata)
psnr_values, bps_values, quantization_levels = rate_distortion_curve_task3(frames[:30], GOP, quantization_matrix, 30, num_blocks_height, num_blocks_width, block_size, decimals)
plot_rate_distortion_curve(psnr_values, bps_values, quantization_levels)