-
-
Notifications
You must be signed in to change notification settings - Fork 402
Expand file tree
/
Copy pathcrc32c_.py
More file actions
71 lines (56 loc) · 2.22 KB
/
crc32c_.py
File metadata and controls
71 lines (56 loc) · 2.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from __future__ import annotations
from dataclasses import dataclass
from typing import TYPE_CHECKING, cast
import google_crc32c
import numpy as np
import typing_extensions
from zarr.abc.codec import BytesBytesCodec
from zarr.core.buffer import Buffer
from zarr.core.common import JSON, parse_named_configuration
if TYPE_CHECKING:
from typing import Self
from zarr.core.array_spec import ArraySpec
@dataclass(frozen=True)
class Crc32cCodec(BytesBytesCodec):
"""crc32c codec"""
codec_input = Buffer
codec_output = Buffer
is_fixed_size = True
@classmethod
def from_dict(cls, data: dict[str, JSON]) -> Self:
parse_named_configuration(data, "crc32c", require_configuration=False)
return cls()
def to_dict(self) -> dict[str, JSON]:
return {"name": "crc32c"}
async def _decode_single(
self,
chunk_bytes: Buffer,
chunk_spec: ArraySpec,
) -> Buffer:
data = chunk_bytes.as_numpy_array()
crc32_bytes = data[-4:]
inner_bytes = data[:-4]
# Need to do a manual cast until https://github.com/numpy/numpy/issues/26783 is resolved
computed_checksum = np.uint32(
google_crc32c.value(cast("typing_extensions.Buffer", inner_bytes))
).tobytes()
stored_checksum = bytes(crc32_bytes)
if computed_checksum != stored_checksum:
raise ValueError(
f"Stored and computed checksum do not match. Stored: {stored_checksum!r}. Computed: {computed_checksum!r}."
)
return chunk_spec.prototype.buffer.from_array_like(inner_bytes)
async def _encode_single(
self,
chunk_bytes: Buffer,
chunk_spec: ArraySpec,
) -> Buffer | None:
data = chunk_bytes.as_numpy_array()
# Calculate the checksum and "cast" it to a numpy array
checksum = np.array(
[google_crc32c.value(cast("typing_extensions.Buffer", data))], dtype=np.uint32
)
# Append the checksum (as bytes) to the data
return chunk_spec.prototype.buffer.from_array_like(np.append(data, checksum.view("B")))
def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int:
return input_byte_length + 4