Skip to content

Commit

Permalink
Merge branch 'main' into dev
Browse files Browse the repository at this point in the history
  • Loading branch information
cccs-jh committed Dec 10, 2024
2 parents 24a81b6 + b954911 commit 8e669dd
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 0 deletions.
44 changes: 44 additions & 0 deletions assemblyline_service_utilities/common/malformed_zip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""
Checks for malformed zip files.
"""

from __future__ import annotations

import zipfile

from typing import TYPE_CHECKING

if TYPE_CHECKING:
from typing import IO

def zip_span(f: IO[bytes]) -> tuple[int, int] | None:
"""Find the start and end offset of a .zip file with prepended or appended data.
If there are several concatenated zip files the span of the last zip file is given.
If no zip file is found None is returned.
"""
try:
position = f.tell()
end_record = zipfile._EndRecData(f)
except OSError:
return None
if end_record is None:
return None
# If the Central Directory isn't at the offset the End Record gives,
# we know there is additional data prepended to the file.
# The Central Directory should be immediately before the End Record,
central_dir_offset = end_record[zipfile._ECD_LOCATION] - end_record[zipfile._ECD_SIZE]
# But if the file is ZIP64 there's two additional ZIP64 structures in between.
if end_record[zipfile._ECD_SIGNATURE] == zipfile.stringEndArchive64:
central_dir_offset -= (zipfile.sizeEndCentDir64 + zipfile.sizeEndCentDir64Locator)
# The difference between the real offset and the offset the zip thinks it should be at gives the start of the file.
start = central_dir_offset - end_record[zipfile._ECD_OFFSET]

# Only thing after the End record is the zip file comment.
# Using the length of the comment instead of the comment size field in case the file is truncated.
end = end_record[zipfile._ECD_LOCATION] + zipfile.sizeEndCentDir + len(end_record[zipfile._ECD_COMMENT])
# Reset position in file
f.seek(position)
return start, end


13 changes: 13 additions & 0 deletions test/test_malformed_zip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
"""Malformed Zip Tests"""

from __future__ import annotations

import io

from assemblyline_service_utilities.common.malformed_zip import zip_span

def test_zip_span():
assert zip_span(
io.BytesIO(b"prepended contentPK\5\6\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x0b\0ZIP Commentappended content")
) == (17, 50)

0 comments on commit 8e669dd

Please sign in to comment.