Skip to content

[3.14] gh-136170: Revert adding ZipFile.data_offset (GH-136950) #136955

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 22, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions Doc/library/zipfile.rst
Original file line number Diff line number Diff line change
Expand Up @@ -558,14 +558,6 @@ The following data attributes are also available:
it should be no longer than 65535 bytes. Comments longer than this will be
truncated.

.. attribute:: ZipFile.data_offset

The offset to the start of ZIP data from the beginning of the file. When the
:class:`ZipFile` is opened in either mode ``'w'`` or ``'x'`` and the
underlying file does not support ``tell()``, the value will be ``None``
instead.

.. versionadded:: 3.14

.. _path-objects:

Expand Down
54 changes: 0 additions & 54 deletions Lib/test/test_zipfile/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -3470,60 +3470,6 @@ def test_execute_zip64(self):
self.assertIn(b'number in executable: 5', output)


class TestDataOffsetPrependedZip(unittest.TestCase):
"""Test .data_offset on reading zip files with an executable prepended."""

def setUp(self):
self.exe_zip = findfile('exe_with_zip', subdir='archivetestdata')
self.exe_zip64 = findfile('exe_with_z64', subdir='archivetestdata')

def _test_data_offset(self, name):
with zipfile.ZipFile(name) as zipfp:
self.assertEqual(zipfp.data_offset, 713)

def test_data_offset_with_exe_prepended(self):
self._test_data_offset(self.exe_zip)

def test_data_offset_with_exe_prepended_zip64(self):
self._test_data_offset(self.exe_zip64)

class TestDataOffsetZipWrite(unittest.TestCase):
"""Test .data_offset for ZipFile opened in write mode."""

def setUp(self):
os.mkdir(TESTFNDIR)
self.addCleanup(rmtree, TESTFNDIR)
self.test_path = os.path.join(TESTFNDIR, 'testoffset.zip')

def test_data_offset_write_no_prefix(self):
with io.BytesIO() as fp:
with zipfile.ZipFile(fp, "w") as zipfp:
self.assertEqual(zipfp.data_offset, 0)

def test_data_offset_write_with_prefix(self):
with io.BytesIO() as fp:
fp.write(b"this is a prefix")
with zipfile.ZipFile(fp, "w") as zipfp:
self.assertEqual(zipfp.data_offset, 16)

def test_data_offset_append_with_bad_zip(self):
with io.BytesIO() as fp:
fp.write(b"this is a prefix")
with zipfile.ZipFile(fp, "a") as zipfp:
self.assertEqual(zipfp.data_offset, 16)

def test_data_offset_write_no_tell(self):
# The initializer in ZipFile checks if tell raises AttributeError or
# OSError when creating a file in write mode when deducing the offset
# of the beginning of zip data
class NoTellBytesIO(io.BytesIO):
def tell(self):
raise OSError("Unimplemented!")
with NoTellBytesIO() as fp:
with zipfile.ZipFile(fp, "w") as zipfp:
self.assertIsNone(zipfp.data_offset)


class EncodedMetadataTests(unittest.TestCase):
file_names = ['\u4e00', '\u4e8c', '\u4e09'] # Han 'one', 'two', 'three'
file_content = [
Expand Down
13 changes: 0 additions & 13 deletions Lib/zipfile/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1452,7 +1452,6 @@ def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
self._lock = threading.RLock()
self._seekable = True
self._writing = False
self._data_offset = None

try:
if mode == 'r':
Expand All @@ -1463,7 +1462,6 @@ def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
self._didModify = True
try:
self.start_dir = self.fp.tell()
self._data_offset = self.start_dir
except (AttributeError, OSError):
self.fp = _Tellable(self.fp)
self.start_dir = 0
Expand All @@ -1488,7 +1486,6 @@ def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
# even if no files are added to the archive
self._didModify = True
self.start_dir = self.fp.tell()
self._data_offset = self.start_dir
else:
raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
except:
Expand Down Expand Up @@ -1535,10 +1532,6 @@ def _RealGetContents(self):
# self.start_dir: Position of start of central directory
self.start_dir = offset_cd + concat

# store the offset to the beginning of data for the
# .data_offset property
self._data_offset = concat

if self.start_dir < 0:
raise BadZipFile("Bad offset for central directory")
fp.seek(self.start_dir, 0)
Expand Down Expand Up @@ -1599,12 +1592,6 @@ def _RealGetContents(self):
zinfo._end_offset = end_offset
end_offset = zinfo.header_offset

@property
def data_offset(self):
"""The offset to the start of zip data in the file or None if
unavailable."""
return self._data_offset

def namelist(self):
"""Return a list of file names in the archive."""
return [data.filename for data in self.filelist]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Removed the unreleased ``zipfile.ZipFile.data_offset`` property added in 3.14.0a7
as it wasn't fully clear which behavior it should have in some situations so
the result was not always what a user might expect.
Loading