Skip to content
95 changes: 73 additions & 22 deletions tests/unit/forklift/test_legacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,19 +72,26 @@ def _get_tar_testdata(compression_type=""):
return temp_f.getvalue()


def _get_whl_testdata(name="fake_package", version="1.0"):
temp_f = io.BytesIO()
with zipfile.ZipFile(file=temp_f, mode="w") as zfp:
zfp.writestr(f"{name}-{version}.dist-info/METADATA", "Fake metadata")
return temp_f.getvalue()


def _storage_hash(data):
return hashlib.blake2b(data, digest_size=256 // 8).hexdigest()


_TAR_GZ_PKG_TESTDATA = _get_tar_testdata("gz")
_TAR_GZ_PKG_MD5 = hashlib.md5(_TAR_GZ_PKG_TESTDATA).hexdigest()
_TAR_GZ_PKG_SHA256 = hashlib.sha256(_TAR_GZ_PKG_TESTDATA).hexdigest()
_TAR_GZ_PKG_STORAGE_HASH = hashlib.blake2b(
_TAR_GZ_PKG_TESTDATA, digest_size=256 // 8
).hexdigest()
_TAR_GZ_PKG_STORAGE_HASH = _storage_hash(_TAR_GZ_PKG_TESTDATA)

_TAR_BZ2_PKG_TESTDATA = _get_tar_testdata("bz2")
_TAR_BZ2_PKG_MD5 = hashlib.md5(_TAR_BZ2_PKG_TESTDATA).hexdigest()
_TAR_BZ2_PKG_SHA256 = hashlib.sha256(_TAR_BZ2_PKG_TESTDATA).hexdigest()
_TAR_BZ2_PKG_STORAGE_HASH = hashlib.blake2b(
_TAR_BZ2_PKG_TESTDATA, digest_size=256 // 8
).hexdigest()
_TAR_BZ2_PKG_STORAGE_HASH = _storage_hash(_TAR_BZ2_PKG_TESTDATA)


class TestExcWithMessage:
Expand Down Expand Up @@ -2761,6 +2768,8 @@ def test_upload_succeeds_with_wheel(
RoleFactory.create(user=user, project=project)

filename = f"{project.name}-{release.version}-cp34-none-{plat}.whl"
filebody = _get_whl_testdata(project.name)
file_storage_hash = _storage_hash(filebody)

pyramid_config.testing_securitypolicy(identity=user)
db_request.user = user
Expand All @@ -2772,19 +2781,22 @@ def test_upload_succeeds_with_wheel(
"version": release.version,
"filetype": "bdist_wheel",
"pyversion": "cp34",
"md5_digest": _TAR_GZ_PKG_MD5,
"md5_digest": hashlib.md5(filebody).hexdigest(),
"content": pretend.stub(
filename=filename,
file=io.BytesIO(_TAR_GZ_PKG_TESTDATA),
type="application/tar",
file=io.BytesIO(filebody),
type="application/octet-stream",
),
}
)

@pretend.call_recorder
def storage_service_store(path, file_path, *, meta):
with open(file_path, "rb") as fp:
assert fp.read() == _TAR_GZ_PKG_TESTDATA
if file_path.endswith(".metadata"):
assert fp.read() == b"Fake metadata"
else:
assert fp.read() == filebody

storage_service = pretend.stub(store=storage_service_store)

Expand All @@ -2808,9 +2820,9 @@ def storage_service_store(path, file_path, *, meta):
pretend.call(
"/".join(
[
_TAR_GZ_PKG_STORAGE_HASH[:2],
_TAR_GZ_PKG_STORAGE_HASH[2:4],
_TAR_GZ_PKG_STORAGE_HASH[4:],
file_storage_hash[:2],
file_storage_hash[2:4],
file_storage_hash[4:],
filename,
]
),
Expand All @@ -2821,7 +2833,24 @@ def storage_service_store(path, file_path, *, meta):
"package-type": "bdist_wheel",
"python-version": "cp34",
},
)
),
pretend.call(
"/".join(
[
file_storage_hash[:2],
file_storage_hash[2:4],
file_storage_hash[4:],
filename + ".metadata",
]
),
mock.ANY,
meta={
"project": project.normalized_name,
"version": release.version,
"package-type": "bdist_wheel",
"python-version": "cp34",
},
),
]

# Ensure that a File object has been created.
Expand Down Expand Up @@ -2874,6 +2903,8 @@ def test_upload_succeeds_with_wheel_after_sdist(
RoleFactory.create(user=user, project=project)

filename = f"{project.name}-{release.version}-cp34-none-any.whl"
filebody = _get_whl_testdata(project.name)
file_storage_hash = _storage_hash(filebody)

pyramid_config.testing_securitypolicy(identity=user)
db_request.user = user
Expand All @@ -2885,19 +2916,22 @@ def test_upload_succeeds_with_wheel_after_sdist(
"version": release.version,
"filetype": "bdist_wheel",
"pyversion": "cp34",
"md5_digest": "335c476dc930b959dda9ec82bd65ef19",
"md5_digest": hashlib.md5(filebody).hexdigest(),
"content": pretend.stub(
filename=filename,
file=io.BytesIO(b"A fake file."),
type="application/tar",
file=io.BytesIO(filebody),
type="application/zip",
),
}
)

@pretend.call_recorder
def storage_service_store(path, file_path, *, meta):
with open(file_path, "rb") as fp:
assert fp.read() == b"A fake file."
if file_path.endswith(".metadata"):
assert fp.read() == b"Fake metadata"
else:
assert fp.read() == filebody

storage_service = pretend.stub(store=storage_service_store)
db_request.find_service = pretend.call_recorder(
Expand All @@ -2920,9 +2954,9 @@ def storage_service_store(path, file_path, *, meta):
pretend.call(
"/".join(
[
"4e",
"6e",
"fa4c0ee2bbad071b4f5b5ea68f1aea89fa716e7754eb13e2314d45a5916e",
file_storage_hash[:2],
file_storage_hash[2:4],
file_storage_hash[4:],
filename,
]
),
Expand All @@ -2933,7 +2967,24 @@ def storage_service_store(path, file_path, *, meta):
"package-type": "bdist_wheel",
"python-version": "cp34",
},
)
),
pretend.call(
"/".join(
[
file_storage_hash[:2],
file_storage_hash[2:4],
file_storage_hash[4:],
filename + ".metadata",
]
),
mock.ANY,
meta={
"project": project.normalized_name,
"version": release.version,
"package-type": "bdist_wheel",
"python-version": "cp34",
},
),
]

# Ensure that a File object has been created.
Expand Down
43 changes: 39 additions & 4 deletions warehouse/forklift/legacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import base64
import email
import hashlib
import hmac
Expand Down Expand Up @@ -790,6 +791,20 @@ def _is_duplicate_file(db_session, filename, hashes):
return None


def extract_wheel_metadata(path):
"""
Extract METADATA file and return it as a content. The name of the
.whl file is used to find the corresponding .dist-info dir.

See https://www.python.org/dev/peps/pep-0658/#specification
"""
filename = os.path.basename(path)
namever = _wheel_file_re.match(filename).group("namever")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can the group ever be None here?

metafile = namever + ".dist-info/METADATA"
with zipfile.ZipFile(path) as zfp:
return zfp.read(metafile)


@view_config(
route_name="forklift.legacy.file_upload",
uses_session=True,
Expand Down Expand Up @@ -1317,11 +1332,19 @@ def file_upload(request):
"Binary wheel '{filename}' has an unsupported "
"platform tag '{plat}'.".format(filename=filename, plat=plat),
)
wheel_metadata = extract_wheel_metadata(temporary_filename)
with open(temporary_filename + ".metadata", "wb") as fp:
fp.write(wheel_metadata)
metadata_hash = base64.b64encode(
hashlib.blake2s(wheel_metadata, digest_size=128 // 8).digest()
).decode("utf-8")
else:
metadata_hash = None

# Also buffer the entire signature file to disk.
if "gpg_signature" in request.POST:
has_signature = True
with open(os.path.join(tmpdir, filename + ".asc"), "wb") as fp:
with open(temporary_filename + ".asc", "wb") as fp:
signature_size = 0
for chunk in iter(
lambda: request.POST["gpg_signature"].file.read(8096), b""
Expand All @@ -1332,7 +1355,7 @@ def file_upload(request):
fp.write(chunk)

# Check whether signature is ASCII armored
with open(os.path.join(tmpdir, filename + ".asc"), "rb") as fp:
with open(temporary_filename + ".asc", "rb") as fp:
if not fp.read().startswith(b"-----BEGIN PGP SIGNATURE-----"):
raise _exc_with_message(
HTTPBadRequest, "PGP signature isn't ASCII armored."
Expand All @@ -1357,6 +1380,7 @@ def file_upload(request):
md5_digest=file_hashes["md5"],
sha256_digest=file_hashes["sha256"],
blake2_256_digest=file_hashes["blake2_256"],
metadata_hash=metadata_hash,
# Figure out what our filepath is going to be, we're going to use a
# directory structure based on the hash of the file contents. This
# will ensure that the contents of the file cannot change without
Expand Down Expand Up @@ -1412,18 +1436,29 @@ def file_upload(request):
storage = request.find_service(IFileStorage, name="primary")
storage.store(
file_.path,
os.path.join(tmpdir, filename),
temporary_filename,
meta={
"project": file_.release.project.normalized_name,
"version": file_.release.version,
"package-type": file_.packagetype,
"python-version": file_.python_version,
},
)
if metadata_hash is not None:
storage.store(
file_.path + ".metadata",
temporary_filename + ".metadata",
meta={
"project": file_.release.project.normalized_name,
"version": file_.release.version,
"package-type": file_.packagetype,
"python-version": file_.python_version,
},
)
if has_signature:
storage.store(
file_.pgp_path,
os.path.join(tmpdir, filename + ".asc"),
temporary_filename + ".asc",
meta={
"project": file_.release.project.normalized_name,
"version": file_.release.version,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Add a metadata_hash column to File

Revision ID: 9b9778779fe2
Revises: d582fb87b94c
Create Date: 2021-09-18 07:34:31.828437
"""

import sqlalchemy as sa

from alembic import op

revision = "9b9778779fe2"
down_revision = "d582fb87b94c"


def upgrade():
op.add_column("release_files", sa.Column("metadata_hash", sa.Text(), nullable=True))


def downgrade():
op.drop_column("release_files", "metadata_hash")
1 change: 1 addition & 0 deletions warehouse/packaging/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -666,6 +666,7 @@ def __table_args__(cls): # noqa
md5_digest = Column(Text, unique=True, nullable=False)
sha256_digest = Column(CIText, unique=True, nullable=False)
blake2_256_digest = Column(CIText, unique=True, nullable=False)
metadata_hash = Column(Text, unique=False, nullable=True)
upload_time = Column(DateTime(timezone=False), server_default=func.now())
uploaded_via = Column(Text)

Expand Down