Skip to content

Commit cc0f598

Browse files
authored
Merge branch 'master' into master
2 parents 7e1c3f4 + 072d8b2 commit cc0f598

File tree

98 files changed

+4754
-643
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

98 files changed

+4754
-643
lines changed

.circleci/config.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1145,6 +1145,10 @@ workflows:
11451145
cu_version: cpu
11461146
name: cmake_windows_cpu
11471147
python_version: '3.8'
1148+
- cmake_windows_gpu:
1149+
cu_version: cu101
1150+
name: cmake_windows_gpu
1151+
python_version: '3.8'
11481152
- cmake_macos_cpu:
11491153
cu_version: cpu
11501154
name: cmake_macos_cpu

.circleci/regenerate.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -191,16 +191,16 @@ def cmake_workflows(indentation=6):
191191
jobs = []
192192
python_version = '3.8'
193193
for os_type in ['linux', 'windows', 'macos']:
194-
# Right now CMake builds are failling on Windows (GPU)
195-
device_types = ['cpu', 'gpu'] if os_type == 'linux' else ['cpu']
194+
# Skip OSX CUDA
195+
device_types = ['cpu', 'gpu'] if os_type != 'macos' else ['cpu']
196196
for device in device_types:
197197
job = {
198198
'name': f'cmake_{os_type}_{device}',
199199
'python_version': python_version
200200
}
201201

202202
job['cu_version'] = 'cu101' if device == 'gpu' else 'cpu'
203-
if device == 'gpu':
203+
if device == 'gpu' and os_type == 'linux':
204204
job['wheel_docker_image'] = 'pytorch/manylinux-cuda101'
205205
jobs.append({f'cmake_{os_type}_{device}': job})
206206
return indent(indentation, jobs)

.circleci/unittest/linux/scripts/environment.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
channels:
2+
- pytorch
23
- defaults
34
dependencies:
45
- numpy
@@ -8,6 +9,7 @@ dependencies:
89
- pip
910
- libpng
1011
- jpeg
12+
- ffmpeg=4.2
1113
- ca-certificates
1214
- pip:
1315
- future

.circleci/unittest/windows/scripts/environment.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
channels:
2+
- pytorch
23
- defaults
34
dependencies:
45
- numpy

.gitattributes

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,8 @@
11
*.pkl binary
2+
# Jupyter notebook
3+
4+
# For text count
5+
*.ipynb text
6+
7+
# To ignore it use below
8+
# *.ipynb linguist-documentation

.github/workflows/tests-schedule.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ jobs:
3535
run: pip install pytest
3636

3737
- name: Run tests
38-
run: pytest --durations=20 -ra test/test_datasets_download.py
38+
run: pytest -ra -v test/test_datasets_download.py
3939

4040
- uses: JasonEtco/[email protected]
4141
name: Create issue if download tests failed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,4 @@ gen.yml
2323
.mypy_cache
2424
.vscode/
2525
*.orig
26+
*-checkpoint.ipynb

.travis.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ before_install:
3838
fi
3939
- conda install av -c conda-forge
4040

41-
4241
install:
4342
# Using pip instead of setup.py ensures we install a non-compressed version of the package
4443
# (as opposed to an egg), which is necessary to collect coverage.
@@ -55,7 +54,7 @@ install:
5554
cd -
5655
5756
script:
58-
- pytest --cov-config .coveragerc --cov torchvision --cov $TV_INSTALL_PATH -k 'not TestVideoReader and not TestVideoTransforms and not TestIO' test --ignore=test/test_datasets_download.py
57+
- pytest --cov-config .coveragerc --cov torchvision --cov $TV_INSTALL_PATH -k 'not TestVideo and not TestVideoReader and not TestVideoTransforms and not TestIO' test --ignore=test/test_datasets_download.py
5958
- pytest test/test_hub.py
6059

6160
after_success:

CMakeLists.txt

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ if(WITH_CUDA)
99
enable_language(CUDA)
1010
add_definitions(-D__CUDA_NO_HALF_OPERATORS__)
1111
add_definitions(-DWITH_CUDA)
12+
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
1213
endif()
1314

1415
find_package(Python3 COMPONENTS Development)
@@ -17,6 +18,15 @@ find_package(Torch REQUIRED)
1718
find_package(PNG REQUIRED)
1819
find_package(JPEG REQUIRED)
1920

21+
function(CUDA_CONVERT_FLAGS EXISTING_TARGET)
22+
get_property(old_flags TARGET ${EXISTING_TARGET} PROPERTY INTERFACE_COMPILE_OPTIONS)
23+
if(NOT "${old_flags}" STREQUAL "")
24+
string(REPLACE ";" "," CUDA_flags "${old_flags}")
25+
set_property(TARGET ${EXISTING_TARGET} PROPERTY INTERFACE_COMPILE_OPTIONS
26+
"$<$<BUILD_INTERFACE:$<COMPILE_LANGUAGE:CXX>>:${old_flags}>$<$<BUILD_INTERFACE:$<COMPILE_LANGUAGE:CUDA>>:-Xcompiler=${CUDA_flags}>"
27+
)
28+
endif()
29+
endfunction()
2030

2131
file(GLOB HEADERS torchvision/csrc/*.h)
2232
# Image extension
@@ -29,6 +39,26 @@ endif()
2939
file(GLOB MODELS_HEADERS torchvision/csrc/models/*.h)
3040
file(GLOB MODELS_SOURCES torchvision/csrc/models/*.h torchvision/csrc/models/*.cpp)
3141

42+
if(MSVC)
43+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4819")
44+
if(WITH_CUDA)
45+
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler=/wd4819")
46+
foreach(diag cc_clobber_ignored integer_sign_change useless_using_declaration
47+
set_but_not_used field_without_dll_interface
48+
base_class_has_different_dll_interface
49+
dll_interface_conflict_none_assumed
50+
dll_interface_conflict_dllexport_assumed
51+
implicit_return_from_non_void_function
52+
unsigned_compare_with_zero
53+
declared_but_not_referenced
54+
bad_friend_decl)
55+
string(APPEND CMAKE_CUDA_FLAGS " -Xcudafe --diag_suppress=${diag}")
56+
endforeach()
57+
CUDA_CONVERT_FLAGS(torch_cpu)
58+
CUDA_CONVERT_FLAGS(torch_cuda)
59+
endif()
60+
endif()
61+
3262
add_library(${PROJECT_NAME} SHARED ${MODELS_SOURCES} ${OPERATOR_SOURCES} ${IMAGE_SOURCES})
3363
target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_LIBRARIES} ${PNG_LIBRARY} ${JPEG_LIBRARIES} Python3::Python)
3464
set_target_properties(${PROJECT_NAME} PROPERTIES EXPORT_NAME TorchVision)

cmake/TorchVisionConfig.cmake.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ if(NOT TARGET Python3::Python)
3232
find_package(Python3 COMPONENTS Development)
3333
endif()
3434

35-
set_target_properties(TorchVision::TorchVision PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${PACKAGE_PREFIX_DIR}/@CMAKE_INSTALL_INCLUDEDIR@" INTERFACE_LINK_LIBRARIES "torch;Python3::Python" )
35+
set_target_properties(TorchVision::TorchVision PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${${PN}_INCLUDE_DIR}" INTERFACE_LINK_LIBRARIES "torch;Python3::Python" )
3636

3737

3838
if(@WITH_CUDA@)

docs/source/io.rst

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@ torchvision.io
44
.. currentmodule:: torchvision.io
55

66
The :mod:`torchvision.io` package provides functions for performing IO
7-
operations. They are currently specific to reading and writing video.
7+
operations. They are currently specific to reading and writing video and
8+
images.
89

910
Video
1011
-----
@@ -14,3 +15,58 @@ Video
1415
.. autofunction:: read_video_timestamps
1516

1617
.. autofunction:: write_video
18+
19+
20+
Fine-grained video API
21+
-------------------
22+
23+
In addition to the :mod:`read_video` function, we provide a high-performance
24+
lower-level API for more fine-grained control compared to the :mod:`read_video` function.
25+
It does all this whilst fully supporting torchscript.
26+
27+
.. autoclass:: VideoReader
28+
:members: __next__, get_metadata, set_current_stream, seek
29+
30+
31+
Example of inspecting a video:
32+
33+
.. code:: python
34+
35+
import torchvision
36+
video_path = "path to a test video"
37+
# Constructor allocates memory and a threaded decoder
38+
# instance per video. At the momet it takes two arguments:
39+
# path to the video file, and a wanted stream.
40+
reader = torchvision.io.VideoReader(video_path, "video")
41+
42+
# The information about the video can be retrieved using the
43+
# `get_metadata()` method. It returns a dictionary for every stream, with
44+
# duration and other relevant metadata (often frame rate)
45+
reader_md = reader.get_metadata()
46+
47+
# metadata is structured as a dict of dicts with following structure
48+
# {"stream_type": {"attribute": [attribute per stream]}}
49+
#
50+
# following would print out the list of frame rates for every present video stream
51+
print(reader_md["video"]["fps"])
52+
53+
# we explicitly select the stream we would like to operate on. In
54+
# the constructor we select a default video stream, but
55+
# in practice, we can set whichever stream we would like
56+
video.set_current_stream("video:0")
57+
58+
59+
Image
60+
-----
61+
62+
.. autofunction:: read_image
63+
64+
.. autofunction:: decode_image
65+
66+
.. autofunction:: encode_jpeg
67+
68+
.. autofunction:: write_jpeg
69+
70+
.. autofunction:: encode_png
71+
72+
.. autofunction:: write_png

docs/source/models.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,7 @@ the instances set of COCO train2017 and evaluated on COCO val2017.
350350
Network box AP mask AP keypoint AP
351351
================================ ======= ======== ===========
352352
Faster R-CNN ResNet-50 FPN 37.0 - -
353+
RetinaNet ResNet-50 FPN 36.4 - -
353354
Mask R-CNN ResNet-50 FPN 37.9 34.6 -
354355
================================ ======= ======== ===========
355356

@@ -405,6 +406,7 @@ precision-recall.
405406
Network train time (s / it) test time (s / it) memory (GB)
406407
============================== =================== ================== ===========
407408
Faster R-CNN ResNet-50 FPN 0.2288 0.0590 5.2
409+
RetinaNet ResNet-50 FPN 0.2514 0.0939 4.1
408410
Mask R-CNN ResNet-50 FPN 0.2728 0.0903 5.4
409411
Keypoint R-CNN ResNet-50 FPN 0.3789 0.1242 6.8
410412
============================== =================== ================== ===========
@@ -416,6 +418,12 @@ Faster R-CNN
416418
.. autofunction:: torchvision.models.detection.fasterrcnn_resnet50_fpn
417419

418420

421+
RetinaNet
422+
------------
423+
424+
.. autofunction:: torchvision.models.detection.retinanet_resnet50_fpn
425+
426+
419427
Mask R-CNN
420428
----------
421429

docs/source/transforms.rst

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,27 @@ All transformations accept PIL Image, Tensor Image or batch of Tensor Images as
1414
Tensor Images is a tensor of ``(B, C, H, W)`` shape, where ``B`` is a number of images in the batch. Deterministic or
1515
random transformations applied on the batch of Tensor Images identically transform all the images of the batch.
1616

17+
.. warning::
18+
19+
Since v0.8.0 all random transformations are using torch default random generator to sample random parameters.
20+
It is a backward compatibility breaking change and user should set the random state as following:
21+
22+
.. code:: python
23+
24+
# Previous versions
25+
# import random
26+
# random.seed(12)
27+
28+
# Now
29+
import torch
30+
torch.manual_seed(17)
31+
32+
Please, keep in mind that the same seed for torch random generator and Python random generator will not
33+
produce the same results.
34+
1735

1836
Scriptable transforms
19-
^^^^^^^^^^^^^^^^^^^^^
37+
---------------------
2038

2139
In order to script the transformations, please use ``torch.nn.Sequential`` instead of :class:`Compose`.
2240

@@ -34,6 +52,9 @@ Make sure to use only scriptable transformations, i.e. that work with ``torch.Te
3452
For any custom transformations to be used with ``torch.jit.script``, they should be derived from ``torch.nn.Module``.
3553

3654

55+
Compositions of transforms
56+
--------------------------
57+
3758
.. autoclass:: Compose
3859

3960
Transforms on PIL Image
@@ -79,6 +100,8 @@ Transforms on PIL Image
79100

80101
.. autoclass:: TenCrop
81102

103+
.. autoclass:: GaussianBlur
104+
82105
Transforms on torch.\*Tensor
83106
----------------------------
84107

examples/python/README.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Python examples
2+
3+
- [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pytorch/vision/blob/master/examples/python/tensor_transforms.ipynb)
4+
[Examples of Tensor Images transformations](https://github.com/pytorch/vision/blob/master/examples/python/tensor_transforms.ipynb)
5+
- [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pytorch/vision/blob/master/examples/python/video_api.ipynb)
6+
[Example of VideoAPI](https://github.com/pytorch/vision/blob/master/examples/python/video_api.ipynb)
7+
8+
9+
Prior to v0.8.0, transforms in torchvision have traditionally been PIL-centric and presented multiple limitations due to
10+
that. Now, since v0.8.0, transforms implementations are Tensor and PIL compatible and we can achieve the following new
11+
features:
12+
- transform multi-band torch tensor images (with more than 3-4 channels)
13+
- torchscript transforms together with your model for deployment
14+
- support for GPU acceleration
15+
- batched transformation such as for videos
16+
- read and decode data directly as torch tensor with torchscript support (for PNG and JPEG image formats)
17+
18+
Furthermore, previously we used to provide a very high-level API for video decoding which left little control to the user. We're now expanding that API (and replacing it in the future) with a lower-level API that allows the user a frame-based access to a video.

0 commit comments

Comments
 (0)