Skip to content

Commit b415a70

Browse files
authored
Merge branch 'master' into models/mobilenetv3
2 parents e4d130f + 7b9d30e commit b415a70

40 files changed

+304
-985
lines changed

references/detection/group_by_aspect_ratio.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ class GroupedBatchSampler(BatchSampler):
2626
It enforces that the batch only contain elements from the same group.
2727
It also tries to provide mini-batches which follows an ordering which is
2828
as close as possible to the ordering from the original sampler.
29-
Arguments:
29+
Args:
3030
sampler (Sampler): Base sampler.
3131
group_ids (list[int]): If the sampler produces indices in range [0, N),
3232
`group_ids` must be a list of `N` ints which contains the group id of each sample.

setup.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,14 @@ def get_dist(pkgname):
2929
return None
3030

3131

32-
version = '0.9.0a0'
32+
cwd = os.path.dirname(os.path.abspath(__file__))
33+
34+
version_txt = os.path.join(cwd, 'version.txt')
35+
with open(version_txt, 'r') as f:
36+
version = f.readline().strip()
3337
sha = 'Unknown'
3438
package_name = 'torchvision'
3539

36-
cwd = os.path.dirname(os.path.abspath(__file__))
37-
3840
try:
3941
sha = subprocess.check_output(['git', 'rev-parse', 'HEAD'], cwd=cwd).decode('ascii').strip()
4042
except Exception:

torchvision/csrc/io/image/cpu/readjpeg_cpu.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ torch::Tensor decodeJPEG(const torch::Tensor& data, ImageReadMode mode) {
117117
*/
118118
default:
119119
jpeg_destroy_decompress(&cinfo);
120-
TORCH_CHECK(false, "Provided mode not supported");
120+
TORCH_CHECK(false, "The provided mode is not supported for JPEG files");
121121
}
122122

123123
jpeg_calc_output_dimensions(&cinfo);

torchvision/csrc/io/image/cpu/readpng_cpu.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ torch::Tensor decodePNG(const torch::Tensor& data, ImageReadMode mode) {
143143
break;
144144
default:
145145
png_destroy_read_struct(&png_ptr, &info_ptr, nullptr);
146-
TORCH_CHECK(false, "Provided mode not supported");
146+
TORCH_CHECK(false, "The provided mode is not supported for PNG files");
147147
}
148148

149149
png_read_update_info(png_ptr, info_ptr);

torchvision/csrc/io/image/image_read_mode.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22

33
/* Should be kept in-sync with Python ImageReadMode enum */
44
using ImageReadMode = int64_t;
5-
#define IMAGE_READ_MODE_UNCHANGED 0
6-
#define IMAGE_READ_MODE_GRAY 1
7-
#define IMAGE_READ_MODE_GRAY_ALPHA 2
8-
#define IMAGE_READ_MODE_RGB 3
9-
#define IMAGE_READ_MODE_RGB_ALPHA 4
5+
const ImageReadMode IMAGE_READ_MODE_UNCHANGED = 0;
6+
const ImageReadMode IMAGE_READ_MODE_GRAY = 1;
7+
const ImageReadMode IMAGE_READ_MODE_GRAY_ALPHA = 2;
8+
const ImageReadMode IMAGE_READ_MODE_RGB = 3;
9+
const ImageReadMode IMAGE_READ_MODE_RGB_ALPHA = 4;

torchvision/datasets/samplers/clip_sampler.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ class UniformClipSampler(Sampler):
111111
When number of unique clips in the video is fewer than num_video_clips_per_video,
112112
repeat the clips until `num_video_clips_per_video` clips are collected
113113
114-
Arguments:
114+
Args:
115115
video_clips (VideoClips): video clips to sample from
116116
num_clips_per_video (int): number of clips to be sampled per video
117117
"""
@@ -151,7 +151,7 @@ class RandomClipSampler(Sampler):
151151
"""
152152
Samples at most `max_video_clips_per_video` clips for each video randomly
153153
154-
Arguments:
154+
Args:
155155
video_clips (VideoClips): video clips to sample from
156156
max_clips_per_video (int): maximum number of clips to be sampled per video
157157
"""

torchvision/datasets/video_utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ class VideoClips(object):
8888
Recreating the clips for different clip lengths is fast, and can be done
8989
with the `compute_clips` method.
9090
91-
Arguments:
91+
Args:
9292
video_paths (List[str]): paths to the video files
9393
clip_length_in_frames (int): size of a clip in number of frames
9494
frames_between_clips (int): step (in frames) between each clip
@@ -227,7 +227,7 @@ def compute_clips(self, num_frames, step, frame_rate=None):
227227
Always returns clips of size `num_frames`, meaning that the
228228
last few frames in a video can potentially be dropped.
229229
230-
Arguments:
230+
Args:
231231
num_frames (int): number of frames for the clip
232232
step (int): distance between two clips
233233
"""
@@ -285,7 +285,7 @@ def get_clip(self, idx):
285285
"""
286286
Gets a subclip from a list of videos.
287287
288-
Arguments:
288+
Args:
289289
idx (int): index of the subclip. Must be between 0 and num_clips().
290290
291291
Returns:

torchvision/io/image.py

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,15 @@
5050

5151

5252
class ImageReadMode(Enum):
53+
"""
54+
Support for various modes while reading images.
55+
56+
Use `ImageReadMode.UNCHANGED` for loading the image as-is,
57+
`ImageReadMode.GRAY` for converting to grayscale,
58+
`ImageReadMode.GRAY_ALPHA` for grayscale with transparency,
59+
`ImageReadMode.RGB` for RGB and `ImageReadMode.RGB_ALPHA` for
60+
RGB with transparency.
61+
"""
5362
UNCHANGED = 0
5463
GRAY = 1
5564
GRAY_ALPHA = 2
@@ -62,7 +71,7 @@ def read_file(path: str) -> torch.Tensor:
6271
Reads and outputs the bytes contents of a file as a uint8 Tensor
6372
with one dimension.
6473
65-
Arguments:
74+
Args:
6675
path (str): the path to the file to be read
6776
6877
Returns:
@@ -77,7 +86,7 @@ def write_file(filename: str, data: torch.Tensor) -> None:
7786
Writes the contents of a uint8 tensor with one dimension to a
7887
file.
7988
80-
Arguments:
89+
Args:
8190
filename (str): the path to the file to be written
8291
data (Tensor): the contents to be written to the output file
8392
"""
@@ -90,15 +99,13 @@ def decode_png(input: torch.Tensor, mode: ImageReadMode = ImageReadMode.UNCHANGE
9099
Optionally converts the image to the desired format.
91100
The values of the output tensor are uint8 between 0 and 255.
92101
93-
Arguments:
102+
Args:
94103
input (Tensor[1]): a one dimensional uint8 tensor containing
95104
the raw bytes of the PNG image.
96105
mode (ImageReadMode): the read mode used for optionally
97-
converting the image. Use `ImageReadMode.UNCHANGED` for loading
98-
the image as-is, `ImageReadMode.GRAY` for converting to grayscale,
99-
`ImageReadMode.GRAY_ALPHA` for grayscale with transparency,
100-
`ImageReadMode.RGB` for RGB and `ImageReadMode.RGB_ALPHA` for
101-
RGB with transparency. Default: `ImageReadMode.UNCHANGED`
106+
converting the image. Default: `ImageReadMode.UNCHANGED`.
107+
See `ImageReadMode` class for more information on various
108+
available modes.
102109
103110
Returns:
104111
output (Tensor[image_channels, image_height, image_width])
@@ -155,13 +162,13 @@ def decode_jpeg(input: torch.Tensor, mode: ImageReadMode = ImageReadMode.UNCHANG
155162
Optionally converts the image to the desired format.
156163
The values of the output tensor are uint8 between 0 and 255.
157164
158-
Arguments:
165+
Args:
159166
input (Tensor[1]): a one dimensional uint8 tensor containing
160167
the raw bytes of the JPEG image.
161168
mode (ImageReadMode): the read mode used for optionally
162-
converting the image. Use `ImageReadMode.UNCHANGED` for loading
163-
the image as-is, `ImageReadMode.GRAY` for converting to grayscale
164-
and `ImageReadMode.RGB` for RGB. Default: `ImageReadMode.UNCHANGED`
169+
converting the image. Default: `ImageReadMode.UNCHANGED`.
170+
See `ImageReadMode` class for more information on various
171+
available modes.
165172
166173
Returns:
167174
output (Tensor[image_channels, image_height, image_width])
@@ -229,11 +236,10 @@ def decode_image(input: torch.Tensor, mode: ImageReadMode = ImageReadMode.UNCHAN
229236
a one dimensional uint8 tensor containing the raw bytes of the
230237
PNG or JPEG image.
231238
mode: ImageReadMode
232-
the read mode used for optionally converting the image. JPEG
233-
and PNG images have different permitted values. The default
234-
value is `ImageReadMode.UNCHANGED` and it keeps the image as-is.
235-
See `decode_jpeg()` and `decode_png()` for more information.
236-
Default: `ImageReadMode.UNCHANGED`
239+
the read mode used for optionally converting the image.
240+
Default: `ImageReadMode.UNCHANGED`.
241+
See `ImageReadMode` class for more information on various
242+
available modes.
237243
238244
Returns
239245
-------
@@ -254,11 +260,10 @@ def read_image(path: str, mode: ImageReadMode = ImageReadMode.UNCHANGED) -> torc
254260
path: str
255261
path of the JPEG or PNG image.
256262
mode: ImageReadMode
257-
the read mode used for optionally converting the image. JPEG
258-
and PNG images have different permitted values. The default
259-
value is `ImageReadMode.UNCHANGED` and it keeps the image as-is.
260-
See `decode_jpeg()` and `decode_png()` for more information.
261-
Default: `ImageReadMode.UNCHANGED`
263+
the read mode used for optionally converting the image.
264+
Default: `ImageReadMode.UNCHANGED`.
265+
See `ImageReadMode` class for more information on various
266+
available modes.
262267
263268
Returns
264269
-------

torchvision/models/_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ class IntermediateLayerGetter(nn.ModuleDict):
1818
assigned to the model. So if `model` is passed, `model.feature1` can
1919
be returned, but not `model.feature1.layer2`.
2020
21-
Arguments:
21+
Args:
2222
model (nn.Module): model on which we will extract the features
2323
return_layers (Dict[name, new_name]): a dict containing the names
2424
of the modules for which the activations will be returned as

torchvision/models/detection/_utils.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ class BalancedPositiveNegativeSampler(object):
1515
def __init__(self, batch_size_per_image, positive_fraction):
1616
# type: (int, float) -> None
1717
"""
18-
Arguments:
18+
Args:
1919
batch_size_per_image (int): number of elements to be selected per image
2020
positive_fraction (float): percentace of positive elements per batch
2121
"""
@@ -25,7 +25,7 @@ def __init__(self, batch_size_per_image, positive_fraction):
2525
def __call__(self, matched_idxs):
2626
# type: (List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]
2727
"""
28-
Arguments:
28+
Args:
2929
matched idxs: list of tensors containing -1, 0 or positive values.
3030
Each tensor corresponds to a specific image.
3131
-1 values are ignored, 0 are considered as negatives and > 0 as
@@ -83,7 +83,7 @@ def encode_boxes(reference_boxes, proposals, weights):
8383
Encode a set of proposals with respect to some
8484
reference boxes
8585
86-
Arguments:
86+
Args:
8787
reference_boxes (Tensor): reference boxes
8888
proposals (Tensor): boxes to be encoded
8989
"""
@@ -133,7 +133,7 @@ class BoxCoder(object):
133133
def __init__(self, weights, bbox_xform_clip=math.log(1000. / 16)):
134134
# type: (Tuple[float, float, float, float], float) -> None
135135
"""
136-
Arguments:
136+
Args:
137137
weights (4-element tuple)
138138
bbox_xform_clip (float)
139139
"""
@@ -153,7 +153,7 @@ def encode_single(self, reference_boxes, proposals):
153153
Encode a set of proposals with respect to some
154154
reference boxes
155155
156-
Arguments:
156+
Args:
157157
reference_boxes (Tensor): reference boxes
158158
proposals (Tensor): boxes to be encoded
159159
"""
@@ -183,7 +183,7 @@ def decode_single(self, rel_codes, boxes):
183183
From a set of original boxes and encoded relative box offsets,
184184
get the decoded boxes.
185185
186-
Arguments:
186+
Args:
187187
rel_codes (Tensor): encoded boxes
188188
boxes (Tensor): reference boxes.
189189
"""
@@ -361,7 +361,7 @@ def overwrite_eps(model, eps):
361361
only when the pretrained weights are loaded to maintain compatibility
362362
with previous versions.
363363
364-
Arguments:
364+
Args:
365365
model (nn.Module): The model on which we perform the overwrite.
366366
eps (float): The new value of eps.
367367
"""

torchvision/models/detection/anchor_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ class AnchorGenerator(nn.Module):
2222
and AnchorGenerator will output a set of sizes[i] * aspect_ratios[i] anchors
2323
per spatial location for feature map i.
2424
25-
Arguments:
25+
Args:
2626
sizes (Tuple[Tuple[int]]):
2727
aspect_ratios (Tuple[Tuple[float]]):
2828
"""

torchvision/models/detection/backbone_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ class BackboneWithFPN(nn.Module):
1414
Internally, it uses torchvision.models._utils.IntermediateLayerGetter to
1515
extract a submodel that returns the feature maps specified in return_layers.
1616
The same limitations of IntermediatLayerGetter apply here.
17-
Arguments:
17+
Args:
1818
backbone (nn.Module)
1919
return_layers (Dict[name, new_name]): a dict containing the names
2020
of the modules for which the activations will be returned as
@@ -73,7 +73,7 @@ def resnet_fpn_backbone(
7373
>>> ('3', torch.Size([1, 256, 2, 2])),
7474
>>> ('pool', torch.Size([1, 256, 1, 1]))]
7575
76-
Arguments:
76+
Args:
7777
backbone_name (string): resnet architecture. Possible values are 'ResNet', 'resnet18', 'resnet34', 'resnet50',
7878
'resnet101', 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d', 'wide_resnet50_2', 'wide_resnet101_2'
7979
norm_layer (torchvision.ops): it is recommended to use the default value. For details visit:

torchvision/models/detection/faster_rcnn.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ class FasterRCNN(GeneralizedRCNN):
4949
- labels (Int64Tensor[N]): the predicted labels for each image
5050
- scores (Tensor[N]): the scores or each prediction
5151
52-
Arguments:
52+
Args:
5353
backbone (nn.Module): the network used to compute the features for the model.
5454
It should contain a out_channels attribute, which indicates the number of output
5555
channels that each feature map has (and it should be the same for all feature maps).
@@ -239,7 +239,7 @@ class TwoMLPHead(nn.Module):
239239
"""
240240
Standard heads for FPN-based models
241241
242-
Arguments:
242+
Args:
243243
in_channels (int): number of input channels
244244
representation_size (int): size of the intermediate representation
245245
"""
@@ -264,7 +264,7 @@ class FastRCNNPredictor(nn.Module):
264264
Standard classification + bounding box regression layers
265265
for Fast R-CNN.
266266
267-
Arguments:
267+
Args:
268268
in_channels (int): number of input channels
269269
num_classes (int): number of output classes (including background)
270270
"""
@@ -341,7 +341,7 @@ def fasterrcnn_resnet50_fpn(pretrained=False, progress=True,
341341
>>> # optionally, if you want to export the model to ONNX:
342342
>>> torch.onnx.export(model, x, "faster_rcnn.onnx", opset_version = 11)
343343
344-
Arguments:
344+
Args:
345345
pretrained (bool): If True, returns a model pre-trained on COCO train2017
346346
progress (bool): If True, displays a progress bar of the download to stderr
347347
pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet

torchvision/models/detection/generalized_rcnn.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ class GeneralizedRCNN(nn.Module):
1414
"""
1515
Main class for Generalized R-CNN.
1616
17-
Arguments:
17+
Args:
1818
backbone (nn.Module):
1919
rpn (nn.Module):
2020
roi_heads (nn.Module): takes the features + the proposals from the RPN and computes
@@ -43,7 +43,7 @@ def eager_outputs(self, losses, detections):
4343
def forward(self, images, targets=None):
4444
# type: (List[Tensor], Optional[List[Dict[str, Tensor]]]) -> Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]]
4545
"""
46-
Arguments:
46+
Args:
4747
images (list[Tensor]): images to be processed
4848
targets (list[Dict[Tensor]]): ground-truth boxes present in the image (optional)
4949

torchvision/models/detection/image_list.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ class ImageList(object):
1414

1515
def __init__(self, tensors: Tensor, image_sizes: List[Tuple[int, int]]):
1616
"""
17-
Arguments:
17+
Args:
1818
tensors (tensor)
1919
image_sizes (list[tuple[int, int]])
2020
"""

torchvision/models/detection/keypoint_rcnn.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ class KeypointRCNN(FasterRCNN):
4444
- scores (Tensor[N]): the scores or each prediction
4545
- keypoints (FloatTensor[N, K, 3]): the locations of the predicted keypoints, in [x, y, v] format.
4646
47-
Arguments:
47+
Args:
4848
backbone (nn.Module): the network used to compute the features for the model.
4949
It should contain a out_channels attribute, which indicates the number of output
5050
channels that each feature map has (and it should be the same for all feature maps).
@@ -309,7 +309,7 @@ def keypointrcnn_resnet50_fpn(pretrained=False, progress=True,
309309
>>> # optionally, if you want to export the model to ONNX:
310310
>>> torch.onnx.export(model, x, "keypoint_rcnn.onnx", opset_version = 11)
311311
312-
Arguments:
312+
Args:
313313
pretrained (bool): If True, returns a model pre-trained on COCO train2017
314314
progress (bool): If True, displays a progress bar of the download to stderr
315315
pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet

0 commit comments

Comments
 (0)