Source code for matorage.model.torch.manager

# Copyright 2020-present Tae Hwan Jung
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

_KB = 1024
"""The size of a Kilobyte in bytes"""

_MB = 1024 * _KB
"""The size of a Megabyte in bytes"""

import os
import io
import h5py
import torch
from collections import OrderedDict

from matorage.model.manager import Manager


[docs]class ModelManager(Manager):

    """
    Model Manager Pytorch classes. This class overrides ``Manager``.

    Note:
        Unlike Dataset, model weight is loaded entirely into cpu memory.
        Therefore, the `HDF5_CORE` driver using the memory option is default setting.

    Args:
        config (:obj:`matorage.ModelConfig`, **require**):
            A ModelConfig instance object
        num_worker_threads (:obj:`int`, optional, defaults to `4`):
            Number of backend storage worker to upload or download.
        multipart_upload_size (:obj:`integer`, optional, defaults to `5 * 1024 * 1024`):
            size of the incompletely uploaded object.
            You can sync files faster with `multipart upload in MinIO. <https://github.com/minio/minio-py/blob/master/minio/api.py#L1795>`_
            This is because MinIO clients use multi-threading, which improves IO speed more
            efficiently regardless of Python's Global Interpreter Lock(GIL).

    Examples::

        from matorage import ModelConfig
        from matorage.torch import ModelManager

        model_config = ModelConfig(
            endpoint='127.0.0.1:9000',
            access_key='minio',
            secret_key='miniosecretkey',
            model_name='testmodel',
            additional={
                "version" : "1.0.1"
            }
        )

        model_manager = ModelManager(config=model_config)

        import torch.nn as nn
        class Model(nn.Module):
            def __init__(self):
                super(Model, self).__init__()
                self.net1 = nn.Sequential(
                    nn.Linear(5, 10),
                    nn.ReLU(),
                )
                self.net2 = nn.Sequential(
                    nn.Linear(10, 5),
                    nn.ReLU(),
                )
            def forward(self, x):
                x = self.net1(x)
                x = self.net2(x)
                return x

        model = Model()
        model_manager.save(model, step=100)


    """

    def __init__(self, config, num_worker_threads=4, multipart_upload_size=5 * _MB):
        super(ModelManager, self).__init__(
            config, num_worker_threads, multipart_upload_size
        )

    def _save_model(self, model_folder, model):
        for name, weight in model.state_dict().items():
            self._save_layer(model_folder, name, weight.cpu().numpy())

    def _load_model(self, model_folder, layers, model):
        weight = OrderedDict()

        if isinstance(model, str):
            keys = [model]
        else:
            keys = list(model.state_dict().keys())

        for layer in layers:
            name = os.path.basename(layer.object_name)
            if name in keys:
                layer_image = self._client.get_object(
                    bucket_name=self.config.bucket_name,
                    object_name=f"{model_folder}/{name}",
                ).read()

                layer_image = h5py.File(io.BytesIO(layer_image), "r")
                weight[name] = torch.from_numpy(layer_image[self.type][:])

        if isinstance(model, str):
            return weight
        else:
            model.load_state_dict(weight)

[docs]    def save(self, model, **kwargs):
        """
        save weight of model

        Args:
            model (:obj:`torch.nn.Module`, **require**):
                Pytorch model (``torch.nn.Module`` type)
            kwargs (:obj:`**kwargs`, optional):
                metadata about step or epoch for model.

        Examples::

            >>> model_manager.save(model, step=0)
            >>> model_manager.save(model, epoch=0)
            >>> model_manager.save(model, epoch=0, step=0)

        """
        super(ModelManager, self).save(model, **kwargs)

[docs]    def load(self, model, **kwargs):
        """
        load weight of model

        Args:
            model (:obj:`torch.nn.Module` or `string`, **require**):
                Pytorch model(``torch.nn.Module`` type) or layer name(string type).
            kwargs (:obj:`**kwargs`, optional):
                metadata about step or epoch for model.

        Returns:
            :obj:`None or OrderedDict`: If ``model`` is pytorch model, weight is loaded into the model and return None.
            however, If it is a string type with the name of the layer, it returns the weight of the OrderedDict type.

        Examples::

            # Load entire model
            >>> model = Model()
            >>> model_manager.load(model, step=0)
            >>> model
            Model(
              (net1): Sequential(
                (0): Linear(in_features=5, out_features=10, bias=True)
                (1): ReLU()
              )
              (net2): Sequential(
                (0): Linear(in_features=10, out_features=5, bias=True)
                (1): ReLU()
              )
            )

            # Load sub-layer model
            >>> class SubModel(nn.Module):
            >>>   def __init__(self):
            >>>     super(SubModel, self).__init__()
            >>>     self.net1 = nn.Sequential(
            >>>         nn.Linear(5, 10),
            >>>         nn.ReLU(),
            >>>     )

            >>> submodel = SubModel()
            >>> model_manager.load(submodel, step=0)
            >>> model
            Model(
              (net1): Sequential(
                (0): Linear(in_features=5, out_features=10, bias=True)
                (1): ReLU()
              )
            )

            # Load from layer name
            >>> model_manager.load('net1.0.weight', step=0)
            OrderedDict([('net1.0.weight',
            tensor([[ 0.2679, -0.2147, -0.1927, -0.3263,  0.0930],
                [ 0.0144,  0.2935,  0.3614, -0.0493, -0.3772],
                [ 0.4101, -0.1864,  0.1076, -0.3900,  0.3613],
                [-0.2831,  0.3692,  0.3367,  0.2491, -0.2971],
                [-0.3019,  0.1682, -0.3951,  0.1528,  0.1778],
                [-0.1593,  0.3315, -0.2286,  0.1294,  0.2087],
                [-0.3394, -0.2706,  0.1515,  0.0357, -0.4252],
                [ 0.2555, -0.4435, -0.3353,  0.2096, -0.3741],
                [ 0.3950, -0.2630, -0.1730,  0.1393,  0.3678],
                [ 0.3065, -0.0095,  0.0988,  0.4294,  0.3338]]))])

        """
        return super(ModelManager, self).load(model, **kwargs)
Source code for matorage.model.torch.manager

matorage

Navigation

Related Topics