Compare commits

...

15 Commits

Author SHA1 Message Date
2ce9a31883 update service data 2023-04-19 11:46:42 +08:00
1b90edee59 add demo to download optimized model on client 2023-04-19 11:46:33 +08:00
e1f143736e update service data 2023-04-18 17:13:15 +08:00
3c39c48242 script to optimize pytorch model on server 2023-04-18 17:13:05 +08:00
ac6400e93a add one more layer to model dir 2023-04-11 11:56:56 +08:00
0e6a5b8925 update service 2023-04-11 11:56:07 +08:00
578f0ceea1 update service 2023-03-10 15:03:31 +08:00
4bf037de15 package unsupported ml model 2023-03-10 15:03:21 +08:00
1f86146b12 replace bash script to docker-compose to build server 2023-03-07 16:33:45 +08:00
ec19042d0d update file description and reference 2023-03-01 17:13:17 +08:00
a0ac14d0f7 mlflow 1.30 -> 2.1 2023-03-01 14:56:27 +08:00
0327ebf1f4 mod service file structure 2023-03-01 14:55:51 +08:00
3c8580f0f4 update data storage 2023-02-26 05:10:58 +08:00
8001876359 test rest api 2023-02-26 05:10:28 +08:00
b31dbcd0f0 ignore saved model 2023-02-22 16:27:19 +08:00
195 changed files with 336 additions and 2737 deletions

4
.gitignore vendored
View File

@ -1 +1,3 @@
.DS_Store .DS_Store
__pycache__
model

View File

@ -1,3 +1,35 @@
# test_mlflow # Abstract
測試使用MLflow紀錄Pytorch模型訓練以及從Model registry中拉取Production model進行推論。 Try to use [MLflow](https://mlflow.org) platform to log PyTorch model training, and pull production model from model registry to run inference⛩
# Requirements
* MacOS 12.5
* Docker 20.10
# Dirs
* **service**
* House MLflow service data, including MLflow artifacts, backend store and model registry
* **env**
* **mlflow.yaml**
* conda env yaml to run this repo
# Files
* **docker-compose.yaml**
* a yaml to apply docker-compose to start MLflow service with basic configuration (run ```docker-compose -f docker-compose.yaml up```)
* **test_pytorch_m1.py**
* a script to test PyTorch on Apple M1 platform with GPU acceleration
* **train.py**
* a sample code to apply PyTorch to train a small neural network to predict fortune with MLflow logging
* **predict.py**
* a sample code to call registered model to predict testing data and save model to local file system
* **get_registered_model_via_rest_api.py**
* a script to test MLflow REST api
* **log_unsupported_model.py**
* a sample script to apply mlflow.pyfunc to package unsupported ml model which can be logged and registered by mlflow
* **optimize_model.py**
* a sample script to demonstrate how to use MLflow and TensorRT libs to optimize Pytorch model on edge devices and fetch it out on client
###### tags: `MLOps`

View File

@ -1,16 +0,0 @@
artifact_path: model
flavors:
python_function:
data: data
env: conda.yaml
loader_module: mlflow.pytorch
pickle_module_name: mlflow.pytorch.pickle_module
python_version: 3.10.9
pytorch:
code: null
model_data: data
pytorch_version: 1.13.1
mlflow_version: 1.30.0
model_uuid: 2382b7a39c064e7b9b1465cfd84140a3
run_id: 24469fc083d6470a9cad7f17a6eeeea0
utc_time_created: '2023-02-21 05:57:41.973454'

View File

@ -1,11 +0,0 @@
channels:
- conda-forge
dependencies:
- python=3.10.9
- pip<=23.0.1
- pip:
- mlflow
- cloudpickle==2.2.1
- torch==1.13.1
- tqdm==4.64.1
name: mlflow-env

View File

@ -1,7 +0,0 @@
python: 3.10.9
build_dependencies:
- pip==23.0.1
- setuptools==67.3.2
- wheel==0.38.4
dependencies:
- -r requirements.txt

View File

@ -1,4 +0,0 @@
mlflow
cloudpickle==2.2.1
torch==1.13.1
tqdm==4.64.1

View File

@ -1,16 +0,0 @@
artifact_path: cls_model
flavors:
python_function:
data: data
env: conda.yaml
loader_module: mlflow.pytorch
pickle_module_name: mlflow.pytorch.pickle_module
python_version: 3.10.9
pytorch:
code: null
model_data: data
pytorch_version: 1.13.1
mlflow_version: 1.30.0
model_uuid: e40643f3e1b9481896e1ae6ed30e8654
run_id: 2820b379bfc945358bfd516e5577846c
utc_time_created: '2023-02-21 05:33:10.779919'

View File

@ -1,10 +0,0 @@
channels:
- conda-forge
dependencies:
- python=3.10.9
- pip<=23.0.1
- pip:
- mlflow
- cloudpickle==2.2.1
- torch==1.13.1
name: mlflow-env

View File

@ -1,7 +0,0 @@
python: 3.10.9
build_dependencies:
- pip==23.0.1
- setuptools==67.3.2
- wheel==0.38.4
dependencies:
- -r requirements.txt

View File

@ -1,3 +0,0 @@
mlflow
cloudpickle==2.2.1
torch==1.13.1

View File

@ -1,16 +0,0 @@
artifact_path: models
flavors:
python_function:
data: data
env: conda.yaml
loader_module: mlflow.pytorch
pickle_module_name: mlflow.pytorch.pickle_module
python_version: 3.10.9
pytorch:
code: null
model_data: data
pytorch_version: 1.13.1
mlflow_version: 1.30.0
model_uuid: faf1bec9ecb64581b22a0b8e09a9cca8
run_id: 3ef01a1e3e3d4ba2be705da789bbb8e1
utc_time_created: '2023-02-21 05:07:17.344052'

View File

@ -1,10 +0,0 @@
channels:
- conda-forge
dependencies:
- python=3.10.9
- pip<=23.0.1
- pip:
- mlflow
- cloudpickle==2.2.1
- torch==1.13.1
name: mlflow-env

View File

@ -1,7 +0,0 @@
python: 3.10.9
build_dependencies:
- pip==23.0.1
- setuptools==67.3.2
- wheel==0.38.4
dependencies:
- -r requirements.txt

View File

@ -1,3 +0,0 @@
mlflow
cloudpickle==2.2.1
torch==1.13.1

View File

@ -1,82 +0,0 @@
# train.py
#
# author: deng
# date : 20230221
import torch
import torch.nn as nn
from torch.optim import SGD
from tqdm import tqdm
import mlflow
class Net(nn.Module):
""" define a simple neural network model """
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(10, 5)
self.fc2 = nn.Linear(5, 1)
def forward(self, x):
x = self.fc1(x)
x = torch.relu(x)
x = self.fc2(x)
return x
def train(model, dataloader, criterion, optimizer, epochs):
""" define the training function """
for epoch in tqdm(range(epochs), 'Epochs'):
for i, (inputs, labels) in enumerate(dataloader):
# forwarding
outputs = model(inputs)
loss = criterion(outputs, labels)
# update gradient
optimizer.zero_grad()
loss.backward()
optimizer.step()
# log loss
mlflow.log_metric('train_loss', loss.item(), step=i)
return loss
if __name__ == '__main__':
# set hyper parameters
learning_rate = 1e-2
epochs = 20
# create a dataloader with fake data
dataloader = [(torch.randn(10), torch.randn(1)) for _ in range(100)]
dataloader = torch.utils.data.DataLoader(dataloader, batch_size=10)
# create the model, criterion, and optimizer
model = Net()
criterion = nn.MSELoss()
optimizer = SGD(model.parameters(), lr=learning_rate)
# set the tracking URI to the model registry
mlflow.set_tracking_uri('http://127.0.0.1:5000')
# start the MLflow run
with mlflow.start_run():
# train the model and log the loss
loss = train(model, dataloader, criterion, optimizer, epochs)
# log some additional metrics
mlflow.log_metric('final_loss', loss.item())
mlflow.log_param('learning_rate', learning_rate)
# log trained model
mlflow.pytorch.log_model(model, 'model')
# log training code
mlflow.log_artifact('./train.py', 'code')
print('Completed.')

View File

@ -1,16 +0,0 @@
artifact_path: model
flavors:
python_function:
data: data
env: conda.yaml
loader_module: mlflow.pytorch
pickle_module_name: mlflow.pytorch.pickle_module
python_version: 3.10.9
pytorch:
code: null
model_data: data
pytorch_version: 1.13.1
mlflow_version: 1.30.0
model_uuid: 18b69aa38c064c579c9b465d7a826081
run_id: 410d85525e5f4cfe9839a432d35f9ad2
utc_time_created: '2023-02-22 00:42:48.668457'

View File

@ -1,11 +0,0 @@
channels:
- conda-forge
dependencies:
- python=3.10.9
- pip<=23.0.1
- pip:
- mlflow
- cloudpickle==2.2.1
- torch==1.13.1
- tqdm==4.64.1
name: mlflow-env

View File

@ -1,7 +0,0 @@
python: 3.10.9
build_dependencies:
- pip==23.0.1
- setuptools==67.3.2
- wheel==0.38.4
dependencies:
- -r requirements.txt

View File

@ -1,4 +0,0 @@
mlflow
cloudpickle==2.2.1
torch==1.13.1
tqdm==4.64.1

View File

@ -1,16 +0,0 @@
artifact_path: cls_model
flavors:
python_function:
data: data
env: conda.yaml
loader_module: mlflow.pytorch
pickle_module_name: mlflow.pytorch.pickle_module
python_version: 3.10.9
pytorch:
code: null
model_data: data
pytorch_version: 1.13.1
mlflow_version: 1.30.0
model_uuid: a0ecc970cadb47a9b839283e9514732d
run_id: 63c7363339e042f4848d9041ba8deb82
utc_time_created: '2023-02-21 05:37:55.904472'

View File

@ -1,10 +0,0 @@
channels:
- conda-forge
dependencies:
- python=3.10.9
- pip<=23.0.1
- pip:
- mlflow
- cloudpickle==2.2.1
- torch==1.13.1
name: mlflow-env

View File

@ -1,7 +0,0 @@
python: 3.10.9
build_dependencies:
- pip==23.0.1
- setuptools==67.3.2
- wheel==0.38.4
dependencies:
- -r requirements.txt

View File

@ -1,3 +0,0 @@
mlflow
cloudpickle==2.2.1
torch==1.13.1

View File

@ -1,16 +0,0 @@
artifact_path: models
flavors:
python_function:
data: data
env: conda.yaml
loader_module: mlflow.pytorch
pickle_module_name: mlflow.pytorch.pickle_module
python_version: 3.10.9
pytorch:
code: null
model_data: data
pytorch_version: 1.13.1
mlflow_version: 1.30.0
model_uuid: 4ebe94bd0249452a90b3497d3b00a1c3
run_id: 6845ef0d54024cb3bdb32050f6a46fea
utc_time_created: '2023-02-21 05:25:14.020335'

View File

@ -1,10 +0,0 @@
channels:
- conda-forge
dependencies:
- python=3.10.9
- pip<=23.0.1
- pip:
- mlflow
- cloudpickle==2.2.1
- torch==1.13.1
name: mlflow-env

View File

@ -1,7 +0,0 @@
python: 3.10.9
build_dependencies:
- pip==23.0.1
- setuptools==67.3.2
- wheel==0.38.4
dependencies:
- -r requirements.txt

View File

@ -1,3 +0,0 @@
mlflow
cloudpickle==2.2.1
torch==1.13.1

View File

@ -1,16 +0,0 @@
artifact_path: models
flavors:
python_function:
data: data
env: conda.yaml
loader_module: mlflow.pytorch
pickle_module_name: mlflow.pytorch.pickle_module
python_version: 3.10.9
pytorch:
code: null
model_data: data
pytorch_version: 1.13.1
mlflow_version: 1.30.0
model_uuid: 8cd1e70114e548ea8d9bfb1bf468e285
run_id: 68e8a3cbbafa46538ebb8a60d80f185d
utc_time_created: '2023-02-21 05:05:46.624814'

View File

@ -1,10 +0,0 @@
channels:
- conda-forge
dependencies:
- python=3.10.9
- pip<=23.0.1
- pip:
- mlflow
- cloudpickle==2.2.1
- torch==1.13.1
name: mlflow-env

View File

@ -1,7 +0,0 @@
python: 3.10.9
build_dependencies:
- pip==23.0.1
- setuptools==67.3.2
- wheel==0.38.4
dependencies:
- -r requirements.txt

View File

@ -1,3 +0,0 @@
mlflow
cloudpickle==2.2.1
torch==1.13.1

View File

@ -1,16 +0,0 @@
artifact_path: models
flavors:
python_function:
data: data
env: conda.yaml
loader_module: mlflow.pytorch
pickle_module_name: mlflow.pytorch.pickle_module
python_version: 3.10.9
pytorch:
code: null
model_data: data
pytorch_version: 1.13.1
mlflow_version: 1.30.0
model_uuid: dd1b1e3a6b5f4274a5690a8843751ff3
run_id: 8ba27f225a7442be8816977c2077c510
utc_time_created: '2023-02-21 05:05:04.660670'

View File

@ -1,10 +0,0 @@
channels:
- conda-forge
dependencies:
- python=3.10.9
- pip<=23.0.1
- pip:
- mlflow
- cloudpickle==2.2.1
- torch==1.13.1
name: mlflow-env

View File

@ -1,7 +0,0 @@
python: 3.10.9
build_dependencies:
- pip==23.0.1
- setuptools==67.3.2
- wheel==0.38.4
dependencies:
- -r requirements.txt

View File

@ -1,3 +0,0 @@
mlflow
cloudpickle==2.2.1
torch==1.13.1

View File

@ -1,16 +0,0 @@
artifact_path: model
flavors:
python_function:
data: data
env: conda.yaml
loader_module: mlflow.pytorch
pickle_module_name: mlflow.pytorch.pickle_module
python_version: 3.10.9
pytorch:
code: null
model_data: data
pytorch_version: 1.13.1
mlflow_version: 1.30.0
model_uuid: 957e2f6e4fd048c99aee3150c73c4078
run_id: c4fd84a025e1474d87cdc2919874b88c
utc_time_created: '2023-02-22 00:41:33.282088'

View File

@ -1,11 +0,0 @@
channels:
- conda-forge
dependencies:
- python=3.10.9
- pip<=23.0.1
- pip:
- mlflow
- cloudpickle==2.2.1
- torch==1.13.1
- tqdm==4.64.1
name: mlflow-env

View File

@ -1,7 +0,0 @@
python: 3.10.9
build_dependencies:
- pip==23.0.1
- setuptools==67.3.2
- wheel==0.38.4
dependencies:
- -r requirements.txt

View File

@ -1,4 +0,0 @@
mlflow
cloudpickle==2.2.1
torch==1.13.1
tqdm==4.64.1

View File

@ -1,82 +0,0 @@
# train.py
#
# author: deng
# date : 20230221
import torch
import torch.nn as nn
from torch.optim import SGD
from tqdm import tqdm
import mlflow
class Net(nn.Module):
""" define a simple neural network model """
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(10, 5)
self.fc2 = nn.Linear(5, 1)
def forward(self, x):
x = self.fc1(x)
x = torch.relu(x)
x = self.fc2(x)
return x
def train(model, dataloader, criterion, optimizer, epochs):
""" define the training function """
for epoch in tqdm(range(epochs), 'Epochs'):
for i, (inputs, labels) in enumerate(dataloader):
# forwarding
outputs = model(inputs)
loss = criterion(outputs, labels)
# update gradient
optimizer.zero_grad()
loss.backward()
optimizer.step()
# log loss
mlflow.log_metric('train_loss', loss.item(), step=i)
return loss
if __name__ == '__main__':
# set hyper parameters
learning_rate = 1e-2
epochs = 20
# create a dataloader with fake data
dataloader = [(torch.randn(10), torch.randn(1)) for _ in range(100)]
dataloader = torch.utils.data.DataLoader(dataloader, batch_size=10)
# create the model, criterion, and optimizer
model = Net()
criterion = nn.MSELoss()
optimizer = SGD(model.parameters(), lr=learning_rate)
# set the tracking URI to the model registry
mlflow.set_tracking_uri('http://127.0.0.1:5000')
# start the MLflow run
with mlflow.start_run():
# train the model and log the loss
loss = train(model, dataloader, criterion, optimizer, epochs)
# log some additional metrics
mlflow.log_metric('final_loss', loss.item())
mlflow.log_param('learning_rate', learning_rate)
# log trained model
mlflow.pytorch.log_model(model, 'model')
# log training code
mlflow.log_artifact('./train.py')
print('Completed.')

View File

@ -1,16 +0,0 @@
artifact_path: cls_model
flavors:
python_function:
data: data
env: conda.yaml
loader_module: mlflow.pytorch
pickle_module_name: mlflow.pytorch.pickle_module
python_version: 3.10.9
pytorch:
code: null
model_data: data
pytorch_version: 1.13.1
mlflow_version: 1.30.0
model_uuid: aaa800b217da4dd0b8f17e8dbfdc5c45
run_id: f1320882f24c4f489cbf85159627eaf8
utc_time_created: '2023-02-21 05:34:08.242864'

View File

@ -1,10 +0,0 @@
channels:
- conda-forge
dependencies:
- python=3.10.9
- pip<=23.0.1
- pip:
- mlflow
- cloudpickle==2.2.1
- torch==1.13.1
name: mlflow-env

View File

@ -1,7 +0,0 @@
python: 3.10.9
build_dependencies:
- pip==23.0.1
- setuptools==67.3.2
- wheel==0.38.4
dependencies:
- -r requirements.txt

View File

@ -1,3 +0,0 @@
mlflow
cloudpickle==2.2.1
torch==1.13.1

View File

@ -1,83 +0,0 @@
# train.py
#
# author: deng
# date : 20230221
import torch
import torch.nn as nn
from torch.optim import SGD
from tqdm import tqdm
import mlflow
class Net(nn.Module):
""" define a simple neural network model """
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(10, 5)
self.fc2 = nn.Linear(5, 1)
def forward(self, x):
x = self.fc1(x)
x = torch.relu(x)
x = self.fc2(x)
return x
def train(model, dataloader, criterion, optimizer, epochs):
""" define the training function """
for epoch in tqdm(range(epochs), 'Epochs'):
for i, (inputs, labels) in enumerate(dataloader):
# forwarding
outputs = model(inputs)
loss = criterion(outputs, labels)
# update gradient
optimizer.zero_grad()
loss.backward()
optimizer.step()
# log loss
mlflow.log_metric('train_loss', loss.item(), step=i)
return loss
if __name__ == '__main__':
# set hyper parameters
learning_rate = 1e-2
epochs = 20
# create a dataloader with fake data
dataloader = [(torch.randn(10), torch.randn(1)) for _ in range(100)]
dataloader = torch.utils.data.DataLoader(dataloader, batch_size=10)
# create the model, criterion, and optimizer
model = Net()
criterion = nn.MSELoss()
optimizer = SGD(model.parameters(), lr=learning_rate)
# set the tracking URI to the model registry
mlflow.set_tracking_uri('http://127.0.0.1:5000')
mlflow.set_experiment('/mlflow_testing')
# start the MLflow run
with mlflow.start_run():
# train the model
loss = train(model, dataloader, criterion, optimizer, epochs)
# log some additional metrics
mlflow.log_metric('final_loss', loss.item())
mlflow.log_param('learning_rate', learning_rate)
# log trained model
mlflow.pytorch.log_model(model, 'model')
# log training code
mlflow.log_artifact('./train.py', 'code')
print('Completed.')

View File

@ -1,16 +0,0 @@
artifact_path: model
flavors:
python_function:
data: data
env: conda.yaml
loader_module: mlflow.pytorch
pickle_module_name: mlflow.pytorch.pickle_module
python_version: 3.10.9
pytorch:
code: null
model_data: data
pytorch_version: 1.13.1
mlflow_version: 1.30.0
model_uuid: ff8b845d6a174ffabfc49a18673c6c04
run_id: c248a4299f97423987a9496a2241ab1a
utc_time_created: '2023-02-22 01:10:55.971443'

View File

@ -1,11 +0,0 @@
channels:
- conda-forge
dependencies:
- python=3.10.9
- pip<=23.0.1
- pip:
- mlflow
- cloudpickle==2.2.1
- torch==1.13.1
- tqdm==4.64.1
name: mlflow-env

View File

@ -1,7 +0,0 @@
python: 3.10.9
build_dependencies:
- pip==23.0.1
- setuptools==67.3.2
- wheel==0.38.4
dependencies:
- -r requirements.txt

View File

@ -1,4 +0,0 @@
mlflow
cloudpickle==2.2.1
torch==1.13.1
tqdm==4.64.1

View File

@ -1,83 +0,0 @@
# train.py
#
# author: deng
# date : 20230221
import torch
import torch.nn as nn
from torch.optim import SGD
from tqdm import tqdm
import mlflow
class Net(nn.Module):
""" define a simple neural network model """
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(10, 5)
self.fc2 = nn.Linear(5, 1)
def forward(self, x):
x = self.fc1(x)
x = torch.relu(x)
x = self.fc2(x)
return x
def train(model, dataloader, criterion, optimizer, epochs):
""" define the training function """
for epoch in tqdm(range(epochs), 'Epochs'):
for i, (inputs, labels) in enumerate(dataloader):
# forwarding
outputs = model(inputs)
loss = criterion(outputs, labels)
# update gradient
optimizer.zero_grad()
loss.backward()
optimizer.step()
# log loss
mlflow.log_metric('train_loss', loss.item(), step=i)
return loss
if __name__ == '__main__':
# set hyper parameters
learning_rate = 1e-2
epochs = 20
# create a dataloader with fake data
dataloader = [(torch.randn(10), torch.randn(1)) for _ in range(100)]
dataloader = torch.utils.data.DataLoader(dataloader, batch_size=10)
# create the model, criterion, and optimizer
model = Net()
criterion = nn.MSELoss()
optimizer = SGD(model.parameters(), lr=learning_rate)
# set the tracking URI to the model registry
mlflow.set_tracking_uri('http://127.0.0.1:5000')
mlflow.set_experiment('mlflow_testing')
# start the MLflow run
with mlflow.start_run():
# train the model
loss = train(model, dataloader, criterion, optimizer, epochs)
# log some additional metrics
mlflow.log_metric('final_loss', loss.item())
mlflow.log_param('learning_rate', learning_rate)
# log trained model
mlflow.pytorch.log_model(model, 'model')
# log training code
mlflow.log_artifact('./train.py', 'code')
print('Completed.')

View File

@ -1,16 +0,0 @@
artifact_path: model
flavors:
python_function:
data: data
env: conda.yaml
loader_module: mlflow.pytorch
pickle_module_name: mlflow.pytorch.pickle_module
python_version: 3.10.9
pytorch:
code: null
model_data: data
pytorch_version: 1.13.1
mlflow_version: 1.30.0
model_uuid: 27a96ad04f5a4578a3e1302500ad9a11
run_id: b7d7395b6b53404497f7656b07b71bf8
utc_time_created: '2023-02-22 01:11:36.809812'

View File

@ -1,11 +0,0 @@
channels:
- conda-forge
dependencies:
- python=3.10.9
- pip<=23.0.1
- pip:
- mlflow
- cloudpickle==2.2.1
- torch==1.13.1
- tqdm==4.64.1
name: mlflow-env

View File

@ -1,7 +0,0 @@
python: 3.10.9
build_dependencies:
- pip==23.0.1
- setuptools==67.3.2
- wheel==0.38.4
dependencies:
- -r requirements.txt

View File

@ -1,4 +0,0 @@
mlflow
cloudpickle==2.2.1
torch==1.13.1
tqdm==4.64.1

View File

@ -1,83 +0,0 @@
# train.py
#
# author: deng
# date : 20230221
import torch
import torch.nn as nn
from torch.optim import SGD
from tqdm import tqdm
import mlflow
class Net(nn.Module):
""" define a simple neural network model """
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(10, 5)
self.fc2 = nn.Linear(5, 1)
def forward(self, x):
x = self.fc1(x)
x = torch.relu(x)
x = self.fc2(x)
return x
def train(model, dataloader, criterion, optimizer, epochs):
""" define the training function """
for epoch in tqdm(range(epochs), 'Epochs'):
for i, (inputs, labels) in enumerate(dataloader):
# forwarding
outputs = model(inputs)
loss = criterion(outputs, labels)
# update gradient
optimizer.zero_grad()
loss.backward()
optimizer.step()
# log loss
mlflow.log_metric('train_loss', loss.item(), step=i)
return loss
if __name__ == '__main__':
# set hyper parameters
learning_rate = 1e-2
epochs = 20
# create a dataloader with fake data
dataloader = [(torch.randn(10), torch.randn(1)) for _ in range(100)]
dataloader = torch.utils.data.DataLoader(dataloader, batch_size=10)
# create the model, criterion, and optimizer
model = Net()
criterion = nn.MSELoss()
optimizer = SGD(model.parameters(), lr=learning_rate)
# set the tracking URI to the model registry
mlflow.set_tracking_uri('http://127.0.0.1:5000')
mlflow.set_experiment('mlflow_testing')
# start the MLflow run
with mlflow.start_run():
# train the model
loss = train(model, dataloader, criterion, optimizer, epochs)
# log some additional metrics
mlflow.log_metric('final_loss', loss.item())
mlflow.log_param('learning_rate', learning_rate)
# log trained model
mlflow.pytorch.log_model(model, 'model')
# log training code
mlflow.log_artifact('./train.py', 'code')
print('Completed.')

View File

@ -1,16 +0,0 @@
artifact_path: model
flavors:
python_function:
data: data
env: conda.yaml
loader_module: mlflow.pytorch
pickle_module_name: mlflow.pytorch.pickle_module
python_version: 3.10.9
pytorch:
code: null
model_data: data
pytorch_version: 1.13.1
mlflow_version: 1.30.0
model_uuid: 2625ea164ff248c194686ed5afb9a510
run_id: c293e8294f4f46adacd21465be08c608
utc_time_created: '2023-02-22 01:11:28.646127'

View File

@ -1,11 +0,0 @@
channels:
- conda-forge
dependencies:
- python=3.10.9
- pip<=23.0.1
- pip:
- mlflow
- cloudpickle==2.2.1
- torch==1.13.1
- tqdm==4.64.1
name: mlflow-env

View File

@ -1,7 +0,0 @@
python: 3.10.9
build_dependencies:
- pip==23.0.1
- setuptools==67.3.2
- wheel==0.38.4
dependencies:
- -r requirements.txt

View File

@ -1,4 +0,0 @@
mlflow
cloudpickle==2.2.1
torch==1.13.1
tqdm==4.64.1

View File

@ -1,83 +0,0 @@
# train.py
#
# author: deng
# date : 20230221
import torch
import torch.nn as nn
from torch.optim import SGD
from tqdm import tqdm
import mlflow
class Net(nn.Module):
""" define a simple neural network model """
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(10, 5)
self.fc2 = nn.Linear(5, 1)
def forward(self, x):
x = self.fc1(x)
x = torch.relu(x)
x = self.fc2(x)
return x
def train(model, dataloader, criterion, optimizer, epochs):
""" define the training function """
for epoch in tqdm(range(epochs), 'Epochs'):
for i, (inputs, labels) in enumerate(dataloader):
# forwarding
outputs = model(inputs)
loss = criterion(outputs, labels)
# update gradient
optimizer.zero_grad()
loss.backward()
optimizer.step()
# log loss
mlflow.log_metric('train_loss', loss.item(), step=i)
return loss
if __name__ == '__main__':
# set hyper parameters
learning_rate = 1e-2
epochs = 20
# create a dataloader with fake data
dataloader = [(torch.randn(10), torch.randn(1)) for _ in range(100)]
dataloader = torch.utils.data.DataLoader(dataloader, batch_size=10)
# create the model, criterion, and optimizer
model = Net()
criterion = nn.MSELoss()
optimizer = SGD(model.parameters(), lr=learning_rate)
# set the tracking URI to the model registry
mlflow.set_tracking_uri('http://127.0.0.1:5000')
mlflow.set_experiment('mlflow_testing')
# start the MLflow run
with mlflow.start_run():
# train the model
loss = train(model, dataloader, criterion, optimizer, epochs)
# log some additional metrics
mlflow.log_metric('final_loss', loss.item())
mlflow.log_param('learning_rate', learning_rate)
# log trained model
mlflow.pytorch.log_model(model, 'model')
# log training code
mlflow.log_artifact('./train.py', 'code')
print('Completed.')

View File

@ -1,16 +0,0 @@
artifact_path: model
flavors:
python_function:
data: data
env: conda.yaml
loader_module: mlflow.pytorch
pickle_module_name: mlflow.pytorch.pickle_module
python_version: 3.10.9
pytorch:
code: null
model_data: data
pytorch_version: 1.13.1
mlflow_version: 1.30.0
model_uuid: 2ee49fdb3ec647a58b1235498b186722
run_id: d548729629634031a93a46d6dab8b7da
utc_time_created: '2023-02-22 01:11:33.149151'

View File

@ -1,11 +0,0 @@
channels:
- conda-forge
dependencies:
- python=3.10.9
- pip<=23.0.1
- pip:
- mlflow
- cloudpickle==2.2.1
- torch==1.13.1
- tqdm==4.64.1
name: mlflow-env

View File

@ -1,7 +0,0 @@
python: 3.10.9
build_dependencies:
- pip==23.0.1
- setuptools==67.3.2
- wheel==0.38.4
dependencies:
- -r requirements.txt

View File

@ -1,4 +0,0 @@
mlflow
cloudpickle==2.2.1
torch==1.13.1
tqdm==4.64.1

View File

@ -1,83 +0,0 @@
# train.py
#
# author: deng
# date : 20230221
import torch
import torch.nn as nn
from torch.optim import SGD
from tqdm import tqdm
import mlflow
class Net(nn.Module):
""" define a simple neural network model """
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(10, 5)
self.fc2 = nn.Linear(5, 1)
def forward(self, x):
x = self.fc1(x)
x = torch.relu(x)
x = self.fc2(x)
return x
def train(model, dataloader, criterion, optimizer, epochs):
""" define the training function """
for epoch in tqdm(range(epochs), 'Epochs'):
for i, (inputs, labels) in enumerate(dataloader):
# forwarding
outputs = model(inputs)
loss = criterion(outputs, labels)
# update gradient
optimizer.zero_grad()
loss.backward()
optimizer.step()
# log loss
mlflow.log_metric('train_loss', loss.item(), step=i)
return loss
if __name__ == '__main__':
# set hyper parameters
learning_rate = 1e-2
epochs = 20
# create a dataloader with fake data
dataloader = [(torch.randn(10), torch.randn(1)) for _ in range(100)]
dataloader = torch.utils.data.DataLoader(dataloader, batch_size=10)
# create the model, criterion, and optimizer
model = Net()
criterion = nn.MSELoss()
optimizer = SGD(model.parameters(), lr=learning_rate)
# set the tracking URI to the model registry
mlflow.set_tracking_uri('http://127.0.0.1:5000')
mlflow.set_experiment('train_fortune_predict_model')
# start the MLflow run
with mlflow.start_run():
# train the model
loss = train(model, dataloader, criterion, optimizer, epochs)
# log some additional metrics
mlflow.log_metric('final_loss', loss.item())
mlflow.log_param('learning_rate', learning_rate)
# log trained model
mlflow.pytorch.log_model(model, 'model')
# log training code
mlflow.log_artifact('./train.py', 'code')
print('Completed.')

View File

@ -1,16 +0,0 @@
artifact_path: model
flavors:
python_function:
data: data
env: conda.yaml
loader_module: mlflow.pytorch
pickle_module_name: mlflow.pytorch.pickle_module
python_version: 3.10.9
pytorch:
code: null
model_data: data
pytorch_version: 1.13.1
mlflow_version: 1.30.0
model_uuid: 75aa23c5bb33452c978feeeffcdcb393
run_id: 0be79b1f3f7d480a9c7f497312887a37
utc_time_created: '2023-02-22 01:12:26.682417'

View File

@ -1,11 +0,0 @@
channels:
- conda-forge
dependencies:
- python=3.10.9
- pip<=23.0.1
- pip:
- mlflow
- cloudpickle==2.2.1
- torch==1.13.1
- tqdm==4.64.1
name: mlflow-env

View File

@ -1,7 +0,0 @@
python: 3.10.9
build_dependencies:
- pip==23.0.1
- setuptools==67.3.2
- wheel==0.38.4
dependencies:
- -r requirements.txt

View File

@ -1,4 +0,0 @@
mlflow
cloudpickle==2.2.1
torch==1.13.1
tqdm==4.64.1

View File

@ -1,83 +0,0 @@
# train.py
#
# author: deng
# date : 20230221
import torch
import torch.nn as nn
from torch.optim import SGD
from tqdm import tqdm
import mlflow
class Net(nn.Module):
""" define a simple neural network model """
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(10, 5)
self.fc2 = nn.Linear(5, 1)
def forward(self, x):
x = self.fc1(x)
x = torch.relu(x)
x = self.fc2(x)
return x
def train(model, dataloader, criterion, optimizer, epochs):
""" define the training function """
for epoch in tqdm(range(epochs), 'Epochs'):
for i, (inputs, labels) in enumerate(dataloader):
# forwarding
outputs = model(inputs)
loss = criterion(outputs, labels)
# update gradient
optimizer.zero_grad()
loss.backward()
optimizer.step()
# log loss
mlflow.log_metric('train_loss', loss.item(), step=i)
return loss
if __name__ == '__main__':
# set hyper parameters
learning_rate = 1e-2
epochs = 20
# create a dataloader with fake data
dataloader = [(torch.randn(10), torch.randn(1)) for _ in range(100)]
dataloader = torch.utils.data.DataLoader(dataloader, batch_size=10)
# create the model, criterion, and optimizer
model = Net()
criterion = nn.MSELoss()
optimizer = SGD(model.parameters(), lr=learning_rate)
# set the tracking URI to the model registry
mlflow.set_tracking_uri('http://127.0.0.1:5000')
mlflow.set_experiment('train_fortune_predict_model')
# start the MLflow run
with mlflow.start_run():
# train the model
loss = train(model, dataloader, criterion, optimizer, epochs)
# log some additional metrics
mlflow.log_metric('final_loss', loss.item())
mlflow.log_param('learning_rate', learning_rate)
# log trained model
mlflow.pytorch.log_model(model, 'model')
# log training code
mlflow.log_artifact('./train.py', 'code')
print('Completed.')

Some files were not shown because too many files have changed in this diff Show More