tensorboardX 使用流程
Published:
记录一些TensorboardX的学习记录 不想学可以转用wandb.
使用torch及tensorboardX 记录实验
step1 : 在训练脚本(main.py)中的设置
#两种import方式.
# 1 :
# from torch.utils.tensorboard import SummaryWriter
# 2 :
from tensorboardX import SummaryWriter
import torch.optim
from torch.optim.lr_scheduler import StepLR
now = time.strftime('%Y%m%d-%H_%M_%S', time.localtime(time.time()))
writer = SummaryWriter(log_dir=f'{LOG_DIR}/{now}')
# set optimizer 设置优化器
optimizer = torch.optim.SGD(...)
# set scheduler 设置学习率调整策略
scheduler = StepLR(optimizer, ...)
...
def train(..., epoch):
...
for i, (input, target) in enumerate(train_loader):
...
writer.add_scalars(main_tag='Loss',tag_scalar_dict={
'train loss' : loss,
}, global_step=epoch)
writer.add_scalars(main_tag='Prec',tag_scalar_dict={
'train top1' : top1,
'trian top5' : top5
}, global_step=epoch)
writer.add_scalars(main_tag='time/train', tag_scalar_dict={
'batch_time': batch_time,
'data_time': data_time,
},
global_step=epoch)
def validate(..., epoch):
...
with torch.no_grad():
...
writer.add_scalars(main_tag='Loss', tag_scalar_dict={
'val loss' : losses.avg,
}, global_step=epoch)
writer.add_scalars(main_tag='Prec', tag_scalar_dict={
'validate top1' : top1.avg,
'validate top5' : top5.avg
}, global_step=epoch)
writer.add_scalar('time/val/batch_time', batch_time.avg, epoch)
return ...
def main():
...
for epoch in range(args.start_epoch, args.epochs):
...
train(..., epoch)
validate(..., epoch)
writer.add_scalar('learning rate', optimizer.state_dict()['param_groups'][0]['lr'], epoch)
scheduler.step()
...
writer.close()
# only close once after all add_ function is done
# the end of main()
step2 : check results 查看结果
tensorboard --logdir='your_logdir' \
--port XXXX #默认6006端口
然后,浏览器打开 ` http://localhost:6006/ or
http://localhost:XXXX/`
step3(OPT) : 若远程服务器没有图像界面,可映射到本地端口后查看
#本地计算机:
ssh -L 本地端口:127.0.0.1:TensorBoard端口 用户名@服务器的IP地址 -p 服务器登录端口
ssh -L 10086:127.0.0.1:6006 user_name@remote_server_ip -p loading_port
然后,本地计算机浏览器打开 http://127.0.0.1:10086/