大致分为:1) 服务器的基础配置,2) 基于conda的多用户环境, 3) https协议的开通,包括SSL证书的安装和Apache的转发代理,4) 异常检测,包括断点重连以及发送异常邮件。适合用于macOs,Ubuntu。
I. 服务器的基础配置
# --- 本地端 ---
# 生成密钥公钥 公钥copy至server端.ssh/authorized_keys
$ ssh-keygen -t rsa -C "email@qwerty.com"
$ cat .ssh/id_rsa.pub
# --- 移步至server端 ---
# 查看server系统位数
$ getconf LONG_BIT
# 查看本节点ip
$ hostname -I
# 普通用户登陆后 切换至root
$ sudo -i
# 关闭ssh密码访问 关闭root直接登陆
$ vim /etc/ssh/sshd_config
# 将 PasswordAuthentication yes 改为 PasswordAuthentication no
# 将 PermitRootLogin yes 改为 PermitRootLogin no
# 重启ssh服务
$ service sshd restart
# server端的.ssh的权限不能设为777 防止除所有者以外的人对内部key修改
# 添加用户 -m是建立家目录
$ useradd -m {user_name}
# 配置密码
$ passwd {user_name}
# 删除用户 (连带家目录)
$ userdel -r {user_name}
# 查看可用系统shell 查看新用户当前shell 切换至新shell, e.g. bash
$ echo $SHELL
$ cat /etc/passwd | grep {user_name}
$ chsh -s /bin/bash {user_name}
# 查看防火墙状态 开启、关闭防火墙
$ ufw status [verbose]
$ ufw enable
$ ufw disable
# 查看防火墙目前允许应用 查看详细应用信息 添加新规则
$ ufw app list
$ ufw app info '{app_name}'
$ ufw allow {new_app_name}
# 防火墙允许某端口访问
$ /sbin/iptables -A INPUT -p TCP --sport 443 -j ACCEPT
# 后台运行某命令并将输入重定向至log文件
$ nohup {command} >> {log_file} &
# 查看某端口下进程
$ lsof -i:{port_number}
# 查看相关进程
$ ps -ef | grep {key_words}
II. 基于conda的多用户环境
# 下载anaconda 并安装
$ wget {anaconda_sh_file_url}
$ sh ./{anaconda_sh_file}
# root用户 指定安装路径为`/opt/anaconda`
# 个人用户 指定安装路径为家目录任意位置
# init conda in ~/.bashrc [yes]
# root用户再将~/.bashrc中init conda的部分copy至/etc/profile
# base环境只有root可写
# 安装jupyterHub和相应的包管理器
# 目前最新版anaconda支持python37但jupyterHub支持python38,冲突
$ conda install jupyterhub
$ conda install nb_conda_kernels
# conda新建、查看、激活、退出、删除虚拟环境
$ conda create -n {env_name} python={py_version}
$ conda env list
$ conda activate {env_name}
$ conda deactivate {env_name}
$ conda remove -n {env_name} --all
# 配置多kernal的jupyter notebook
$ conda install ipykernel
$ ipython kernel install --user --name={kernal_name}
# 配置jupyter notebook密码
$ jupyter notebook --generate-config
$ ipython
In [1]: from notebook.auth import passwd
In [2]: passwd()
Enter password:
Verify password:
Out[2]: '{hash_code}'
$ vim /home/zhufm/.jupyter/jupyter_notebook_config.py
-> c.NotebookApp.password = u'{hash_code}'
# 启动jupyter 监听所有ip
# notebook_url+/lab = jupyter_lab
$ jupyter notebook --ip 0.0.0.0 --port {port_number} --no-browser
III. Https协议的开通 (基于apache)
1. ssl证书的安装
# 安装apache
$ apt-get install apache2
# 创建证书存放目录并拷贝证书至此
$ mkdir /etc/apache2/ssl
$ cp {downloded_ssl_folder}/* /etc/apache2/ssl/
# 启动ssl模块
# 已启动的模块存在于/etc/apache2/mods-enabled中
$ a2enmod ssl
# 修改default-ssl.conf的相关配置
$ vim /etc/apache2/sites-available/default-ssl.conf
->
ServerName {domain_name}
SSLCertificateFile /etc/apache2/ssl/2_{domain_name}.crt
SSLCertificateKeyFile /etc/apache2/ssl/3_{domain_name}.key
SSLCertificateChainFile /etc/apache2/ssl/1_root_bundle.crt
# 在sites-enable中建立软连接
ln -s /etc/apache2/sites-available/default-ssl.conf /etc/apache2/sites-enabled/default-ssl.conf
# 重新加载配置
$ /etc/init.d/apache2 force-reload
# 重启apache服务
# 其实重启apache服务包括了stop reload start
$ /etc/init.d/apache2 restart
# 浏览器登录https://{domain_name}即可出现apache测试页
# 等同于访问https://{domain_name}:443
# --- debugging ---
# 查看apache服务运行状况
$ systemctl status apache2.service
# 查看apache的访问、错误日志
$ vim /etc/apache2/apache2.conf
-> ErrorLog ${APACHE_LOG_DIR}/error.log
$ vim /etc/apache2/envvars
-> export APACHE_LOG_DIR=/var/log/apache2$SUFFIX
# 则error日志路径为/var/log/apache2/error.log
2. Apache转发代理
本地服务监听7878端:http://localhost:7878/{api}
想达成目的:通过访问https开头的url访问7878的api
解决方案:apache新增监听7879,配置同一套ssl,再代理至本地7878端口
# 开启proxy模块
$ a2enmod proxy
# 新增 site
$ vim /etc/apache2/sites-available/7879-proxy
->
<VirtualHost *:7879>
ServerAdmin test@test.com
# ServerName www.test.com
ErrorLog ${APACHE_LOG_DIR}/error.log
CustomLog ${APACHE_LOG_DIR}/access.log combined
SSLEngine On
SSLCertificateFile /etc/apache2/ssl/2_{domain_name}.crt
SSLCertificateKeyFile /etc/apache2/ssl/3_{domain_name}.key
SSLCertificateChainFile /etc/apache2/ssl/1_root_bundle.crt
ProxyRequests Off
ProxyPreserveHost On
<Proxy />
Order deny,allow
Allow from all
</Proxy>
ProxyPass / http://localhost:7878/
ProxyPassReverse / http://localhost:7878/
</VirtualHost>
<VirtualHost *:7879>
ServerAdmin test@test.com
# ServerName www.test.com
ErrorLog ${APACHE_LOG_DIR}/error.log
CustomLog ${APACHE_LOG_DIR}/access.log combined
ProxyRequests Off
ProxyPreserveHost On
<Proxy />
Order deny,allow
Allow from all
</Proxy>
ProxyPass / http://localhost:7878/
ProxyPassReverse / http://localhost:7878/
</VirtualHost>
# 打开apache的7879监听端口 添加Listen
$ vim /etc/apache2/ports.conf
-> Listen 7879
# 再sites-enable中建立软连接
ln -s /etc/apache2/sites-available/7879-proxy.conf /etc/apache2/sites-enabled/7879-proxy.conf
# 重载 重启
$ /etc/init.d/apache2 force-reload
$ /etc/init.d/apache2 restart
# 浏览器访问 https://{domain_name}:7879/{api} 即可
IV. 异常检测
后台程序有时候会处于不明原因中断,这时我们需要查看相应的端口或者索引相应的脚本来确认程序的状态,
$ lsof -i:{port_number}
# or
$ ps -ef | grep {command}
若检测发现程序中断,一方面我们需要程序自行重连,另一方面最好给管理员发送一封异常邮件,并附上时间戳,一下是相应的python代码
import os
import time
import smtplib
from email.mime.text import MIMEText
from email.header import Header
sender = 'zhufm@shanghaitech.edu.cn'
receivers = ['zfm19980618@icloud.com', 'zhaodj@shanghaitech.edu.cn']
mail_host="smtp.shanghaitech.edu.cn" # SMTP serer
mail_user="zhufm@shanghaitech.edu.cn" # usually the email account
mail_pass="zfm19980618" # not necessarily the password
# --- test ---
print("Loading testing checker...")
message = MIMEText("Just ignore.", 'plain', 'utf-8')
message['From'] = Header("test", 'utf-8') # sender title
message['To'] = Header("test", 'utf-8') # receiver title
subject = 'This is a test.'
message['Subject'] = Header(subject, 'utf-8')
for receiver in receivers:
try:
smtpObj = smtplib.SMTP()
# 25 is the non-ssl defualt port for SMTP
smtpObj.connect(mail_host, 25)
smtpObj.login(mail_user,mail_pass)
smtpObj.sendmail(sender, receiver, message.as_string())
print("Testing email sent to ", receiver, ", successful!")
except smtplib.SMTPException:
print("Error: fail sending testing email to ", receiver)
# --- main checker ---
print("Loading main checker...")
while True:
if os.system("lsof -i:7878") == 256:
t = time.localtime(time.time())
print("%4d %02d %02d %02d:%02d:%02d"
% (t.tm_year,t.tm_mon,t.tm_mday,t.tm_hour,t.tm_min, t.tm_sec),
"DOWN !")
os.system("nohup ./run.sh >> backend.log &")
mes = "Restarted at " \
+ "%4d %02d %02d %02d:%02d:%02d" \
% (t.tm_year,t.tm_mon,t.tm_mday,t.tm_hour,t.tm_min, t.tm_sec)
# 1. message content 2. text format 3. encoding
message = MIMEText(mes, 'plain', 'utf-8')
message['From'] = Header("test", 'utf-8') # sender title
message['To'] = Header("test", 'utf-8') # reveiver title
subject = 'The Server is Down!'
message['Subject'] = Header(subject, 'utf-8')
for receiver in receivers:
try:
smtpObj = smtplib.SMTP()
# 25 is the non-ssl defualt port for SMTP
smtpObj.connect(mail_host, 25)
smtpObj.login(mail_user,mail_pass)
smtpObj.sendmail(sender, receiver, message.as_string())
print("error email sent to ", receiver, ", successful!")
except smtplib.SMTPException:
print("Error: fail sending testing email to ", receiver)
time.sleep(20)
References:
- https://hkumockup.wixsite.com/14200-01/using-the-gpu-farm
- 10.15.89.41:8899 [inside ShanghaiTech]
- https://zhuanlan.zhihu.com/p/66169551
- https://cloud.tencent.com/document/product/213/2502
- https://cloud.tencent.com/document/product/213/40436