Cersat BigData & Cloud Platform : hardware evolution and statistics¶
#ignore
%matplotlib inline
import pandas as pd
data = pd.read_csv('/home/pifgold/tmp/servers_cersat.csv', na_values=['#DIV/0!'])
#ignore
from datetime import datetime
def to_date(d):
try:
return datetime.strptime(d, '%d-%m-%Y')
except Exception, e:
#print e
return None
dates = [ to_date(d) for d in data['date'] ]
data.index = dates
#data
#print data.index.min(), "->", data.index.max()
#ignore
data = data["2010-01-01":] # removes invalid data
#print data
#data.columns
dataset = data[["nb cores.1","ram.1","net link.1","nb disks.1","TB total.1"]]
dataset.columns = ['cores', 'memory', 'network links', 'disks', 'TB']
dataset = dataset.resample('6M', how='sum')
Capacity added per semester¶
_ = dataset.plot(subplots=True, kind='bar', figsize=(16,12))
Platform capacity over time¶
_ = dataset.cumsum().plot(subplots=True, kind='bar', figsize=(16,16))
Ratios (mem/core, TB/core etc...)¶
dataset_stats = pd.DataFrame()
dataset_stats['mem/core (gb)'] = dataset['memory']/dataset['cores']
dataset_stats['TB / core'] = dataset['TB']/dataset['cores']
dataset_stats['TB / network links'] = dataset['TB']/dataset['network links']
_ = dataset_stats.plot(subplots=True, kind='bar', figsize=(16,12))
Price evolution per TB (in €, all server components included [cpu,mem,system disks...])¶
#ignore
def to_float(s):
#print type(s),s
res = None
try:
s = ''.join([ c for c in s if c.isdigit() or c == ',' ])
s = s.replace(',', '.')
res = float(s)
except Exception, e:
#print e
pass
#print res
return res
#print data[u'Prix / TB\n(tout compris)']
price_per_tb = [ to_float(s) for s in data[u'Prix / TB\n(tout compris)'] ]
data['euros / TB'] = price_per_tb
price_per_tb_per6M = data['euros / TB'].resample('6M', how='mean')
#print price_per_tb_per6M
price_per_tb_per6M.plot(kind='bar', figsize=(16,5))
Out[453]:
Comments
comments powered by Disqus