In [1]:
import numpy as np
import pandas as pd
In [13]:
data = pd.read_csv('features.csv').iloc[:, :105].drop(['duration'], axis=1)
data.head()
Out[13]:
match_id start_time lobby_type r1_hero r1_level r1_xp r1_gold r1_lh r1_kills r1_deaths ... radiant_first_ward_time dire_bottle_time dire_courier_time dire_flying_courier_time dire_tpscroll_count dire_boots_count dire_ward_observer_count dire_ward_sentry_count dire_first_ward_time radiant_win
0 0 1430198770 7 11 5 2098 1489 20 0 0 ... 35 103 -84 221 3 4 2 2 -52 1
1 1 1430220345 0 42 4 1188 1033 9 0 1 ... -20 149 -84 195 5 4 3 1 -5 1
2 2 1430227081 7 33 4 1319 1270 22 0 0 ... -39 45 -77 221 3 4 3 1 13 0
3 3 1430263531 1 29 4 1779 1056 14 0 0 ... -30 124 -80 184 0 4 2 0 27 0
4 4 1430282290 7 13 4 1431 1090 8 1 0 ... 46 182 -80 225 6 3 3 0 -16 0

5 rows × 104 columns

In [14]:
data.columns
Out[14]:
Index([u'match_id', u'start_time', u'lobby_type', u'r1_hero', u'r1_level', u'r1_xp', u'r1_gold', u'r1_lh', u'r1_kills', u'r1_deaths', u'r1_items', u'r2_hero', u'r2_level', u'r2_xp', u'r2_gold', u'r2_lh', u'r2_kills', u'r2_deaths', u'r2_items', u'r3_hero', u'r3_level', u'r3_xp', u'r3_gold', u'r3_lh', u'r3_kills', u'r3_deaths', u'r3_items', u'r4_hero', u'r4_level', u'r4_xp', u'r4_gold', u'r4_lh', u'r4_kills', u'r4_deaths', u'r4_items', u'r5_hero', u'r5_level', u'r5_xp', u'r5_gold', u'r5_lh', u'r5_kills', u'r5_deaths', u'r5_items', u'd1_hero', u'd1_level', u'd1_xp', u'd1_gold', u'd1_lh', u'd1_kills', u'd1_deaths', u'd1_items', u'd2_hero', u'd2_level', u'd2_xp', u'd2_gold', u'd2_lh', u'd2_kills', u'd2_deaths', u'd2_items', u'd3_hero', u'd3_level', u'd3_xp', u'd3_gold', u'd3_lh', u'd3_kills', u'd3_deaths', u'd3_items', u'd4_hero', u'd4_level', u'd4_xp', u'd4_gold', u'd4_lh', u'd4_kills', u'd4_deaths', u'd4_items', u'd5_hero', u'd5_level', u'd5_xp', u'd5_gold', u'd5_lh', u'd5_kills', u'd5_deaths', u'd5_items', u'first_blood_time', u'first_blood_team', u'first_blood_player1', u'first_blood_player2', u'radiant_bottle_time', u'radiant_courier_time', u'radiant_flying_courier_time', u'radiant_tpscroll_count', u'radiant_boots_count', u'radiant_ward_observer_count', u'radiant_ward_sentry_count', u'radiant_first_ward_time', u'dire_bottle_time', u'dire_courier_time', u'dire_flying_courier_time', u'dire_tpscroll_count', u'dire_boots_count', ...], dtype='object')
In [15]:
data = data.drop(['match_id', 'start_time'], axis=1)
In [16]:
data = data.fillna(0)
In [17]:
from sklearn.cross_validation import train_test_split
X, X_test, y, y_test = train_test_split(data.drop(['radiant_win'], axis=1), data['radiant_win'], test_size=0.3)
In [18]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
clf = LogisticRegression()
clf.fit(X, y)
print roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1])
0.713511442472