Source code for torchctr.datasets.avazu

#!/usr/bin/env python
# encoding: utf-8

import numpy as np
import pandas as pd
from .base import BaseDataset


[docs]class Avazu(BaseDataset): def __init__(self): super().__init__()
[docs] def build_data(self): data = pd.read_csv("~/.torchctr/avazu/train_mini.data", engine="python") self.data = data[data.columns[1:]] self.y_column = "click" self.x_columns = [c for c in self.data.columns if c != self.y_column] # process x self.preprocess_x() # build feature self.preprocess_y() self.x = self.data[self.x_columns].to_numpy().astype(np.int) self.y = self.data[self.y_column].to_numpy().astype(np.float32) self.feature_dims = np.max(self.x, axis=0)