Source code for caver.utils

import numpy as np
from torch import nn
import torch
import os


[docs]def init_weight(layer): """ Init layer weights and bias """ classname = layer.__class__.__name__ if classname.find('Conv') != -1: nn.init.xavier_normal_(layer.weight.data, gain=np.sqrt(2)) nn.init.constant_(layer.bias.data, 0.1) elif classname.find('BatchNorm') != -1: nn.init.constant_(layer.weight.data, 1.0) nn.init.constant_(layer.bias.data, 0) elif classname.find('Linear') != -1: nn.init.eye_(layer.weight.data)
# else: # nn.init.normal_(layer.weight.data, 0, 0.1)
[docs]def update_config(config, **kwargs): """ Update config attributes with key-value in kwargs. Keys not in config will be ignored. """ for key in kwargs: if not hasattr(config, key): print('Ignore unknown attribute {}.'.format(key)) else: setattr(config, key, kwargs[key]) print('Attribute {} has been updated.'.format(key)) return config
def zero_padding(x, length): result = np.zeros((len(x), length)) for i, row in enumerate(x): for j, val in enumerate(row): if j >= length: break result[i][j] = val return result def transform2onehot(y, num_class): label = np.zeros((len(y), num_class)) for i, index in enumerate(y): for j in index: label[i][j] = 1 return label def scaler(x, minimal=0, maximal=1): std = (x - np.min(x)) / (np.max(x) - np.min(x)) return std * (maximal - minimal) + minimal def get_top_label_with_logits(logits, index2label, top=5): index = np.argsort(logits)[-top:] index = index[::-1] return [index2label.get(i, '<WRONG>') for i in index] def recall_at_k(pred, y, k=5): if len(pred) > k: pred = pred[:k] hits = 0.0 for p in pred: if p in y: hits += 1.0 return hits / min(len(y), k) def make_batches(size, batch_size): num_batches = int(np.ceil(size / float(batch_size))) return [(i * batch_size, min(size, (i + 1) * batch_size)) for i in range(num_batches)]
[docs]def load_embedding(embedding_file, dim, vocab_size, index2word): """ :param embedding_file: path of embedding file :type embedding_file: str :param dim: dimension of vector :type dim: int :param vocab_size: size of vocabulary :type vocab_size: int :param index2word: index => word :type index2word: dict Load pre-trained embedding file. First line of file should be the number of words and dimension of vector. Then each line is combined of word and vectors separated by space. :: 1024, 64 # 1024 words and 64-d a 0.223 0.566 ...... b 0.754 0.231 ...... ...... """ word2vec = {} with open(embedding_file, 'r', encoding='utf-8') as f: print('Embedding file header: {}'.format(f.readline())) # ignore header for line in f.readlines(): items = line.strip().split(' ') word2vec[items[0]] = [float(vec) for vec in items[1:]] embedding = [[]] * vocab_size bound = np.sqrt(6.0) / np.sqrt(vocab_size) count_exist, count_not_exist = 0, 0 for i in range(vocab_size): word = index2word[i] try: embedding[i] = word2vec[word] count_exist += 1 except: embedding[i] = np.random.uniform(-bound, bound, dim) count_not_exist += 1 print('word exists embedding:', count_exist, '\tword not exists:', count_not_exist) embedding = np.array(embedding) return embedding
def check_ensemble_args(args): status = True if not ( os.path.exists(args.cnn) or os.path.exists(args.lstm) or os.path.exists(args.fasttext) ): status = False print("|ERROR| no model directory is exist") models = list( filter( lambda x: os.path.exists(x) == True, [args.cnn, args.lstm, args.fasttext] ) ) if len(models) < 2: status = False print("|ERROR| numbers of model to ensemble shouldn`t less than two") if len(args.model_ratio) > 0: if len(models) != len(args.model_ratio): status = False print("|ERROR| model ratio numbers not equal to model numbers ") elif sum(args.model_ratio.values()) != 1: status = False print("|ERROR| add all model`s ratio not equal one") if len(args.sentences) == 0: status = False print("|ERROR| sentences list can`t be empty") return status def show_ensemble_args(args): dict_args = vars(args) print("=============== Command Line Tools Args ===============") for arg, value in dict_args.items(): if isinstance(value, dict) and len(value) > 0: value = " ".join("{}_{}".format(k, v) for k, v in value.items()) elif isinstance(value, dict) and len(value) == 0: continue elif isinstance(value, list): value = "'" + ",'".join(value) + "'" elif isinstance(value, str) and value == "": continue print("{:>20} <===> {:<20}".format(arg, value)) print("=======================================================")
[docs]def set_config(config, args_dict): """ Update config attributes with key-value in kwargs. Keys not in config will be ignored. """ for key, value in args_dict.items(): if hasattr(config, key): setattr(config, key, value) return config
[docs]class MiniBatchWrapper(object): """ wrap the simple torchtext iter with multiple y label """ def __init__(self, dl, x_var, y_vars): self.dl, self.x_var, self.y_vars = dl, x_var, y_vars def __iter__(self): for batch in self.dl: x = getattr(batch, self.x_var) # we assume only one input in this wrapper if self.y_vars is not None: temp = [getattr(batch, feat).unsqueeze(1) for feat in self.y_vars] y = torch.cat(temp, dim=1).float() else: y = torch.zeros((1)) yield (x, y) def __len__(self): return len(self.dl)