Commit 30878c4f authored by Alexander Henkel's avatar Alexander Henkel
Browse files

cluster stuff

parent ca30e076
......@@ -157,7 +157,7 @@ def start_training():
print('Finished predictions in ', time.time()-start, 'seconds')
# HyperParameter: Epochs, UseWeights, QuerySize, SubsetSize, includeHighConfidencePart
for epochs in range(100, 801, 100):
for epochs in range(50, 151, 50):
for use_weights in (False, True):
for query_size in range(20, 101, 10):
for subset_size in range(5, 21, 5):
......
......@@ -24,7 +24,6 @@ arg_parser.add_argument('BaseModel',
type=str,
help='path of base model')
arg_parser.add_argument('Dataset',
metavar='dataset',
type=str,
......@@ -50,7 +49,6 @@ arg_parser.add_argument('-s', '--skip',
action='store_true',
help='set to skip already existing models')
args = arg_parser.parse_args()
smooth_values = [0, 0.1, 0.2, 0.3, 0.4, 0.49]
smooth_selects = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.98]
......@@ -63,12 +61,12 @@ trainings_manager = None
training_run_name = None
models_directory = ''
collection_name = ''
skip_existing = args.skip
args = None
skip_existing = False
model_predictions = dict()
def randomize_collection(target_values: Dict[str, float]):
def randomize_collection(collection, target_values: Dict[str, float]):
randomized_collection: List[Dataset] = []
for dataset in collection:
dataset = copy.deepcopy(dataset)
......@@ -125,16 +123,20 @@ def smooth_selected_data(input_collection, target_collection, smooth_selected):
return smoothen_data
def smooth_random_noise(input_collection, smooth_value, random_value) -> List[Dataset]:
smoothen_data = []
def smooth_random_noise(input_collection, smooth_value, noise_values) -> List[Dataset]:
smoothen_data: List[Dataset] = []
for i, dataset in enumerate(input_collection):
dataset = copy.deepcopy(dataset)
noise = dataset.y_win[:, 1] <= 0.5
noise_indices = np.where(noise)[0]
selection = np.random.choice(noise_indices.shape[0], int(noise_indices.shape[0] * random_value), replace=False)
noise_selection = np.zeros(noise.shape[0], dtype=bool)
noise_selection[noise_indices[selection]] = True
dataset.y_win[noise_selection] = (smooth_value, 1 - smooth_value)
target = {'rand_noise': 0.0, 'rand_hw': 0.0}
target.update(noise_values)
noise_indices = np.where(dataset.y_win[:, 1] <= 0.5)[0]
hw_indices = np.where(dataset.y_win[:, 1] > 0.5)[0]
comb_noise = np.random.choice(noise_indices, size=int(target['rand_noise'] * noise_indices.shape[0]), replace=False)
comb_hw = np.random.choice(hw_indices, size=int(target['rand_hw'] * hw_indices.shape[0]), replace=False)
dataset.y_win[comb_noise] = (1 - smooth_value, smooth_value)
dataset.y_win[comb_hw] = (smooth_value, 1 - smooth_value)
smoothen_data.append(dataset)
return smoothen_data
......@@ -216,7 +218,7 @@ def train_on_values(randomized_collection, smooth_collections, noise_values):
def start_training():
if 'rand_hw' in training_run_name:
randomized_collection = randomize_collection({'rand_hw': 0.3})
randomized_collection = randomize_collection(collection, {'rand_hw': 0.3})
smooth_collections = dict()
for smooth_val in smooth_values:
smooth_collections[smooth_val] = smooth_data(randomized_collection, collection, smooth_val)
......@@ -231,23 +233,40 @@ def start_training():
elif 'false_smooth' in training_run_name:
print('Train random noise smoothing')
smooth_collections = dict()
for random_val in false_smooth_randoms:
smoothen_data = smooth_random_noise(collection, 0.49, random_val)
smooth_collections[random_val] = smoothen_data
for rand_value in randomize_values:
noise_values = {'rand_hw': rand_value, 'rand_noise': 0}
for smooth_val in smooth_values:
smooth_collections[smooth_val] = smooth_random_noise(collection, smooth_val, noise_values)
train_on_values(collection, smooth_collections, noise_values)
train_on_random_smoothed(smooth_collections)
noise_values = {'rand_hw': 0, 'rand_noise': rand_value}
for smooth_val in smooth_values:
smooth_collections[smooth_val] = smooth_random_noise(collection, smooth_val, noise_values)
train_on_values(collection, smooth_collections, noise_values)
elif 'both' in training_run_name:
for rand_value in randomize_values:
noise_values = {'rand_hw': 0, 'rand_noise': rand_value}
randomized_collection = randomize_collection(collection, noise_values)
smooth_collections = dict()
for smooth_val in smooth_values:
smooth_collections_step1 = smooth_data(randomized_collection, collection, smooth_val)
smooth_collections[smooth_val] = smooth_random_noise(smooth_collections_step1, smooth_val, noise_values)
train_on_values(randomized_collection, smooth_collections, noise_values)
else:
for rand_value in randomize_values:
noise_values = {'rand_hw': rand_value, 'rand_noise': 0}
randomized_collection = randomize_collection(noise_values)
randomized_collection = randomize_collection(collection, noise_values)
smooth_collections = dict()
for smooth_val in smooth_values:
smooth_collections[smooth_val] = smooth_data(randomized_collection, collection, smooth_val)
train_on_values(randomized_collection, smooth_collections, noise_values)
noise_values = {'rand_hw': 0, 'rand_noise': rand_value}
randomized_collection = randomize_collection(noise_values)
randomized_collection = randomize_collection(collection, noise_values)
smooth_collections = dict()
for smooth_val in smooth_values:
smooth_collections[smooth_val] = smooth_data(randomized_collection, collection, smooth_val)
......@@ -256,7 +275,8 @@ def start_training():
if __name__ == '__main__':
args = arg_parser.parse_args()
skip_existing = args.skip
training_run_name = args.RunName
personalizer.initialize(args.BaseModel)
dataset_manager = DatasetManager(args.Dataset)
......
This source diff could not be displayed because it is too large. You can view the blob instead.