Commit 30878c4f authored by Alexander Henkel's avatar Alexander Henkel
Browse files

cluster stuff

parent ca30e076
...@@ -157,7 +157,7 @@ def start_training(): ...@@ -157,7 +157,7 @@ def start_training():
print('Finished predictions in ', time.time()-start, 'seconds') print('Finished predictions in ', time.time()-start, 'seconds')
# HyperParameter: Epochs, UseWeights, QuerySize, SubsetSize, includeHighConfidencePart # HyperParameter: Epochs, UseWeights, QuerySize, SubsetSize, includeHighConfidencePart
for epochs in range(100, 801, 100): for epochs in range(50, 151, 50):
for use_weights in (False, True): for use_weights in (False, True):
for query_size in range(20, 101, 10): for query_size in range(20, 101, 10):
for subset_size in range(5, 21, 5): for subset_size in range(5, 21, 5):
......
...@@ -24,7 +24,6 @@ arg_parser.add_argument('BaseModel', ...@@ -24,7 +24,6 @@ arg_parser.add_argument('BaseModel',
type=str, type=str,
help='path of base model') help='path of base model')
arg_parser.add_argument('Dataset', arg_parser.add_argument('Dataset',
metavar='dataset', metavar='dataset',
type=str, type=str,
...@@ -50,7 +49,6 @@ arg_parser.add_argument('-s', '--skip', ...@@ -50,7 +49,6 @@ arg_parser.add_argument('-s', '--skip',
action='store_true', action='store_true',
help='set to skip already existing models') help='set to skip already existing models')
args = arg_parser.parse_args()
smooth_values = [0, 0.1, 0.2, 0.3, 0.4, 0.49] smooth_values = [0, 0.1, 0.2, 0.3, 0.4, 0.49]
smooth_selects = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.98] smooth_selects = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.98]
...@@ -63,12 +61,12 @@ trainings_manager = None ...@@ -63,12 +61,12 @@ trainings_manager = None
training_run_name = None training_run_name = None
models_directory = '' models_directory = ''
collection_name = '' collection_name = ''
args = None
skip_existing = args.skip skip_existing = False
model_predictions = dict() model_predictions = dict()
def randomize_collection(target_values: Dict[str, float]): def randomize_collection(collection, target_values: Dict[str, float]):
randomized_collection: List[Dataset] = [] randomized_collection: List[Dataset] = []
for dataset in collection: for dataset in collection:
dataset = copy.deepcopy(dataset) dataset = copy.deepcopy(dataset)
...@@ -125,16 +123,20 @@ def smooth_selected_data(input_collection, target_collection, smooth_selected): ...@@ -125,16 +123,20 @@ def smooth_selected_data(input_collection, target_collection, smooth_selected):
return smoothen_data return smoothen_data
def smooth_random_noise(input_collection, smooth_value, random_value) -> List[Dataset]: def smooth_random_noise(input_collection, smooth_value, noise_values) -> List[Dataset]:
smoothen_data = [] smoothen_data: List[Dataset] = []
for i, dataset in enumerate(input_collection): for i, dataset in enumerate(input_collection):
dataset = copy.deepcopy(dataset) dataset = copy.deepcopy(dataset)
noise = dataset.y_win[:, 1] <= 0.5 target = {'rand_noise': 0.0, 'rand_hw': 0.0}
noise_indices = np.where(noise)[0] target.update(noise_values)
selection = np.random.choice(noise_indices.shape[0], int(noise_indices.shape[0] * random_value), replace=False)
noise_selection = np.zeros(noise.shape[0], dtype=bool) noise_indices = np.where(dataset.y_win[:, 1] <= 0.5)[0]
noise_selection[noise_indices[selection]] = True hw_indices = np.where(dataset.y_win[:, 1] > 0.5)[0]
dataset.y_win[noise_selection] = (smooth_value, 1 - smooth_value) comb_noise = np.random.choice(noise_indices, size=int(target['rand_noise'] * noise_indices.shape[0]), replace=False)
comb_hw = np.random.choice(hw_indices, size=int(target['rand_hw'] * hw_indices.shape[0]), replace=False)
dataset.y_win[comb_noise] = (1 - smooth_value, smooth_value)
dataset.y_win[comb_hw] = (smooth_value, 1 - smooth_value)
smoothen_data.append(dataset) smoothen_data.append(dataset)
return smoothen_data return smoothen_data
...@@ -216,7 +218,7 @@ def train_on_values(randomized_collection, smooth_collections, noise_values): ...@@ -216,7 +218,7 @@ def train_on_values(randomized_collection, smooth_collections, noise_values):
def start_training(): def start_training():
if 'rand_hw' in training_run_name: if 'rand_hw' in training_run_name:
randomized_collection = randomize_collection({'rand_hw': 0.3}) randomized_collection = randomize_collection(collection, {'rand_hw': 0.3})
smooth_collections = dict() smooth_collections = dict()
for smooth_val in smooth_values: for smooth_val in smooth_values:
smooth_collections[smooth_val] = smooth_data(randomized_collection, collection, smooth_val) smooth_collections[smooth_val] = smooth_data(randomized_collection, collection, smooth_val)
...@@ -231,23 +233,40 @@ def start_training(): ...@@ -231,23 +233,40 @@ def start_training():
elif 'false_smooth' in training_run_name: elif 'false_smooth' in training_run_name:
print('Train random noise smoothing') print('Train random noise smoothing')
smooth_collections = dict() smooth_collections = dict()
for random_val in false_smooth_randoms: for rand_value in randomize_values:
smoothen_data = smooth_random_noise(collection, 0.49, random_val) noise_values = {'rand_hw': rand_value, 'rand_noise': 0}
smooth_collections[random_val] = smoothen_data for smooth_val in smooth_values:
smooth_collections[smooth_val] = smooth_random_noise(collection, smooth_val, noise_values)
train_on_values(collection, smooth_collections, noise_values)
train_on_random_smoothed(smooth_collections) noise_values = {'rand_hw': 0, 'rand_noise': rand_value}
for smooth_val in smooth_values:
smooth_collections[smooth_val] = smooth_random_noise(collection, smooth_val, noise_values)
train_on_values(collection, smooth_collections, noise_values)
elif 'both' in training_run_name:
for rand_value in randomize_values:
noise_values = {'rand_hw': 0, 'rand_noise': rand_value}
randomized_collection = randomize_collection(collection, noise_values)
smooth_collections = dict()
for smooth_val in smooth_values:
smooth_collections_step1 = smooth_data(randomized_collection, collection, smooth_val)
smooth_collections[smooth_val] = smooth_random_noise(smooth_collections_step1, smooth_val, noise_values)
train_on_values(randomized_collection, smooth_collections, noise_values)
else: else:
for rand_value in randomize_values: for rand_value in randomize_values:
noise_values = {'rand_hw': rand_value, 'rand_noise': 0} noise_values = {'rand_hw': rand_value, 'rand_noise': 0}
randomized_collection = randomize_collection(noise_values) randomized_collection = randomize_collection(collection, noise_values)
smooth_collections = dict() smooth_collections = dict()
for smooth_val in smooth_values: for smooth_val in smooth_values:
smooth_collections[smooth_val] = smooth_data(randomized_collection, collection, smooth_val) smooth_collections[smooth_val] = smooth_data(randomized_collection, collection, smooth_val)
train_on_values(randomized_collection, smooth_collections, noise_values) train_on_values(randomized_collection, smooth_collections, noise_values)
noise_values = {'rand_hw': 0, 'rand_noise': rand_value} noise_values = {'rand_hw': 0, 'rand_noise': rand_value}
randomized_collection = randomize_collection(noise_values) randomized_collection = randomize_collection(collection, noise_values)
smooth_collections = dict() smooth_collections = dict()
for smooth_val in smooth_values: for smooth_val in smooth_values:
smooth_collections[smooth_val] = smooth_data(randomized_collection, collection, smooth_val) smooth_collections[smooth_val] = smooth_data(randomized_collection, collection, smooth_val)
...@@ -256,7 +275,8 @@ def start_training(): ...@@ -256,7 +275,8 @@ def start_training():
if __name__ == '__main__': if __name__ == '__main__':
args = arg_parser.parse_args()
skip_existing = args.skip
training_run_name = args.RunName training_run_name = args.RunName
personalizer.initialize(args.BaseModel) personalizer.initialize(args.BaseModel)
dataset_manager = DatasetManager(args.Dataset) dataset_manager = DatasetManager(args.Dataset)
......
This diff is collapsed.
This diff is collapsed.
...@@ -81,7 +81,6 @@ class EvaluationManager: ...@@ -81,7 +81,6 @@ class EvaluationManager:
training_db = self.get_config_entry('training_db', collection_config) training_db = self.get_config_entry('training_db', collection_config)
base_model_prefix = self.get_config_entry('model_prefix', collection_config, './') base_model_prefix = self.get_config_entry('model_prefix', collection_config, './')
models_directory = self.get_config_entry('models_directory', collection_config, './') models_directory = self.get_config_entry('models_directory', collection_config, './')
if dataset_db is None or training_db is None: if dataset_db is None or training_db is None:
raise KeyError raise KeyError
...@@ -96,6 +95,7 @@ class EvaluationManager: ...@@ -96,6 +95,7 @@ class EvaluationManager:
self.test_collections[collection_config['name']] = test_collection self.test_collections[collection_config['name']] = test_collection
self.train_collections[collection_config['name']] = train_collection self.train_collections[collection_config['name']] = train_collection
self.model_evaluation.add_collection(test_collection) self.model_evaluation.add_collection(test_collection)
self.model_evaluation.add_collection(train_collection) self.model_evaluation.add_collection(train_collection)
self.model_evaluation.add_model(os.path.join(base_model_prefix, collection_config['base_model'])) self.model_evaluation.add_model(os.path.join(base_model_prefix, collection_config['base_model']))
...@@ -191,5 +191,6 @@ class EvaluationManager: ...@@ -191,5 +191,6 @@ class EvaluationManager:
print('Training sets:', train_collection) print('Training sets:', train_collection)
print('Test sets:', test_collection) print('Test sets:', test_collection)
print('Runs:', training_manager.get_all_training_runs().keys()) for training_manager in self.training_managers.values():
print('Runs:', training_manager.get_all_training_runs().keys())
...@@ -62,7 +62,7 @@ def get_triggers_on_running_mean(dataset, r_mean, kernel_threshold=0.59): ...@@ -62,7 +62,7 @@ def get_triggers_on_running_mean(dataset, r_mean, kernel_threshold=0.59):
prev_pos_trigger_spot = 0 prev_pos_trigger_spot = 0
for trigger_spot in trigger_spots[:]: for trigger_spot in trigger_spots[:]:
for region in dataset.feedback_areas.labeled_regions_hw: for region in dataset.feedback_areas.labeled_regions_hw:
if region[0] <= trigger_spot <= region[1] + 14: if region[0] <= trigger_spot <= region[1] + 18:
if trigger_spot - prev_pos_trigger_spot < 18: if trigger_spot - prev_pos_trigger_spot < 18:
trigger_spots.remove(trigger_spot) trigger_spots.remove(trigger_spot)
prev_pos_trigger_spot = trigger_spot prev_pos_trigger_spot = trigger_spot
......
...@@ -17,16 +17,17 @@ from personalization_tools.dataset import Dataset, RecordedDataset ...@@ -17,16 +17,17 @@ from personalization_tools.dataset import Dataset, RecordedDataset
class SensorRecorderDataReader: class SensorRecorderDataReader:
def __init__(self, data_set_path, window_length=150, window_shift=75): def __init__(self, data_set_path, window_length=150, window_shift=75, use_numpy_caching=False):
self.data_set_path = data_set_path self.data_set_path = data_set_path
self.window_length = window_length self.window_length = window_length
self.window_shift = window_shift self.window_shift = window_shift
self.use_numpy_caching = use_numpy_caching
def get_data_set(self, data_set_name, participant_name='user', transform_hand=False): def get_data_set(self, data_set_name, participant_name='user', transform_hand=False):
processor = DataProcessor(os.path.join(self.data_set_path, data_set_name), init_all=False) processor = DataProcessor(os.path.join(self.data_set_path, data_set_name), init_all=False)
processor.read_entry(RecordingEntry.ACCELERATION) processor.read_entry(RecordingEntry.ACCELERATION, use_numpy_caching=True)
processor.read_entry(RecordingEntry.GYROSCOPE) processor.read_entry(RecordingEntry.GYROSCOPE, use_numpy_caching=True)
processor.read_entry(RecordingEntry.MARKERS) processor.read_entry(RecordingEntry.MARKERS)
processor.read_entry(RecordingEntry.MANUALWHTS) processor.read_entry(RecordingEntry.MANUALWHTS)
processor.read_entry(RecordingEntry.EVALUATIONS) processor.read_entry(RecordingEntry.EVALUATIONS)
......
...@@ -73,7 +73,7 @@ def train(model, X_train, y_train, repetitions=10, batch_size=256, es=False, ...@@ -73,7 +73,7 @@ def train(model, X_train, y_train, repetitions=10, batch_size=256, es=False,
if base_regularization_parameters is not None: if base_regularization_parameters is not None:
regularization_term = 0 regularization_term = 0
for i, (name, param) in enumerate(model.named_parameters()): for i, (name, param) in enumerate(model.named_parameters()):
regularization_term += 0.001 / 2 * torch.linalg.norm(param - base_regularization_parameters[i]) ** 2 regularization_term += 0.002 / 2 * torch.linalg.norm(param - base_regularization_parameters[i]) ** 2
# print(loss, regularization_term) # print(loss, regularization_term)
loss += regularization_term loss += regularization_term
loss.backward() loss.backward()
......
...@@ -116,12 +116,14 @@ def run_training(train_collection: List[RecordedDataset], test_collection: List[ ...@@ -116,12 +116,14 @@ def run_training(train_collection: List[RecordedDataset], test_collection: List[
prediction_buffer=prediction_buffer, do_print=False) prediction_buffer=prediction_buffer, do_print=False)
mean_kernel_width = best_model_settings[0][1][0] mean_kernel_width = best_model_settings[0][1][0]
mean_threshold = best_model_settings[0][1][1] mean_threshold = best_model_settings[0][1][1]
false_diff_relative = best_model_settings[0][1][2]
correct_diff_relative = best_model_settings[0][1][3]
sensitivity, precision, f1 = calc_quality_comparison(test_collection, based_model, mean_kernel_width, sensitivity, precision, f1 = calc_quality_comparison(test_collection, based_model, mean_kernel_width,
mean_threshold, prediction_buffer) mean_threshold, prediction_buffer)
information = {'based_model_index': based_index, 'mean_kernel_width': mean_kernel_width, information = {'my_index': based_index, 'based_model_index': based_index, 'mean_kernel_width': mean_kernel_width,
'mean_threshold': mean_threshold, 'false_diff_relative': 0, 'mean_threshold': mean_threshold, 'false_diff_relative': false_diff_relative,
'correct_diff_relative': 0, 'sensitivity': sensitivity, 'correct_diff_relative': correct_diff_relative, 'sensitivity': sensitivity,
'precision': precision, 'f1': f1} 'precision': precision, 'f1': f1}
model_name = f'{personalization["name"]}_base.pt' model_name = f'{personalization["name"]}_base.pt'
trainings_manager.add_model_information(model_name, information) trainings_manager.add_model_information(model_name, information)
...@@ -150,7 +152,8 @@ def run_training(train_collection: List[RecordedDataset], test_collection: List[ ...@@ -150,7 +152,8 @@ def run_training(train_collection: List[RecordedDataset], test_collection: List[
trainings_manager.add_model_information(model_name, information) trainings_manager.add_model_information(model_name, information)
trainings_manager.db_update() trainings_manager.db_update()
if f1 > best_f1: if f1 > best_f1 or not personalization.get('base_on_best', False):
best_f1 = f1
based_model = model_path based_model = model_path
based_index = i based_index = i
...@@ -159,7 +162,15 @@ def start_personalization(): ...@@ -159,7 +162,15 @@ def start_personalization():
for personalization in config['collection_configs']['personalizations'].values(): for personalization in config['collection_configs']['personalizations'].values():
print(personalization['name']) print(personalization['name'])
if args.skip and not personalization.get('enforce', False) and personalization['name'] in trainings_manager.database['training_runs']: if args.skip and not personalization.get('enforce', False) and personalization['name'] in trainings_manager.database['training_runs']:
continue new_recording = False
covered_train_sets = trainings_manager.database['training_runs'][personalization['name']]
for train_set in personalization['train_sets']:
if train_set not in covered_train_sets:
new_recording = True
break
if not new_recording:
print('Nothing new')
continue
test_collection = load_collection(personalization['test_sets']) test_collection = load_collection(personalization['test_sets'])
train_collection = load_collection(personalization['train_sets']) train_collection = load_collection(personalization['train_sets'])
train_collection = clean_collection(train_collection) train_collection = clean_collection(train_collection)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment