# -*- coding: utf-8 -*-
import os
import stat
import time
import shutil
from subprocess import call, Popen
from datetime import datetime
import magic
import multiprocessing
from django.conf import settings
from django.utils.translation import ugettext_lazy as _
from gsi.settings import EXECUTE_FE_COMMAND, PROCESS_NUM, STATIC_DIR, FE_SUBMIT, EXEC_RUNS
from core.multithreaded import MultiprocessingCards
[docs]class UnicodeNameMixin(object):
"""**Class inheritance for other classes of models.**
:Functions:
When inheriting displays the model name
"""
def __unicode__(self):
return _(u"%s") % self.name
[docs]def validate_status(status):
"""**The method makes the validation status for the API.**
:Functions:
Checks whether there is obtaining the status of the list of possible statuses.
:Arguments:
* *status*: Current status
"""
from gsi.models import STATES
states = [st[0] for st in STATES]
if not status or status not in states:
return {
'status': False,
'message': 'Invalid or missing "status" GET parameter.'
}
return {'status': status}
[docs]def get_copy_name(name):
"""**The method returns the principal name.**
:Functions:
It is used when creating a copy card in the object CardSequence.
:Arguments:
* *name*: Card name
"""
if '*cp' in name:
return name.split('*cp')[0]
else:
return name
[docs]def execute_fe_command(params, flag='cards'):
"""**The method returns the principal name.**
:Functions:
It is used when creating a copy card in the object CardSequence.
:Arguments:
* *params*: Current options for the execute
* *flag*: Specifies to which the entity of the process is performed is started
"""
queue = multiprocessing.JoinableQueue() # create queue of the tasks
num_process = len(params) / 2
processes = num_process
for param in params:
queue.put(param)
for i in xrange(processes):
t = MultiprocessingCards(queue, flag) # Create process
t.start()
time.sleep(0.1)
queue.join() # suspend the execution of code until the queue is emptied
[docs]def slash_remove_from_path(path):
"""**The method removes superfluous slashes in path.**
:Functions:
Checks whether there is in the transmission path of the extra slashes. If found, it replaces them with the standard one.
:Arguments:
* *path*: Path (string)
"""
result = path
if '//' in path:
result = path.replace('//', '/')
elif '///' in path:
result = path.replace('///', '/')
return result
[docs]def create_symlink(src, dest, path):
"""**The method creates a symlink to transmit directory.**
:Functions:
Check whether there is already such a symlink. If not, create.
:Arguments:
* *src*: Folder which do symlink
* *dest*: The folder from which make a symlink
* *path*: The path for check the existence of symbolic link
"""
if not os.path.exists(path):
symlink = call("ln -s {0} {1}".format(dest, src), shell=True)
else:
pass
[docs]def get_dir_root_static_path():
"""**Method for get the symbolic link.**"""
from gsi.models import HomeVariables
home_var = HomeVariables.objects.all()
user_dir_root = home_var[0].USER_DATA_DIR_ROOT
static_dir_root = user_dir_root.split('/')[-1]
if not static_dir_root:
static_dir_root = user_dir_root.split('/')[-2:-1]
static_dir_root_path = STATIC_DIR + '/' + static_dir_root
static_dir_root_path = slash_remove_from_path(static_dir_root_path)
return {
'static_dir_root': static_dir_root,
'static_dir_root_path': static_dir_root_path,
}
[docs]def convert_size_file(size):
"""**Method convert a numeric value of the file size in the text designations: B, KB, MB.**"""
kb = 1024.0
mb = 1024.0 * 1024.0
if size < kb:
size_file = "%.2f B" % (size)
if size > mb:
size = size / mb
size_file = "%.2f MB" % (size)
if size > kb:
size = float(size) / kb
size_file = "%.2f KB" % (size)
return size_file
[docs]def get_type_file(mime_type):
"""**The method determines the mime-type of file: image, text, pdf, msword, doc, archive.**"""
if mime_type[0] == 'image':
type_file = mime_type[0]
elif mime_type[0] == 'text':
type_file = mime_type[0]
elif mime_type[0] == 'application':
if mime_type[1] == 'pdf':
type_file = mime_type[1]
elif mime_type[1] == 'msword':
type_file = 'doc'
elif mime_type[1] == 'octet-stream':
type_file = 'bin'
else:
type_file = 'archive'
return type_file
[docs]def get_files_dirs(url_path, full_path):
"""**The method to get a list of all files and directories from a given path.**"""
dict_dirs = {}
all_dirs = {}
dict_files = {}
all_files = {}
info_message = False
try:
root, dirs, files = os.walk(full_path).next()
for d in dirs:
date_modification = datetime.fromtimestamp(os.path.getmtime(full_path))
format_date_modification = datetime.strftime(date_modification, "%Y/%m/%d %H:%M:%S")
dict_dirs['name'] = d
dict_dirs['date'] = format_date_modification
all_dirs[d] = dict_dirs
dict_dirs = {}
for f in files:
file_path = os.path.join(url_path, f)
full_file_path = os.path.join(full_path, f)
size_file = convert_size_file(os.path.getsize(full_file_path))
date_modification = datetime.fromtimestamp(os.path.getmtime(full_file_path))
format_date_modification = datetime.strftime(date_modification, "%Y/%m/%d %H:%M:%S")
mime_type = magic.from_file(full_file_path, mime=True)
type_file = get_type_file(mime_type.split('/'))
dict_files['name'] = f
dict_files['path'] = file_path
dict_files['size'] = size_file
dict_files['date'] = format_date_modification
dict_files['type'] = type_file
all_files[f] = dict_files
dict_files = {}
except StopIteration, e:
info_message = True
except OSError, e:
info_message = True
return all_dirs, all_files, info_message
[docs]def create_sub_dir(path):
"""**Method to create directorys: Results, Scores, Trees.**"""
from gsi.models import HomeVariables
error_message = ''
try:
home_variables = HomeVariables.objects.all()
path_root = home_variables[0].USER_DATA_DIR_ROOT
sub_directories = ['Results', 'Scores', 'Trees']
path_sub_directories = os.path.join(str(path_root), str(path))
try:
for d in sub_directories:
full_path_sub_directories = os.path.join(str(path_sub_directories), str(d))
os.makedirs(full_path_sub_directories)
except OSError, e:
error_message = 'Permission denied: "{0}""'.format(path)
except Exception, e:
error_message = 'Error creating a sub-directorys. Please check "Top Level for user data dir" in the Home Variables.'
return error_message
[docs]def create_new_folder(dir):
"""The method to create a new directory for the model Input Data Directory"""
from gsi.models import HomeVariables as Home
try:
home_var = Home.objects.all()
if home_var[0].RF_AUXDATA_DIR:
full_path = os.path.join(home_var[0].RF_AUXDATA_DIR, dir)
else:
full_path = '/' + dir
os.makedirs(full_path)
except OSError, e:
print '*** FOLDER EXIST ***: ', e
return full_path
[docs]def get_files(path, file_extension):
"""**The method to get a list of files from a specified directory and file extension.**
Returns a list of files and errors in the preparation of a list of files
:Arguments:
* *path*: The path to the file
* *file_extension*: The file extension
"""
list_files = []
error = None
try:
root, dirs, files = os.walk(path).next()
list_files = filter(lambda x: x.endswith(file_extension), files)
except StopIteration, e:
error = e
except OSError, e:
error = e
return list_files, error
[docs]def update_root_list_files():
"""**The method updates the list of files to ListTestFiles model.**"""
from gsi.models import HomeVariables as Home
from gsi.models import ListTestFiles
home_var = Home.objects.all()
root_path = home_var[0].RF_AUXDATA_DIR
try:
files, errors = get_files(root_path, '.tif')
tif_files = filter(lambda x: x.endswith('.tif'), files)
files_exclude = ListTestFiles.objects.filter(input_data_directory=None).exclude(name__in=tif_files).delete()
files_include = ListTestFiles.objects.filter(input_data_directory=None).values_list('name')
for f in tif_files:
file_path = os.path.join(root_path, f)
if (f,) not in files_include:
obj = ListTestFiles.objects.create(name=f, input_data_directory=None)
obj.size = convert_size_file(os.path.getsize(file_path))
obj.date_modified = datetime.fromtimestamp(os.path.getmtime(file_path))
obj.save()
except StopIteration, e:
pass
except OSError, e:
pass
[docs]def update_list_files(obj_dir):
"""**The method updates the list of files from a specified directory to the model ListTestFiles.**"""
from gsi.models import HomeVariables as Home
from gsi.models import ListTestFiles
update_list_dirs()
home_var = Home.objects.all()
root_path = home_var[0].RF_AUXDATA_DIR
if obj_dir is not None:
full_dir_path = os.path.join(obj_dir.full_path)
else:
full_dir_path = os.path.join(root_path)
try:
root, dirs, files = os.walk(full_dir_path).next()
tif_files = filter(lambda x: x.endswith('.tif'), files)
files_exclude = ListTestFiles.objects.filter(input_data_directory=obj_dir).exclude(name__in=tif_files).delete()
files_include = ListTestFiles.objects.filter(input_data_directory=obj_dir).values_list('name')
for f in tif_files:
full_file_path = os.path.join(full_dir_path, f)
if (f,) not in files_include:
obj = ListTestFiles.objects.create(name=f, input_data_directory=obj_dir)
file_path = os.path.join(full_dir_path, f)
obj.size = convert_size_file(os.path.getsize(file_path))
obj.date_modified = datetime.fromtimestamp(os.path.getmtime(file_path))
obj.save()
except StopIteration, e:
pass
except OSError, e:
pass
[docs]def update_list_dirs():
"""The method updates the list of files and directory for the ListTestFiles and the InputDataDirectory models."""
from gsi.models import InputDataDirectory, ListTestFiles
from gsi.models import HomeVariables as Home
home_var = Home.objects.all()
root_path = home_var[0].RF_AUXDATA_DIR
all_dirs = InputDataDirectory.objects.all()
try:
for dir in all_dirs:
dir_path = os.path.join(root_path, dir.name)
if not os.path.exists(dir_path):
ListTestFiles.objects.filter(input_data_directory=dir).delete()
InputDataDirectory.objects.filter(name=dir.name).delete()
except OSError, e:
pass
[docs]def get_path_folder_run(run):
"""**The method receives to the logs folder a run.**"""
from gsi.models import HomeVariables as Home
home_var = Home.objects.all()
# home dir scripts
GSI_HOME = settings.SCRIPTS_HOME
path_runs_logs = GSI_HOME + 'scripts/runs/R_{0}/LOGS'.format(run.id)
return path_runs_logs
[docs]def copy_file(src, dest, card_name):
"""**The method copies the file specification.**"""
message_error = None
err_mess = 'No specification file "PreprocSpec location"'
try:
shutil.copy2(src, dest)
except AttributeError, e:
message_error = 'For Card "{0}": {1}'.format(card_name, err_mess)
# eg. src and dest are the same file
except shutil.Error as e:
err_mess = str(e).split(']')[1]
err_mess = err_mess.replace('`', '"')
message_error = 'For Card "{0}": {1}'.format(card_name, err_mess)
# eg. source or destination doesn't exist
except IOError as e:
message_error = 'For Card "{0}": {1}'.format(card_name, err_mess)
return message_error
[docs]def create_open_master_script(path_runs, card_id, num):
"""**The method creates a script file and set it permissions 777.**"""
num_file = 1
execute_master_script = '{0}_master_{1}'.format(card_id, num)
master_script_name = 'card_{0}_master_{1}.sh'.format(card_id, num)
master_script_path = path_runs + master_script_name
master_script = open(master_script_path, 'w+')
os.chmod(master_script_path, 0777)
return master_script, execute_master_script
[docs]def make_run(run_base, user):
"""**The method starts work on card processing script.**"""
from gsi.models import Run, Log, RunStep, OrderedCardItem, SubCardItem
from gsi.models import HomeVariables as Home
now = datetime.now()
step = RunStep.objects.none()
scripts = []
first_script = {}
path_test_data = ''
message_error = None
run = Run.objects.create(run_base=run_base, user=user)
home_var = Home.objects.all()
resolution = run.run_base.resolution
directory_path = run.run_base.directory_path
all_card = OrderedCardItem.objects.filter(sequence__runbase=run.run_base).order_by('order')
try:
# <USER_DATA_DIR_ROOT>/<resolution>
path_test_data = home_var[0].USER_DATA_DIR_ROOT + '/' + str(resolution) + '/' + str(directory_path) + '/'
path_test_data = path_test_data.replace('//', '/')
try:
os.makedirs(path_test_data, 0777)
except OSError, e:
pass
except Exception, e:
pass
for card in all_card:
step = RunStep.objects.create(parent_run=run, card_item=card)
#TODO: make scripts for each step
sequence = step.parent_run.run_base.card_sequence
# create the scripts for the each cards
script = create_scripts(run, sequence, card, step)
if script['error']:
message_error = script['error']
# if variable script is empty than remove the Run object
if not script:
run.delete()
step.delete()
return False
script['step'] = step
scripts.append(script)
# if variable script is not empty than execute first element script variable
# the other scripts are run in the api
if scripts:
first_script = scripts[0]
params = []
try:
if first_script['card'].run_parallel:
for n in first_script['execute_master_scripts']:
ex_fe_com = Popen(
'nohup {0} {1} {2} &'.format(
EXECUTE_FE_COMMAND,
first_script['run'].id,
n
),
shell=True,
)
first_script['step'].state = 'running'
first_script['step'].save()
first_script['run'].state = 'running'
first_script['run'].save()
else:
ex_fe_com = Popen(
'nohup {0} {1} {2} &'.format(
EXECUTE_FE_COMMAND,
first_script['run'].id,
first_script['card'].id
),
shell=True,
)
first_script['step'].state = 'running'
first_script['step'].save()
first_script['run'].state = 'running'
first_script['run'].save()
except Exception, e:
pass
# record in the log model of gsi app path to script
log_name = '{0}_{1}.log'.format(run.id, first_script['card'].id)
path_log = first_script['path_runs_logs']
write_log(log_name, run, path_log)
return {'run': run, 'step': step, 'error': message_error}
[docs]def create_scripts(run, sequence, card, step):
"""**The method for create a scripts at startup RunBase object for the each cards.**"""
from gsi.models import HomeVariables as Home
card_model = None
message_error = None
execute_master_scripts = []
home_var = Home.objects.all()
export_home_var = ''
LOCAL_VAR_GROUPS = ''
# home dir scripts
GSI_HOME = settings.SCRIPTS_HOME
# <RESOLUTION_ENV_SCRIPT>
resolution = run.run_base.resolution
RESOLUTION_ENV_SCRIPT = GSI_HOME + 'bin/' + str(resolution) + '_config'
# <HOME_ENV_OVERRIDES>
for hv in home_var:
export_home_var += 'export SAT_TIF_DIR=' + hv.SAT_DIF_DIR_ROOT + '\n'
export_home_var += 'export RF_DIR=' + hv.RF_DIR_ROOT + '\n'
export_home_var += 'export USER_DATA_DIR=' + hv.USER_DATA_DIR_ROOT + '\n'
export_home_var += 'export MODIS_DIR=' + hv.MODIS_DIR_ROOT + '\n'
export_home_var += 'export RF_AUXDATA_DIR=' + hv.RF_AUXDATA_DIR + '\n'
export_home_var += 'export SAT_DIF_DIR=' + hv.SAT_DIF_DIR_ROOT
# <LOCAL_ENV_OVERRIDES>
try:
local_var_groups = (run.run_base.card_sequence.environment_base.environment_variables).replace('\r\n', '\n')
local_var_groups = local_var_groups.splitlines()
LOCAL_VAR_GROUPS = ''
for line in local_var_groups:
if line != '':
ln = line.replace('export ', '')
LOCAL_VAR_GROUPS += u'export {0}\n'.format(ln)
except Exception, e:
LOCAL_VAR_GROUPS = ''
# <ENVIROMENT OVERRIDE>
try:
env_override = (run.run_base.card_sequence.environment_override).replace('\r\n', '\n')
env_override = env_override.splitlines()
ENVIROMENT_OVERRIDE = ''
for line in env_override:
if line != '':
ln = line.replace('export ', '')
ENVIROMENT_OVERRIDE += u'export {0}\n'.format(ln)
except Exception, e:
ENVIROMENT_OVERRIDE = ''
# <EXECUTABLE>
try:
card_item = step.card_item.card_item
card_model = card_item.content_type.model
run_parallel, EXECUTABLE = get_executable(run, sequence, card, card_item)
except Exception, e:
EXECUTABLE = ''
run_parallel = False
# path to scripts for runs and steps
path_runs = GSI_HOME + 'scripts/runs/R_{0}/'.format(run.id)
path_runs_logs = GSI_HOME + 'scripts/runs/R_{0}/LOGS'.format(run.id)
# <USER_DATA_DIR_ROOT>/<resolution>
try:
os.makedirs(path_runs)
os.makedirs(path_runs_logs)
except OSError, e:
pass
finally:
try:
if card_model == 'preproc' or card_model == 'calcstats':
card_name = card.card_item.content_object
cur_card = get_card_model(card_model, card_name)
from_path_spec_location = cur_card.path_spec_location
to_path_spec_location = path_runs
message_error = copy_file(from_path_spec_location, to_path_spec_location, card_name)
if run_parallel:
params = []
num_file = 1
count = 0
master_script, execute_master = create_open_master_script(path_runs, card.id, num_file)
execute_master_scripts.append(execute_master)
for n in EXECUTABLE:
file_contents = ''
script_name = 'card_{0}.sh'.format(n)
script_path = path_runs + script_name
card_line = '{0} {1} {2}\n'.format(FE_SUBMIT, run.id, n)
execute_runs = count % EXEC_RUNS
if not execute_runs and count:
num_file += 1
master_script.close()
master_script, execute_master = create_open_master_script(path_runs, card.id, num_file)
execute_master_scripts.append(execute_master)
master_script.write(card_line)
file_contents += '# Sequence: {0}, card: {1} - Generated {2}\n\n'.\
format(sequence.name, card.card_item, step.start_date)
file_contents += 'umask 000\n\n'
file_contents += 'cd {0}\n\n'.format(path_runs)
file_contents += '. ' + RESOLUTION_ENV_SCRIPT + '\n\n'
file_contents += export_home_var + '\n\n'
file_contents += LOCAL_VAR_GROUPS + '\n\n'
file_contents += ENVIROMENT_OVERRIDE + '\n\n'
file_contents += EXECUTABLE[n]
fd = open(script_path, 'w+')
fd.write(file_contents)
fd.close()
os.chmod(script_path, 0777)
os.chmod(path_runs_logs, 0777)
count += 1
master_script.close()
else:
script_name = 'card_{0}.sh'.format(step.card_item.id)
script_path = path_runs + script_name
fd = open(script_path, 'w+')
fd.write('# Sequence: {0}, card: {1} - Generated {2} \n\n'.\
format(sequence.name, card.card_item, step.start_date))
fd.writelines('umask 000\n\n')
fd.writelines('cd {0}\n\n'.format(path_runs))
fd.writelines('. ' + RESOLUTION_ENV_SCRIPT + '\n\n')
fd.writelines(export_home_var + '\n\n')
fd.writelines(LOCAL_VAR_GROUPS + '\n\n')
fd.writelines(ENVIROMENT_OVERRIDE + '\n\n')
fd.writelines(EXECUTABLE)
os.chmod(script_path, 0777)
os.chmod(path_runs_logs, 0777)
fd.close()
except OSError, e:
pass
return False
return {
'script_path': script_path,
'path_runs_logs': path_runs_logs,
'script_name': script_name,
'run': run,
'card': card,
'error': message_error,
'execute_master_scripts': execute_master_scripts
}
[docs]def write_log(log_name, run, path_log):
"""**The method writes a Log model GSI app.**"""
from gsi.models import Log
log = Log.objects.create(name=log_name)
log.log_file_path = path_log
log.log_file = log_name
log.save()
run.log = log
run.save()
[docs]def get_years(name):
"""**The method geting all year from an object YearGroup.**"""
from gsi.models import YearGroup
year_group = YearGroup.objects.get(name=name)
return year_group.years.through.objects.filter(yeargroup=year_group)
[docs]def get_area_tiles(name):
"""**The method geting all tiles from an object Area.**"""
from gsi.models import Area
card_area = Area.objects.get(name=name)
return card_area.tiles.through.objects.filter(area=card_area)
[docs]def get_statistical_method(remap_obj):
"""**The method geting statistical method.**"""
stat_methods = []
if remap_obj.conditional_mean:
stat_methods.append('ConditionalMean')
if remap_obj.conditional_min:
stat_methods.append('ConditionalMin')
if remap_obj.conditional_median:
stat_methods.append('ConditionalMedian')
if remap_obj.conditional_max:
stat_methods.append('ConditionalMax')
if remap_obj.lower_quartile:
stat_methods.append('LowerQuartile')
if remap_obj.upper_quartile:
stat_methods.append('UpperQuartile')
return stat_methods
[docs]def get_card_model(card_model, card_name):
"""**The method geting type model for the card.**"""
from cards.models import (RFScore, RFTrain, QRF,
Remap, YearFilter, PreProc,
Collate, MergeCSV, RandomForest,
CalcStats)
card = None
if card_model == 'rfscore':
card = RFScore.objects.get(name=card_name)
if card_model == 'rftrain':
card = RFTrain.objects.get(name=card_name)
if card_model == 'qrf':
card = QRF.objects.get(name=card_name)
if card_model == 'remap':
card = Remap.objects.get(name=card_name)
if card_model == 'yearfilter':
card = YearFilter.objects.get(name=card_name)
if card_model == 'preproc':
card = PreProc.objects.get(name=card_name)
if card_model == 'collate':
card = Collate.objects.get(name=card_name)
if card_model == 'randomforest':
card = RandomForest.objects.get(name=card_name)
if card_model == 'calcstats':
card = CalcStats.objects.get(name=card_name)
return card
[docs]def is_run_parallel(card):
"""**The method checks the type of start-up cards: parallel or in series.**"""
run_parallel = False
try:
run_parallel = card.run_parallel
except Exception:
pass
return run_parallel
[docs]def create_sub_card_item(name, run_id, card_id):
"""**The method creates a new object model SubCardItem.**"""
from gsi.models import SubCardItem
try:
sub_card_item = SubCardItem.objects.create(
name=name,
run_id=run_id,
card_id=card_id
)
except Exception, e:
pass
[docs]def get_executable(run, sequence, card, card_item):
"""**The method gets a value for the variable EXECUTABLE for the each card for the create_scripts method.**"""
from cards.models import (RFScore, RFTrain, QRF, Remap, YearFilter, PreProc,
Collate, MergeCSV, RandomForest, CalcStats)
from gsi.models import Year, Tile, ListTestFiles
card_model = card_item.content_type.model
EXECUTABLE = ''
EXECUTABLE_DICT = {}
EXEC = ''
pid = 1
all_num = 1
run_parallel = False
if card_model == 'rfscore':
# u'RFscore <Tile> [[MyDir]] [<BiasCorrn>] [<QRFopts>] [<RefTarget>] [<CleanName>]'
data_card = RFScore.objects.get(name=card.card_item.content_object)
years = get_years(data_card.year_group.name)
area_tiles = get_area_tiles(data_card.area)
all_num = len(years) * len(area_tiles)
run_parallel = is_run_parallel(data_card)
for year in years:
year_card = Year.objects.get(id=year.year_id)
for tile in area_tiles:
tile_card = Tile.objects.get(id=tile.tile_id)
if run_parallel:
EXEC = '$RF_EXEC_DIR/RFscore {0} {1} {2} {3} {4} {5} -s {6}.{7}.{8}.{9}.{10}\n'.format(
tile_card,
run.run_base.directory_path,
data_card.bias_corrn,
year_card,
data_card.number_of_threads,
data_card.QRFopts,
run.id,
sequence.id,
card.id,
pid,
all_num
)
script_name = '{0}_{1}'.format(card.id, pid)
EXECUTABLE_DICT[script_name] = EXEC
create_sub_card_item(script_name, run.id, card.id)
pid += 1
else:
EXECUTABLE += '$RF_EXEC_DIR/RFscore {0} {1} {2} {3} {4} {5} -s {6}.{7}.{8}.{9}.{10}\n'.format(
tile_card,
run.run_base.directory_path,
data_card.bias_corrn,
year_card,
data_card.number_of_threads,
data_card.QRFopts,
run.id,
sequence.id,
card.id,
pid,
all_num
)
pid += 1
if card_model == 'rftrain':
# u'RFtrain <Tile> [<Ntrees>] [<training>] [<Nvar>] [<Nthread>]'
data_card = RFTrain.objects.get(name=card.card_item.content_object)
EXECUTABLE += '$RF_EXEC_DIR/RFtrain {0} {1} {2} {3} {4} -s {5}.{6}.{7}.{8}.{9}\n'.format(
data_card.value,
data_card.number_of_trees,
data_card.training,
data_card.number_of_variable,
data_card.number_of_thread,
run.id,
sequence.id,
card.id,
pid,
1)
pid += 1
if card_model == 'qrf':
# u'QRF [<QRFinterval>] [<ntrees>] [<nthreads>] [<MyDir>]'
data_card = QRF.objects.get(name=card.card_item.content_object)
EXECUTABLE += '$RF_EXEC_DIR/QRF {0} {1} {2} {3} -s {4}.{5}.{6}.{7}.{8}\n'.format(
data_card.interval,
data_card.number_of_trees,
data_card.number_of_threads,
data_card.directory,
run.id,
sequence.id,
card.id,
pid,
1,
)
pid += 1
if card_model == 'remap':
# Remap <FileSpec> <RoI> <OutRoot>[,<OutSuffix>] [<Scale>[,<Xsize>,<Ysize>]] [<Output>] [<ColourTable>] [<RefStatsFile>] [<RefStatsScale>]
years = None
data_card = Remap.objects.get(name=card.card_item.content_object)
stat_methods = get_statistical_method(data_card)
run_parallel = is_run_parallel(data_card)
model_name_suff = ''
file_spec = data_card.file_spec
output_root = data_card.output_root
refstats_scale = data_card.refstats_scale or ''
all_num = 1
if data_card.model_name and data_card.output_suffix:
model_name_suff = str(data_card.model_name) + data_card.output_suffix
if data_card.model_name and not data_card.output_suffix:
model_name_suff = str(data_card.model_name)
if not data_card.model_name and data_card.output_suffix:
model_name_suff = data_card.output_suffix
if data_card.year_group is not None:
years = get_years(data_card.year_group.name)
if years is not None:
years_num = len(years) or 1
methods_num = len(stat_methods) or 1
all_num = years_num * methods_num
for year in years:
year_card = Year.objects.get(id=year.year_id)
if stat_methods:
for m in stat_methods:
if model_name_suff:
method_file_spec = str(year_card) + '_' + m + '_' + model_name_suff
else:
method_file_spec = str(year_card) + '_' + m
method = str(year_card) + '_' + m
cur_file_spec = os.path.join(str(file_spec), method_file_spec)
cur_output_root = os.path.join(str(output_root), method)
if run_parallel:
EXEC = '$RF_EXEC_DIR/Remap {0} {1} {2} {3} {4} {5} {6} {7} -s {8}.{9}.{10}.{11}.{12}\n'.format(
cur_file_spec,
data_card.roi,
cur_output_root,
data_card.scale,
data_card.output,
data_card.color_table,
data_card.refstats_file,
refstats_scale,
run.id,
sequence.id,
card.id,
pid,
all_num,
)
script_name = '{0}_{1}'.format(card.id, pid)
EXECUTABLE_DICT[script_name] = EXEC
create_sub_card_item(script_name, run.id, card.id)
pid += 1
else:
EXECUTABLE += '$RF_EXEC_DIR/Remap {0} {1} {2} {3} {4} {5} {6} {7} -s {8}.{9}.{10}.{11}.{12}\n'.format(
cur_file_spec,
data_card.roi,
cur_output_root,
data_card.scale,
data_card.output,
data_card.color_table,
data_card.refstats_file,
refstats_scale,
run.id,
sequence.id,
card.id,
pid,
all_num,
)
pid += 1
else:
if model_name_suff:
full_path = str(year_card) + '_' + model_name_suff
cur_file_spec = os.path.join(str(file_spec), full_path)
else:
cur_file_spec = os.path.join(str(file_spec), str(year_card))
cur_output_root = os.path.join(str(output_root), str(year_card))
if run_parallel:
EXEC = '$RF_EXEC_DIR/Remap {0} {1} {2} {3} {4} {5} {6} {7} -s {8}.{9}.{10}.{11}.{12}\n'.format(
cur_file_spec,
data_card.roi,
cur_output_root,
data_card.scale,
data_card.output,
data_card.color_table,
data_card.refstats_file,
refstats_scale,
run.id,
sequence.id,
card.id,
pid,
all_num,
)
script_name = '{0}_{1}'.format(card.id, pid)
EXECUTABLE_DICT[script_name] = EXEC
create_sub_card_item(script_name, run.id, card.id)
pid += 1
else:
EXECUTABLE += '$RF_EXEC_DIR/Remap {0} {1} {2} {3} {4} {5} {6} {7} -s {8}.{9}.{10}.{11}.{12}\n'.format(
cur_file_spec,
data_card.roi,
cur_output_root,
data_card.scale,
data_card.output,
data_card.color_table,
data_card.refstats_file,
refstats_scale,
run.id,
sequence.id,
card.id,
pid,
all_num,
)
pid += 1
else:
if stat_methods:
all_num = len(stat_methods)
for m in stat_methods:
if model_name_suff:
full_path = m + '_' + model_name_suff
cur_file_spec = os.path.join(str(file_spec), full_path)
else:
cur_file_spec = os.path.join(str(file_spec), m)
cur_output_root = os.path.join(str(output_root), m)
if run_parallel:
EXEC = '$RF_EXEC_DIR/Remap {0} {1} {2} {3} {4} {5} {6} {7} -s {8}.{9}.{10}.{11}.{12}\n'.format(
cur_file_spec,
data_card.roi,
cur_output_root,
data_card.scale,
data_card.output,
data_card.color_table,
data_card.refstats_file,
refstats_scale,
run.id,
sequence.id,
card.id,
pid,
all_num,
)
script_name = '{0}_{1}'.format(card.id, pid)
EXECUTABLE_DICT[script_name] = EXEC
create_sub_card_item(script_name, run.id, card.id)
pid += 1
else:
EXECUTABLE += '$RF_EXEC_DIR/Remap {0} {1} {2} {3} {4} {5} {6} {7} -s {8}.{9}.{10}.{11}.{12}\n'.format(
cur_file_spec,
data_card.roi,
cur_output_root,
data_card.scale,
data_card.output,
data_card.color_table,
data_card.refstats_file,
refstats_scale,
run.id,
sequence.id,
card.id,
pid,
all_num,
)
pid += 1
else:
if model_name_suff:
cur_file_spec = os.path.join(str(file_spec), model_name_suff)
else:
cur_file_spec = str(file_spec)
if run_parallel:
EXEC = '$RF_EXEC_DIR/Remap {0} {1} {2} {3} {4} {5} {6} {7} -s {8}.{9}.{10}.{11}.{12}\n'.format(
cur_file_spec,
data_card.roi,
data_card.output_root,
data_card.scale,
data_card.output,
data_card.color_table,
data_card.refstats_file,
refstats_scale,
run.id,
sequence.id,
card.id,
pid,
1,
)
script_name = '{0}_{1}'.format(card.id, pid)
EXECUTABLE_DICT[script_name] = EXEC
create_sub_card_item(script_name, run.id, card.id)
pid += 1
else:
EXECUTABLE += '$RF_EXEC_DIR/Remap {0} {1} {2} {3} {4} {5} {6} {7} -s {8}.{9}.{10}.{11}.{12}\n'.format(
cur_file_spec,
data_card.roi,
data_card.output_root,
data_card.scale,
data_card.output,
data_card.color_table,
data_card.refstats_file,
refstats_scale,
run.id,
sequence.id,
card.id,
pid,
1,
)
pid += 1
if card_model == 'yearfilter':
# u'YearFilter <Tile> <FileType> [<Filter>] [<FiltOut>] [<ExtendStart>] [<InpFourier>] [<OutDir>] [<InpDir>]'
data_card = YearFilter.objects.get(name=card.card_item.content_object)
area_tiles = get_area_tiles(data_card.area)
all_num = len(area_tiles)
run_parallel = is_run_parallel(data_card)
for tile in area_tiles:
tile_card = Tile.objects.get(id=tile.tile_id)
if run_parallel:
EXEC = '$RF_EXEC_DIR/YearFilter {0} {1} {2} {3} {4} {5} {6} {7} -s {8}.{9}.{10}.{11}.{12}\n'.format(
tile_card,
data_card.filetype,
data_card.filter,
data_card.filter_output,
data_card.extend_start,
data_card.input_fourier,
data_card.output_directory,
data_card.input_directory,
run.id,
sequence.id,
card.id,
pid,
all_num,
)
script_name = '{0}_{1}'.format(card.id, pid)
EXECUTABLE_DICT[script_name] = EXEC
create_sub_card_item(script_name, run.id, card.id)
pid += 1
else:
EXECUTABLE += '$RF_EXEC_DIR/YearFilter {0} {1} {2} {3} {4} {5} {6} {7} -s {8}.{9}.{10}.{11}.{12}\n'.format(
tile_card,
data_card.filetype,
data_card.filter,
data_card.filter_output,
data_card.extend_start,
data_card.input_fourier,
data_card.output_directory,
data_card.input_directory,
run.id,
sequence.id,
card.id,
pid,
all_num,
)
pid += 1
if card_model == 'preproc':
# u'PreProc [<Tile>|<file.hdf>] [<Year>] [<Mode>]'
data_card = PreProc.objects.get(name=card.card_item.content_object)
years = None
area_tiles = None
if data_card.year_group:
years = get_years(data_card.year_group.name)
if data_card.area:
area_tiles = get_area_tiles(data_card.area)
run_parallel = is_run_parallel(data_card)
if years:
len_years = len(years)
else:
len_years = 1
if area_tiles:
len_area_tiles = len(area_tiles)
else:
len_area_tiles = 1
all_num = len_years * len_area_tiles
if run_parallel:
card.run_parallel = True
card.number_sub_cards = all_num
card.save()
if years and area_tiles:
for year in years:
year_card = Year.objects.get(id=year.year_id)
for tile in area_tiles:
tile_card = Tile.objects.get(id=tile.tile_id)
if run_parallel:
EXEC = '$RF_EXEC_DIR/PreProc {0} {1} {2} -s {3}.{4}.{5}.{6}.{7}\n'.format(
tile_card,
year_card,
data_card.mode,
run.id,
sequence.id,
card.id,
pid,
all_num,
)
script_name = '{0}_{1}'.format(card.id, pid)
EXECUTABLE_DICT[script_name] = EXEC
create_sub_card_item(script_name, run.id, card.id)
pid += 1
else:
EXECUTABLE += '$RF_EXEC_DIR/PreProc {0} {1} {2} -s {3}.{4}.{5}.{6}.{7}\n'.format(
tile_card,
year_card,
data_card.mode,
run.id,
sequence.id,
card.id,
pid,
all_num,
)
pid += 1
if years and not area_tiles:
if run_parallel:
EXEC = '$RF_EXEC_DIR/PreProc {0} {1} -s {2}.{3}.{4}.{5}.{6}\n'.format(
year_card,
data_card.mode,
run.id,
sequence.id,
card.id,
pid,
all_num,
)
script_name = '{0}_{1}'.format(card.id, pid)
EXECUTABLE_DICT[script_name] = EXEC
create_sub_card_item(script_name, run.id, card.id)
pid += 1
else:
EXECUTABLE += '$RF_EXEC_DIR/PreProc {0} {1} -s {2}.{3}.{4}.{5}.{6}\n'.format(
year_card,
data_card.mode,
run.id,
sequence.id,
card.id,
pid,
all_num,
)
pid += 1
elif area_tiles and not years:
for tile in area_tiles:
tile_card = Tile.objects.get(id=tile.tile_id)
if run_parallel:
EXEC = '$RF_EXEC_DIR/PreProc {0} {1} -s {2}.{3}.{4}.{5}.{6}\n'.format(
tile_card,
data_card.mode,
run.id,
sequence.id,
card.id,
pid,
all_num,
)
script_name = '{0}_{1}'.format(card.id, pid)
EXECUTABLE_DICT[script_name] = EXEC
create_sub_card_item(script_name, run.id, card.id)
pid += 1
else:
EXECUTABLE += '$RF_EXEC_DIR/PreProc {0} {1} -s {2}.{3}.{4}.{5}.{6}\n'.format(
tile_card,
data_card.mode,
run.id,
sequence.id,
card.id,
pid,
all_num,
)
pid += 1
else:
if run_parallel:
EXEC = '$RF_EXEC_DIR/PreProc {0} -s {1}.{2}.{3}.{4}.{5}\n'.format(
data_card.mode,
run.id,
sequence.id,
card.id,
pid,
all_num,
)
script_name = '{0}_{1}'.format(card.id, pid)
EXECUTABLE_DICT[script_name] = EXEC
create_sub_card_item(script_name, run.id, card.id)
else:
EXECUTABLE += '$RF_EXEC_DIR/PreProc {0} -s {1}.{2}.{3}.{4}.{5}\n'.format(
data_card.mode,
run.id,
sequence.id,
card.id,
pid,
all_num,
)
if card_model == 'collate':
# u'Collate <Tile> [<Mode>] [<InpFile>] [<OutDirFile>] [<InpScale>]'
from gsi.models import HomeVariables as Home
home_var = Home.objects.all()
root_path = home_var[0].RF_AUXDATA_DIR
files = []
data_card = Collate.objects.get(name=card.card_item.content_object)
area_tiles = get_area_tiles(data_card.area)
files_list = Collate.input_files.through.objects.filter(collate=data_card)
run_parallel = is_run_parallel(data_card)
all_num = len(area_tiles)
for f in files_list:
file_obj = ListTestFiles.objects.get(id=f.listtestfiles_id)
f_name = file_obj.name.split('.')
f_subdir = os.path.join(data_card.output_tile_subdir, f_name[0])
temp = [file_obj.name, f_subdir]
files.append(temp)
if files:
all_num *= len(files)
for tile in area_tiles:
tile_card = Tile.objects.get(id=tile.tile_id)
if files:
for f in files:
if run_parallel:
EXEC = '$RF_EXEC_DIR/Collate {0} {1} {2} {3} {4} -s {5}.{6}.{7}.{8}.{9}\n'.format(
tile_card,
data_card.mode,
f[0],
f[1],
data_card.input_scale_factor,
run.id,
sequence.id,
card.id,
pid,
all_num,
)
script_name = '{0}_{1}'.format(card.id, pid)
EXECUTABLE_DICT[script_name] = EXEC
create_sub_card_item(script_name, run.id, card.id)
pid += 1
else:
EXECUTABLE += '$RF_EXEC_DIR/Collate {0} {1} {2} {3} {4} -s {5}.{6}.{7}.{8}.{9}\n'.format(
tile_card,
data_card.mode,
f[0],
f[1],
data_card.input_scale_factor,
run.id,
sequence.id,
card.id,
pid,
all_num,
)
pid += 1
else:
if run_parallel:
EXEC = '$RF_EXEC_DIR/Collate {0} {1} {2} -s {3}.{4}.{5}.{6}.{7}\n'.format(
tile_card,
data_card.mode,
data_card.input_scale_factor,
run.id,
sequence.id,
card.id,
pid,
all_num,
)
script_name = '{0}_{1}'.format(card.id, pid)
EXECUTABLE_DICT[script_name] = EXEC
create_sub_card_item(script_name, run.id, card.id)
pid += 1
else:
EXECUTABLE += '$RF_EXEC_DIR/Collate {0} {1} {2} -s {3}.{4}.{5}.{6}.{7}\n'.format(
tile_card,
data_card.mode,
data_card.input_scale_factor,
run.id,
sequence.id,
card.id,
pid,
all_num,
)
pid += 1
if card_model == 'randomforest':
# RunRandomForestModels.sh <AoI_Name> <Satellite> <ParamSet> <RunSet>
data_card = RandomForest.objects.get(name=card.card_item.content_object)
EXECUTABLE += 'export MODELDIR=/lustre/w23/mattgsi/satdata/RF/Projects/Models\n'
EXECUTABLE += '''export CSVFILE=/lustre/w23/mattgsi/satdata/RF/Projects/Lane/Data/ref/Model${0}.csv >> $MODELDIR/ParamSet_CSVbands.sh\n'''.format(data_card.model)
EXECUTABLE += 'export MVRF_TOTAL={0} >> $MODELDIR/ParamSet_CSV"$XSET".sh\n'.format(data_card.mvrf)
EXECUTABLE += 'RunRandomForestModels.sh {0} {1} {2} {3} -s {4}.{5}.{6}.{7}.{8}\n'.format(
data_card.aoi_name,
data_card.satellite,
data_card.param_set,
data_card.run_set,
run.id,
sequence.id,
card.id,
pid,
1,
)
pid += 1
if card_model == 'calcstats':
#CalcStats <Tile> [<Year>] [<Period>] [<Filter>] [<FiltOut>] [OutDir]
data_card = CalcStats.objects.get(name=card.card_item.content_object)
period = data_card.period
run_parallel = is_run_parallel(data_card)
if period == 'doy':
period = data_card.doy_variable
try:
years = get_years(data_card.year_group.name)
except Exception, e:
years_num = 1
years = None
try:
areas = get_area_tiles(data_card.area.name)
except Exception, e:
areas_num = 1
areas = None
if years and areas:
all_num = len(years) * len(areas)
for year in years:
year_card = Year.objects.get(id=year.year_id)
for area in areas:
area_card = Tile.objects.get(id=area.tile_id)
if run_parallel:
EXEC = '$RF_EXEC_DIR/CalcStats {0} {1} {2} {3} {4} {5} {6} -s {7}.{8}.{9}.{10}.{11}\n'.format(
area_card,
year_card,
period,
data_card.filter,
data_card.filter_out,
data_card.input_fourier,
data_card.out_dir,
run.id,
sequence.id,
card.id,
pid,
all_num
)
script_name = '{0}_{1}'.format(card.id, pid)
EXECUTABLE_DICT[script_name] = EXEC
create_sub_card_item(script_name, run.id, card.id)
pid += 1
else:
EXECUTABLE += '$RF_EXEC_DIR/CalcStats {0} {1} {2} {3} {4} {5} {6} -s {7}.{8}.{9}.{10}.{11}\n'.format(
area_card,
year_card,
period,
data_card.filter,
data_card.filter_out,
data_card.input_fourier,
data_card.out_dir,
run.id,
sequence.id,
card.id,
pid,
all_num
)
pid += 1
elif years and not areas:
all_num = len(years)
for year in years:
year_card = Year.objects.get(id=year.year_id)
if run_parallel:
EXEC = '$RF_EXEC_DIR/CalcStats {0} {1} {2} {3} {4} {5} -s {6}.{7}.{8}.{9}.{10}\n'.format(
year_card,
period,
data_card.filter,
data_card.filter_out,
data_card.input_fourier,
data_card.out_dir,
run.id,
sequence.id,
card.id,
pid,
all_num
)
script_name = '{0}_{1}'.format(card.id, pid)
EXECUTABLE_DICT[script_name] = EXEC
create_sub_card_item(script_name, run.id, card.id)
pid += 1
else:
EXECUTABLE += '$RF_EXEC_DIR/CalcStats {0} {1} {2} {3} {4} {5} -s {6}.{7}.{8}.{9}.{10}\n'.format(
year_card,
period,
data_card.filter,
data_card.filter_out,
data_card.input_fourier,
data_card.out_dir,
run.id,
sequence.id,
card.id,
pid,
all_num
)
pid += 1
elif not years and areas:
all_num = len(areas)
for area in areas:
area_card = Tile.objects.get(id=area.tile_id)
if run_parallel:
EXEC = '$RF_EXEC_DIR/CalcStats {0} {1} {2} {3} {4} {5} -s {6}.{7}.{8}.{9}.{10}\n'.format(
area_card,
period,
data_card.filter,
data_card.filter_out,
data_card.input_fourier,
data_card.out_dir,
run.id,
sequence.id,
card.id,
pid,
all_num
)
script_name = '{0}_{1}'.format(card.id, pid)
EXECUTABLE_DICT[script_name] = EXEC
create_sub_card_item(script_name, run.id, card.id)
pid += 1
else:
EXECUTABLE += '$RF_EXEC_DIR/CalcStats {0} {1} {2} {3} {4} {5} -s {6}.{7}.{8}.{9}.{10}\n'.format(
area_card,
period,
data_card.filter,
data_card.filter_out,
data_card.input_fourier,
data_card.out_dir,
run.id,
sequence.id,
card.id,
pid,
all_num
)
pid += 1
else:
if run_parallel:
EXEC = '$RF_EXEC_DIR/CalcStats {0} {1} {2} {3} {4} {5} -s {6}.{7}.{8}.{9}.{10}\n'.format(
data_card.output_tile_subdir,
period,
data_card.filter,
data_card.filter_out,
data_card.input_fourier,
data_card.out_dir,
run.id,
sequence.id,
card.id,
pid,
all_num
)
script_name = '{0}_{1}'.format(card.id, pid)
EXECUTABLE_DICT[script_name] = EXEC
create_sub_card_item(script_name, run.id, card.id)
pid += 1
else:
EXECUTABLE += '$RF_EXEC_DIR/CalcStats {0} {1} {2} {3} {4} {5} -s {6}.{7}.{8}.{9}.{10}\n'.format(
data_card.output_tile_subdir,
period,
data_card.filter,
data_card.filter_out,
data_card.input_fourier,
data_card.out_dir,
run.id,
sequence.id,
card.id,
pid,
all_num
)
pid += 1
if run_parallel:
card.run_parallel = True
card.number_sub_cards = all_num
elif not run_parallel:
card.run_parallel = False
card.number_sub_cards = 0
card.save()
if run_parallel:
return run_parallel, EXECUTABLE_DICT
else:
return run_parallel, EXECUTABLE