Source code for pipeline.hsd.tasks.baseline.validation

import collections
import math
import time
from math import sqrt

import numpy
import numpy.linalg as LA
import scipy.cluster.hierarchy as HIERARCHY
import scipy.cluster.vq as VQ

import pipeline.infrastructure as infrastructure
import pipeline.infrastructure.basetask as basetask
import pipeline.infrastructure.vdp as vdp
from pipeline.domain.datatable import DataTableIndexer
from . import rules
from .. import common
from ..common import utils

_LOG = infrastructure.get_logger(__name__)
LOG = utils.OnDemandStringParseLogger(_LOG)


[docs]def ValidationFactory(pattern): if pattern == 'RASTER': return ValidateLineRaster elif pattern == 'SINGLE-POINT' or pattern == 'MULTI-POINT': return ValidateLineSinglePointing else: raise ValueError('Invalid observing pattern')
[docs]class ValidateLineInputs(vdp.StandardInputs): window = vdp.VisDependentProperty(default=[]) edge = vdp.VisDependentProperty(default=(0, 0)) nsigma = vdp.VisDependentProperty(default=3.0) xorder = vdp.VisDependentProperty(default=-1.0) yorder = vdp.VisDependentProperty(default=-1.0) broad_component = vdp.VisDependentProperty(default=False) clusteringalgorithm = vdp.VisDependentProperty(default=rules.ClusterRule['ClusterAlgorithm']) @property def group_desc(self): return self.context.observing_run.ms_reduction_group[self.group_id] @property def reference_member(self): return self.group_desc[self.member_list[0]] @property def windowmode(self): return getattr(self, '_windowmode', 'replace') @windowmode.setter def windowmode(self, value): if value not in ['replace', 'merge']: raise ValueError("linewindowmode must be either 'replace' or 'merge'.") self._windowmode = value def __init__(self, context, group_id, member_list, iteration, grid_ra, grid_dec, window=None, windowmode=None, edge=None, nsigma=None, xorder=None, yorder=None, broad_component=None, clusteringalgorithm=None): super(ValidateLineInputs, self).__init__() self.context = context self.group_id = group_id self.member_list = member_list self.iteration = iteration self.grid_ra = grid_ra self.grid_dec = grid_dec self.window = window self.windowmode = windowmode self.edge = edge self.nsigma = nsigma self.xorder = xorder self.yorder = yorder self.broad_component = broad_component self.clusteringalgorithm = clusteringalgorithm
[docs]class ValidateLineResults(common.SingleDishResults): def __init__(self, task=None, success=None, outcome=None): super(ValidateLineResults, self).__init__(task, success, outcome)
[docs] def merge_with_context(self, context): super(ValidateLineResults, self).merge_with_context(context)
# exporting datatable should be done within the parent task # datatable = self.outcome.pop('datatable') # datatable.exportdata(minimal=True) def _outcome_name(self): return ''
[docs]class ValidateLineSinglePointing(basetask.StandardTaskTemplate): Inputs = ValidateLineInputs
[docs] def prepare(self, datatable_dict=None, index_list=None, grid_table=None, detect_signal=None): """ ValidateLine class for single-pointing or multi-pointing (collection of fields with single-pointing). Accept all detected lines without clustering analysis. detect_signal = {ID1: [RA, DEC, [[LineStartChannel1, LineEndChannel1], [LineStartChannel2, LineEndChannel2], [LineStartChannelN, LineEndChannelN]]], IDn: [RA, DEC, [[LineStartChannel1, LineEndChannel1], [LineStartChannelN, LineEndChannelN]]]} lines: output parameter [LineCenter, LineWidth, Validity] OK: Validity = True; NG: Validity = False """ window = self.inputs.window windowmode = self.inputs.windowmode assert datatable_dict is not None assert index_list is not None assert detect_signal is not None # indexer translates serial index into per-MS index indexer = DataTableIndexer(self.inputs.context) # for Pre-Defined Spectrum Window if len(window) != 0 and windowmode == 'replace': LOG.info('Skip clustering analysis since predefined line window is set.') lines = _to_validated_lines(detect_signal) # TODO: review whether this relies on order of dictionary values. signal = list(detect_signal.values())[0] for i in index_list: vis, row = indexer.serial2perms(i) datatable = datatable_dict[vis] datatable.putcell('MASKLIST', row, signal[2]) datatable.putcell('NOCHANGE', row, False) outcome = {'lines': lines, 'channelmap_range': lines, 'cluster_info': {}, 'flag_digits': {} } result = ValidateLineResults(task=self.__class__, success=True, outcome=outcome) result.task = self.__class__ return result # Dictionary for final output lines = [] # register manually specified line windows to lines for w in window: center = float(sum(w)) / 2 width = max(w) - min(w) lines.append([center, width, True]) LOG.info('Accept all detected lines without clustering analysis.') iteration = self.inputs.iteration # First cycle #if len(grid_table) == 0: if iteration == 0: for i in index_list: vis, row = indexer.serial2perms(i) datatable = datatable_dict[vis] mask_list = datatable.getcell('MASKLIST', row) no_change = datatable.getcell('NOCHANGE', row) #LOG.debug('DataTable = %s, detect_signal = %s, OldFlag = %s' % (mask_list, detect_signal[row][2], no_change)) # datatable.putcell('MASKLIST', row, detect_signal[row][2]) datatable.putcell('MASKLIST', row, detect_signal[0][2]) datatable.putcell('NOCHANGE', row, False) # Iteration case else: for i in index_list: vis, row = indexer.serial2perms(i) datatable = datatable_dict[vis] mask_list = datatable.getcell('MASKLIST', row) no_change = datatable.getcell('NOCHANGE', row) #LOG.debug('DataTable = %s, detect_signal = %s, OldFlag = %s' % (mask_list, detect_signal[0][2], no_change)) if mask_list == detect_signal[0][2]: #if type(no_change) != int: if no_change < 0: # 2013/05/17 TN # Put iteration itself instead to subtract 1 since # iteration counter is incremented *after* the # baseline subtraction in refactorred code. #datatable.putcell('NOCHANGE',row,iteration - 1) datatable.putcell('NOCHANGE', row, iteration) else: datatable.putcell('MASKLIST', row, detect_signal[0][2]) datatable.putcell('NOCHANGE', row, False) outcome = {'lines': lines, 'channelmap_range': lines, 'cluster_info': {}, 'flag_digits': {} } result = ValidateLineResults(task=self.__class__, success=True, outcome=outcome) result.task = self.__class__ return result
[docs] def analyse(self, result): return result
[docs]class ValidateLineRaster(basetask.StandardTaskTemplate): Inputs = ValidateLineInputs CLUSTER_WHITEN = 1.0 #CLUSTER_WHITEN = 4.0 # sensitivity of line width compared to center position -> 1/4 # as of 2017/7/4 Valid=0.5, Marginal=0.35, Questionable=0.2 # should be Valid=0.7, Marginal=0.5, Questionable=0.3 Valid = rules.ClusterRule['ThresholdValid'] Marginal = rules.ClusterRule['ThresholdMarginal'] Questionable = rules.ClusterRule['ThresholdQuestionable'] MinFWHM = rules.LineFinderRule['MinFWHM'] #MaxFWHM = rules.LineFinderRule['MaxFWHM'] #Clustering_Algorithm = rules.ClusterRule['ClusterAlgorithm'] DebugOutName = '%05d' % (int(time.time())%100000) DebugOutVer = [0, 0] @property def MaxFWHM(self): num_edge = sum(self.inputs.edge) spw = self.inputs.reference_member.spw nchan = spw.num_channels return int(max(0, nchan - num_edge) // 3)
[docs] def validate_cluster(self, clustering_algorithm, clustering_result, index_list, detect_signal, PosList, Region2): # input parameters grid_ra = self.inputs.grid_ra grid_dec = self.inputs.grid_dec broad_component = self.inputs.broad_component xorder = self.inputs.xorder yorder = self.inputs.yorder # decompose clustering result (Ncluster, Bestlines, BestCategory, Region) = clustering_result # Calculate Parameters for Gridding DecCorrection = 1.0 / math.cos(PosList[1][0] / 180.0 * 3.141592653) grid_ra *= DecCorrection wra = PosList[0].max() - PosList[0].min() wdec = PosList[1].max() - PosList[1].min() cra = PosList[0].min() + wra/2.0 cdec = PosList[1].min() + wdec/2.0 # 2010/6/11 +1.0 -> +1.01: if wra is n x grid_ra (n is a integer), int(wra/grid_ra) is not n in some cases because of the lack of accuracy. nra = 2 * (int((wra/2.0 - grid_ra/2.0)/grid_ra) + 1) + 1 ndec = 2 * (int((wdec/2.0 - grid_dec/2.0)/grid_dec) + 1) + 1 x0 = cra - grid_ra/2.0 - grid_ra*(nra-1)/2.0 y0 = cdec - grid_dec/2.0 - grid_dec*(ndec-1)/2.0 LOG.debug('Grid = {} x {}\n', nra, ndec) if 'grid' not in self.cluster_info: self.cluster_info['grid'] = { 'ra_min': x0, 'dec_min': y0, 'grid_ra': grid_ra, 'grid_dec': grid_dec } # self.cluster_info['grid']['ra_min'] = x0 # self.cluster_info['grid']['dec_min'] = y0 # self.cluster_info['grid']['grid_ra'] = grid_ra # self.cluster_info['grid']['grid_dec'] = grid_dec # Create Space for storing the list of spectrum (ID) in the Grid # 2013/03/27 TN # Grid2SpectrumID stores index of index_list instead of row numbers # that are held by index_list. Grid2SpectrumID = [[[] for y in range(ndec)] for x in range(nra)] for i in range(len(PosList[0])): Grid2SpectrumID[int((PosList[0][i] - x0)/grid_ra)][int((PosList[1][i] - y0)/grid_dec)].append(i) # Sort lines and Category by LineCenter: lines[][0] LineIndex = numpy.argsort([line[0] for line in Bestlines[:Ncluster]]) lines = [Bestlines[i] for i in LineIndex] print('Ncluster, lines: {} {}'.format(Ncluster, lines)) print('LineIndex: {}'.format(LineIndex)) ### 2011/05/17 anti-scaling of the line width # Region2[:, 0] = Region2[:, 0] * self.CLUSTER_WHITEN for Nc in range(Ncluster): lines[Nc][1] *= self.CLUSTER_WHITEN LineIndex2 = numpy.argsort(LineIndex) print('LineIndex2: {}'.format(LineIndex2)) print('BestCategory: {}'.format(BestCategory)) #for i in range(len(BestCategory)): category[i] = LineIndex2[BestCategory[i]] category = [LineIndex2[bc] for bc in BestCategory] ######## Clustering: Detection Stage ######## ProcStartTime = time.time() LOG.info('Clustering: Detection Stage Start') (GridCluster, GridMember, cluster_flag) = self.detection_stage(Ncluster, nra, ndec, x0, y0, grid_ra, grid_dec, category, Region, detect_signal) ProcEndTime = time.time() LOG.info('Clustering: Detection Stage End: Elapsed time = {} sec', (ProcEndTime - ProcStartTime)) ######## Clustering: Validation Stage ######## ProcStartTime = time.time() LOG.info('Clustering: Validation Stage Start') (GridCluster, GridMember, lines, cluster_flag) = self.validation_stage(GridCluster, GridMember, lines, cluster_flag) ProcEndTime = time.time() LOG.info('Clustering: Validation Stage End: Elapsed time = {} sec', (ProcEndTime - ProcStartTime)) ######## Clustering: Smoothing Stage ######## # Rating: [0.0, 0.4, 0.5, 0.4, 0.0] # [0.4, 0.7, 1.0, 0.7, 0.4] # [0.5, 1.0, 6.0, 1.0, 0.5] # [0.4, 0.7, 1.0, 0.7, 0.4] # [0.0, 0.4, 0.5, 0.4, 0.0] # Rating = 1.0 / (Dx**2 + Dy**2)**(0.5) : if (Dx, Dy) == (0, 0) rating = 6.0 ProcStartTime = time.time() LOG.info('Clustering: Smoothing Stage Start') (GridCluster, lines, cluster_flag) = self.smoothing_stage(GridCluster, lines, cluster_flag) ProcEndTime = time.time() LOG.info('Clustering: Smoothing Stage End: Elapsed time = {} sec', (ProcEndTime - ProcStartTime)) ######## Clustering: Final Stage ######## ProcStartTime = time.time() LOG.info('Clustering: Final Stage Start') # create virtual index_list (RealSignal, lines, channelmap_range, cluster_flag) = self.final_stage(GridCluster, GridMember, Region, Region2, lines, category, grid_ra, grid_dec, broad_component, xorder, yorder, x0, y0, Grid2SpectrumID, index_list, PosList, cluster_flag) ProcEndTime = time.time() LOG.info('Clustering: Final Stage End: Elapsed time = {} sec', (ProcEndTime - ProcStartTime)) return RealSignal, lines, channelmap_range, cluster_flag
[docs] def prepare(self, datatable_dict=None, index_list=None, grid_table=None, detect_signal=None): """ 2D fit line characteristics calculated in Process3 Sigma clipping iterations will be applied if nsigma is positive order < 0 : automatic determination of fitting order (max = 5) detect_signal = {ID1: [RA, DEC, [[LineStartChannel1, LineEndChannel1, Binning], [LineStartChannel2, LineEndChannel2, Binning], [LineStartChannelN, LineEndChannelN, Binning]]], IDn: [RA, DEC, [[LineStartChannel1, LineEndChannel1, Binning], [LineStartChannelN, LineEndChannelN, Binning]]]} lines: output parameter [LineCenter, LineWidth, Validity] OK: Validity = True; NG: Validity = False """ window = self.inputs.window windowmode = self.inputs.windowmode LOG.debug('{}: window={}, windowmode={}'.format(self.__class__.__name__, window, windowmode)) assert datatable_dict is not None assert grid_table is not None assert index_list is not None assert detect_signal is not None # indexer translates serial index into per-MS index indexer = DataTableIndexer(self.inputs.context) # for Pre-Defined Spectrum Window if len(window) != 0 and windowmode == 'replace': LOG.info('Skip clustering analysis since predefined line window is set.') lines = _to_validated_lines(detect_signal) # TODO: review whether this relies on order of dictionary values. signal = list(detect_signal.values())[0] for i in index_list: vis, row = indexer.serial2perms(i) datatable = datatable_dict[vis] datatable.putcell('MASKLIST', row, signal[2]) datatable.putcell('NOCHANGE', row, False) outcome = {'lines': lines, 'channelmap_range': lines, 'cluster_info': {}, 'flag_digits': {} } result = ValidateLineResults(task=self.__class__, success=True, outcome=outcome) return result manual_window = [] # register manually specified line windows to lines for w in window: center = float(sum(w)) / 2 width = max(w) - min(w) manual_window.append([center, width, True, 0.0]) iteration = self.inputs.iteration # grid_ra = self.inputs.grid_ra # grid_dec = self.inputs.grid_dec # broad_component = self.inputs.broad_component # xorder = self.inputs.xorder # yorder = self.inputs.yorder _vis, _row = indexer.serial2perms(index_list[0]) self.nchan = datatable_dict[_vis].getcell('NCHAN', _row) self.nsigma = self.inputs.nsigma ProcStartTime = time.time() LOG.info('2D fit the line characteristics...') #tSFLAG = datatable.getcol('FLAG_SUMMARY') Totallines = 0 RMS0 = 0.0 lines = [] self.cluster_info = {} self.flag_digits = {'detection': 1, 'validation': 10, 'smoothing': 100, 'final': 1000} # RASTER CASE # Read data from Table to generate ID -> RA, DEC conversion table Region = [] dummy = [] flag = 1 Npos = 0 # 2017/7/4 clean-up detect_signal LOG.trace('Before: Detect_Signal = {}', detect_signal) detect_signal = self.clean_detect_signal(detect_signal) LOG.trace('After: Detect_Signal = {}', detect_signal) for row in range(len(grid_table)): # detect_signal[row][2]: [[LineStartChannelN, LineEndChannelN, Binning],[],,,[]] if len(detect_signal[row][2]) != 0 and detect_signal[row][2][0][0] != -1: Npos += 1 for line in detect_signal[row][2]: # Check statistics flag. tSFLAG[row]==1 => Valid Spectra 2008/1/17 # Bug fix 2008/5/29 #if (line[0] != line[1]) and ((len(grid_table) == 0 and tSFLAG[row] == 1) or len(grid_table) != 0): # refering tSFLAG is not correct if line[0] != line[1]: #and tSFLAG[row] == 1: #2014/11/28 add Binning info into Region Region.append([row, line[0], line[1], detect_signal[row][0], detect_signal[row][1], flag, line[2]]) ### 2011/05/17 make cluster insensitive to the line width: Whiten dummy.append([float(line[1] - line[0]) / self.CLUSTER_WHITEN, 0.5 * float(line[0] + line[1])]) Region2 = numpy.array(dummy) # [FullWidth, Center] ### 2015/04/22 save Region to file for test if infrastructure.logging.logging_level == infrastructure.logging.LOGGING_LEVELS['trace'] or \ infrastructure.logging.logging_level == infrastructure.logging.LOGGING_LEVELS['debug']: self.DebugOutVer[0] += 1 with open('ClstRegion.%s.%02d.txt' % (self.DebugOutName, self.DebugOutVer[0]), 'w') as fp: for i in range(len(Region)): fp.writelines('%d %f %f %f %f %d %d\n' % (Region[i][0], Region[i][1], Region[i][2], Region[i][3], Region[i][4], Region[i][5], Region[i][6])) del dummy LOG.debug('Npos = {}', Npos) # 2010/6/9 for non-detection if Npos == 0 or len(Region2) == 0: if len(manual_window) == 0: signal = [[-1, -1]] else: signal = manual_window for i in index_list: vis, row = indexer.serial2perms(i) datatable = datatable_dict[vis] datatable.putcell('MASKLIST', row, signal) datatable.putcell('NOCHANGE', row, False) outcome = {'lines': manual_window, 'channelmap_range': manual_window, 'cluster_info': {}, 'flag_digits': {} } result = ValidateLineResults(task=self.__class__, success=True, outcome=outcome) result.task = self.__class__ return result # 2008/9/20 Dec Effect was corrected def _g(colname): for i in index_list: vis, j = indexer.serial2perms(i) datatable = datatable_dict[vis] yield datatable.getcell(colname, j) # ras = numpy.fromiter(_g('RA'), dtype=numpy.float64) # decs = numpy.fromiter(_g('DEC'), dtype=numpy.float64) # ras = numpy.fromiter(_g('SHIFT_RA'), dtype=numpy.float64) # decs = numpy.fromiter(_g('SHIFT_DEC'), dtype=numpy.float64) ras = numpy.fromiter(_g('OFS_RA'), dtype=numpy.float64) decs = numpy.fromiter(_g('OFS_DEC'), dtype=numpy.float64) PosList = numpy.asarray([ras, decs]) # PosList = numpy.array([numpy.take(datatable.getcol('RA'),index_list), # numpy.take(datatable.getcol('DEC'),index_list)]) # DecCorrection = 1.0 / math.cos(PosList[1][0] / 180.0 * 3.141592653) # grid_ra *= DecCorrection # # Calculate Parameters for Gridding # wra = PosList[0].max() - PosList[0].min() # wdec = PosList[1].max() - PosList[1].min() # cra = PosList[0].min() + wra/2.0 # cdec = PosList[1].min() + wdec/2.0 # # 2010/6/11 +1.0 -> +1.01: if wra is n x grid_ra (n is a integer), int(wra/grid_ra) is not n in some cases because of the lack of accuracy. # nra = 2 * (int((wra/2.0 - grid_ra/2.0)/grid_ra) + 1) + 1 # ndec = 2 * (int((wdec/2.0 - grid_dec/2.0)/grid_dec) + 1) + 1 # x0 = cra - grid_ra/2.0 - grid_ra*(nra-1)/2.0 # y0 = cdec - grid_dec/2.0 - grid_dec*(ndec-1)/2.0 # LOG.debug('Grid = {} x {}\n', nra, ndec) # self.cluster_info['grid'] = {} # self.cluster_info['grid']['ra_min'] = x0 # self.cluster_info['grid']['dec_min'] = y0 # self.cluster_info['grid']['grid_ra'] = grid_ra # self.cluster_info['grid']['grid_dec'] = grid_dec # # Create Space for storing the list of spectrum (ID) in the Grid # # 2013/03/27 TN # # Grid2SpectrumID stores index of index_list instead of row numbers # # that are held by index_list. # Grid2SpectrumID = [[[] for y in xrange(ndec)] for x in xrange(nra)] # for i in range(len(PosList[0])): # Grid2SpectrumID[int((PosList[0][i] - x0)/grid_ra)][int((PosList[1][i] - y0)/grid_dec)].append(i) ProcEndTime = time.time() LOG.info('Clustering: Initialization End: Elapsed time = {} sec', ProcEndTime - ProcStartTime) ######## Clustering: K-mean Stage ######## # K-mean Clustering Analysis with LineWidth and LineCenter # Max number of protect regions are SDC.SDParam['Cluster']['MaxCluster'] (Max lines) ProcStartTime = time.time() LOG.info('Clustering Analysis Start') # Bestlines: [[center, width, T/F],[],,,[]] clustering_algorithm = self.inputs.clusteringalgorithm LOG.info('clustering algorithm is \'{}\'', clustering_algorithm) clustering_results = collections.OrderedDict() if clustering_algorithm == 'kmean': #(Ncluster, Bestlines, BestCategory, Region) = self.clustering_kmean(Region, Region2) clustering_results[clustering_algorithm] = self.clustering_kmean(Region, Region2) elif clustering_algorithm == 'hierarchy': #(Ncluster, Bestlines, BestCategory, Region) = self.clustering_hierarchy(Region, Region2, nThreshold=rules.ClusterRule['ThresholdHierarchy'], method='single') clustering_results[clustering_algorithm] = self.clustering_hierarchy(Region, Region2, nThreshold=rules.ClusterRule['ThresholdHierarchy'], nThreshold2=rules.ClusterRule['ThresholdHierarchy2'], method='single') elif clustering_algorithm == 'both': clustering_results['kmean'] = self.clustering_kmean(Region, Region2) clustering_results['hierarchy'] = self.clustering_hierarchy(Region, Region2, nThreshold=rules.ClusterRule['ThresholdHierarchy'], nThreshold2=rules.ClusterRule['ThresholdHierarchy2'], method='single') else: LOG.error('Invalid clustering algorithm: {}'.format(clustering_algorithm)) ProcEndTime = time.time() LOG.info('Clustering Analysis End: Elapsed time = {} sec', (ProcEndTime - ProcStartTime)) # 2017/8/15 for non-detection after cleaninig #if Ncluster == 0: if sum([r[0] for r in clustering_results.values()]) == 0: if len(manual_window) == 0: signal = [[-1, -1]] else: signal = manual_window for i in index_list: vis, row = indexer.serial2perms(i) datatable = datatable_dict[vis] datatable.putcell('MASKLIST', row, signal) datatable.putcell('NOCHANGE', row, False) outcome = {'lines': manual_window, 'channelmap_range': manual_window, 'cluster_info': {}, 'flag_digits': {} } result = ValidateLineResults(task=self.__class__, success=True, outcome=outcome) result.task = self.__class__ return result # # Sort lines and Category by LineCenter: lines[][0] # LineIndex = numpy.argsort([line[0] for line in Bestlines[:Ncluster]]) # lines = [Bestlines[i] for i in LineIndex] # print('Ncluster, lines: {} {}'.format(Ncluster, lines)) # print('LineIndex: {}'.format(LineIndex)) ### 2011/05/17 anti-scaling of the line width Region2[:, 0] = Region2[:, 0] * self.CLUSTER_WHITEN # the following lines have been moved to self.validate_cluster # for Nc in range(Ncluster): # lines[Nc][1] *= self.CLUSTER_WHITEN # LineIndex2 = numpy.argsort(LineIndex) # print('LineIndex2: {}'.format(LineIndex2)) # print('BestCategory: {}'.format(BestCategory)) # #for i in range(len(BestCategory)): category[i] = LineIndex2[BestCategory[i]] # category = [LineIndex2[bc] for bc in BestCategory] # ######## Clustering: Detection Stage ######## # ProcStartTime = time.time() # LOG.info('Clustering: Detection Stage Start') # (GridCluster, GridMember) = self.detection_stage(Ncluster, nra, ndec, x0, y0, grid_ra, grid_dec, category, # Region, detect_signal) # ProcEndTime = time.time() # LOG.info('Clustering: Detection Stage End: Elapsed time = {} sec', (ProcEndTime - ProcStartTime)) # ######## Clustering: Validation Stage ######## # ProcStartTime = time.time() # LOG.info('Clustering: Validation Stage Start') # (GridCluster, GridMember, lines) = self.validation_stage(GridCluster, GridMember, lines) # ProcEndTime = time.time() # LOG.info('Clustering: Validation Stage End: Elapsed time = {} sec', (ProcEndTime - ProcStartTime)) # ######## Clustering: Smoothing Stage ######## # # Rating: [0.0, 0.4, 0.5, 0.4, 0.0] # # [0.4, 0.7, 1.0, 0.7, 0.4] # # [0.5, 1.0, 6.0, 1.0, 0.5] # # [0.4, 0.7, 1.0, 0.7, 0.4] # # [0.0, 0.4, 0.5, 0.4, 0.0] # # Rating = 1.0 / (Dx**2 + Dy**2)**(0.5) : if (Dx, Dy) == (0, 0) rating = 6.0 # ProcStartTime = time.time() # LOG.info('Clustering: Smoothing Stage Start') # (GridCluster, lines) = self.smoothing_stage(GridCluster, lines) # ProcEndTime = time.time() # LOG.info('Clustering: Smoothing Stage End: Elapsed time = {} sec', (ProcEndTime - ProcStartTime)) # ######## Clustering: Final Stage ######## # ProcStartTime = time.time() # LOG.info('Clustering: Final Stage Start') # # create virtual index_list # (RealSignal, lines, channelmap_range) = self.final_stage(GridCluster, GridMember, Region, Region2, # lines, category, grid_ra, grid_dec, broad_component, # xorder, yorder, x0, y0, Grid2SpectrumID, index_list, # PosList) # ProcEndTime = time.time() # LOG.info('Clustering: Final Stage End: Elapsed time = {} sec', (ProcEndTime - ProcStartTime)) # validate cluster assert clustering_algorithm in ['kmean', 'hierarchy', 'both'] validated = [ self.validate_cluster(k, v, index_list, detect_signal, PosList, Region2) for k, v in clustering_results.items() ] # Merge results from multiple clustering analysises # If more than one results exist, minimum contents of RealSignal will be merged # and remaining items (PolList) will be lost # Original RealSignal data will be stored in validated[x][0] (RealSignal, lines, channelmap_range, cluster_flag) = self._merge_cluster_result(validated) self.cluster_info['cluster_flag'] = cluster_flag # Merge masks if possible ProcStartTime = time.time() LOG.info('Clustering: Merging Start') # RealSignal should have all row's as its key tmp_index = 0 for vrow in index_list: if vrow in RealSignal: signal = self.__merge_lines(RealSignal[vrow][2], self.nchan) signal.extend(window) else: signal = window if len(signal) == 0: signal = [[-1, -1]] #RealSignal[row] = [PosList[0][tmp_index], PosList[1][tmp_index], signal] tmp_index += 1 vis, row = indexer.serial2perms(vrow) datatable = datatable_dict[vis] # In the following code, access to MASKLIST and NOCHANGE columns # is direct to underlying table object instead of access via # datatable object's method since direct access is faster. # Note that MASKLIST [[-1,-1]] represents that no masks are # available, while NOCHANGE -1 indicates NOCHANGE is False. #tMASKLIST = datatable.getcell('MASKLIST',row) #tNOCHANGE = datatable.getcell('NOCHANGE',row) tMASKLIST = datatable.getcell('MASKLIST', row) if len(tMASKLIST) == 0 or tMASKLIST[0][0] < 0: tMASKLIST = [] else: tMASKLIST = tMASKLIST.tolist() # list(tMASKLIST) tNOCHANGE = datatable.getcell('NOCHANGE', row) #LOG.debug('DataTable = %s, RealSignal = %s' % (tMASKLIST, signal)) if tMASKLIST == signal: #LOG.debug('No update on row %s: iter is %s'%(row,iteration)) #if type(tNOCHANGE) != int: if tNOCHANGE < 0: # 2013/05/17 TN # Put iteration itself instead to subtract 1 since iteration # counter is incremented *after* baseline subtraction # in refactorred code. #datatable.putcell('NOCHANGE',row,iteration - 1) #datatable.putcell('NOCHANGE', row, iteration) datatable.putcell('NOCHANGE', row, iteration) else: #datatable.putcell('NOCHANGE',row,False) #datatable.putcell('MASKLIST',row,numpy.array(RealSignal[row][2])) #LOG.debug('Updating row %s: signal=%s (type=%s, %s)'%(row,list(signal),type(signal),type(signal[0]))) #datatable.putcell('MASKLIST',row,numpy.array(signal)) #datatable.putcell('MASKLIST',row,signal) datatable.putcell('MASKLIST', row, signal) #datatable.putcell('NOCHANGE',row,-1) datatable.putcell('NOCHANGE', row, -1) del RealSignal ProcEndTime = time.time() LOG.info('Clustering: Merging End: Elapsed time = {} sec', (ProcEndTime - ProcStartTime)) lines.extend(manual_window) channelmap_range.extend(manual_window) outcome = {'lines': lines, 'channelmap_range': channelmap_range, 'cluster_info': self.cluster_info, 'flag_digits': self.flag_digits } result = ValidateLineResults(task=self.__class__, success=True, outcome=outcome) result.task = self.__class__ return result
[docs] def analyse(self, result): return result
def _merge_cluster_info(self, algorithm, cluster_score, detected_lines, cluster_property, cluster_scale): actions = { 'cluster_score': ('kmean', cluster_score), 'detected_lines': ('skip', detected_lines), 'cluster_property': ('append', cluster_property), 'cluster_scale': ('skip', cluster_scale) } for key, (action, value) in actions.items(): if key not in self.cluster_info: self.cluster_info[key] = value elif action == 'append': self.cluster_info[key].extend(value) elif action == algorithm: self.cluster_info[key] = value def _merge_cluster_result(self, result_list): if len(result_list) == 1: return tuple(result_list[0]) merged_RealSignal = collections.defaultdict(lambda: [None, None, []]) for r in result_list: RealSignal = r[0] for k, v in RealSignal.items(): merged_RealSignal[k][2].extend(v[2]) merged_lines = [l for r in result_list for l in r[1]] merged_channelmap_ranges = [l for r in result_list for l in r[2]] merged_flag = numpy.concatenate([r[3] for r in result_list], axis=0) return merged_RealSignal, merged_lines, merged_channelmap_ranges, merged_flag
[docs] def clean_detect_signal(self, DS): """ Spectra in each grid positions are splitted into 3 groups along time series. Group of spectra is then combined to 1 spectrum. So, one grid position has 3 combined spectra. Suppose that the real signal is correlated but the error is not, we can clean false signals (not correlated) in advance of the validation stage. detect_signal = {ID1: [RA, DEC, [[LineStartChannel1, LineEndChannel1, Binning], [LineStartChannel2, LineEndChannel2, Binning], [LineStartChannelN, LineEndChannelN, Binning]]], IDn: [RA, DEC, [[LineStartChannel1, LineEndChannel1, Binning], [LineStartChannelN, LineEndChannelN, Binning]]]} """ # grouping by position Gthreshold = 1.0 / 3600. # TODO: review whether the following relies on a specific order of keys. DSkey = list(DS.keys()) PosGroup = [] # PosGroup: [[ID,ID,ID],[ID,ID,ID],...,[ID,ID,ID]] for ID in list(DS.keys()): if ID in DSkey: del DSkey[DSkey.index(ID)] DStmp = DSkey[:] PosGroup.append([ID]) for nID in DStmp: if abs(DS[ID][0] - DS[nID][0]) < Gthreshold and abs(DS[ID][1] - DS[nID][1]) < Gthreshold: del DSkey[DSkey.index(nID)] PosGroup[-1].append(nID) LOG.debug('clean_detect_signal: PosGroup = {}', PosGroup) for PList in PosGroup: if len(PList) > 2: data = collections.OrderedDict() for i in PList: data[i] = DS[i][2] # threshold 0.7: identical line is detected in all 3 data: strict checking # threshold 0.6: identical line is detected in 2 data out of 3 ret = self.clean_detect_line(data.copy(), threshold=0.7) for i in PList: DS[i][2] = ret[i] return DS
[docs] def clean_detect_line(self, data, threshold=0.6): """ Select only line candidates with good possibility by checking all spectra taken at the same position data: {ID1: [[LineStart, LineEnd, Binning],,,], ID2: [[LineStart, LineEnd, Binning],,,], IDn: [[LineStart, LineEnd, Binning],,,]} """ ret = {} NSP = float(len(data)) for ID in list(data.keys()): ret[ID] = [] for ID in list(data.keys()): for i in range(len(data[ID])): if data[ID][i][0] != -1: ref = data[ID][i][:] tmp = [] # if binning > 1: expected number of lines in the grid doubles, i.e., two different offsets for nID in list(data.keys()): Lines = [0.0, 0.0, 0.0] for j in range(len(data[nID])): # check binning is the same, and overlap if data[nID][j][2] == ref[2] and self.CheckLineIdentity(ref, data[nID][j], overlap=0.7): Lines[0] += data[nID][j][0] Lines[1] += data[nID][j][1] Lines[2] += 1.0 # clear identical line data[nID][j] = [-1, -1, 1] if Lines[2] > 0: tmp.append([nID, [Lines[0]/Lines[2], Lines[1]/Lines[2], ref[2]]]) if len(tmp) / NSP >= threshold: for nID, ref in tmp: ret[nID].append(ref) for ID in list(data.keys()): if len(ret[ID]) == 0: ret[ID].append([-1, -1, 1]) return ret
[docs] def CheckLineIdentity(self, old, new, overlap=0.7): """ True if the overlap of two lines is greater than the threshold 1L 1R 1L 1R 1L 1R 1L 1R [ ] [ ] [ ] [ ] xxxxxxxxxxxx xxxxxxxxxxxx xxxxxxxxxxx xxxxxxxxxxx oooooooooooooo oooooooooooo ooooooooooo ooooooooo [ ] [ ] [ ] [ ] 2L 2R 2L 2R 2L 2R 2L 2R True if Num(x) / Num(o) >= overlap old: [left, right, binning] new: [left, right, binning] """ if(old[0] <= new[0] < old[1] or \ old[0] < new[1] <= old[1] or \ new[0] <= old[0] < new[1] or \ new[0] < old[1] <= new[1]) and \ (min(old[1], new[1]) - max(old[0], new[0]) + 1) / float(max((old[1] - old[0] + 1), (new[1] - new[0] + 1))) >= overlap: #print 'Identical', old, new return True else: return False
[docs] def clustering_kmean(self, Region, Region2): # Region = [[row, chan0, chan1, RA, DEC, flag, Binning],[],[],,,[]] # Region2 = [[Width, Center],[],[],,,[]] MedianWidth = numpy.median(Region2[:, 0]) LOG.trace('MedianWidth = {}', MedianWidth) MaxCluster = int(rules.ClusterRule['MaxCluster']) LOG.info('Maximum number of clusters (MaxCluster) = {}', MaxCluster) # Determin the optimum number of clusters BestScore = -1.0 # 2010/6/15 Plot the score along the number of the clusters ListNcluster = [] ListScore = [] ListBestScore = [] converged = False # elapsed = 0.0 self.DebugOutVer[1] += 1 for Ncluster in range(1, MaxCluster + 1): index0=len(ListScore) # Fix the random seed 2008/5/23 numpy.random.seed((1234, 567)) # Try multiple times to supress random selection effect 2007/09/04 for Multi in range(min(Ncluster+1, 10)): codebook, diff = VQ.kmeans(Region2, Ncluster, iter=50) # codebook = [[clstCentX, clstCentY],[clstCentX,clstCentY],,[]] len=Ncluster # diff <= distortion NclusterNew = 0 LOG.trace('codebook={}', codebook) # Do iteration until no merging of clusters to be found while(NclusterNew != len(codebook)): category, distance = VQ.vq(Region2, codebook) # category = [c0, c0, c1, c2, c0,,,] c0,c1,c2,,, are clusters which each element belongs to # category starts with 0 # distance = [d1, d2, d3,,,,,] distance from belonging cluster center LOG.trace('Cluster Category&Distance {}, distance = {}', category, distance) # remove empty line in codebook codebook = codebook.take([x for x in range(0, len(codebook)) if any(category==x)], axis=0) NclusterNew = len(codebook) # Clear Flag for i in range(len(Region)): Region[i][5] = 1 Outlier = 0.0 lines = [] for Nc in range(NclusterNew): ### 2011/05/17 Strict the threshold, clean-up each cluster by nsigma clipping/flagging ValueList = distance[(category == Nc).nonzero()[0]] Stddev = ValueList.std() Threshold = ValueList.mean() + Stddev * self.nsigma del ValueList LOG.trace('Cluster Clipping Threshold = {}, Stddev = {}', Threshold, Stddev) for i in ((distance * (category == Nc)) > Threshold).nonzero()[0]: Region[i][5] = 0 # set flag to 0 Outlier += 1.0 # Calculate Cluster Characteristics MaxDistance = max(distance * ((distance < Threshold) * (category == Nc))) indices = [x for x in range(len(category)) if category[x] == Nc and Region[x][5] != 0] properties = Region2.take(indices, axis=0) median_props = numpy.median(properties, axis=0) lines.append([median_props[1], median_props[0], True, MaxDistance]) MemberRate = (len(Region) - Outlier)/float(len(Region)) MeanDistance = (distance * numpy.transpose(numpy.array(Region))[5]).mean() LOG.trace('lines = {}, MemberRate = {}', lines, MemberRate) # 2010/6/15 Plot the score along the number of the clusters ListNcluster.append(Ncluster) Score = self.clustering_kmean_score(MeanDistance, MedianWidth, NclusterNew, MemberRate) ListScore.append(Score) LOG.debug('NclusterNew = {}, Score = {}', NclusterNew, Score) ### 2017/07/06 save Score to file for test if infrastructure.logging.logging_level == infrastructure.logging.LOGGING_LEVELS['trace'] or \ infrastructure.logging.logging_level == infrastructure.logging.LOGGING_LEVELS['debug']: with open('ClstProp.%s.%02d.txt' % (self.DebugOutName, self.DebugOutVer[1]), "wa") as fp: fp.writelines('%d,%f,%f,%f,%f,%s\n' % (NclusterNew, Score, MeanDistance, MedianWidth, MemberRate, lines)) if BestScore < 0 or Score < BestScore: BestNcluster = NclusterNew BestScore = Score BestCategory = category.copy() BestCodebook = codebook.copy() BestRegion = Region[:] Bestlines = lines[:] ListBestScore.append(min(ListScore[index0:])) LOG.debug('Ncluster = {}, BestScore = {}', NclusterNew, ListBestScore[-1]) # iteration end if Score(N) < Score(N+1),Score(N+2),Score(N+3) #if len(ListBestScore) > 3 and \ # ListBestScore[-4] <= ListBestScore[-3] and \ # ListBestScore[-4] <= ListBestScore[-2] and \ # ListBestScore[-4] <= ListBestScore[-1]: if len(ListBestScore) >= 10: LOG.info('Determined the Number of Clusters to be {}', BestNcluster) converged = True break if converged is False: LOG.warn('Clustering analysis not converged. Number of clusters may be greater than upper limit' ' (MaxCluster={})', MaxCluster) cluster_info = { 'algorithm': 'kmean', 'cluster_score': [ListNcluster, ListScore], 'detected_lines': Region2, 'cluster_property': Bestlines, # [[Center, Width/WHITEN, T/F, ClusterRadius],[],,,[]] 'cluster_scale': self.CLUSTER_WHITEN } self._merge_cluster_info(**cluster_info) #SDP.ShowClusterScore(ListNcluster, ListScore, ShowPlot, FigFileDir, FigFileRoot) #SDP.ShowClusterInchannelSpace(Region2, Bestlines, self.CLUSTER_WHITEN, ShowPlot, FigFileDir, FigFileRoot) LOG.info('Final: Ncluster = {}, Score = {}, lines = {}', BestNcluster, BestScore, Bestlines) LOG.debug('Category = {}, CodeBook = {}', category, BestCodebook) return (BestNcluster, Bestlines, BestCategory, BestRegion)
[docs] def clustering_hierarchy(self, Region, Region2, nThreshold=3.0, nThreshold2=4.5, method='single'): #def calc_clustering(self, nThreshold, method='ward'): """ Hierarchical Clustering method = 'ward' : Ward's linkage method 'single' : nearest point linkage method 'complete': farthest point linkage method 'average' : average linkage method 'centroid': centroid/UPGMC linkage method 'median' : median/WPGMC linkage method 1st threshold is set to nThreshold x stddev(distance matrix) 2nd threshold is set to nThreshold2 x stddev of sub-cluster distance matrix in: self.Data -> Region2 self.Ndata out: self.Threshold self.Nthreshold self.Category self.Ncluster """ Data = self.set_data(Region2, ordering=[0, 1]) # Data: numpy[[width, center],[w,c],,,] Repeat = 3 # Number of artificial detection points to normalize the cluster distance # Calculate LinkMatrix from given data set if method.lower() == 'single': # nearest point linkage method H_Clustering = HIERARCHY.single elif method.lower() == 'complete': # farthest point linkage method H_Clustering = HIERARCHY.complete elif method.lower() == 'average': # average linkage method H_Clustering = HIERARCHY.average elif method.lower() == 'centroid': # centroid/UPGMC linkage method H_Clustering = HIERARCHY.centroid elif method.lower() == 'median': # median/WPGMC linkage method H_Clustering = HIERARCHY.median else: # Ward's linkage method: default H_Clustering = HIERARCHY.ward # temporaly add artificial detection points to normalize the cluster distance tmp = numpy.zeros((Data.shape[0]+Repeat*2, Data.shape[1]), numpy.float) tmp[Repeat*2:] = Data.copy() for i in range(Repeat): tmp[i] = [self.nchan//2, 0] tmp[Repeat+i] = [self.nchan//2, self.nchan-1] #LOG.debug('tmp[:10] = {}', tmp[:10]) tmpLinkMatrix = H_Clustering(tmp) MedianDistance = numpy.median(tmpLinkMatrix.T[2]) MeanDistance = tmpLinkMatrix.T[2].mean() Stddev = tmpLinkMatrix.T[2].std() del tmp, tmpLinkMatrix LOG.debug('MedianDistance = {}, MeanDistance = {}, Stddev = {}', MedianDistance, MeanDistance, Stddev) LOG.debug('Ndata = {}', Data.shape[0]) # Divide data set into several clusters # LinkMatrix[n][2]: distance between two data/clusters # 1st classification LinkMatrix = H_Clustering(Data) #MedianDistance = numpy.median(LinkMatrix.T[2]) #Stddev = LinkMatrix.T[2].std() Nthreshold = nThreshold #Threshold = MedianDistance + Nthreshold * Stddev Threshold = MeanDistance + Nthreshold * Stddev Category = HIERARCHY.fcluster(LinkMatrix, Threshold, criterion='distance') Ncluster = Category.max() LOG.debug('nThreshold = {}, nThreshold2 = {}, method = {}', nThreshold, nThreshold2, method) LOG.debug('Init Threshold = {}, Init Ncluster = {}', Threshold, Ncluster) print('Init Threshold: {}'.format(Threshold), end=' ') print('\tInit Ncluster: {}'.format(Ncluster)) IDX = numpy.array([x for x in range(len(Data))]) for k in range(Ncluster): C = Category.max() NewData = Data[Category==(k+1)] # Category starts with 1 (not 0) if(len(NewData) < 2): print('skip(%d): %d' % (k, len(NewData))) continue # LinkMatrix = () NewIDX = IDX[Category==(k+1)] LinkMatrix = H_Clustering(NewData) # selected linkage method #print LinkMatrix MedianDistance = numpy.median(LinkMatrix.T[2]) MeanDistance = LinkMatrix.T[2].mean() #print 'MedianD', MedianDistance Stddev = LinkMatrix.T[2].std() LOG.debug('MedianDistance = {}, MeanDistance = {}, Stddev = {}', MedianDistance, MeanDistance, Stddev) #NewThreshold = MedianDistance + Nthreshold ** 2. * Stddev #NewThreshold = MedianDistance + Nthreshold ** 1.5 * Stddev #NewThreshold = MeanDistance + Nthreshold ** 1.5 * Stddev #NewThreshold = MeanDistance + Nthreshold * 2.0 * Stddev #NewThreshold = MeanDistance + Nthreshold * 1.5 * Stddev #NewThreshold = MedianDistance + Nthreshold ** 1.3 * Stddev #NewThreshold = MedianDistance + Nthreshold * Stddev NewThreshold = MeanDistance + nThreshold2 * Stddev LOG.debug('Threshold({}): {}', k, NewThreshold) print('Threshold(%d): %.1f' % (k, NewThreshold), end=' ') NewCategory = HIERARCHY.fcluster(LinkMatrix, NewThreshold, criterion='distance') NewNcluster = NewCategory.max() LOG.debug('NewCluster({}): {}', k, NewNcluster) print('\tNewNcluster(%d): %d' % (k, NewNcluster), end=' ') print('\t# of Members(%d): %d: ' % (k, ((Category == k+1)*1).sum()), end=' ') for kk in range(NewNcluster): print(((NewCategory == kk+1)*1).sum(), end=' ') print('') if NewNcluster > 1: for i in range(len(NewData)): if NewCategory[i] > 1: Category[NewIDX[i]] = C + NewCategory[i] - 1 Ncluster = Category.max() # update Ncluster #(Region, Range, Stdev, Category) = self.clean_cluster(Data, Category, Region, Nthreshold, 2) # nThreshold, NumParam (Region, Range, Stdev, Category) = self.clean_cluster(Data, Category, Region, nThreshold2, 2) # nThreshold, NumParam # 2017/7/25 ReNumbering is done in clean_cluster #for i in range(len(Category)): # #if Category[i] > Ncluster: Region[i][5] = 0 # flag out cleaned data # Category[i] -= 1 # Category starts with 1 -> starts with 0 (to align kmean) Bestlines = [] Ncluster = len(Range) for j in range(Ncluster): Bestlines.append([Range[j][1], Range[j][0], True, Range[j][4]]) LOG.info('Final: Ncluster = {}, lines = {}', Ncluster, Bestlines) cluster_info = { 'algorithm': 'hierarchy', 'cluster_score': [[1, 2, 3, 4, 5], [1, 2, 3, 4, 5]], 'detected_lines': Region2, 'cluster_property': Bestlines, # [[Center, Width, T/F, ClusterRadius],[],,,[]] 'cluster_scale': self.CLUSTER_WHITEN } self._merge_cluster_info(**cluster_info) return (Ncluster, Bestlines, Category, Region)
[docs] def set_data(self, Observation, ordering='none'): """ Observation: numpy.array([[val1, val2, val3,..,valN], [val1, val2, val3,..,valN], ........................ [val1, val2, val3,..,valN]], numpy.float) where N is a max dimensions of parameter space ordering: 'none' or list of ordering of columns e.g., ordering=[2,3,1,0] => [col3,col2,col0,col1] self.Data: Observation data self.NumParam: Number of dimensions to be used for Clustering Analysis self.Factor: Set default Whitening factor (to be 1.0) """ if ordering != 'none': NumParam = len(ordering) OrderList = ordering else: NumParam = len(Observation[0]) OrderList = list(range(NumParam)) if isinstance(Observation, list): Obs = numpy.array(Observation, numpy.float) else: Obs = Observation.copy() if len(Obs.shape) == 2: Data = numpy.zeros((Obs.shape[0], NumParam), numpy.float) for i in range(Obs.shape[0]): for j in range(NumParam): Data[i][j] = Obs[i][OrderList[j]] Factor = numpy.ones(NumParam, numpy.float) Ndata = len(Data) else: LOG.error("Data should be 2-dimensional. {}-dimensional data was given".format(len(Obs.shape))) raise ValueError('Data should be 2-dimensional!') del Obs, OrderList return (Data)
[docs] def clean_cluster(self, Data, Category, Region, Nthreshold, NumParam): """ Clean-up cluster by eliminating outliers Radius = StandardDeviation * nThreshold (circle/sphere) in: Data Category Nthreshold Ncluster out: Region: flag information is added Range: Range[Ncluster][5]: [ClusterCenterX, ClusterCenterY, 0, 0, Threshold] Stdev: Stdev[Ncluster][5]: [ClusterStddevX, ClusterStddevY, 0, 0, 0] Category: renumbered category """ IDX = numpy.array([x for x in range(len(Data))]) Ncluster = Category.max() C = Ncluster + 1 ValidClusterID = [] ValidRange = [] ValidStdev = [] ReNumber = {} Range = numpy.zeros((C, 5), numpy.float) Stdev = numpy.zeros((C, 5), numpy.float) for k in range(Ncluster): NewData = Data[Category == k+1].T NewIDX = IDX[Category == k+1] for i in range(NumParam): Range[k][i] = NewData[i].mean() Stdev[k][i] = NewData[i].std() if(NumParam == 4): Tmp = ((NewData - numpy.array([[Range[k][0]], [Range[k][1]], [Range[k][2]], [Range[k][3]]]))**2).sum(axis=0)**0.5 elif(NumParam == 3): Tmp = ((NewData - numpy.array([[Range[k][0]], [Range[k][1]], [Range[k][2]]]))**2).sum(axis=0)**0.5 else: # NumParam == 2 Tmp = ((NewData - numpy.array([[Range[k][0]], [Range[k][1]]]))**2).sum(axis=0)**0.5 Threshold = numpy.median(Tmp) + Tmp.std() * Nthreshold #Threshold = Tmp.mean() + Tmp.std() * Nthreshold Range[k][4] = Threshold LOG.trace('Threshold({}) = {}', k, Threshold) Out = NewIDX[Tmp > Threshold] #if (len(NewIDX) - len(Out)) < 6: # max 3 detections for each binning: detected in two binning pattern if (len(NewIDX) - len(Out)) < 3: # max 3 detections for each binning: detected in at least one binning pattern: sensitive to very narrow lines LOG.trace('Non Cluster: {}', len(NewIDX)) for i in NewIDX: #self.Category[i] = C Region[i][5] = 0 ReNumber[k+1] = 0 continue LOG.trace('Out Of Cluster ({}): {}', k, len(Out)) if len(Out > 0): for i in Out: #Category[i] = C Region[i][5] = 0 ReNumber[k+1] = len(ValidClusterID) ValidClusterID.append(k) for k in ValidClusterID: ValidRange.append(Range[k]) ValidStdev.append(Stdev[k]) LOG.debug('ReNumbering Table: {}', ReNumber) for j in range(len(Category)): Category[j] = ReNumber[Category[j]] #return (Region, Range, Stdev) return (Region, numpy.array(ValidRange), numpy.array(ValidStdev), Category)
[docs] def clustering_kmean_score(self, MeanDistance, MedianWidth, Ncluster, MemberRate): # Rating ### 2011/05/12 modified for (distance==0) ### 2014/11/28 further modified for (distance==0) ### 2017/07/05 modified to be sensitive to MemberRate # (distance * numpy.transpose(Region[5])).mean(): average distance from each cluster center #return(math.sqrt(((distance * numpy.transpose(numpy.array(Region))[5]).mean())**2.0 + (MedianWidth/2.0)**2.0) * (Ncluster+ 1.0/Ncluster) * (((1.0 - MemberRate)**0.5 + 1.0)**2.0)) return(math.sqrt(MeanDistance**2.0 + (MedianWidth/2.0)**2.0) * (Ncluster+ 1.0/Ncluster) * ((1.0 - MemberRate) * 100.0 + 1.0))
[docs] def detection_stage(self, Ncluster, nra, ndec, ra0, dec0, grid_ra, grid_dec, category, Region, detect_signal): """ Region = [[row, chan0, chan1, RA, DEC, flag, Binning],[],[],,,[]] detect_signal = {ID1: [RA, DEC, [[LineStartChannel1, LineEndChannel1, Binning], [LineStartChannel2, LineEndChannel2, Binning], [LineStartChannelN, LineEndChannelN, Binning]]], IDn: [RA, DEC, [[LineStartChannel1, LineEndChannel1, Binning], [LineStartChannelN, LineEndChannelN, Binning]]]} """ # Create Grid Parameter Space (Ncluster * nra * ndec) MinChanBinSp = 50.0 BinningVariation = 1 + int(math.ceil(math.log(self.nchan/MinChanBinSp)/math.log(4))) GridClusterWithBinning = numpy.zeros((Ncluster, BinningVariation, nra, ndec), dtype=numpy.float32) #GridCluster = numpy.zeros((Ncluster, nra, ndec), dtype=numpy.float32) GridMember = numpy.zeros((nra, ndec)) # Set the number of spectra belong to each gridding positions for row in range(len(detect_signal)): GridMember[int((detect_signal[row][0] - ra0)/grid_ra)][int((detect_signal[row][1] - dec0)/grid_dec)] += 1 for i in range(len(category)): if Region[i][5] == 1: # valid spectrum # binning = 4**n n = int(math.log(Region[i][6])/math.log(4.) + 0.1) # if binning larger than 1, detection is done twice: m=>0.5 if n == 0: m = 1.0 # case binning=1 else: m = 0.5 try: #2014/11/28 Counting is done for each binning separately GridClusterWithBinning[category[i]][n][int((Region[i][3] - ra0)/grid_ra)][int((Region[i][4] - dec0)/grid_dec)] += m #GridCluster[category[i]][int((Region[i][3] - ra0)/grid_ra)][int((Region[i][4] - dec0)/grid_dec)] += 1.0 except IndexError: pass GridCluster = GridClusterWithBinning.max(axis=1) #2014/11/28 select the largest value among different Binning #2017/7/4 normalize by the number of spectra in the grid #x1, x2, x3 = GridCluster.shape #for i in range(x1): # GridCluster[i] /= GridMember # for j in range(x2): # for k in range(x3): # if GridCluster[i][j][k] > 1.0: GridCluster[i][j][k] = 1.0 #for i in xrange(Ncluster): # for j in xrange(nra): # for k in xrange(ndec): # m = 0 # for l in xrange(BinningVariation): # if GridClusterWithBinning[i][l][j][k] > m: m = GridClusterWithBinning[i][l][j][k] # GridCluster[i][j][k] = m LOG.trace('GridClusterWithBinning = {}', GridClusterWithBinning) LOG.trace('GridCluster = {}', GridCluster) LOG.trace('GridMember = {}', GridMember) del GridClusterWithBinning # 2013/05/29 TN # cluster_flag is data for plotting clustering analysis results. # It stores GridCluster quantized by given thresholds. # it is defined as integer array and one digit is assigned to # one clustering stage in each integer value: # # 1st digit: detection # 2nd digit: validation # 3rd digit: smoothing # 4th digit: final # # If GridCluster value exceeds any threshold, corresponding # digit is incremented. For example, flag 3210 stands for, # # value didn't exceed any thresholds in detection, and # exceeded one (out of three) threshold in validation, and # exceeded two (out of three) thresholds in smoothing, and # exceeded three (out of four) thresholds in final. # #self.cluster_info['cluster_flag'] = numpy.zeros(GridCluster.shape, dtype=numpy.uint16) threshold = [1.5, 0.5] cluster_flag = numpy.zeros(GridCluster.shape, dtype=numpy.uint16) flag_digit = self.flag_digits['detection'] cluster_flag = self.__update_cluster_flag(cluster_flag, 'detection', GridCluster, threshold, flag_digit) return (GridCluster, GridMember, cluster_flag)
[docs] def validation_stage(self, GridCluster, GridMember, lines, cluster_flag): # Validated if number of spectrum which contains feature belongs to the cluster is greater or equal to # the half number of spectrum in the Grid # Normally, 3 spectra are created for each grid positions, # therefore, gridmember[ra][dec] = 3 for most of the cases. # Normalized validity can be # 1/3 (0.2<V) -> only one detection -> Qestionable # 2/3 (0.5<V)-> two detections -> Marginal # 3/3 (0.7<V)-> detected for all spectra -> Valid # ThresholdValid should be 0.5 -> 0.7 in the future (Ncluster, nra, ndec) = GridCluster.shape MinChanBinSp = 50.0 BinningVariation = 1 + int(math.ceil(math.log(self.nchan/MinChanBinSp)/math.log(4))) for Nc in range(Ncluster): LOG.trace('GridCluster[Nc]: {}', GridCluster[Nc]) LOG.trace('Gridmember: {}', GridMember) for x in range(nra): for y in range(ndec): if GridMember[x][y] == 0: GridCluster[Nc][x][y] = 0.0 elif GridMember[x][y] == 1 and GridCluster[Nc][x][y] > 0.9: GridCluster[Nc][x][y] = 1.0 ### 2014/11/28 Binning valiation is taken into account in the previous stage # normarize validity #else: GridCluster[Nc][x][y] /= float(GridMember[x][y]) else: GridCluster[Nc][x][y] = min(GridCluster[Nc][x][y] / float(GridMember[x][y]), 1.0) if ((GridCluster[Nc] > self.Questionable)*1).sum() == 0: lines[Nc][2] = False threshold = [self.Valid, self.Marginal, self.Questionable] flag_digit = self.flag_digits['validation'] cluster_flag = self.__update_cluster_flag(cluster_flag, 'validation', GridCluster, threshold, flag_digit) LOG.trace('GridCluster {}', GridCluster) LOG.trace('GridMember {}', GridMember) self.GridClusterValidation = GridCluster.copy() return (GridCluster, GridMember, lines, cluster_flag)
[docs] def smoothing_stage(self, GridCluster, lines, cluster_flag): # Rating: [0.0, 0.4, 0.5, 0.4, 0.0] # [0.4, 0.7, 1.0, 0.7, 0.4] # [0.5, 1.0, 6.0, 1.0, 0.5] # [0.4, 0.7, 1.0, 0.7, 0.4] # [0.0, 0.4, 0.5, 0.4, 0.0] # Rating = 1.0 / (Dx**2 + Dy**2)**(0.5) : if (Dx, Dy) == (0, 0) rating = 6.0 # NewRating[0.0, 0.2, 0.3, 0.2, 0.0] # [0.2, 0.5, 1.0, 0.5, 0.2] # [0.3, 1.0, 6.0, 1.0, 0.3] # [0.2, 0.5, 1.0, 0.5, 0.2] # [0.0, 0.2, 0.3, 0.2, 0.0] # NewRating = 1.0 / (Dx**2 + Dy**2) : if (Dx, Dy) == (0, 0) rating = 6.0 (Ncluster, nra, ndec) = GridCluster.shape GridScore = numpy.zeros((2, nra, ndec), dtype=numpy.float32) LOG.trace('GridCluster = {}', GridCluster) LOG.trace('lines = {}', lines) for Nc in range(Ncluster): if lines[Nc][2] != False: GridScore[:] = 0.0 for x in range(nra): range_x = list(range(-min(2, x), 0)) + list(range(1, min(3, nra-x))) for y in range(ndec): range_y = list(range(-min(2, y), 0)) + list(range(1, min(3, ndec-y))) # TN refactoring # split smoothing loop # dx = 0 and dy = 0 GridScore[0][x][y] += 6.0 * GridCluster[Nc][x][y] GridScore[1][x][y] += 6.0 # dx = 0 for dy in range_y: ny = y + dy #Rating = 1.0 / abs(dy) Rating = 1.0 / abs(dy*dy) GridScore[0][x][y] += Rating * GridCluster[Nc][x][ny] GridScore[1][x][y] += Rating # dy = 0 for dx in range_x: nx = x + dx #Rating = 1.0 / abs(dx) Rating = 1.0 / abs(dx*dx) GridScore[0][x][y] += Rating * GridCluster[Nc][nx][y] GridScore[1][x][y] += Rating # dx != 0 and dy != 0 for dx in range_x: for dy in range_y: if (abs(dx) + abs(dy)) <= 3: (nx, ny) = (x + dx, y + dy) #Rating = 1.0 / sqrt(dx*dx + dy*dy) Rating = 1.0 / (dx*dx + dy*dy) GridScore[0][x][y] += Rating * GridCluster[Nc][nx][ny] GridScore[1][x][y] += Rating #for dx in [-2, -1, 0, 1, 2]: # for dy in [-2, -1, 0, 1, 2]: # if (abs(dx) + abs(dy)) <= 3: # (nx, ny) = (x + dx, y + dy) # if 0 <= nx < nra and 0 <= ny < ndec: # if dx == 0 and dy == 0: Rating = 6.0 # else: Rating = 1.0 / sqrt(dx*dx + dy*dy) # GridScore[0][x][y] += Rating * GridCluster[Nc][nx][ny] # GridScore[1][x][y] += Rating LOG.trace('Score : GridScore[{}][0] = {}', Nc, GridScore[0]) LOG.trace('Rating: GridScore[{}][1] = {}', Nc, GridScore[1]) #GridCluster[Nc] = GridScore[0] / GridScore[1] GridCluster[Nc] = GridScore[0] / GridScore[1] * 2.0 # for single valid detection if ((GridCluster[Nc] > self.Questionable)*1).sum() < 0.1: lines[Nc][2] = False threshold = [self.Valid, self.Marginal, self.Questionable] flag_digit = self.flag_digits['smoothing'] cluster_flag = self.__update_cluster_flag(cluster_flag, 'smoothing', GridCluster, threshold, flag_digit) LOG.trace('threshold = {}', threshold) LOG.trace('GridCluster = {}', GridCluster) return (GridCluster, lines, cluster_flag)
[docs] def final_stage(self, GridCluster, GridMember, Region, Region2, lines, category, grid_ra, grid_dec, broad_component, xorder, yorder, x0, y0, Grid2SpectrumID, index_list, PosList, cluster_flag): (Ncluster, nra, ndec) = GridCluster.shape xorder0 = xorder yorder0 = yorder LOG.trace('GridCluster = {}', GridCluster) LOG.trace('GridMember = {}', GridMember) LOG.trace('lines = {}', lines) LOG.info('Ncluster={}', Ncluster) # Dictionary for final output RealSignal = collections.OrderedDict() HalfGrid = 0.5 * sqrt(grid_ra*grid_ra + grid_dec*grid_dec) MinFWHM = self.MinFWHM MaxFWHM = self.MaxFWHM # for Channel Map velocity range determination 2014/1/12 channelmap_range = [] for i in range(len(lines)): channelmap_range.append(lines[i][:]) for Nc in range(Ncluster): LOG.trace('------00------ Exec for Nth Cluster: Nc={}', Nc) LOG.trace('lines[Nc] = {}', lines[Nc]) if lines[Nc][2] == False: continue Plane = (GridCluster[Nc] > self.Marginal) * 1 if Plane.sum() == 0: lines[Nc][2] = False channelmap_range[Nc][2] = False continue Original = GridCluster[Nc].copy() # Clear GridCluster Nc-th plane GridCluster[Nc] *= 0.0 # Clean isolated grids MemberList, Realmember = self.CleanIsolation(nra, ndec, Original, Plane, GridMember) if len(MemberList) == 0: continue # Blur each SubCluster with the radius of sqrt(Nmember/Pi) * ratio ratio = rules.ClusterRule['BlurRatio'] # Set-up SubCluster for Ns in range(len(MemberList)): # Ns: SubCluster number LOG.trace('------01------ Ns={}', Ns) SubPlane = numpy.zeros((nra, ndec), dtype=numpy.float32) for (x, y) in MemberList[Ns]: SubPlane[x][y] = Original[x][y] ValidPlane, BlurPlane = self.DoBlur(Realmember[Ns], nra, ndec, SubPlane, ratio) LOG.debug('GridCluster.shape = {}', list(GridCluster.shape)) LOG.trace('Original {}', Original) LOG.debug('SubPlane.shape = {}', list(SubPlane.shape)) LOG.trace('SubPlane {}', SubPlane) LOG.debug('BlurPlane.shape = {}', list(BlurPlane.shape)) LOG.trace('BlurPlane {}', BlurPlane) LOG.trace('ValidPlane {}', ValidPlane) LOG.trace('GridClusterV {}', self.GridClusterValidation[Nc]) # 2D fit for each Plane # Use the data for fit if GridCluster[Nc][x][y] > self.Valid # Not use for fit but apply the value at the border if GridCluster[Nc][x][y] > self.Marginal (ylen, xlen) = self.GridClusterValidation[Nc].shape # 2017/9/21 GridClusterValidation should not be used for order determination # 0 <= xorder0,yorder0 <= 5, swap xorder0 and yorder0 if xorder < 0: xorder0 = max(min(numpy.sum(ValidPlane, axis=0).max()-1, 5), 0) if yorder < 0: yorder0 = max(min(numpy.sum(ValidPlane, axis=1).max()-1, 5), 0) #if xorder < 0: xorder0 = max(min(numpy.sum(ValidPlane, axis=1).max()-1, 5), 0) #if yorder < 0: yorder0 = max(min(numpy.sum(ValidPlane, axis=0).max()-1, 5), 0) LOG.trace('(X,Y)order, order0 = ({}, {}) ({}, {})', xorder, yorder, xorder0, yorder0) # clear Flag for i in range(len(category)): Region[i][5] = 1 #while ExceptionLinAlg: FitData = [] LOG.trace('------02-1---- category={}, len(category)={}, ClusterNumber(Nc)={}, SubClusterNumber(Ns)={}', category, len(category), Nc, Ns) #Region format:([row, line[0], line[1], RA, DEC, flag]) dummy = [tuple(Region[i][:5]) for i in range(len(category)) if category[i] == Nc and Region[i][5] == 1 and SubPlane[int((Region[i][3] - x0)/grid_ra)][int((Region[i][4] - y0)/grid_dec)] > self.Valid] LOG.trace('------02-2----- len(dummy)={}, dummy={}', len(dummy), dummy) if len(dummy) == 0: continue # same signal can be detected in a single row with multiple binning # in that case, take maximum width as a representative width (Lmax-Lmin) (Lrow, Lmin, Lmax, LRA, LDEC) = dummy[0] for i in range(1, len(dummy)): if Lrow == dummy[i][0]: Lmin, Lmax = (min(Lmin, dummy[i][1]), max(Lmax, dummy[i][2])) else: FitData.append([Lmin, Lmax, LRA, LDEC, 1]) (Lrow, Lmin, Lmax, LRA, LDEC) = dummy[i] FitData.append([Lmin, Lmax, LRA, LDEC, 1]) del dummy # FitData format: [Chan0, Chan1, RA, DEC, flag] LOG.trace('FitData = {}', FitData) # Instantiate SVD solver #LOG.trace('2D Fit Order: xorder0={} yorder0={}', xorder0, yorder0) #solver = SVDSolver2D(xorder0, yorder0) # TN refactoring # Comment out the following if statement since # 1) len(FitData) is always greater than 0. # Lee the code just above start of iteration. # 2) SingularMatrix is always False in this # loop. Exit the loop whenever SingularMatrix # is set to False. See code below. #if len(FitData) == 0 or SingularMatrix: break SingularMatrix = False for iteration in range(3): # iteration loop for 2D fit sigma flagging LOG.trace('------03------ iteration={}', iteration) # effective components of FitData effective = [i for i in range(len(FitData)) if FitData[i][4] == 1] # assertion assert len(effective) > 0 # prepare data for SVD fit #xdata = numpy.array([FitData[i][2] for i in effective], dtype=numpy.float64) #ydata = numpy.array([FitData[i][3] for i in effective], dtype=numpy.float64) #lmindata = numpy.array([FitData[i][0] for i in effective], dtype=numpy.float64) #lmaxdata = numpy.array([FitData[i][1] for i in effective], dtype=numpy.float64) # 2017/9/26 Repeat solver.find_good_solution until not through exception by reducing xorder and yorder SVDsolver = True while(1): LOG.trace('2D Fit Order: xorder0={} yorder0={}', xorder0, yorder0) if(SVDsolver): # Instantiate SVD solver solver = SVDSolver2D(xorder0, yorder0) # prepare data for SVD fit xdata = numpy.array([FitData[i][2] for i in effective], dtype=numpy.float64) ydata = numpy.array([FitData[i][3] for i in effective], dtype=numpy.float64) lmindata = numpy.array([FitData[i][0] for i in effective], dtype=numpy.float64) lmaxdata = numpy.array([FitData[i][1] for i in effective], dtype=numpy.float64) else: # 2017/9/28 old code is incerted for validation purpose # make arrays for coefficient calculation # Matrix MM x A = B -> A = MM^-1 x B M0 = numpy.zeros((xorder0 * 2 + 1) * (yorder0 * 2 + 1), dtype=numpy.float64) M1 = numpy.zeros((xorder0 * 2 + 1) * (yorder0 * 2 + 1), dtype=numpy.float64) B0 = numpy.zeros((xorder0 + 1) * (yorder0 + 1), dtype=numpy.float64) B1 = numpy.zeros((xorder0 + 1) * (yorder0 + 1), dtype=numpy.float64) MM0 = numpy.zeros([(xorder0 + 1) * (yorder0 + 1), (xorder0 + 1) * (yorder0 + 1)], dtype=numpy.float64) MM1 = numpy.zeros([(xorder0 + 1) * (yorder0 + 1), (xorder0 + 1) * (yorder0 + 1)], dtype=numpy.float64) for (Width, Center, x, y, flag) in FitData: if flag == 1: for k in range(yorder0 * 2 + 1): for j in range(xorder0 * 2 + 1): M0[j + k * (xorder0 * 2 + 1)] += math.pow(x, j) * math.pow(y, k) for k in range(yorder0 + 1): for j in range(xorder0 + 1): B0[j + k * (xorder0 + 1)] += math.pow(x, j) * math.pow(y, k) * Center for k in range(yorder0 * 2 + 1): for j in range(xorder0 * 2 + 1): M1[j + k * (xorder0 * 2 + 1)] += math.pow(x, j) * math.pow(y, k) for k in range(yorder0 + 1): for j in range(xorder0 + 1): B1[j + k * (xorder0 + 1)] += math.pow(x, j) * math.pow(y, k) * Width # make Matrix MM0,MM1 and calculate A0,A1 for K in range((xorder0 + 1) * (yorder0 + 1)): k0 = K % (xorder0 + 1) k1 = int(K // (xorder0 + 1)) for J in range((xorder0 + 1) * (yorder0 + 1)): j0 = J % (xorder0 + 1) j1 = int(J // (xorder0 + 1)) MM0[J, K] = M0[j0 + k0 + (j1 + k1) * (xorder0 * 2 + 1)] MM1[J, K] = M1[j0 + k0 + (j1 + k1) * (xorder0 * 2 + 1)] LOG.trace('OLD:MM0 = {}', MM0.tolist()) LOG.trace('OLD:B0 = {}', B0.tolist()) try: if(SVDsolver): solver.set_data_points(xdata, ydata) A0 = solver.find_good_solution(lmaxdata) A1 = solver.find_good_solution(lmindata) LOG.trace('SVD: A0={}', A0.tolist()) LOG.trace('SVD: A1={}', A1.tolist()) break # verification # diff_lmin = numpy.zeros(len(effective), dtype=numpy.float64) # diff_lmax = numpy.zeros(len(effective), dtype=numpy.float64) # for ieff in xrange(len(effective)): # eff = effective[ieff] # lmin_ex = FitData[eff][0] # lmax_ex = FitData[eff][1] # x = FitData[eff][2] # y = FitData[eff][3] # lmin_fit, lmax_fit = _eval_poly(xorder0+1, yorder0+1, x, y, A0, A1) # diff_lmin[ieff] = abs((lmin_fit - lmin_ex) / lmin_ex) # diff_lmax[ieff] = abs((lmax_fit - lmax_ex) / lmax_ex) # LOG.trace('SVD: Lmin difference: max %s, min %s, mean %s, std %s'%(diff_lmin.max(), diff_lmin.min(), diff_lmin.mean(), diff_lmin.std())) # LOG.trace('SVD: Lmax difference: max %s, min %s, mean %s, std %s'%(diff_lmax.max(), diff_lmax.min(), diff_lmax.mean(), diff_lmax.std())) else: A0 = LA.solve(MM0, B0) A1 = LA.solve(MM1, B1) LOG.trace('OLD: A0={}', A0.tolist()) LOG.trace('OLD: A1={}', A1.tolist()) del MM0, MM1, B0, B1, M0, M1 break except Exception as e: LOG.trace('------04------ in exception loop SingularMatrix={}', SingularMatrix) import traceback LOG.trace(traceback.format_exc()) if xorder0 != 0 or yorder0 != 0: xorder0 = max(xorder0 - 1, 0) yorder0 = max(yorder0 - 1, 0) LOG.info('Fit failed. Trying lower order ({}, {})', xorder0, yorder0) else: SingularMatrix = True break if SingularMatrix: break # Calculate Sigma # Sigma should be calculated in the upper stage # Fit0: Center or Lmax, Fit1: Width or Lmin Diff = [] # TN refactoring # Calculation of Diff is duplicated here and # following clipping stage. So, evalueate Diff # only once here and reuse it in clipping. for (Width, Center, x, y, flag) in FitData: LOG.trace('{} {} {} {} {} {}', xorder0, yorder0, x, y, A0, A1) (Fit0, Fit1) = _eval_poly(xorder0+1, yorder0+1, x, y, A0, A1) Fit0 -= Center Fit1 -= Width Diff.append(sqrt(Fit0*Fit0 + Fit1*Fit1)) if len(effective) > 1: npdiff = numpy.array(Diff)[effective] Threshold = npdiff.mean() Threshold += sqrt(numpy.square(npdiff - Threshold).mean()) * self.nsigma else: Threshold = Diff[effective[0]] * 2.0 LOG.trace('Diff = {}', [Diff[i] for i in effective]) LOG.trace('2D Fit Threshold = {}', Threshold) # Sigma Clip NFlagged = 0 Number = len(FitData) ### 2011/05/15 for i in range(Number): # Reuse Diff if Diff[i] <= Threshold: FitData[i][4] = 1 else: FitData[i][4] = 0 NFlagged += 1 LOG.trace('2D Fit Flagged/All = ({}, {})', NFlagged, Number) #2009/10/15 compare the number of the remainder and fitting order if (Number - NFlagged) <= max(xorder0, yorder0) or Number == NFlagged: SingularMatrix = True break #2017/9/27 exit if there is update (no flag, or flag number is the same to the previous iteration) if NFlagged == 0: break if iteration == 0: NFlagOrg = NFlagged elif NFlagOrg == NFlagged: break # Iteration End ### 2011/05/15 Fitting is no longer (Width, Center) but (minchan, maxChan) LOG.trace('------06------ End of Iteration: SingularMatrix={}', SingularMatrix) if SingularMatrix: # skip for next Ns (SubCluster) LOG.trace('------06b----- Skip for the next SubCluster') continue LOG.trace('------07------ SingularMatrix=False') # Clear FitData and restore all relevant data # FitData: [(Chan0, Chan1, RA, DEC, Flag)] FitData = [] for i in range(len(category)): if category[i] == Nc and Region[i][5] == 1 and SubPlane[int((Region[i][3] - x0)/grid_ra)][int((Region[i][4] - y0)/grid_dec)] > self.Valid: FitData.append(tuple(Region2[i][:5])) if len(FitData) == 0: continue # for Channel Map velocity range determination 2014/1/12 (MaskMin, MaskMax) = (10000.0, 0.0) # Calculate Fit for each position LOG.trace('------08------ Calc Fit for each pos') for x in range(nra): for y in range(ndec): if ValidPlane[x][y] == 1: LOG.trace('------09------ in ValidPlane x={} y={}', x, y) for PID in Grid2SpectrumID[x][y]: ID = index_list[PID] ### 2011/05/15 (Width, Center) -> (minchan, maxChan) (Chan1, Chan0) = _eval_poly(xorder0+1, yorder0+1, PosList[0][PID], PosList[1][PID], A0, A1) Fit0 = 0.5 * (Chan0 + Chan1) Fit1 = (Chan1 - Chan0) + 1.0 LOG.trace('Fit0, Fit1 = {}, {}', Fit0, Fit1) # 2015/04/23 remove MaxFWHM check if (Fit1 >= MinFWHM): # and (Fit1 <= MaxFWHM): ProtectMask = self.calc_ProtectMask(Fit0, Fit1, self.nchan, MinFWHM, MaxFWHM) # for Channel map velocity range determination 2014/1/12 MaskCen = (ProtectMask[0] + ProtectMask[1]) / 2.0 if MaskMin > MaskCen: MaskMin = max(0, MaskCen) if MaskMax < MaskCen: MaskMax = min(self.nchan - 1, MaskCen) if ID in RealSignal: RealSignal[ID][2].append(ProtectMask) else: RealSignal[ID] = [PosList[0][PID], PosList[1][PID], [ProtectMask]] else: LOG.trace('------10------ out of range Fit0={} Fit1={}', Fit0, Fit1) elif BlurPlane[x][y] == 1: LOG.trace('------11------ in BlurPlane x={} y={}', x, y) # in Blur Plane, Fit is not extrapolated, # but use the nearest value in Valid Plane # Search the nearest Valid Grid Nearest = [] square_aspect = grid_ra / grid_dec square_aspect *= square_aspect Dist2 = numpy.inf for xx in range(nra): for yy in range(ndec): if ValidPlane[xx][yy] == 1: Dist3 = (xx-x)*(xx-x)*square_aspect + (yy-y)*(yy-y) if Dist2 > Dist3: Nearest = [xx, yy] Dist2 = Dist3 (RA0, DEC0) = (x0 + grid_ra * (x + 0.5), y0 + grid_dec * (y + 0.5)) (RA1, DEC1) = (x0 + grid_ra * (Nearest[0] + 0.5), y0 + grid_dec * (Nearest[1] + 0.5)) # Setup the position near the border RA2 = RA1 - (RA1 - RA0) * HalfGrid / sqrt(Dist2) DEC2 = DEC1 - (DEC1 - DEC0) * HalfGrid / sqrt(Dist2) LOG.trace('[X,Y],[XX,YY] = [{},{}],{}', x, y, Nearest) LOG.trace('(RA0,DEC0),(RA1,DEC1),(RA2,DEC2) = ({:.5},{:.5}),({:.5},{:.5}),({:.5},{:.5})', RA0, DEC0, RA1, DEC1, RA2, DEC2) # Calculate Fit and apply same value to all the spectra in the Blur Grid ### 2011/05/15 (Width, Center) -> (minchan, maxChan) # Border case #(Chan0, Chan1) = _eval_poly(xorder0+1, yorder0+1, RA2, DEC2, A0, A1) # Center case (Chan1, Chan0) = _eval_poly(xorder0+1, yorder0+1, RA1, DEC1, A0, A1) Fit0 = 0.5 * (Chan0 + Chan1) Fit1 = (Chan1 - Chan0) LOG.trace('Fit0, Fit1 = {}, {}', Fit0, Fit1) # 2015/04/23 remove MaxFWHM check if (Fit1 >= MinFWHM): # and (Fit1 <= MaxFWHM): ProtectMask = self.calc_ProtectMask(Fit0, Fit1, self.nchan, MinFWHM, MaxFWHM) for PID in Grid2SpectrumID[x][y]: ID = index_list[PID] if ID in RealSignal: RealSignal[ID][2].append(ProtectMask) else: RealSignal[ID] = [PosList[0][PID], PosList[1][PID], [ProtectMask]] else: LOG.trace('------12------ out of range Fit0={} Fit1={}', Fit0, Fit1) continue # Add every SubClusters to GridCluster just for Plot GridCluster[Nc] += BlurPlane #if not SingularMatrix: GridCluster[Nc] += BlurPlane if ((GridCluster[Nc] > 0.5)*1).sum() < self.Questionable or MaskMax == 0.0: lines[Nc][2] = False channelmap_range[Nc][2] = False else: # for Channel map velocity range determination 2014/1/12 arbitrary factor 0.8 #channelmap_range[Nc][1] = (MaskMax - MaskMin - 10) * 0.8 #channelmap_range[Nc][1] = MaskMax - MaskMin + lines[Nc][1] / 2.0 # MaskMax-MaskMin is an maximum offset of line center channelmap_range[Nc][1] = MaskMax - MaskMin + lines[Nc][1] LOG.info('Nc, MaskMax, Min: {}, {}, {}', Nc, MaskMax, MaskMin) LOG.info('channelmap_range[Nc]: {}', channelmap_range[Nc]) LOG.info('lines[Nc]: {}', lines[Nc]) for x in range(nra): for y in range(ndec): if Original[x][y] > self.Valid: GridCluster[Nc][x][y] = 2.0 elif GridCluster[Nc][x][y] > 0.5: GridCluster[Nc][x][y] = 1.0 threshold = [1.5, 0.5, 0.5, 0.5] flag_digit = self.flag_digits['final'] cluster_flag = self.__update_cluster_flag(cluster_flag, 'final', GridCluster, threshold, flag_digit) return (RealSignal, lines, channelmap_range, cluster_flag)
[docs] def CleanIsolation(self, nra, ndec, Original, Plane, GridMember): """ Clean isolated cluster return: if no cluster is left after the cleaning, n == 0 MemberList contains positions of cluster member with Marginal+Valid detection MemberList[n]: [[(x00,y00),(x01,y01),........,(x0k-1,y0k-1)], [(x10,y10),(x11,y11),..,(x1i-1,y1i-1)], ...... [(xn-10,yn-10),(xn-11,yn-11),..,(xn-1i-1,yn-1j-1)]] Realmember contains positions of cluster member with only Valid detection Realmember[n]: [[(x00,y00),(x01,y01),..,(x0a-1,y0a-1)], [(x10,y10),(x11,y11),..,(x1b-1,y1b-1)], ...... [(xn-10,yn-10),(xn-11,yn-11),..,(xn-1c-1,yn-1c-1)]] """ Nmember = [] # number of positions where value > self.Marginal in each SubCluster Realmember = [] # number of positions where value > self.Valid in each SubCluster MemberList = [] NsubCluster = 0 # Separate cluster members into several SubClusters by spacial connection for x in range(nra): for y in range(ndec): if Plane[x][y] == 1: Plane[x][y] = 2 SearchList = [(x, y)] M = 1 if Original[x][y] > self.Valid: MM = 1 else: MM = 0 MemberList.append([(x, y)]) while(len(SearchList) != 0): cx, cy = SearchList[0] #for dx in [-1, 0, 1]: for dx in range(-min(1, cx), min(2, nra-cx)): #for dy in [-1, 0, 1]: for dy in range(-min(1, cy), min(2, ndec-cy)): (nx, ny) = (cx + dx, cy + dy) #if 0 <= nx < nra and 0 <= ny < ndec and Plane[nx][ny] == 1: if Plane[nx][ny] == 1: Plane[nx][ny] = 2 SearchList.append((nx, ny)) M += 1 if Original[nx][ny] > self.Valid: MM += 1 MemberList[NsubCluster].append((nx, ny)) del SearchList[0] Nmember.append(M) Realmember.append(MM) NsubCluster += 1 if len(Nmember) > 0: # Threshold is set to half the number of the largest cluster in the plane #Threshold = max(Nmember) / 2.0 Threshold = min(0.5 * max(Realmember), 3) for n in range(NsubCluster - 1, -1, -1): # isolated cluster made from single spectrum should be omitted if Nmember[n] == 1: (x, y) = MemberList[n][0] if GridMember[x][y] <= 1: Nmember[n] = 0 # Sub-Cluster whose member below the threshold is cleaned if Nmember[n] < Threshold: for (x, y) in MemberList[n]: Plane[x][y] == 0 del Nmember[n], Realmember[n], MemberList[n] return MemberList, Realmember
[docs] def DoBlur(self, Realmember, nra, ndec, SubPlane, ratio): # Calculate Blur radius BlurF = sqrt(Realmember / 3.141592653) * ratio + 1.5 Blur = int(BlurF) # Set-up kernel for convolution # caution: if nra < (Blur*2+1) and ndec < (Blur*2+1) # => dimension of SPC.convolve2d(Sub,kernel) gets not (nra,ndec) but (Blur*2+1,Blur*2+1) if nra < (Blur * 2 + 1) and ndec < (Blur * 2 + 1): Blur = int((max(nra, ndec) - 1) // 2) kernel = numpy.zeros((Blur * 2 + 1, Blur * 2 + 1), dtype=int) for x in range(Blur * 2 + 1): dx = Blur - x for y in range(Blur * 2 + 1): dy = Blur - y if sqrt(dx*dx + dy*dy) <= BlurF: kernel[x][y] = 1 # ValidPlane is used for fitting parameters # BlurPlane is not used for fitting but just extend the parameter determined in ValidPlane return (SubPlane > self.Valid) * 1, (convolve2d(SubPlane, kernel) > self.Marginal) * 1
[docs] def calc_ProtectMask(self, Center, Width, nchan, MinFWHM, MaxFWHM): """ return ProtectMask: [MaskL, MaskR] """ #Allowance = Fit1 / 2.0 * 1.3 # To keep broad line region, make allowance larger ### 2011/05/16 allowance + 0.5 -> 2.5 for sharp line ### 2011/05/16 factor 1.5 -> 2.0 for broad line #Allowance = min(Fit1 / 2.0 * 2.0, MaxFWHM / 2.0) ### 2011/11/22 Allowance is too narrow for new line finding algorithm #Allowance = min(Fit1 + 5.0, self.MaxFWHM / 2.0) ### 2015/04/23 Allowance=MaxFWHM at x=MaxFWHM, Allowance=2xMinFWHM+10 at x=MinFWHM Allowance = ((MaxFWHM-Width)*(2.0*MinFWHM+10.0) + (Width-MinFWHM)*MaxFWHM) / (MaxFWHM-MinFWHM) / 2.0 ### 2011/10/21 left side mask exceeded nchan ProtectMask = [min(max(int(Center - Allowance), 0), nchan - 1), min(int(Center + Allowance), nchan - 1)] #Allowance = Fit1 / 2.0 * 1.5 #ProtectMask = [max(int(Fit0 - Allowance + 0.5), 0), min(int(Fit0 + Allowance + 0.5), nchan - 1)] LOG.trace('Allowance = %s ProtectMask = %s' % (Allowance, ProtectMask)) return ProtectMask
def __merge_lines(self, lines, nchan): nlines = len(lines) if nlines < 1: return [] elif nlines < 2: return lines else: # TN refactoring # New line merge algorithm that doesn't require 1-d array with # length of nchan+2. It will be faster if nchan is large while # it would be slow when number of lines is (extremely) large. nlines *= 2 flat_lines = numpy.array(lines).reshape((nlines)) sorted_index = flat_lines.argsort() flag = -1 left_edge = flat_lines[sorted_index[0]] nedges = 0 for i in range(1, nlines-2): if sorted_index[i] % 2 == 0: flag -= 1 else: #flag = min(0, flag + 1) flag += 1 if flag == 0 and flat_lines[sorted_index[i]] != flat_lines[sorted_index[i+1]]: sorted_index[nedges] = left_edge sorted_index[nedges+1] = flat_lines[sorted_index[i]] nedges += 2 left_edge = flat_lines[sorted_index[i+1]] sorted_index[nedges] = left_edge sorted_index[nedges+1] = flat_lines[sorted_index[-1]] nedges += 2 return sorted_index[:nedges].reshape((nedges//2, 2)).tolist() #region = numpy.ones(nchan + 2, dtype=int) #for [chan0, chan1] in lines: # region[chan0 + 1:chan1 + 1] = 0 #dummy = (region[1:] - region[:-1]).nonzero()[0] #return dummy.reshape((len(dummy)/2,2)).tolist() def __update_cluster_flag(self, cluster_flag, stage, GridCluster, threshold, factor): #cluster_flag = self.cluster_info['cluster_flag'] for t in threshold: cluster_flag = cluster_flag + factor * (GridCluster > t) #self.cluster_info['cluster_flag'] = cluster_flag self.cluster_info['%s_threshold'%(stage)] = threshold #LOG.trace('cluster_flag = {}', cluster_flag) return cluster_flag
[docs]def convolve2d(data, kernel, mode='nearest', cval=0.0): """ 2d convolution function. mode = 'nearest' use nearest pixel value for pixels beyond the edge 'constant' use cval for pixels beyond the edge """ (ndx, ndy) = data.shape (nkx, nky) = kernel.shape edgex = int(0.5 * (nkx - 1)) edgey = int(0.5 * (nky - 1)) dummy = numpy.ones((ndx+2*edgex, ndy+2*edgey), dtype=numpy.float64) * cval dummy[edgex:ndx+edgex, edgey:ndy+edgey] = data if mode == 'nearest': dummy[0:edgex, 0:edgey] = data[0][0] dummy[0:edgex, edgey+ndy:] = data[0][ndy-1] dummy[edgex+ndx:, 0:edgey] = data[ndx-1][0] dummy[edgex+ndx:, edgey+ndy:] = data[ndx-1][ndy-1] for i in range(ndx): dummy[i+edgex, 0:edgey] = data[i][0] dummy[i+edgex, edgey+ndy:] = data[i][ndy-1] for i in range(ndy): dummy[0:edgex, i+edgey] = data[0][i] dummy[edgex+ndx:, i+edgey] = data[ndx-1][i] cdata = numpy.zeros((ndx, ndy), dtype=numpy.float64) for ix in range(ndx): for iy in range(ndy): for jx in range(nkx): for jy in range(nky): idx = ix + jx idy = iy + jy val = dummy[idx][idy] cdata[ix][iy] += kernel[jx][jy] * val return cdata
def _eval_poly(xorder, yorder, x, y, xcoeff, ycoeff): xpoly = 0.0 ypoly = 0.0 yk = 1.0 idx = 0 for k in range(yorder): xjyk = yk for j in range(xorder): #xjyk = math.pow(x, j) * math.pow(y, k) #xpoly += xjyk * xcoeff[j + k * xorder] #ypoly += xjyk * ycoeff[j + k * xorder] xpoly += xjyk * xcoeff[idx] ypoly += xjyk * ycoeff[idx] xjyk *= x idx += 1 yk *= y return xpoly, ypoly def _to_validated_lines(detect_lines): # conversion from [chmin, chmax] to [center, width, T/F] lines = [] for line_prop in detect_lines.values(): for line in line_prop[2]: if line not in lines: lines.append(line) lines_withflag = [[0.5*sum(x), x[1]-x[0], True] for x in lines] return lines_withflag
[docs]class SVDSolver2D(object): CONDITION_NUMBER_LIMIT = 1.0e-12 def __init__(self, xorder, yorder): self.xorder = xorder self.yorder = yorder assert 0 <= self.xorder assert 0 <= self.yorder # internal storage for solver self.N = 0 self.L = (self.xorder + 1) * (self.yorder + 1) # design matrix self.storage = numpy.empty(self.N * self.L, dtype=numpy.float64) self.G = None # for SVD self.Vs = numpy.empty((self.L, self.L), dtype=numpy.float64) self.B = numpy.empty(self.L, dtype=numpy.float64) self.U = None
[docs] def set_data_points(self, x, y): nx = len(x) ny = len(y) LOG.trace('nx, ny = {}, {}', nx, ny) assert nx == ny if self.N < nx: self.storage.resize(nx * self.L) #self.G.resize((nx, self.L)) self.N = nx assert self.L <= self.N self.G = self.storage[:self.N * self.L].reshape((self.N, self.L)) # matrix operation self._set_design_matrix(x, y)
#self._svd() def _set_design_matrix(self, x, y): # The design matrix G is a basis array that stores gj(xi) # where g0 = 1, g1 = x, g2 = x^2 g3 = x^3, # g4 = y, g5 = x y, g6 = x^2 y, g7 = x^3 y # g8 = y^2, g9 = x y^2, g10 = x^2 y^2, g11 = x^3 y^2 # g12 = y^3, g13 = x y^3, g14 = x^2 y^3, g15 = x^3 y^3 # if xorder = 3 and yorder = 3 for k in range(self.N): yp = 1.0 for i in range(self.yorder + 1): xp = 1.0 for j in range(self.xorder + 1): l = j + (self.xorder + 1) * i self.G[k, l] = xp * yp xp *= x[k] yp *= y[k] def _do_svd(self): LOG.trace('G.shape={}', self.G.shape) self.U, self.s, self.Vh = LA.svd(self.G, full_matrices=False) LOG.trace('U.shape={} (N,L)=({},{})', self.U.shape, self.N, self.L) LOG.trace('s.shape={}', self.s.shape) LOG.trace('Vh.shape={}', self.Vh.shape) LOG.trace('s = {}', self.s) assert self.U.shape == (self.N, self.L) assert len(self.s) == self.L assert self.Vh.shape == (self.L, self.L) def _svd_with_mask(self, nmask=0): if not hasattr(self, 's'): # do SVD self._do_svd() assert nmask < self.L for icol in range(self.L): if self.L - 1 - icol < nmask: sinv = 0.0 else: sinv = 1.0 / self.s[icol] for irow in range(self.L): self.Vs[irow, icol] = self.Vh[icol, irow] * sinv def _svd_with_eps(self, eps=1.0e-7): if not hasattr(self, 's'): # do SVD self._do_svd() assert 0.0 < eps # max value of s is always s[0] since it is sorted # in descendent order #threshold = self.s.max() * eps threshold = self.s[0] * eps for icol in range(self.L): if self.s[icol] < threshold: sinv = 0.0 else: sinv = 1.0 / self.s[icol] for irow in range(self.L): self.Vs[irow, icol] = self.Vh[icol, irow] * sinv def _svd(self, eps): LOG.trace('G.shape={}', self.G.shape) self.U, s, Vh = LA.svd(self.G, full_matrices=False) LOG.trace('U.shape={} (N,L)=({},{})', self.U.shape, self.N, self.L) LOG.trace('s.shape={}', s.shape) LOG.trace('Vh.shape={}', Vh.shape) #LOG.trace('U=%s'%(self.U)) #LOG.trace('s=%s'%(s)) #LOG.trace('Vh=%s'%(Vh)) assert self.U.shape == (self.N, self.L) assert len(s) == self.L assert Vh.shape == (self.L, self.L) assert 0.0 < eps # absolute_s = abs(s) # condition_number = absolute_s.min() / absolute_s.max() # if condition_number < self.CONDITION_NUMBER_LIMIT: # LOG.trace('smax {}, smin {}, condition_number is {}', absolute_s.max(), absolute_s.min(), condition_number) # raise RuntimeError('singular matrix') threshold = s.max() * eps for i in range(self.L): if s[i] < threshold: s[i] = 0.0 else: s[i] = 1.0 / s[i] for icol in range(self.L): for irow in range(self.L): self.Vs[irow, icol] = Vh[icol, irow] * s[icol] def _eval_poly_from_G(self, row, coeff): idx = 0 poly = 0.0 for k in range(self.yorder + 1): for j in range(self.xorder + 1): poly += self.G[row, idx] * coeff[idx] idx += 1 #LOG.trace('poly=%s'%(poly)) return poly
[docs] def solve_with_mask(self, z, out=None, nmask=0): nz = len(z) assert nz == self.N self._svd_with_mask(nmask) if out is None: A = numpy.zeros(self.L, dtype=numpy.float64) else: A = out A[:] = 0 assert len(A) == self.L self.B[:] = 0 for i in range(self.L): for k in range(self.N): self.B[i] += self.U[k, i] * z[k] for i in range(self.L): for k in range(self.L): A[i] += self.Vs[i, k] * self.B[k] #fit = numpy.fromiter((self._eval_poly_from_G(i, A) for i in xrange(self.N)), dtype=numpy.float64) #LOG.trace('fit=%s'%(fit)) #LOG.trace('diff=%s'%(abs(fit - z)/z)) return A
[docs] def solve_with_eps(self, z, out=None, eps=1.0e-7): assert 0.0 <= eps nz = len(z) assert nz == self.N self._svd_with_eps(eps) if out is None: A = numpy.zeros(self.L, dtype=numpy.float64) else: A = out A[:] = 0 assert len(A) == self.L self.B[:] = 0 for i in range(self.L): for k in range(self.N): self.B[i] += self.U[k, i] * z[k] for i in range(self.L): for k in range(self.L): A[i] += self.Vs[i, k] * self.B[k] #fit = numpy.fromiter((self._eval_poly_from_G(i, A) for i in xrange(self.N)), dtype=numpy.float64) #LOG.trace('fit=%s'%(fit)) #LOG.trace('diff=%s'%(abs(fit - z)/z)) return A
[docs] def solve_for(self, z, out=None, eps=1.0e-7): assert 0.0 <= eps nz = len(z) assert nz == self.N self._svd(eps) if out is None: A = numpy.zeros(self.L, dtype=numpy.float64) else: A = out A[:] = 0 assert len(A) == self.L self.B[:] = 0 for i in range(self.L): for k in range(self.N): self.B[i] += self.U[k, i] * z[k] for i in range(self.L): for k in range(self.L): A[i] += self.Vs[i, k] * self.B[k] #fit = numpy.fromiter((self._eval_poly_from_G(i, A) for i in xrange(self.N)), dtype=numpy.float64) #LOG.trace('fit=%s'%(fit)) #LOG.trace('diff=%s'%(abs(fit - z)/z)) return A
[docs] def find_good_solution(self, z, threshold=0.05): assert 0.0 <= threshold eps_list = [10**x for x in range(-11, -3)] best_score = 1e30 best_eps = eps_list[0] intlog = lambda x: int(numpy.log10(x)) ans = numpy.empty(self.L, dtype=numpy.float64) best_ans = numpy.empty(self.L, dtype=numpy.float64) diff = numpy.empty(self.N, dtype=numpy.float64) # do SVD self._do_svd() for eps in eps_list: ans = self.solve_with_eps(z, out=ans, eps=eps) for i in range(self.N): fit = self._eval_poly_from_G(i, ans) if z[i] != 0: diff[i] = abs((fit - z[i]) / z[i]) else: diff[i] = fit #score = diff.max() score = diff.mean() LOG.trace('eps={}, score={}', intlog(eps), score) if best_ans is None or score < best_score: best_ans[:] = ans best_score = score best_eps = eps if 1.0 <= best_score: raise RuntimeError('No good solution is found.') elif threshold < best_score: LOG.trace('Score is higher than given threshold (threshold {}, score {})', threshold, best_score) LOG.trace('best eps: {} (score {})', intlog(best_eps), best_score) return best_ans