qb.backend.logParser

1 """ 2 Implement a grep-like mechanism to extract information out of the log files. 3 """ 4 #====================================== 5 # $Revision: #13 $ 6 # $Change: 16065 $ 7 # $File: //depot/main/qube/src/api/python/qb/backend/logParser.py $ 8 #====================================== 9 10 import sys 11 import re 12 import logging 13 14 import qb 15 import qb.backend.utils as backendUtils 16 17 RGX_QUBE_RETRY_MSG = re.compile('qube! - retry/requeue|work\[.*auto-retry\[') 18 19

20 -class LogParser(object):

21 """ 22 The base class for log parsers in Qube, it provides all the functionality needed with the 23 exception of calculating in-frame progress. 24 25 Since calculating in-frame progress requires interpreting the extracted string from the logs, 26 derived methods should override this class' calcProgress() method. See the docstring for this 27 class' calcProgress() 28 """

29 - def __init__(self, job):

30 """ 31 @type job: qb.Job 32 """ 33 self.logging = logging.getLogger('%s' % self.__class__.__name__) 34 35 self.regex = {} 36 37 self.dev = backendUtils.getDevBoolean(job) 38 if self.dev: 39 self.logging.warning('Running in "dev" mode, all log matches are skipped.') 40 41 # these regex's are actually just looking for a match, not trying to extract a sub-string, 42 # so we'll push the entire matched string into the "matches" dictionary in self.parse() 43 rgxMatchesEntireString = ['regex_highlights', 'regex_errors'] 44 45 # put all the job's regex's into a dict keyed by the job regex's name 46 for jobRegex in [x for x in job.get('package', {}) if x.startswith('regex') and isinstance(job['package'][x], str)]: 47 48 # handle multi-line regex patterns from the GUI 49 patterns = [] 50 for pattern in [x.strip() for x in job['package'][jobRegex].splitlines()]: 51 # catch empty lines in the regex_* boxes in the GUI 52 if len(pattern): 53 patterns.append(pattern) 54 55 rgxPattern = '|'.join(patterns) 56 57 # tweak the rgx pattern if we're simply looking for a match and not extracting a substring 58 if jobRegex in rgxMatchesEntireString: 59 rgxPattern = '(%s)' % rgxPattern 60 61 self.regex[jobRegex] = re.compile(rgxPattern, re.MULTILINE | re.IGNORECASE)

62

63 - def parse(self, data, qbTokens=None):

64 """ 65 Scan the log stream for matches to the job's various regexes. 66 67 Return a dict of lists, key is the regex name as specificied in the job package with the 68 leading 'regex_' stripped, the list is all matches for the regex. Only return non-empty 69 lists. 70 71 @type data: str 72 73 @param qbTokens: a dictionary containing all the QB_FRAME* values to aid in calculation the 74 in-chunk progress. 75 76 @type qbTokens: C{dict} 77 78 @rtype: dict of lists 79 """ 80 matches = {} 81 82 # toss everything that precedes a 'retry/requeue' match 83 # it's from a previous run of this instance, don't bother matching it again 84 if RGX_QUBE_RETRY_MSG.search(data): 85 self.logging.debug('Log parsing skipping sections that precede a "retry"') 86 self.logging.debug('BEFORE: data length: %s' % len(data)) 87 88 data = RGX_QUBE_RETRY_MSG.split(data)[-1] 89 90 self.logging.debug('AFTER: data length: %s' % len(data)) 91 92 for jobRegex in self.regex.keys(): 93 rgxName = jobRegex.replace('regex_', '') 94 matches[rgxName] = [] 95 96 # find all the matches for each regex 97 for (rgxName, rgx) in self.regex.items(): 98 # trim the leading 'regex_' from the name, maintains b-ward compatibility with the 99 # original resultpackage keys 100 rgxName = rgxName.replace('regex_', '') 101 102 self.logging.debug('rgx name:%s pattern: %s' % (rgxName, rgx.pattern)) 103 104 for m in rgx.finditer(data): 105 if m.groups(): 106 for grp in m.groups(): 107 if grp is not None and len(grp) > 0 and not self.dev: 108 matches[rgxName].append(grp) 109 110 self.logging.debug('Regex matches: %s' % matches) 111 112 # cull the empty lists 113 for rgxName in matches.keys(): 114 if len(matches[rgxName]) == 0: 115 del matches[rgxName] 116 elif rgxName == 'outputPaths': 117 # outputPaths is unique in the resultpackage, as it's a comma-delimited string, not a list. 118 matches[rgxName] = ','.join(matches[rgxName]) 119 120 # the 'progress' only uses the last match, and it is necessary to compute the amount of 121 # the work that is done when the work is in a chunk or partition. 122 if 'progress' in matches: 123 if qbTokens: 124 matches['progress'] = self.calcProgress(matches['progress'][-1], qbTokens) 125 else: 126 matches['progress'] = self.calcProgress(matches['progress'][-1]) 127 128 return matches

129

130 - def calcProgress(self, progressMatchStr, *args):

131 """ 132 Calculate the internal progress of a piece of work (or a cmdline job) and return it as 133 a float between 0 and 1. 134 135 The extraction of the progress value via the job's "progress" regular expression is only the 136 first aspect of determining how close a particular chunk is to completing; the other part is 137 figuring out what to do with the frame or the "% complete" string that's been pulled out of 138 the log stream and converting it into the expected numerical range so that the QubeGUI knows 139 what to do with it. 140 141 2-digit precision is preferred, more precision will be truncated in the QubeGUI. 142 143 It is expected that this method be overridden for any log parser that is expected to 144 calculate a "done" value from the log contents. 145 146 @param progressMatchStr: the value used to derive the progress within the chunk. It is 147 simply the progress percentage integer matched in the application output. 148 149 @type progressMatchStr: C{str} 150 151 @return: The amount of work complete for the item, expressed as a float between 0 and 1.0, 1.0 being completely done. 152 @rtype: C{float} 153 """ 154 pass

155 156

157 -class ProgressPercentageLogParser(LogParser):

158 """ 159 This LogParser class on of calcProgress() is for the most trivial case: 160 161 A render returns a "% complete" string between 1 and 100. The portion of the string 162 containing the integer is passed to calcProgress as the progressMatchStr. 163 164 This class converts that percentage value to a float between 0 and 1 and returns that float. 165 """

166 - def calcProgress(self, progressMatchStr, *args):

167 """ 168 Calculate the internal progress of a piece of work (or a cmdline job) and return it as 169 a float between 0 and 1. 170 171 eg: A progressMatchStr of "23" is converted to 0.23. 172 173 @param progressMatchStr: the value used to derive the progress within the chunk. It is 174 simply the progress percentage integer matched in the application output. 175 176 @type progressMatchStr: C{str} 177 178 @return: The amount of work complete for the item, expressed as a float between 0 and 1.0, 1.0 being completely done. 179 @rtype: C{float} 180 """ 181 progress = 0.0 182 try: 183 progress = '%0.2f' % (float(progressMatchStr) / 100,) 184 self.logging.debug('progress = %s' % progress) 185 except: 186 backendUtils.flushPrint('WARNING: the logParser %s did not extract a progress % value: "%s"' % (self.__class__.__name__, progressMatchStr), fhList=sys.stderr) 187 backendUtils.flushPrint(backendUtils.formatExc(), fhList=[sys.stderr]) 188 189 return progress

190 191

192 -class CmdRangeChunkLogParser(LogParser):

193 """ 194 This LogParser class is expected to be used for jobs whose agenda contains frame chunks or 195 partitions. It returns the value of currentFrame/chunkLength as a float between 0 and 1. 196 """

197 - def calcProgress(self, progressMatchStr, qbTokens):

198 """ 199 Calculate the internal progress of a frame chunk or partition. 200 201 This particular example expects to have only a frame number in the progressMatchStr, and 202 compare it to the frame range for the individual work item that was passed to the parse() 203 method. 204 205 Determine the extent to which the chunk has progressed by determining the index of the current 206 frame (QB_FRAME_NUMBER) in a list comprised of all frames for the chunk, then comparing that 207 index against the size of the chunk. 208 209 eg: progressMatchStr = "23", and the frame range for this piece of work (as determined from 210 the qbTokens dict) is 21-30. So the index for this frame would be 3, and 3/10 = 0.3 211 212 @param progressMatchStr: the value used to derive the progress within the chunk. It is 213 simply the progress percentage integer matched in the application output. 214 215 @type progressMatchStr: C{str} 216 217 @param qbTokens: a dictionary containing all the QB_FRAME* values to aid in calculation the 218 in-chunk progress. 219 220 @type qbTokens: C{dict} 221 222 @return: The amount of work complete for the item, expressed as a float between 0 and 1.0, 223 1.0 being completely done. 224 225 @rtype: C{float} 226 """ 227 progress = 0.0 228 229 try: 230 chunkFrames = [ x['name'] for x in qb.genframes( qbTokens['QB_FRAME_RANGE']) ] 231 idx = chunkFrames.index(progressMatchStr) + 1 232 progress = float(idx) / len(chunkFrames) 233 progress = '%0.2f' % progress 234 except ValueError: 235 backendUtils.flushPrint('WARNING: the logParser %s did not extract a valid frame number: "%s"' % (self.__class__.__name__, progressMatchStr), fhList=sys.stderr) 236 backendUtils.flushPrint('WARNING: Traceback from error:', fhList=[sys.stderr]) 237 backendUtils.flushPrint(backendUtils.formatExc(), fhList=[sys.stderr]) 238 239 return progress

240

Source Code for Module qb.backend.logParser