Changeset 29

Show
Ignore:
Timestamp:
27/11/07 17:40:17 (2 years ago)
Author:
daedalus
Message:

* Manual bailout when in authoritarian mode now works correctly.
* Fixed up some of the log level and stats reporting.
* Added 'backout' mode, that will only run the backout portion of changes. More

useful for testing, but can also be used to undo provisioning by repurposing
the same set of templates used for provisioning.

Location:
trunk
Files:
2 added
3 modified

Legend:

Unmodified
Added
Removed
  • trunk/confloader.py

    r28 r29  
    1414 
    1515from device import Device 
    16 from change import CHANGE_STATE 
     16from change import CHANGE_STATE, ChangeConditionFailure 
     17from provisioner import UserBailout 
    1718import util 
    1819 
     
    186187            self.add_namespace(nsnode) 
    187188        except IndexError: 
    188             log.warn("Cannot find global namespace") 
    189189            self.global_namespace = {} 
    190190 
     
    604604                    if prereq not in self.change_success: 
    605605 
     606                        # If we're in backout mode, a prereq in 'backout_ok' state 
     607                        # is treated as complete. 
     608                        if prereq.state in [ CHANGE_STATE['backout_ok'], ]: 
     609                            continue 
     610 
    606611                        # If a prereq has failed, doesn't need to retry, and is 
    607612                        # marked as 'onfail:continue', then we treat it as if 
    608613                        # this prereq has been met. 
    609                         if prereq.state not in [ CHANGE_STATE['pending'], 
     614                        elif prereq.state not in [ CHANGE_STATE['pending'], 
    610615                                                 CHANGE_STATE['retry'], 
    611616                                                 ] and prereq.on_fail_continue: 
     
    631636        Move a change from pending to complete. 
    632637        """ 
    633         log.debug("Change completed.") 
     638        log.info("Change %s completed.", change.name) 
    634639        if change.state == CHANGE_STATE['success']: 
    635640            self.change_success.append(change) 
     
    657662            # Change will be retried 
    658663            pass 
    659          
     664 
    660665        else: 
    661666            log.error("Unknown/unhandled change state '%s'", change.state) 
     
    703708 
    704709        # deal with errors in stats 
    705         #self.alldone.addErrback(self.stats_error) 
     710        #self.alldone.addErrback(self.print_stats) 
    706711                                 
    707712        return self.alldone 
    708713 
    709     def get_next_changes(self, ignored): 
     714    def get_next_changes(self, results): 
    710715        """ 
    711716        Fetch the next set of changes to run. 
     
    716721        have no interdependencies on one another. 
    717722        """ 
     723        log.debug("Results in get_next_changes are: %s", results) 
    718724        log.debug("Fetching outstanding changes...") 
    719725        self.current_changelist = self.cfgldr.get_available_changes() 
     
    747753            log.debug("adding change '%s' to implementation queue", change.name) 
    748754                 
    749             d = change.provisioner.perform_change(None, change, self.cfgldr.global_namespace) 
     755            d = change.provisioner.perform_change(None, change, self.cfgldr.global_namespace, backout=self.cfgldr.options.backout) 
    750756            d.addCallback(self.change_complete, change) 
    751757            d.addErrback(self.change_failure, change) 
     
    753759            pass 
    754760 
    755         dl = defer.DeferredList(dlist) 
     761        dl = defer.DeferredList(dlist, fireOnOneErrback=True, consumeErrors=True) 
    756762        # After the current crop of changes has been completed, find if there are more 
    757763        dl.addCallback( self.get_next_changes ) 
     764        dl.addErrback( self.bailout ) 
    758765        return dl 
    759766 
     767    def bailout(self, failure): 
     768        """ 
     769        Bailout detects the first error from the deferred list, 
     770        and bails out. 
     771        """ 
     772        log.debug("Running bailout...") 
     773        if failure.value.subFailure.type == UserBailout: 
     774            self.alldone.callback('Bailout') 
     775 
     776        else: 
     777            log.debug("Unhandled failure: %s", failure) 
     778            self.alldone.errback(failure) 
     779 
    760780    def change_complete(self, ignored, change): 
    761781        """ 
     
    765785 
    766786    def change_failure(self, failure, change): 
    767         log.error("Major change failure!") 
    768         tlog.err(failure) 
     787        if failure.type == UserBailout: 
     788            log.debug("Controller detected UserBailout.") 
     789            return failure 
     790 
     791        elif failure.type == ChangeConditionFailure: 
     792            pass 
     793 
     794        else: 
     795            log.error("Major change failure!") 
     796            tlog.err(failure) 
     797            pass 
     798         
    769799        self.change_complete(failure, change) 
    770800 
     
    780810 
    781811        log.info("--- Final results ---") 
    782          
    783         if not (success_count > 0 or failure_count > 0): 
    784             log.error("no changes succeeded or failed!") 
    785         else: 
    786             log.info("%d ok, %d failed (%d%% success rate)" % (success_count, failure_count, 100 * success_count / (success_count+failure_count) ) ) 
    787  
    788         if success_count > 0: 
    789             log.info( "Successful changes: %s" % ', '.join([ x.name for x in self.cfgldr.change_success])) 
    790  
    791         if failure_count > 0: 
    792             log.info( "Failed changes: %s" % ', '.join([ x.name for x in self.cfgldr.change_failure]) ) 
     812 
     813        # Don't print some stats when in backout mode 
     814        if not self.cfgldr.options.backout: 
     815         
     816            if not (success_count > 0 or failure_count > 0): 
     817                log.error("no changes succeeded or failed!") 
     818            else: 
     819                log.info("%d ok, %d failed (%d%% success rate)" % (success_count, failure_count, 100 * success_count / (success_count+failure_count) ) ) 
     820 
     821            if success_count > 0: 
     822                log.info( "Successful changes: %s" % ', '.join([ x.name for x in self.cfgldr.change_success])) 
     823 
     824            if failure_count > 0: 
     825                log.info( "Failed changes: %s" % ', '.join([ x.name for x in self.cfgldr.change_failure]) ) 
     826                pass 
     827            pass 
     828 
     829        # These stats are always printed 
    793830 
    794831        if backout_success_count > 0 or backout_failure_count > 0: 
    795             log.error( "%d backed out ok, %d failed (%d%% backout success rate)" % (backout_success_count, backout_failure_count, 100 * backout_success_count / (backout_success_count+backout_failure_count) )) 
     832            log.info( "%d backed out ok, %d failed (%d%% backout success rate)" % (backout_success_count, backout_failure_count, 100 * backout_success_count / (backout_success_count+backout_failure_count) )) 
    796833             
    797834        if backout_success_count > 0: 
  • trunk/options.py

    r27 r29  
    7979        help_configfile = "The configuration file to load" 
    8080        help_loadonly = "Load the configuration file and exit. Used to test parsing." 
    81          
     81        help_backout = "Run the backout portion of the changes." 
    8282 
    8383        self.add_option('-a', '--authoritarian',  dest='authoritarian', action='store_true', default=False, help=help_authoritarian)         
     84        self.add_option('-b', '--backout',  dest='backout', action='store_true', default=False, help=help_backout)         
    8485        self.add_option('-c', '--configfile', dest='configfile', type='string', help=help_configfile) 
    8586        self.add_option('', '--loadonly', dest='loadonly', action='store_true', default=False, help=help_loadonly) 
  • trunk/provisioner.py

    r28 r29  
    8080        pass 
    8181 
    82     def perform_change(self, ignored, change, namespace={}): 
     82    def perform_change(self, ignored, change, namespace={}, backout=False): 
    8383        """ 
    8484        Perform a change on one or more (potentially) remote entities. 
     
    120120                namespace.update(device.namespace) 
    121121 
    122                 log.debug("applying change with namespace: %s", namespace) 
    123                 d.addCallback(self.apply_change, device, change, namespace) 
    124  
    125                 d.addCallback(self.change_complete_success, change, namespace) 
    126                 d.addErrback(self.change_failure, change, namespace) 
     122                if backout: 
     123                    # Just do the backout portion 
     124                    d.addCallback(self.backout_change, device, change, namespace) 
     125 
     126                else: 
     127                    log.debug("applying change with namespace: %s", namespace) 
     128                    d.addCallback(self.apply_change, device, change, namespace) 
     129                     
     130                    d.addCallback(self.change_complete_success, change, namespace) 
     131                    d.addErrback(self.change_failure, change, namespace) 
     132                    pass 
    127133                pass 
    128134            pass 
     
    158164        e = failure.check( ChangeConditionFailure, UserBailout ) 
    159165        if e: 
    160             log.error("  failure was: %s", failure.value ) 
    161             if isinstance(e, UserBailout): 
    162                 log.info("User bailout detected.") 
    163                 return defer.succeed('bailout') 
     166            log.error("  failure was: %s, %s", failure.type, failure.value ) 
     167 
     168            # If we want to bail out, bail now 
     169            if failure.type == UserBailout: 
     170                return defer.fail(failure) 
    164171        else: 
     172            log.error("Unhandled failure in provisioner.change_apply_failed()") 
    165173            tlog.err(failure) 
    166174         
     
    193201                change.state = CHANGE_STATE['total_failure'] 
    194202        else: 
    195             change.state = CHANGE_STATE['backout_failed'] 
    196             pass 
     203            log.error("change_failure unhandled change state: %s", change.state) 
     204            raise ValueError("Invalid change state: %s" % change.state) 
     205 
     206        # Propogate a UserBailout failure 
     207        if failure.type == UserBailout: 
     208            return failure 
     209         
    197210        #tlog.err(failure) 
    198211 
     
    202215            log.info("Change '%s' was a success!", change.name) 
    203216        else: 
     217            log.error("change_complete_success in weird state: %s", change.state) 
     218            raise ValueError("wrong change state: %s" % change.state) 
    204219            change.state = CHANGE_STATE['backout_ok'] 
    205220            log.info("Change '%s' backed out successfully", change.name) 
     
    241256    def backout_success(self, ignored, device, change, namespace): 
    242257        log.info("Successfully backed out change: '%s' from device '%s'", change.name, device) 
     258        change.state = CHANGE_STATE['backout_ok'] 
    243259        self.backout_ok[device] = change 
    244260 
    245261    def backout_failure(self, failure, device, change, namespace): 
    246262        log.critical("Backout of change '%s' failed for device '%s'!", change.name, device) 
     263        change.state = CHANGE_STATE['backout_failed'] 
    247264        self.backout_failed[device] = change 
    248         tlog.err(failure) 
     265        #tlog.err(failure) 
    249266        return failure 
    250267 
     
    611628            pass 
    612629 
    613         d.addCallback(self.all_commands_done) 
    614          
     630        d.addCallbacks(self.all_commands_done, self.all_commands_failure) 
    615631        return self.all_commands_defer 
    616632 
     
    662678        log.error("Command failed: %s", errorstr) 
    663679        #self.all_commands_defer.errback( Exception(errorstr) ) 
    664  
     680        return failure 
     681     
    665682    def all_commands_done(self, result): 
    666683        """ 
     
    669686        log.debug("All commands have finished.") 
    670687        self.all_commands_defer.callback( (self.exitcode, self.cmdoutput) ) 
     688 
     689    def all_commands_failure(self, failure): 
     690        """ 
     691        Some kind of failure in the commands occurred. 
     692        """ 
     693        e = failure.check( UserBailout ) 
     694        if e: 
     695            #log.error("User bailout detected.") 
     696            self.all_commands_defer.errback( failure ) 
    671697 
    672698class CommandFailure(Exception):