Changeset 2081


Ignore:
Timestamp:
08/14/12 13:21:47 (12 years ago)
Author:
sanmai
Message:
  • Further development on the Nagios monitoring plugin for Discojuice (JSON).
    • It now checks each of the two Tomcat instances that serve the JSON content on different URL and returns the 'worst exit status' (highest exit code) out of those suggested by each check.
    • Error reporting to logs in /tmp was expanded and improved significantly.
Location:
monitoring/plugins/mpi
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • monitoring/plugins/mpi/check_clarin_discojuice_json.py

    r2065 r2081  
    2424                  "usage: %s -h host" % (sys.argv[0]))
    2525
    26 def nagios_return(code, response) :
    27     """ prints the response message
    28         and exits the script with one
    29         of the defined exit codes
    30         DOES NOT RETURN
    31     """
    32     print code + ": " + response
    33     sys.exit(nagios_codes[code])
     26def nagios_return_complex(results) :
     27    def deal_with_result(result) :
     28        print result['code'] + ": " + result['message']
     29        return result['code']
    3430
    35 def check_response_data_well_formedness(data) :
     31    # Scan all condition/status check results and create a list of appropriate exit codes.
     32    exit_code_keys          = map(lambda result     : deal_with_result(result), results)
     33    suggested_exit_codes    = list(map(lambda key   : nagios_codes[key], exit_code_keys))
     34
     35    # Exit with the highest suggested exit code, because the higher the exit code the more problematic the status is and problems have priority over harmony.
     36    sys.exit(max(suggested_exit_codes))
     37
     38def check_response_data_well_formedness(data, descriptive_string) :
    3639
    3740    timestamp = datetime.datetime.today().isoformat()
     
    4043        json.loads(data)
    4144    except :
     45        traceback_string = traceback.format_exc()
    4246        #pdb.set_trace()
    4347
    44         traceback_string = traceback.format_exc()
     48        err_log_file_path = os.path.normpath("/tmp/err_unparseable." + descriptive_string.replace('/' , '__') + "_" + timestamp + ".log")
    4549
    46         with open(name = "/tmp/err_unparseable_discojuice_JSON_" + timestamp + ".log", mode = "wt") as debugging_output_file :
    47                 debugging_output_file.write(traceback_string)
     50        with open(name = err_log_file_path, mode = "wt") as debugging_output_file :
     51            debugging_output_file.write(traceback_string)
     52       
    4853        return False
    4954    else :
    5055        return True
    5156
    52 def check_condition(host) :
    53     timestamp = datetime.datetime.today().isoformat()
    54     UP_URL  = '/discojuice/metadata_clarin.json'
     57def check_condition(host, UP_URL) :
    5558
    56     conn    = httplib.HTTPConnection(host)
     59    def handle_connection_failure(problem_description) :
     60        err_log_file_path = os.path.normpath("/tmp/err_connection_failure." + UP_URL.replace('/' , '__') + "_" + timestamp + ".log")
     61
     62        with open(name = err_log_file_path, mode = "wt") as debugging_output_file :
     63           debugging_output_file.write(problem_description)
     64   
     65    timestamp                = datetime.datetime.today().isoformat()
    5766   
    5867    try :
    59         request = conn.request("GET", UP_URL)
     68        conn                 = httplib.HTTPConnection(host)   
     69
     70        request              = conn.request("GET", UP_URL)
    6071    except :
    61         #pdb.set_trace()
     72        traceback_string     = traceback.format_exc()
     73
     74        handle_connection_failure(traceback_string + "\nThis problem originates from location 1 in 'check_clarin_discojuice_json.py'.")
    6275
    6376        return {
    6477                "code"      : "CRITICAL",
    65                 "message"   : 'Host %s, service %s has a problem.' % (host, DESCRIPTION)
     78                "message"   : '[%s] HTTP connection to host %s failed.' % (DESCRIPTION, host)
    6679               }
     80    else :
     81        try :
     82            response         = conn.getresponse()
    6783
    68     else :   
    69         r1      = conn.getresponse()
     84            data             = response.read()
     85           
     86            conn.close()
     87        except :
     88            traceback_string = traceback.format_exc()
    7089
    71         data    = r1.read()
    72        
    73         conn.close()
    74        
    75         if r1.status == 200 :
    76             well_formed = check_response_data_well_formedness(data)
     90            handle_connection_failure(traceback_string + "\nThis problem originates from location 2 in 'check_clarin_discojuice_json.py'.")
     91        else :
     92            if response.status == 200 :
     93                well_formed  = check_response_data_well_formedness(data, UP_URL)
    7794
    78             if well_formed :
    79                 with open(name = "/tmp/err_unparseable_discojuice_" + timestamp + ".json" , mode = "wt") as debugging_output_file :
    80                     debugging_output_file.write(data)
     95                if well_formed :
     96                    return {
     97                            "code"      : "OK",
     98                            "message"   : '[%s] Host %s is up and returns parseable JSON data at "%s".' % (DESCRIPTION, host, UP_URL)
     99                           }
     100                else :
     101                    return {
     102                            "code"      : "CRITICAL",
     103                            "message"   : '[%s] Host %s is up but returns unparseable JSON data at "%s".' % (DESCRIPTION, host, UP_URL)
     104                            }
     105            else :
    81106
    82                 return {
    83                         "code"      : "OK",
    84                         "message"   : 'Host %s, service %s is up and returns parseable JSON data.' % (host, DESCRIPTION)
    85                        }
    86             else :
    87                 with open(name = "/tmp/err_unparseable_discojuice_" + timestamp + ".json" , mode = "wt") as debugging_output_file :
    88                     debugging_output_file.write(data)
    89 
     107                handle_connection_failure("Unreachable URL: HTTP response code" + str(response.status) + "\nThis problem originates from location 3 in 'check_clarin_discojuice_json.py'.")
    90108                return {
    91109                        "code"      : "CRITICAL",
    92                         "message"   : 'Host %s, service %s is up but returns unparseable JSON data. ' % (host, DESCRIPTION)
    93                         }
    94         else :
    95             return {
    96                     "code"      : "CRITICAL",
    97                     "message"   : 'Host %s, service %s has a problem.' % (host, DESCRIPTION)
    98                    }
     110                        "message"   : '[%s] Host %s has a problem with the URL path component "%s".' % (DESCRIPTION, host, UP_URL)
     111                       }
    99112
    100113def main() :
     
    117130        else :
    118131            usage()
     132
     133    UP_URLs          = ("/discojuice/metadata_clarin1.json", "/discojuice/metadata_clarin2.json",)
    119134   
    120     #result = test_case(host);
    121     result          = check_condition(host)
    122     nagios_return(result['code'], result['message'])
     135    # Check status for all UP_URLs.
     136    results          = map(lambda UP_URL : check_condition(host = host, UP_URL = UP_URL),
     137                           UP_URLs)
     138   
     139    nagios_return_complex(results)
    123140
    124141if __name__ == "__main__" :
    125142    main()
    126 
  • monitoring/plugins/mpi/check_clarin_saml.py

    r2034 r2081  
    7171                                                           stdout = subprocess.PIPE,
    7272                                                           stderr = subprocess.PIPE,
    73                                                            env    = environment_variables)
     73                                                                                       env    = environment_variables)
    7474
    7575    stdout, stderr                      = process.communicate()
Note: See TracChangeset for help on using the changeset viewer.