- Timestamp:
- 08/07/12 14:23:49 (12 years ago)
- Location:
- metadata/trunk/toolkit/scripts
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
metadata/trunk/toolkit/scripts/check-tools-integrity.py
r2061 r2063 6 6 ## Since: 1-8-2012 7 7 ## Description: 8 ## Check the CLARIN tools registry CSV file for problematic 9 ## URLs (dead links, etc.) and problematic e-mail-addresses. 10 ## Export a table for each record in the registry with the 11 ## check results. 8 12 ## 9 ##10 11 13 12 14 __author__ = "Sander Maijers" 13 15 14 16 import pdb 15 import comm17 import comm 16 18 import csv, sys 17 19 import urllib.request … … 42 44 COMMASPACE = ', ' 43 45 44 #.format(user, socket.gethostname(), experiment_name)45 46 # header_original_recipient = "Original-Recipient:{0}".format(recorded_email_address)47 # "Disposition-Notification-To:{0}".format(options.test_mailbox)48 49 46 msg = MIMEMultipart() 50 msg['Subject'] = "Automatic message for CLARIN database e-mail check."47 msg['Subject'] = "Automatic message for CLARIN database e-mail address check." 51 48 msg['From'] = from_addr 52 49 msg['To'] = COMMASPACE.join([to_addrs]) 53 50 msg['Disposition-Notification-To'] = from_addr 54 51 msg['Original-Recipient'] = to_addr 55 #msg.preamble = "Experiment " + experiment_name + " completed."56 57 # DEBUG:58 #import pdb59 #pdb.set_trace()60 61 # body_MIME = MIMEText(body)62 # print(body)63 #_text = stdout.decode("utf-8"), _subtype = 'plain', _charset = 'utf-8')64 65 # msg.attach(body_MIME)66 52 67 53 return(msg.as_string()) … … 97 83 98 84 def download_file(destination_directory_path = None, 99 destination_file_name = "", 100 file_mode = None, 101 base_URL = "", 102 URL = "") : 103 104 pdb.set_trace(); 85 destination_file_name = "", 86 file_mode = None, 87 base_URL = "", 88 URL = "") : 105 89 106 90 if base_URL == "" and URL == "" : … … 144 128 with open(options.tools_CSV_file_path, 145 129 newline = '') as CSV_file : 146 CSV_reader = csv.DictReader(CSV_file) #DictReader(CSV_file)130 CSV_reader = csv.DictReader(CSV_file) 147 131 148 132 CSV_data = list(CSV_reader) 149 #for row in CSV_reader : 150 151 152 return(CSV_data) # tools_CSV_file_path 153 154 # if not DEBUG : 155 # communicate(level = 0, 156 # message = table_str, 157 # output_streams = sys.stdout) 133 134 135 return(CSV_data) 158 136 159 137 … … 164 142 urllib.request.urlopen(url = URL) 165 143 except HTTPError as HTTP_error_obj : 166 #print(dir(HTTP_error_obj))167 144 #HTTP_error_obj.getcode() 168 145 return(False) … … 173 150 else : 174 151 return(True) 175 # ['_HTTPError__super_init', '__cause__', '__class__', '__context__', '__delattr__', '__dict__', '__doc__', '__enter__', '__eq__', '__exit__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__iter__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setstate__', '__sizeof__', '__str__', '__subclasshook__', '__traceback__', '__weakref__', 'args', 'close', 'code', 'errno', 'filename', 'fileno', 'fp', 'getcode', 'geturl', 'hdrs', 'headers', 'info', 'msg', 'read', 'readline', 'readlines', 'reason', 'strerror', 'url', 'with_traceback']176 # X-177 # test:178 # recorded_URLs =179 # ['http://www.google.com/testtesttest',180 # 'http://bla',181 # 'http://google.com',182 # 'ftp://ftp.kernel.org/',183 # 'http://www.mpi.nl']184 185 # recorded_URLs186 187 # # Reduce to unique URLs188 # URLs = frozenset(recorded_URLs)189 190 # test_results = list(map(test_URL, URLs))191 192 # check_results = dict(zip(URLs, test_results))193 152 194 153 assert(len(recorded_URLs) == len(URL_record_columns)) 195 154 results = [''] * len(URL_record_columns) 196 155 197 #pdb.set_trace()198 199 156 for recorded_URL_index, URL in enumerate(recorded_URLs) : # X- check unique urls once 200 #URL = recorded_URLs[recorded_URL_index].strip()201 157 202 158 URL = URL.strip() … … 210 166 else : 211 167 results[recorded_URL_index] = 'problematic' 212 # pdb.set_trace()213 168 214 169 return(results) … … 266 221 267 222 parser.set_defaults(tools_CSV_file_path = '/tmp/export_tools') 268 parser.set_defaults(output_CSV_file_path = '/tmp/output.tab') 269 # X- 270 # X- !!! date_checksumoftoolsCSV.tab 271 223 parser.set_defaults(output_CSV_file_path = '/tmp/output.tab') # X- !!! date_checksumoftoolsCSV.tab 272 224 273 225 parser.add_option("--check_mail", … … 357 309 tools_CSV_data = read_tools_CSV() 358 310 359 # 'URL check result (field_tool_urlcheck)'360 361 311 test_results = [] 362 363 312 progress_bar_obj = progressbar.ProgressBar() 364 313 … … 370 319 #URL = list(filter(None, URL)) 371 320 372 #test_results = []373 374 321 if len(URL) > 0 : 375 #test_results += test_URLs(URL) 376 #test_results = dict(zip(URL, test_URLs(URL))) 377 # pdb.set_trace() 322 378 323 test_results += [dict(zip(URL_record_columns, test_URLs(URL)))] # list(zip(URL_record_columns, )) #, test_results) 379 324 380 325 data = test_results 381 326 382 #pdb.set_trace()383 # from collections import defaultdict384 # data = defaultdict(list)385 # for URL_record_column, test_result in test_results :386 # data[URL_record_column].append(test_result)387 388 # for URL_record_column in URL_record_columns :389 390 391 # recorded_URLs = tools_CSV_data[URL_record_column]392 393 # test_URLs(recorded_URLs)394 327 return(data) 395 328 … … 406 339 retrieve_tools_CSV() 407 340 408 # pdb.set_trace()409 410 341 # check_results is a list row objects for DictWriter 411 342 check_results = check() … … 413 344 414 345 415 # assert(options.source_file_path is not None)416 417 418 346 if __name__ == '__main__' : 419 347 signal.signal(signal.SIGINT, signal_handler) -
metadata/trunk/toolkit/scripts/cmdi-lrt-template.xml
r2058 r2063 16 16 17 17 <Components> 18 18 19 <LrtInventoryResource> 19 20 … … 51 52 <Description></Description> 52 53 <ContactPerson></ContactPerson> 53 </LrtIPR> 54 55 <Tags/> 54 </LrtIPR> 56 55 57 56 </LrtInventoryResource> 57 58 <tags/> 58 59 </Components> 59 60 </CMD> -
metadata/trunk/toolkit/scripts/filter_tools_csv.R
r2061 r2063 4 4 ## for stale/irrelevant/problematic records. 5 5 6 tools_registry <- read.csv("/tmp/export_tools", header = TRUE);6 tools_registry <- read.csv("/tmp/export_tools", check.names = FALSE, header = TRUE); 7 7 checks_output_table <- read.table("/tmp/output.tab", sep = '\t', check.names = FALSE, header = TRUE); 8 colnames(checks_output_table) <- paste(colnames(checks_output_table), "check"); 8 9 9 records_to_be_kept <- subset(output_table, `Reference link (field_tool_reference_link)` != "unspecified"); 10 ## Records whose contact person should be warned because the "Reference link" URL value is problematic. 11 records_problematic <- subset(records_to_be_kept, `Reference link (field_tool_reference_link)` == "problematic"); 10 records_to_be_kept <- subset(checks_output_table, `Reference link (field_tool_reference_link) check` != "unspecified"); 11 12 13 14 15 ## Records whose contact person should be warned. 16 records_any_unspecified <- subset(records_to_be_kept, `Reference link (field_tool_reference_link) check` == "unspecified" 17 | `Documentation link (field_tool_document_link) check` == "unspecified" 18 | `Webservice link (field_tool_webservice_link) check` == "unspecified"); 19 20 21 complete_extended_table <- cbind(tools_registry, checks_output_table); 22 23 write.table(complete_extended_table, 24 file = "/run/media/sanmai/SAMSUNG/3,MPI/export_tools__complete_extended__7-8-2012.csv", 25 sep = ',', 26 row.names = FALSE, 27 col.names = TRUE); 28 29 30 records_relevant_links_specified <- subset(records_to_be_kept, (`Reference link (field_tool_reference_link) check` != "unspecified" 31 | `Webservice link (field_tool_webservice_link) check` != "unspecified") 32 & `Documentation link (field_tool_document_link) check` != "unspecified"); 33 links_specified_table <- cbind(tools_registry[row.names(records_relevant_links_specified),], records_relevant_links_specified); 34 write.table(links_specified_table, 35 file = "/run/media/sanmai/SAMSUNG/3,MPI/export_tools__relevant_links_specified__7-8-2012.csv", 36 sep = ',', 37 row.names = FALSE, 38 col.names = TRUE); 39 40 41 records_relevant_links_work <- subset(records_to_be_kept, (`Reference link (field_tool_reference_link) check` == "works" 42 | `Webservice link (field_tool_webservice_link) check` == "works") 43 & `Documentation link (field_tool_document_link) check` == "works"); 44 links_work_table <- cbind(tools_registry[row.names(records_relevant_links_work),], records_relevant_links_work); 45 write.table(links_work_table, 46 file = "/run/media/sanmai/SAMSUNG/3,MPI/export_tools__relevant_links_work__7-8-2012.csv", 47 sep = ',', 48 row.names = FALSE, 49 col.names = TRUE); 50 51 12 52 13 53 URLs <- tools_registry[row.names(records_problematic),17] -
metadata/trunk/toolkit/scripts/lrt2cmdi.py
r2058 r2063 212 212 213 213 def addTags(self, tags_string) : 214 tags_XML_element = self.xmlTree.find(".// Tags")214 tags_XML_element = self.xmlTree.find(".//tags") 215 215 assert(tags_XML_element is not None) 216 216
Note: See TracChangeset
for help on using the changeset viewer.