Context Navigation

← Previous Change
Next Change →

Changeset 2063 for metadata

Timestamp:

08/07/12 14:23:49 (12 years ago)

Author:

sanmai

Message:

Fix <tags/> location in cmdi-lrt-template.xml so as to make it valid CMDI.
Small further work on the LRT tools register checking script. (See filter_tools_csv.R for filtering based on the Link checks).

Location:

metadata/trunk/toolkit/scripts

Files:

: 4 edited

check-tools-integrity.py (modified) (12 diffs)
cmdi-lrt-template.xml (modified) (2 diffs)
filter_tools_csv.R (modified) (1 diff)
lrt2cmdi.py (modified) (1 diff)

Legend:

: Unmodified
: Added
: Removed

metadata/trunk/toolkit/scripts/check-tools-integrity.py

-                      r2061
+                      r2063
 ##   Since:      1-8-2012
 ##   Description:
+##   Check the CLARIN tools registry CSV file for problematic
+##   URLs (dead links, etc.) and problematic e-mail-addresses.
+##   Export a table for each record in the registry with the
+##   check results.
 ##
-##
 __author__          = "Sander Maijers"
 import              pdb
 import                          comm
+import                                comm
 import              csv, sys
 import              urllib.request
 …
         COMMASPACE      = ', '
-        #.format(user, socket.gethostname(), experiment_name)
-    #    header_original_recipient = "Original-Recipient:{0}".format(recorded_email_address)
-    #    "Disposition-Notification-To:{0}".format(options.test_mailbox)
         msg             = MIMEMultipart()
         msg['Subject']  = "Automatic message for CLARIN database e-mail check."
+        msg['Subject']  = "Automatic message for CLARIN database e-mail address check."
         msg['From']     = from_addr
         msg['To']       = COMMASPACE.join([to_addrs])
         msg['Disposition-Notification-To']  = from_addr
         msg['Original-Recipient']           = to_addr
-        #msg.preamble    = "Experiment " + experiment_name + " completed."
-        # DEBUG:
-        #import pdb
-        #pdb.set_trace()
-    #    body_MIME       = MIMEText(body)
-    #    print(body)
-        #_text = stdout.decode("utf-8"), _subtype = 'plain', _charset = 'utf-8')
-    #    msg.attach(body_MIME)
         return(msg.as_string())
 …
 def download_file(destination_directory_path  = None,
+                  destination_file_name  = "",
+                  file_mode  = None,
+                  base_URL   = "",
+                  URL        = "") :
+    pdb.set_trace();
+                  destination_file_name       = "",
+                  file_mode                   = None,
+                  base_URL                    = "",
+                  URL                         = "") :
     if base_URL == "" and URL == "" :
 …
     with open(options.tools_CSV_file_path,
               newline = '') as CSV_file :
         CSV_reader          = csv.DictReader(CSV_file) #DictReader(CSV_file)
+        CSV_reader          = csv.DictReader(CSV_file)
         CSV_data            = list(CSV_reader)
+        #for row in CSV_reader :
+    return(CSV_data) # tools_CSV_file_path
+    # if not DEBUG :
+    #     communicate(level           = 0,
+    #                 message         = table_str,
+    #                 output_streams  = sys.stdout)
+    return(CSV_data)
 …
             urllib.request.urlopen(url = URL)
         except HTTPError as HTTP_error_obj :
-            #print(dir(HTTP_error_obj))
             #HTTP_error_obj.getcode()
             return(False)
 …
         else :
             return(True)
-            # ['_HTTPError__super_init', '__cause__', '__class__', '__context__', '__delattr__', '__dict__', '__doc__', '__enter__', '__eq__', '__exit__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__iter__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setstate__', '__sizeof__', '__str__', '__subclasshook__', '__traceback__', '__weakref__', 'args', 'close', 'code', 'errno', 'filename', 'fileno', 'fp', 'getcode', 'geturl', 'hdrs', 'headers', 'info', 'msg', 'read', 'readline', 'readlines', 'reason', 'strerror', 'url', 'with_traceback']
-            # X-
-    # test:
-    # recorded_URLs =
-    # ['http://www.google.com/testtesttest',
-    # 'http://bla',
-    # 'http://google.com',
-    # 'ftp://ftp.kernel.org/',
-    # 'http://www.mpi.nl']
-    # recorded_URLs
-    # # Reduce to unique URLs
-    # URLs            = frozenset(recorded_URLs)
-    # test_results    = list(map(test_URL, URLs))
-    # check_results   = dict(zip(URLs, test_results))
     assert(len(recorded_URLs) == len(URL_record_columns))
     results         = [''] * len(URL_record_columns)
-    #pdb.set_trace()
     for recorded_URL_index, URL in enumerate(recorded_URLs) : # X- check unique urls once
-        #URL         = recorded_URLs[recorded_URL_index].strip()
         URL         = URL.strip()
 …
             else :
                 results[recorded_URL_index] = 'problematic'
-#    pdb.set_trace()
     return(results)
 …
     parser.set_defaults(tools_CSV_file_path     = '/tmp/export_tools')
+    parser.set_defaults(output_CSV_file_path    = '/tmp/output.tab')
+     # X-
+     # X- !!! date_checksumoftoolsCSV.tab
+    parser.set_defaults(output_CSV_file_path    = '/tmp/output.tab') # X- !!! date_checksumoftoolsCSV.tab
     parser.add_option("--check_mail",
 …
     tools_CSV_data          = read_tools_CSV()
-    # 'URL check result (field_tool_urlcheck)'
     test_results = []
     progress_bar_obj = progressbar.ProgressBar()
 …
         #URL = list(filter(None, URL))
-        #test_results        = []
         if len(URL) > 0 :
+            #test_results    += test_URLs(URL)
+            #test_results = dict(zip(URL, test_URLs(URL)))
+#            pdb.set_trace()
             test_results    += [dict(zip(URL_record_columns, test_URLs(URL)))]   # list(zip(URL_record_columns, )) #, test_results)
             data = test_results
-            #pdb.set_trace()
-        # from collections import defaultdict
-        # data = defaultdict(list)
-        # for URL_record_column, test_result in test_results :
-        #     data[URL_record_column].append(test_result)
-    # for URL_record_column in URL_record_columns :
-    #     recorded_URLs   = tools_CSV_data[URL_record_column]
-    #     test_URLs(recorded_URLs)
     return(data)
 …
     retrieve_tools_CSV()
-#    pdb.set_trace()
     # check_results is a list row objects for DictWriter
     check_results           = check()
 …
-    # assert(options.source_file_path         is not None)
 if __name__ == '__main__' :
     signal.signal(signal.SIGINT, signal_handler)

metadata/trunk/toolkit/scripts/cmdi-lrt-template.xml

-                      r2058
+                      r2063
     <Components>
         <LrtInventoryResource>
 …
                 <Description></Description>
                 <ContactPerson></ContactPerson>
+            </LrtIPR>
+            <Tags/>
+            </LrtIPR>
         </LrtInventoryResource>
+        <tags/>
     </Components>
 </CMD>

metadata/trunk/toolkit/scripts/filter_tools_csv.R

-                      r2061
+                      r2063
 ## for stale/irrelevant/problematic records.
 tools_registry          <- read.csv("/tmp/export_tools", header = TRUE);
+tools_registry          <- read.csv("/tmp/export_tools", check.names = FALSE, header = TRUE);
 checks_output_table <- read.table("/tmp/output.tab", sep = '\t', check.names = FALSE, header = TRUE);
+colnames(checks_output_table) <- paste(colnames(checks_output_table), "check");
+records_to_be_kept      <- subset(output_table, `Reference link (field_tool_reference_link)` != "unspecified");
+## Records whose contact person should be warned because the "Reference link" URL value is problematic.
+records_problematic <- subset(records_to_be_kept, `Reference link (field_tool_reference_link)` == "problematic");
+records_to_be_kept      <- subset(checks_output_table, `Reference link (field_tool_reference_link) check` != "unspecified");
+## Records whose contact person should be warned.
+records_any_unspecified <- subset(records_to_be_kept, `Reference link (field_tool_reference_link) check`                        == "unspecified"
+                                                                                                        | `Documentation link (field_tool_document_link) check`                 == "unspecified"
+                                                                                                        | `Webservice link (field_tool_webservice_link) check`          == "unspecified");
+complete_extended_table                                         <- cbind(tools_registry, checks_output_table);
+write.table(complete_extended_table,
+                                file = "/run/media/sanmai/SAMSUNG/3,MPI/export_tools__complete_extended__7-8-2012.csv",
+                                sep = ',',
+                                row.names = FALSE,
+                                col.names = TRUE);
+records_relevant_links_specified        <- subset(records_to_be_kept, (`Reference link (field_tool_reference_link) check`           != "unspecified"
+                                                                                                                                        | `Webservice link (field_tool_webservice_link) check`  != "unspecified")
+                                                                                                                                        & `Documentation link (field_tool_document_link) check`         != "unspecified");
+links_specified_table                           <- cbind(tools_registry[row.names(records_relevant_links_specified),], records_relevant_links_specified);
+write.table(links_specified_table,
+                        file = "/run/media/sanmai/SAMSUNG/3,MPI/export_tools__relevant_links_specified__7-8-2012.csv",
+                        sep = ',',
+                        row.names = FALSE,
+                        col.names = TRUE);
+records_relevant_links_work         <- subset(records_to_be_kept, (`Reference link (field_tool_reference_link) check`       == "works"
+                                                                                                                                        | `Webservice link (field_tool_webservice_link) check`  == "works")
+                                                                                                                                        & `Documentation link (field_tool_document_link) check`         == "works");
+links_work_table                                        <- cbind(tools_registry[row.names(records_relevant_links_work),], records_relevant_links_work);
+write.table(links_work_table,
+                        file = "/run/media/sanmai/SAMSUNG/3,MPI/export_tools__relevant_links_work__7-8-2012.csv",
+                        sep = ',',
+                        row.names = FALSE,
+                        col.names = TRUE);
 URLs                            <- tools_registry[row.names(records_problematic),17]

metadata/trunk/toolkit/scripts/lrt2cmdi.py

r2058	r2063
212	212
213	213	def addTags(self, tags_string) :
214		tags_XML_element = self.xmlTree.find(".//Tags")
	214	tags_XML_element = self.xmlTree.find(".//tags")
215	215	assert(tags_XML_element is not None)
216	216

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 2063 for metadata

Legend:

metadata/trunk/toolkit/scripts/check-tools-integrity.py

metadata/trunk/toolkit/scripts/cmdi-lrt-template.xml

metadata/trunk/toolkit/scripts/filter_tools_csv.R

metadata/trunk/toolkit/scripts/lrt2cmdi.py

Download in other formats: