Changeset 2884


Ignore:
Timestamp:
05/10/13 12:36:29 (11 years ago)
Author:
sanmai
Message:
  • lrt2cmdi.py: CMDI files generated for 'web service' resources were invalid. FIX: fill the Date element in LrtServiceDetails?.
  • Some code style improvements.
Location:
metadata/trunk/toolkit/scripts
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • metadata/trunk/toolkit/scripts/check-tools-integrity.py

    r2063 r2884  
    22# -*- coding: utf-8 -*-
    33
    4 ##   Title:      Check tools integry
     4##   Title:      Check tools integrity
    55##   Author:     Sander Maijers <sanmai @@ mpi.nl>
    66##   Since:      1-8-2012
     7##   Status:     ALPHA
    78##   Description:
    89##   Check the CLARIN tools registry CSV file for problematic
  • metadata/trunk/toolkit/scripts/filter_tools_csv.R

    r2078 r2884  
    44## for stale/irrelevant/problematic records.
    55
    6 tools_registry          <- read.csv("/tmp/export_tools", check.names = FALSE, header = TRUE);
    7 checks_output_table <- read.table("/tmp/output.tab", sep = '\t', check.names = FALSE, header = TRUE);
    8 colnames(checks_output_table) <- paste(colnames(checks_output_table), "check");
     6tools_registry <-
     7        read.csv("/tmp/export_tools",
     8                         check.names = FALSE,
     9                         header = TRUE);
     10checks_output_table <-
     11        read.table("/tmp/output.tab",
     12                           sep = '\t',
     13                           check.names = FALSE,
     14                           header = TRUE);
    915
    10 tools_registry <- tools_registry[, -1 * which(colnames(tools_registry) == "URL check result (field_tool_urlcheck)")];
     16colnames(checks_output_table) <-
     17        paste(colnames(checks_output_table), "check");
    1118
    12 records_to_be_kept      <- subset(checks_output_table, `Reference link (field_tool_reference_link) check` != "unspecified");
     19tools_registry <-
     20        tools_registry[, -1 * which(colnames(tools_registry) == "URL check result (field_tool_urlcheck)")];
     21
     22records_to_be_kept <-
     23        subset(checks_output_table,
     24                        `Reference link (field_tool_reference_link) check` != "unspecified");
    1325
    1426
     
    1628
    1729## Records whose contact person should be warned.
    18 records_any_unspecified <- subset(records_to_be_kept, `Reference link (field_tool_reference_link) check`                        == "unspecified"
    19                                                                                                         | `Documentation link (field_tool_document_link) check`                 == "unspecified"
    20                                                                                                         | `Webservice link (field_tool_webservice_link) check`          == "unspecified");
     30records_any_unspecified <-
     31        subset(records_to_be_kept, `Reference link (field_tool_reference_link) check`                   == "unspecified"
     32                                                                | `Documentation link (field_tool_document_link) check`         == "unspecified"
     33                                                                | `Webservice link (field_tool_webservice_link) check`          == "unspecified");
    2134
    2235
    23 complete_extended_table                                         <- cbind(tools_registry, checks_output_table);
     36complete_extended_table <-
     37        cbind(tools_registry, checks_output_table);
    2438
    2539write.table(complete_extended_table,
     
    3044
    3145
    32 records_relevant_links_specified        <- subset(records_to_be_kept, (`Reference link (field_tool_reference_link) check`           != "unspecified"
    33                                                                                                                                         | `Webservice link (field_tool_webservice_link) check`  != "unspecified")
    34                                                                                                                                         & `Documentation link (field_tool_document_link) check`         != "unspecified");
    35 links_specified_table                           <- cbind(tools_registry[row.names(records_relevant_links_specified),], records_relevant_links_specified);
     46records_relevant_links_specified <-
     47        subset(records_to_be_kept, (`Reference link (field_tool_reference_link) check`      != "unspecified"
     48                                                                | `Webservice link (field_tool_webservice_link) check`  != "unspecified")
     49                                                        & `Documentation link (field_tool_document_link) check`         != "unspecified");
     50links_specified_table <-
     51        cbind(tools_registry[row.names(records_relevant_links_specified),], records_relevant_links_specified);
     52
    3653write.table(links_specified_table,
    3754                        file                    = "/tmp/export_tools__relevant_links_specified__7-8-2012.csv",
     
    4158
    4259
    43 records_relevant_links_work         <- subset(records_to_be_kept, (`Reference link (field_tool_reference_link) check`       == "works"
    44                                                                                                                                         | `Webservice link (field_tool_webservice_link) check`  == "works")
    45                                                                                                                                         & `Documentation link (field_tool_document_link) check`         == "works");
    46 links_work_table                                        <- cbind(tools_registry[row.names(records_relevant_links_work),], records_relevant_links_work);
     60records_relevant_links_work <-
     61        subset(records_to_be_kept, (`Reference link (field_tool_reference_link) check`  == "works"
     62                                                        | `Webservice link (field_tool_webservice_link) check`  == "works")
     63                                                        & `Documentation link (field_tool_document_link) check` == "works");
     64links_work_table <-
     65        cbind(tools_registry[row.names(records_relevant_links_work),], records_relevant_links_work);
     66
    4767write.table(links_work_table,
    4868                        file                    = "/tmp/export_tools__relevant_links_work__7-8-2012.csv",
     
    5171                        col.names               = TRUE);
    5272
    53 URLs                            <- tools_registry[row.names(records_problematic),17]
     73URLs <-
     74        tools_registry[row.names(records_problematic),17];
    5475
    5576## To inspect the problematic records manually:
    56 edit(records_problematic)
     77edit(records_problematic);
    5778
    5879## Bar plot of frequencies of problematic Reference link values by country.
    59 plot(factor(tools_registry[row.names(records_problematic),10]))
     80plot(factor(tools_registry[row.names(records_problematic),10]));
  • metadata/trunk/toolkit/scripts/lrt2cmdi.py

    r2615 r2884  
    77from curses.ascii import ascii
    88
    9 if sys.version_info < (2, 7) :
     9if sys.version_info < (2, 7) or sys.version_info >= (3, 0):
    1010    sys.stderr.write("WARNING: this script was only tested with Python version 2.7.3! You are running version " + str(sys.version_info[1]) + "." + str(sys.version_info[2]) + " instead.\n")
    1111
     
    1616        self.xmlTree        = ElementTree.ElementTree(ElementTree.fromstring(template))
    1717        self.parentmap      = dict((c, p) for p in self.xmlTree.getiterator() for c in p)
    18         self.fillElement("//MdCreationDate", datetime.datetime.now().strftime("%Y-%m-%d"))
     18        self.current_date   = datetime.datetime.now().strftime("%Y-%m-%d")
     19        self.fillElement("//MdCreationDate", self.current_date)
    1920        self.fillElement("//MdSelfLink", "http://lrt.clarin.eu/node/%s" % nodeId)
    2021
     
    151152            collectionList  = frozenset(("Spoken Corpus", "Written Corpus", "Multimodal Corpus", "Aligned Corpus", "Treebank", "N-Gram Model",))
    152153            lexiconList     = frozenset(("Grammar", "Lexicon / Knowledge Source", "Terminological Resource",))
    153 
     154           
    154155            if typeList.intersection(collectionList):
    155156                self.addCollectionDetails(record, isoList)
    156157            if typeList.intersection(lexiconList):
    157158                self.addLexiconDetails(record, isoList)
    158             #if "Web Service" in typeList:
    159             #    self.addServiceDetails(record)
     159            if "Web Service" in typeList:
     160                #pdb.set_trace()
     161                self.addServiceDetails(record)
    160162
    161163    def addCollectionDetails(self, record, isoList):
     
    163165
    164166
    165         self.fillOptionalElement(LrtCollectionDetails_XPath + "/LongTermPreservationBy",    record["field_longterm_preservation"])
    166         self.fillOptionalElement(LrtCollectionDetails_XPath + "/Location",                  record["field_location_0"])
    167         self.fillOptionalElement(LrtCollectionDetails_XPath + "/ContentType",               record["field_content_type"])
    168         self.fillOptionalElement(LrtCollectionDetails_XPath + "/FormatDetailed",            record["field_format_detailed"])
    169         self.fillOptionalElement(LrtCollectionDetails_XPath + "/Quality",                   record["field_quality"])
    170         self.fillOptionalElement(LrtCollectionDetails_XPath + "/Applications",              record["field_applications"])
    171         self.fillOptionalElement(LrtCollectionDetails_XPath + "/Size",                      record["field_size"])
    172         self.fillOptionalElement(LrtCollectionDetails_XPath + "/DistributionForm",          record["field_distribution_form"])
    173         self.fillOptionalElement(LrtCollectionDetails_XPath + "/Size",                      record["field_size"])
    174         self.fillOptionalElement(LrtCollectionDetails_XPath + "/Access",                    record["field_access"])
    175         self.fillOptionalElement(LrtCollectionDetails_XPath + "/Source",                    record["field_source_0"])
     167        self.fillOptionalElement(LrtCollectionDetails_XPath + "/LongTermPreservationBy",   
     168                                 record["field_longterm_preservation"])
     169        self.fillOptionalElement(LrtCollectionDetails_XPath + "/Location",                 
     170                                 record["field_location_0"])
     171        self.fillOptionalElement(LrtCollectionDetails_XPath + "/ContentType",               
     172                                 record["field_content_type"])
     173        self.fillOptionalElement(LrtCollectionDetails_XPath + "/FormatDetailed",           
     174                                 record["field_format_detailed"])
     175        self.fillOptionalElement(LrtCollectionDetails_XPath + "/Quality",                   
     176                                 record["field_quality"])
     177        self.fillOptionalElement(LrtCollectionDetails_XPath + "/Applications",             
     178                                 record["field_applications"])
     179        self.fillOptionalElement(LrtCollectionDetails_XPath + "/Size",                     
     180                                 record["field_size"])
     181        self.fillOptionalElement(LrtCollectionDetails_XPath + "/DistributionForm",         
     182                                 record["field_distribution_form"])
     183        self.fillOptionalElement(LrtCollectionDetails_XPath + "/Size",                     
     184                                 record["field_size"])
     185        self.fillOptionalElement(LrtCollectionDetails_XPath + "/Access",                   
     186                                 record["field_access"])
     187        self.fillOptionalElement(LrtCollectionDetails_XPath + "/Source",                   
     188                                 record["field_source_0"])
    176189
    177190        # ok - this can be done in a cleaner way
    178         self.addLanguages(isoList, record["field_working_languages"], 1, LrtCollectionDetails_XPath + "/WorkingLanguages")
     191        self.addLanguages(isoList,
     192                          record["field_working_languages"],
     193                          1,
     194                          LrtCollectionDetails_XPath + "/WorkingLanguages")
    179195
    180196    def addLexiconDetails(self, record, isoList):
    181         LrtLexiconDetails_XPath     = "//LrtInventoryResource/LrtLexiconDetails"
    182 
    183         self.fillOptionalElement(LrtLexiconDetails_XPath + "/Date",                         record["field_date_0"])
    184         self.fillOptionalElement(LrtLexiconDetails_XPath + "/Type",                         record["field_type"])
    185         self.fillOptionalElement(LrtLexiconDetails_XPath + "/FormatDetailed",               record["field_format_detailed_1"])
    186         self.fillOptionalElement(LrtLexiconDetails_XPath + "/SchemaReference",              record["field_schema_reference"])
    187         self.fillOptionalElement(LrtLexiconDetails_XPath + "/Size",                         record["field_size_0"])
    188         self.fillOptionalElement(LrtLexiconDetails_XPath + "/Access",                       record["field_access_1"])
    189         self.addLanguages(isoList, record["field_working_languages_0"], 1, LrtLexiconDetails_XPath + "/WorkingLanguages")
     197        LrtLexiconDetails_XPath = "//LrtInventoryResource/LrtLexiconDetails"
     198
     199        self.fillOptionalElement(LrtLexiconDetails_XPath + "/Date",                         
     200                                 record["field_date_0"])
     201        self.fillOptionalElement(LrtLexiconDetails_XPath + "/Type",                         
     202                                 record["field_type"])
     203        self.fillOptionalElement(LrtLexiconDetails_XPath + "/FormatDetailed",               
     204                                 record["field_format_detailed_1"])
     205        self.fillOptionalElement(LrtLexiconDetails_XPath + "/SchemaReference",             
     206                                 record["field_schema_reference"])
     207        self.fillOptionalElement(LrtLexiconDetails_XPath + "/Size",                         
     208                                 record["field_size_0"])
     209        self.fillOptionalElement(LrtLexiconDetails_XPath + "/Access",                       
     210                                 record["field_access_1"])
     211        self.addLanguages(isoList,
     212                          record["field_working_languages_0"],
     213                          1,
     214                          LrtLexiconDetails_XPath + "/WorkingLanguages")
    190215
    191216    def addServiceDetails(self, record):
     217
     218        #pdb.set_trace()
    192219        LrtLexiconDetails_XPath  = "//LrtInventoryResource/LrtServiceDetails"
    193220
    194         self.fillOptionalElement(LrtLexiconDetails_XPath + "/Date",                         record["field_date_0"])
     221        if str(record["field_date_0"]).strip() == '' :
     222            service_date = self.current_date
     223        else :
     224            service_date = record["field_date_0"]
     225
     226        self.fillElement(LrtLexiconDetails_XPath + "/Date",
     227                         service_date)
    195228
    196229    def addResourceProxy(self, link) :
     
    298331        cmdi.fillElement("//LrtCommon/MetadataLink", record["field_metadata_link"])
    299332        cmdi.fillElement("//LrtCommon/Publications", record["field_publications"])
    300         cmdi.fillElement("//LrtCommon/ReadilyAvailable", record["field_resource_available"].replace("Yes","true").replace("No","false"))
     333        cmdi.fillElement("//LrtCommon/ReadilyAvailable", record["field_resource_available"].replace("Yes", "true").replace("No", "false"))
    301334        cmdi.fillElement("//LrtCommon/ReferenceLink", record["field_reference_link"])
    302335
    303336        cmdi.fillElement("//LrtDistributionClassification/DistributionType", record["distribution_type"])
    304         cmdi.fillElement("//LrtDistributionClassification/ModificationsRequireRedeposition", record["modifications_require_redeposition"].replace("1","true").replace("0","false"))
    305         cmdi.fillElement("//LrtDistributionClassification/NonCommercialUsageOnly", record["non-commercial_usage_only"].replace("1","true").replace("0","false"))
    306         cmdi.fillElement("//LrtDistributionClassification/UsageReportRequired", record["usage_report_required"].replace("1","true").replace("0","false"))
     337        cmdi.fillElement("//LrtDistributionClassification/ModificationsRequireRedeposition", record["modifications_require_redeposition"].replace("1", "true").replace("0","false"))
     338        cmdi.fillElement("//LrtDistributionClassification/NonCommercialUsageOnly", record["non-commercial_usage_only"].replace("1", "true").replace("0", "false"))
     339        cmdi.fillElement("//LrtDistributionClassification/UsageReportRequired", record["usage_report_required"].replace("1", "true").replace("0", "false"))
    307340        cmdi.fillElement("//LrtDistributionClassification/OtherDistributionRestrictions", record["other_distribution_restrictions"])
    308341
Note: See TracChangeset for help on using the changeset viewer.