Changeset 2884
- Timestamp:
- 05/10/13 12:36:29 (11 years ago)
- Location:
- metadata/trunk/toolkit/scripts
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
metadata/trunk/toolkit/scripts/check-tools-integrity.py
r2063 r2884 2 2 # -*- coding: utf-8 -*- 3 3 4 ## Title: Check tools integr y4 ## Title: Check tools integrity 5 5 ## Author: Sander Maijers <sanmai @@ mpi.nl> 6 6 ## Since: 1-8-2012 7 ## Status: ALPHA 7 8 ## Description: 8 9 ## Check the CLARIN tools registry CSV file for problematic -
metadata/trunk/toolkit/scripts/filter_tools_csv.R
r2078 r2884 4 4 ## for stale/irrelevant/problematic records. 5 5 6 tools_registry <- read.csv("/tmp/export_tools", check.names = FALSE, header = TRUE); 7 checks_output_table <- read.table("/tmp/output.tab", sep = '\t', check.names = FALSE, header = TRUE); 8 colnames(checks_output_table) <- paste(colnames(checks_output_table), "check"); 6 tools_registry <- 7 read.csv("/tmp/export_tools", 8 check.names = FALSE, 9 header = TRUE); 10 checks_output_table <- 11 read.table("/tmp/output.tab", 12 sep = '\t', 13 check.names = FALSE, 14 header = TRUE); 9 15 10 tools_registry <- tools_registry[, -1 * which(colnames(tools_registry) == "URL check result (field_tool_urlcheck)")]; 16 colnames(checks_output_table) <- 17 paste(colnames(checks_output_table), "check"); 11 18 12 records_to_be_kept <- subset(checks_output_table, `Reference link (field_tool_reference_link) check` != "unspecified"); 19 tools_registry <- 20 tools_registry[, -1 * which(colnames(tools_registry) == "URL check result (field_tool_urlcheck)")]; 21 22 records_to_be_kept <- 23 subset(checks_output_table, 24 `Reference link (field_tool_reference_link) check` != "unspecified"); 13 25 14 26 … … 16 28 17 29 ## Records whose contact person should be warned. 18 records_any_unspecified <- subset(records_to_be_kept, `Reference link (field_tool_reference_link) check` == "unspecified" 19 | `Documentation link (field_tool_document_link) check` == "unspecified" 20 | `Webservice link (field_tool_webservice_link) check` == "unspecified"); 30 records_any_unspecified <- 31 subset(records_to_be_kept, `Reference link (field_tool_reference_link) check` == "unspecified" 32 | `Documentation link (field_tool_document_link) check` == "unspecified" 33 | `Webservice link (field_tool_webservice_link) check` == "unspecified"); 21 34 22 35 23 complete_extended_table <- cbind(tools_registry, checks_output_table); 36 complete_extended_table <- 37 cbind(tools_registry, checks_output_table); 24 38 25 39 write.table(complete_extended_table, … … 30 44 31 45 32 records_relevant_links_specified <- subset(records_to_be_kept, (`Reference link (field_tool_reference_link) check` != "unspecified" 33 | `Webservice link (field_tool_webservice_link) check` != "unspecified") 34 & `Documentation link (field_tool_document_link) check` != "unspecified"); 35 links_specified_table <- cbind(tools_registry[row.names(records_relevant_links_specified),], records_relevant_links_specified); 46 records_relevant_links_specified <- 47 subset(records_to_be_kept, (`Reference link (field_tool_reference_link) check` != "unspecified" 48 | `Webservice link (field_tool_webservice_link) check` != "unspecified") 49 & `Documentation link (field_tool_document_link) check` != "unspecified"); 50 links_specified_table <- 51 cbind(tools_registry[row.names(records_relevant_links_specified),], records_relevant_links_specified); 52 36 53 write.table(links_specified_table, 37 54 file = "/tmp/export_tools__relevant_links_specified__7-8-2012.csv", … … 41 58 42 59 43 records_relevant_links_work <- subset(records_to_be_kept, (`Reference link (field_tool_reference_link) check` == "works" 44 | `Webservice link (field_tool_webservice_link) check` == "works") 45 & `Documentation link (field_tool_document_link) check` == "works"); 46 links_work_table <- cbind(tools_registry[row.names(records_relevant_links_work),], records_relevant_links_work); 60 records_relevant_links_work <- 61 subset(records_to_be_kept, (`Reference link (field_tool_reference_link) check` == "works" 62 | `Webservice link (field_tool_webservice_link) check` == "works") 63 & `Documentation link (field_tool_document_link) check` == "works"); 64 links_work_table <- 65 cbind(tools_registry[row.names(records_relevant_links_work),], records_relevant_links_work); 66 47 67 write.table(links_work_table, 48 68 file = "/tmp/export_tools__relevant_links_work__7-8-2012.csv", … … 51 71 col.names = TRUE); 52 72 53 URLs <- tools_registry[row.names(records_problematic),17] 73 URLs <- 74 tools_registry[row.names(records_problematic),17]; 54 75 55 76 ## To inspect the problematic records manually: 56 edit(records_problematic) 77 edit(records_problematic); 57 78 58 79 ## Bar plot of frequencies of problematic Reference link values by country. 59 plot(factor(tools_registry[row.names(records_problematic),10])) 80 plot(factor(tools_registry[row.names(records_problematic),10])); -
metadata/trunk/toolkit/scripts/lrt2cmdi.py
r2615 r2884 7 7 from curses.ascii import ascii 8 8 9 if sys.version_info < (2, 7) :9 if sys.version_info < (2, 7) or sys.version_info >= (3, 0): 10 10 sys.stderr.write("WARNING: this script was only tested with Python version 2.7.3! You are running version " + str(sys.version_info[1]) + "." + str(sys.version_info[2]) + " instead.\n") 11 11 … … 16 16 self.xmlTree = ElementTree.ElementTree(ElementTree.fromstring(template)) 17 17 self.parentmap = dict((c, p) for p in self.xmlTree.getiterator() for c in p) 18 self.fillElement("//MdCreationDate", datetime.datetime.now().strftime("%Y-%m-%d")) 18 self.current_date = datetime.datetime.now().strftime("%Y-%m-%d") 19 self.fillElement("//MdCreationDate", self.current_date) 19 20 self.fillElement("//MdSelfLink", "http://lrt.clarin.eu/node/%s" % nodeId) 20 21 … … 151 152 collectionList = frozenset(("Spoken Corpus", "Written Corpus", "Multimodal Corpus", "Aligned Corpus", "Treebank", "N-Gram Model",)) 152 153 lexiconList = frozenset(("Grammar", "Lexicon / Knowledge Source", "Terminological Resource",)) 153 154 154 155 if typeList.intersection(collectionList): 155 156 self.addCollectionDetails(record, isoList) 156 157 if typeList.intersection(lexiconList): 157 158 self.addLexiconDetails(record, isoList) 158 #if "Web Service" in typeList: 159 # self.addServiceDetails(record) 159 if "Web Service" in typeList: 160 #pdb.set_trace() 161 self.addServiceDetails(record) 160 162 161 163 def addCollectionDetails(self, record, isoList): … … 163 165 164 166 165 self.fillOptionalElement(LrtCollectionDetails_XPath + "/LongTermPreservationBy", record["field_longterm_preservation"]) 166 self.fillOptionalElement(LrtCollectionDetails_XPath + "/Location", record["field_location_0"]) 167 self.fillOptionalElement(LrtCollectionDetails_XPath + "/ContentType", record["field_content_type"]) 168 self.fillOptionalElement(LrtCollectionDetails_XPath + "/FormatDetailed", record["field_format_detailed"]) 169 self.fillOptionalElement(LrtCollectionDetails_XPath + "/Quality", record["field_quality"]) 170 self.fillOptionalElement(LrtCollectionDetails_XPath + "/Applications", record["field_applications"]) 171 self.fillOptionalElement(LrtCollectionDetails_XPath + "/Size", record["field_size"]) 172 self.fillOptionalElement(LrtCollectionDetails_XPath + "/DistributionForm", record["field_distribution_form"]) 173 self.fillOptionalElement(LrtCollectionDetails_XPath + "/Size", record["field_size"]) 174 self.fillOptionalElement(LrtCollectionDetails_XPath + "/Access", record["field_access"]) 175 self.fillOptionalElement(LrtCollectionDetails_XPath + "/Source", record["field_source_0"]) 167 self.fillOptionalElement(LrtCollectionDetails_XPath + "/LongTermPreservationBy", 168 record["field_longterm_preservation"]) 169 self.fillOptionalElement(LrtCollectionDetails_XPath + "/Location", 170 record["field_location_0"]) 171 self.fillOptionalElement(LrtCollectionDetails_XPath + "/ContentType", 172 record["field_content_type"]) 173 self.fillOptionalElement(LrtCollectionDetails_XPath + "/FormatDetailed", 174 record["field_format_detailed"]) 175 self.fillOptionalElement(LrtCollectionDetails_XPath + "/Quality", 176 record["field_quality"]) 177 self.fillOptionalElement(LrtCollectionDetails_XPath + "/Applications", 178 record["field_applications"]) 179 self.fillOptionalElement(LrtCollectionDetails_XPath + "/Size", 180 record["field_size"]) 181 self.fillOptionalElement(LrtCollectionDetails_XPath + "/DistributionForm", 182 record["field_distribution_form"]) 183 self.fillOptionalElement(LrtCollectionDetails_XPath + "/Size", 184 record["field_size"]) 185 self.fillOptionalElement(LrtCollectionDetails_XPath + "/Access", 186 record["field_access"]) 187 self.fillOptionalElement(LrtCollectionDetails_XPath + "/Source", 188 record["field_source_0"]) 176 189 177 190 # ok - this can be done in a cleaner way 178 self.addLanguages(isoList, record["field_working_languages"], 1, LrtCollectionDetails_XPath + "/WorkingLanguages") 191 self.addLanguages(isoList, 192 record["field_working_languages"], 193 1, 194 LrtCollectionDetails_XPath + "/WorkingLanguages") 179 195 180 196 def addLexiconDetails(self, record, isoList): 181 LrtLexiconDetails_XPath = "//LrtInventoryResource/LrtLexiconDetails" 182 183 self.fillOptionalElement(LrtLexiconDetails_XPath + "/Date", record["field_date_0"]) 184 self.fillOptionalElement(LrtLexiconDetails_XPath + "/Type", record["field_type"]) 185 self.fillOptionalElement(LrtLexiconDetails_XPath + "/FormatDetailed", record["field_format_detailed_1"]) 186 self.fillOptionalElement(LrtLexiconDetails_XPath + "/SchemaReference", record["field_schema_reference"]) 187 self.fillOptionalElement(LrtLexiconDetails_XPath + "/Size", record["field_size_0"]) 188 self.fillOptionalElement(LrtLexiconDetails_XPath + "/Access", record["field_access_1"]) 189 self.addLanguages(isoList, record["field_working_languages_0"], 1, LrtLexiconDetails_XPath + "/WorkingLanguages") 197 LrtLexiconDetails_XPath = "//LrtInventoryResource/LrtLexiconDetails" 198 199 self.fillOptionalElement(LrtLexiconDetails_XPath + "/Date", 200 record["field_date_0"]) 201 self.fillOptionalElement(LrtLexiconDetails_XPath + "/Type", 202 record["field_type"]) 203 self.fillOptionalElement(LrtLexiconDetails_XPath + "/FormatDetailed", 204 record["field_format_detailed_1"]) 205 self.fillOptionalElement(LrtLexiconDetails_XPath + "/SchemaReference", 206 record["field_schema_reference"]) 207 self.fillOptionalElement(LrtLexiconDetails_XPath + "/Size", 208 record["field_size_0"]) 209 self.fillOptionalElement(LrtLexiconDetails_XPath + "/Access", 210 record["field_access_1"]) 211 self.addLanguages(isoList, 212 record["field_working_languages_0"], 213 1, 214 LrtLexiconDetails_XPath + "/WorkingLanguages") 190 215 191 216 def addServiceDetails(self, record): 217 218 #pdb.set_trace() 192 219 LrtLexiconDetails_XPath = "//LrtInventoryResource/LrtServiceDetails" 193 220 194 self.fillOptionalElement(LrtLexiconDetails_XPath + "/Date", record["field_date_0"]) 221 if str(record["field_date_0"]).strip() == '' : 222 service_date = self.current_date 223 else : 224 service_date = record["field_date_0"] 225 226 self.fillElement(LrtLexiconDetails_XPath + "/Date", 227 service_date) 195 228 196 229 def addResourceProxy(self, link) : … … 298 331 cmdi.fillElement("//LrtCommon/MetadataLink", record["field_metadata_link"]) 299 332 cmdi.fillElement("//LrtCommon/Publications", record["field_publications"]) 300 cmdi.fillElement("//LrtCommon/ReadilyAvailable", record["field_resource_available"].replace("Yes", "true").replace("No","false"))333 cmdi.fillElement("//LrtCommon/ReadilyAvailable", record["field_resource_available"].replace("Yes", "true").replace("No", "false")) 301 334 cmdi.fillElement("//LrtCommon/ReferenceLink", record["field_reference_link"]) 302 335 303 336 cmdi.fillElement("//LrtDistributionClassification/DistributionType", record["distribution_type"]) 304 cmdi.fillElement("//LrtDistributionClassification/ModificationsRequireRedeposition", record["modifications_require_redeposition"].replace("1", "true").replace("0","false"))305 cmdi.fillElement("//LrtDistributionClassification/NonCommercialUsageOnly", record["non-commercial_usage_only"].replace("1", "true").replace("0","false"))306 cmdi.fillElement("//LrtDistributionClassification/UsageReportRequired", record["usage_report_required"].replace("1", "true").replace("0","false"))337 cmdi.fillElement("//LrtDistributionClassification/ModificationsRequireRedeposition", record["modifications_require_redeposition"].replace("1", "true").replace("0","false")) 338 cmdi.fillElement("//LrtDistributionClassification/NonCommercialUsageOnly", record["non-commercial_usage_only"].replace("1", "true").replace("0", "false")) 339 cmdi.fillElement("//LrtDistributionClassification/UsageReportRequired", record["usage_report_required"].replace("1", "true").replace("0", "false")) 307 340 cmdi.fillElement("//LrtDistributionClassification/OtherDistributionRestrictions", record["other_distribution_restrictions"]) 308 341
Note: See TracChangeset
for help on using the changeset viewer.