1 | #!/usr/bin/Rscript |
---|
2 | |
---|
3 | ## This R script is useful to inspect the table that is put out by the check-tools-integrity.py script, with the aim of filtering the original CLARIN tools registry CSV |
---|
4 | ## for stale/irrelevant/problematic records. |
---|
5 | |
---|
6 | tools_registry <- read.csv("/tmp/export_tools", check.names = FALSE, header = TRUE); |
---|
7 | checks_output_table <- read.table("/tmp/output.tab", sep = '\t', check.names = FALSE, header = TRUE); |
---|
8 | colnames(checks_output_table) <- paste(colnames(checks_output_table), "check"); |
---|
9 | |
---|
10 | records_to_be_kept <- subset(checks_output_table, `Reference link (field_tool_reference_link) check` != "unspecified"); |
---|
11 | |
---|
12 | |
---|
13 | |
---|
14 | |
---|
15 | ## Records whose contact person should be warned. |
---|
16 | records_any_unspecified <- subset(records_to_be_kept, `Reference link (field_tool_reference_link) check` == "unspecified" |
---|
17 | | `Documentation link (field_tool_document_link) check` == "unspecified" |
---|
18 | | `Webservice link (field_tool_webservice_link) check` == "unspecified"); |
---|
19 | |
---|
20 | |
---|
21 | complete_extended_table <- cbind(tools_registry, checks_output_table); |
---|
22 | |
---|
23 | write.table(complete_extended_table, |
---|
24 | file = "/run/media/sanmai/SAMSUNG/3,MPI/export_tools__complete_extended__7-8-2012.csv", |
---|
25 | sep = ',', |
---|
26 | row.names = FALSE, |
---|
27 | col.names = TRUE); |
---|
28 | |
---|
29 | |
---|
30 | records_relevant_links_specified <- subset(records_to_be_kept, (`Reference link (field_tool_reference_link) check` != "unspecified" |
---|
31 | | `Webservice link (field_tool_webservice_link) check` != "unspecified") |
---|
32 | & `Documentation link (field_tool_document_link) check` != "unspecified"); |
---|
33 | links_specified_table <- cbind(tools_registry[row.names(records_relevant_links_specified),], records_relevant_links_specified); |
---|
34 | write.table(links_specified_table, |
---|
35 | file = "/run/media/sanmai/SAMSUNG/3,MPI/export_tools__relevant_links_specified__7-8-2012.csv", |
---|
36 | sep = ',', |
---|
37 | row.names = FALSE, |
---|
38 | col.names = TRUE); |
---|
39 | |
---|
40 | |
---|
41 | records_relevant_links_work <- subset(records_to_be_kept, (`Reference link (field_tool_reference_link) check` == "works" |
---|
42 | | `Webservice link (field_tool_webservice_link) check` == "works") |
---|
43 | & `Documentation link (field_tool_document_link) check` == "works"); |
---|
44 | links_work_table <- cbind(tools_registry[row.names(records_relevant_links_work),], records_relevant_links_work); |
---|
45 | write.table(links_work_table, |
---|
46 | file = "/run/media/sanmai/SAMSUNG/3,MPI/export_tools__relevant_links_work__7-8-2012.csv", |
---|
47 | sep = ',', |
---|
48 | row.names = FALSE, |
---|
49 | col.names = TRUE); |
---|
50 | |
---|
51 | |
---|
52 | |
---|
53 | URLs <- tools_registry[row.names(records_problematic),17] |
---|
54 | |
---|
55 | ## To inspect the problematic records manually: |
---|
56 | edit(records_problematic) |
---|
57 | |
---|
58 | ## Bar plot of frequencies of problematic Reference link values by country. |
---|
59 | plot(factor(tools_registry[row.names(records_problematic),10])) |
---|