1 | package eu.clarin.cmdi.vlo.importer; |
---|
2 | |
---|
3 | import java.io.File; |
---|
4 | import java.io.FileInputStream; |
---|
5 | import java.io.InputStream; |
---|
6 | import java.util.Map; |
---|
7 | |
---|
8 | import org.slf4j.Logger; |
---|
9 | import org.slf4j.LoggerFactory; |
---|
10 | |
---|
11 | import eu.clarin.cmdi.vlo.normalization.NormalizationService; |
---|
12 | import eu.clarin.cmdi.vlo.normalization.NormalizationVocabulary; |
---|
13 | import eu.clarin.cmdi.vlo.normalization.VocabularyEntry; |
---|
14 | import eu.clarin.cmdi.vlo.pojo.VariantsMap; |
---|
15 | import eu.clarin.cmdi.vlo.transformers.VariantsMapMarshaller; |
---|
16 | |
---|
17 | /* |
---|
18 | * abstract class that encapsulates common map creation from mapping files |
---|
19 | * for some postprocessors like LanguageCodePostProcessor* |
---|
20 | * |
---|
21 | * brings one more level in class hierarchy between interface PostPorcessor and concrete implementations |
---|
22 | * |
---|
23 | * @author dostojic |
---|
24 | * |
---|
25 | */ |
---|
26 | |
---|
27 | public abstract class PostProcessorsWithVocabularyMap implements PostProcessor, NormalizationService { |
---|
28 | |
---|
29 | private final static Logger _logger = LoggerFactory.getLogger(PostProcessorsWithVocabularyMap.class); |
---|
30 | |
---|
31 | private NormalizationVocabulary vocabulary; |
---|
32 | |
---|
33 | public String normalize(String value) { |
---|
34 | return normalize(value, value); |
---|
35 | } |
---|
36 | |
---|
37 | public String normalize(String value, String fallBackValue) { |
---|
38 | if (vocabulary == null) |
---|
39 | initVocabulary(); |
---|
40 | |
---|
41 | return vocabulary.normalize(value, fallBackValue); |
---|
42 | } |
---|
43 | |
---|
44 | public Map<String, String> getCrossMappings(String value) { |
---|
45 | if (vocabulary == null) |
---|
46 | initVocabulary(); |
---|
47 | |
---|
48 | return vocabulary.getCrossMappings(value); |
---|
49 | } |
---|
50 | |
---|
51 | public abstract String getNormalizationMapURL(); |
---|
52 | |
---|
53 | private void initVocabulary() { |
---|
54 | VariantsMap varinatsRawMap = getMappingFromFile(getNormalizationMapURL()); |
---|
55 | vocabulary = varinatsRawMap.getMap(); |
---|
56 | |
---|
57 | //printMap(); |
---|
58 | } |
---|
59 | |
---|
60 | protected VariantsMap getMappingFromFile(String mapUrl) { |
---|
61 | |
---|
62 | |
---|
63 | _logger.info("Reading vocabulary file from: {}", mapUrl); |
---|
64 | // load records from file |
---|
65 | // in the future this should be loaded from CLAVAS directly and the |
---|
66 | // file only used as fallback |
---|
67 | |
---|
68 | |
---|
69 | //try from file first |
---|
70 | try { |
---|
71 | InputStream is = new FileInputStream(new File(mapUrl)); |
---|
72 | } catch (Exception e) { |
---|
73 | _logger.warn("File {} not found, trying to fetch it from classpath ...", mapUrl); |
---|
74 | |
---|
75 | |
---|
76 | } |
---|
77 | |
---|
78 | //try from classpath |
---|
79 | |
---|
80 | InputStream is = PostProcessorsWithVocabularyMap.class.getClassLoader().getResourceAsStream(mapUrl); |
---|
81 | if(is == null) |
---|
82 | throw new RuntimeException("Cannot instantiate postProcessor, " + mapUrl + " is not on the classpath"); |
---|
83 | |
---|
84 | try{ |
---|
85 | return VariantsMapMarshaller.unmarshal(is); |
---|
86 | } catch (Exception e) { |
---|
87 | throw new RuntimeException("Cannot instantiate postProcessor: ", e); |
---|
88 | } |
---|
89 | |
---|
90 | |
---|
91 | } |
---|
92 | |
---|
93 | // for debug |
---|
94 | public void printMap() { |
---|
95 | _logger.info("map contains {} entries", vocabulary.getEntries().length); |
---|
96 | for(int i = 0; i < vocabulary.getEntries().length; i++) |
---|
97 | _logger.info(vocabulary.getEntries()[i].toString()); |
---|
98 | |
---|
99 | |
---|
100 | } |
---|
101 | } |
---|