- Timestamp:
- 10/30/15 10:19:12 (9 years ago)
- Location:
- vlo/branches/vlo-3.3-oeaw/vlo-vocabularies/src/main/java/eu/clarin/cmdi/vlo
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
vlo/branches/vlo-3.3-oeaw/vlo-vocabularies/src/main/java/eu/clarin/cmdi/vlo/main/TransformAllMaps.java
r6505 r6708 8 8 import eu.clarin.cmdi.vlo.pojo.Constants; 9 9 import eu.clarin.cmdi.vlo.transformers.CSVTransformer; 10 import eu.clarin.cmdi.vlo.transformers.XML2CSVTransformer;11 10 12 11 public class TransformAllMaps { -
vlo/branches/vlo-3.3-oeaw/vlo-vocabularies/src/main/java/eu/clarin/cmdi/vlo/pojo/Constants.java
r6589 r6708 8 8 //PATHS 9 9 public static final String CSV_PATH = "maps/csv/"; 10 public static final String EXCEL_PATH = "maps/excel/"; 10 11 public static final String MAPS_PATH = "maps/uniform_maps/"; 11 12 … … 18 19 public static final String LANGUAGE_CODE = "LanguageNameVariantsMap"; 19 20 public static final String LICENCE = "LicenseAvailabilityMap"; 20 public static final String RESOURCE_TYPE = " resourceTypeMap";21 public static final String RESOURCE_TYPE = "ACDH_resourceType"; 21 22 public static final String NATIONAL_PROJECT = "nationalProjectsMapping"; 22 23 public static final String ORGANISATION = "OrganisationControlledVocabulary"; … … 25 26 26 27 static{ 27 maps.add(LANGUAGE_CODE);28 maps.add(LICENCE);29 maps.add(NATIONAL_PROJECT);30 maps.add(ORGANISATION);28 // maps.add(LANGUAGE_CODE); 29 // maps.add(LICENCE); 30 // maps.add(NATIONAL_PROJECT); 31 // maps.add(ORGANISATION); 31 32 maps.add(RESOURCE_TYPE); 32 33 } -
vlo/branches/vlo-3.3-oeaw/vlo-vocabularies/src/main/java/eu/clarin/cmdi/vlo/transformers/CSVTransformer.java
r6631 r6708 17 17 import javax.xml.bind.JAXBException; 18 18 19 import org.apache.poi.util.ArrayUtil; 19 20 import org.slf4j.Logger; 20 21 import org.slf4j.LoggerFactory; … … 52 53 _logger.info("reading from file {}{}.xml.", Constants.MAPS_PATH, map); 53 54 InputStream input = new FileInputStream(Constants.MAPS_PATH + map + ".xml"); 54 _logger.info("output will be saved into {}{}. csv.", Constants.CSV_PATH, map);55 OutputStream output = new FileOutputStream(Constants.CSV_PATH + map + ". csv");55 _logger.info("output will be saved into {}{}.txt.", Constants.CSV_PATH, map); 56 OutputStream output = new FileOutputStream(Constants.CSV_PATH + map + ".txt"); 56 57 57 58 xml2csv(input, output); … … 80 81 81 82 public synchronized void csv2xml(String map) throws IOException, JAXBException{ 82 _logger.info("reading from file {}{}. csv.", Constants.CSV_PATH, map);83 InputStream input = new FileInputStream(Constants.CSV_PATH + map + ". csv");83 _logger.info("reading from file {}{}.txt.", Constants.CSV_PATH, map); 84 InputStream input = new FileInputStream(Constants.CSV_PATH + map + ".txt"); 84 85 _logger.info("output will be saved into {}{}.xml.", Constants.MAPS_PATH, map); 85 86 OutputStream output = new FileOutputStream(Constants.MAPS_PATH + map + ".xml"); … … 180 181 private VariantsMap csv2xmlTransformer(BufferedReader br) throws IOException{ 181 182 boolean regExpresionExists = false; 183 boolean remarksExists = false; 182 184 VariantsMap map = new VariantsMap(); 183 185 map.setMappings(new ArrayList<Mapping>()); … … 187 189 188 190 String line = br.readLine(); 189 190 //process column names 191 if(!line.contains(Constants.NORMALIZED_VALUE)) 192 throw new RuntimeException("The first row of CSV file must contain column names"); 193 _logger.info("CSV file contains following headers {}", line); 194 195 for(String col: line.split("\\t")){ 196 if(col.equals(Constants.REMARKS)) 197 break;//last column - don't need it in xml map 198 199 if(col.equals(Constants.REGULAR_EXPRESSION)) 200 regExpresionExists = true; 201 columnNames.add(col.toLowerCase()); 202 203 } 204 205 while ((line = br.readLine()) != null) { 206 csvRowCnt++; 207 while(line.charAt(line.length() - 1) != '\t'){//line must end with a tab, otherwise means that value contains line break -> 
 208 _logger.debug("line {} contains line break!", csvRowCnt); 209 line += "\n" + br.readLine(); 210 } 211 212 String[] tokens = line.split("\\t"); 213 String normalizedVal = regExpresionExists? tokens[3] : tokens[2]; 214 215 if(normalizedVal.isEmpty()) 216 throw new RuntimeException("Normalized value (the 4th column) is mandatory"); 217 218 Variant var = null; 219 220 if(!tokens[0].isEmpty()){ // Organization vocab has normalized values without variants 221 var = new Variant(); 222 var.setValue(tokens[0]); 223 _logger.debug("new varinat was found, variant value is {}. ", var.getValue()); 224 225 //we are setting regEx attr only if it's true 226 if(regExpresionExists && tokens[1].toLowerCase().equals("true")) 227 var.setRegExp(true); 228 List<CrossMapping> crossMappings = new ArrayList<CrossMapping>(); 229 for(int i = regExpresionExists? 4 : 3; i < tokens.length; i++){ 230 if(tokens[i].isEmpty()) //skip when val is null (empty) 231 continue; 191 try{ 192 //process column names 193 if(!line.contains(Constants.NORMALIZED_VALUE)) 194 throw new RuntimeException("The first row of CSV file must contain column names"); 195 _logger.info("CSV file contains following headers {}", line); 196 197 for(String col: line.split("\\t")){ 198 if(col.toUpperCase().equals(Constants.REMARKS)){ 199 remarksExists = true; 200 break;//last column - don't need it in xml map 201 } 202 203 if(col.toUpperCase().equals(Constants.REGULAR_EXPRESSION)) 204 regExpresionExists = true; 205 columnNames.add(col.toLowerCase()); 206 207 } 208 209 while ((line = br.readLine()) != null) { 210 csvRowCnt++; 211 while(!remarksExists && line.charAt(line.length() - 1) != '\t'){//line must end with a tab, otherwise means that value contains line break -> 
 212 _logger.debug("line {} contains line break!", csvRowCnt); 213 line += "\n" + br.readLine(); 214 } 232 215 233 CrossMapping crossMapping = new CrossMapping(); 234 crossMapping.setFacet(columnNames.get(i)); 235 crossMapping.setValue(tokens[i]); 236 crossMappings.add(crossMapping); 237 _logger.debug("new cross-mapping was found {}:{} ", crossMapping.getFacet(), crossMapping.getValue()); 238 }//end cross-mappings 239 240 if(crossMappings.size() > 0) 241 var.setCrossMappings(crossMappings); 242 } 243 244 Mapping mapping = processedNormalizedVals.get(normalizedVal); 245 if(mapping == null){ 246 mapping = new Mapping(); 247 mapping.setValue(normalizedVal); 248 processedNormalizedVals.put(normalizedVal, mapping); 249 _logger.debug("new mapping was found, normalized value is {}", mapping.getValue()); 250 csvNormValsCnt++; 251 } 252 if(var != null){ 253 mapping.getVariants().add(var); 254 }else{ 255 mapping.setVariants(null); //to skip it in XML 256 } 257 258 259 }//end while 260 261 map.setField(columnNames.get(0)); 262 for(String s: processedNormalizedVals.keySet()){ 263 map.getMappings().add(processedNormalizedVals.get(s)); 216 String[] tokens = line.split("\\t"); 217 if(remarksExists && tokens.length == columnNames.size() + 1){ //remove last col (remarks) 218 String[] tokenstWithoutRemarks = new String[tokens.length - 1]; 219 System.arraycopy(tokens, 0, tokenstWithoutRemarks, 0, tokenstWithoutRemarks.length); 220 tokens = tokenstWithoutRemarks; 221 } 222 223 String normalizedVal = regExpresionExists? tokens[3] : tokens[2]; 224 225 if(normalizedVal.isEmpty()) 226 throw new RuntimeException("Normalized value (the 4th column) is mandatory"); 227 228 Variant var = null; 229 230 if(!tokens[0].isEmpty()){ // Organization vocab has normalized values without variants 231 var = new Variant(); 232 var.setValue(tokens[0]); 233 _logger.debug("new varinat was found, variant value is {}. ", var.getValue()); 234 235 //we are setting regEx attr only if it's true 236 if(regExpresionExists && tokens[1].toLowerCase().equals("true")) 237 var.setRegExp(true); 238 List<CrossMapping> crossMappings = new ArrayList<CrossMapping>(); 239 for(int i = regExpresionExists? 4 : 3; i < tokens.length; i++){ 240 if(tokens[i].isEmpty()) //skip when val is null (empty) 241 continue; 242 243 CrossMapping crossMapping = new CrossMapping(); 244 crossMapping.setFacet(columnNames.get(i)); 245 crossMapping.setValue(tokens[i]); 246 crossMappings.add(crossMapping); 247 _logger.debug("new cross-mapping was found {}:{} ", crossMapping.getFacet(), crossMapping.getValue()); 248 }//end cross-mappings 249 250 if(crossMappings.size() > 0) 251 var.setCrossMappings(crossMappings); 252 } 253 254 Mapping mapping = processedNormalizedVals.get(normalizedVal); 255 if(mapping == null){ 256 mapping = new Mapping(); 257 mapping.setValue(normalizedVal); 258 processedNormalizedVals.put(normalizedVal, mapping); 259 _logger.debug("new mapping was found, normalized value is {}", mapping.getValue()); 260 csvNormValsCnt++; 261 } 262 if(var != null){ 263 mapping.getVariants().add(var); 264 }else{ 265 mapping.setVariants(null); //to skip it in XML 266 } 267 268 269 }//end while 270 271 map.setField(columnNames.get(0)); 272 for(String s: processedNormalizedVals.keySet()){ 273 map.getMappings().add(processedNormalizedVals.get(s)); 274 } 275 }catch(Exception e){ 276 _logger.error("Error on line {}", line, e); 264 277 } 278 279 265 280 266 281
Note: See TracChangeset
for help on using the changeset viewer.