Changeset 1619
- Timestamp:
- 11/10/11 17:13:49 (13 years ago)
- Location:
- metadata/trunk/toolkit/scripts
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
metadata/trunk/toolkit/scripts/dir2cmdicollection.py
r990 r1619 3 3 # generates CMDI collection file hierarchy for collections of CMDI records 4 4 # support and questions: Dieter Van Uytvanck <dietuyt@mpi.nl> 5 # rework by Matej id@vronk.net : 5 # rework by Matej id@vronk.net : 6 6 # - already filling ResourceRef with handles read from the MdSelfLink of the mdrecords 7 7 # - also reading ProviderURL-file and filling as ID in the basic collection-profile … … 9 9 10 10 # params: 11 # ref_flag := what to put into ResourceRef: 12 # 'handle' := PID (read from MdSelfLink of the resources), 11 # ref_flag := what to put into ResourceRef: 12 # 'handle' := PID (read from MdSelfLink of the resources), 13 13 # 'path' := the relative path of the record 14 14 … … 29 29 if not os.path.isdir(target_dir): 30 30 os.mkdir(target_dir) 31 31 32 32 if not simple_dir_structure: 33 root = os.getcwd() 34 dirs = filter (os.path.isdir, os.listdir(root)) 33 root = os.getcwd() 34 dirs = filter (os.path.isdir, os.listdir(root)) 35 35 dirs.remove(target_dir.replace("/","")) 36 36 #for root, dirs, files in os.walk(os.getcwd()): 37 print dirs 38 for d in dirs: 37 print dirs 38 for d in dirs: 39 39 rootList.append(generate_branch(root, [d], d)) 40 #startpath = os.getcwd() 40 #startpath = os.getcwd() 41 41 #for d in dirs: 42 42 #if d == "0": 43 43 44 44 else: 45 45 rootList.append(generate_branch(os.getcwd(), [""], "lrt_inventory")) … … 54 54 #collectionName = os.path.relpath(root) 55 55 collectionFile = "collection_%s.cmdi" % collectionName 56 56 57 57 dirs.sort() 58 collectionList = [] 58 collectionList = [] 59 59 for d in dirs: 60 60 fullpath = os.path.join(root, d) … … 63 63 if ".cmdi" in file: 64 64 newFile = os.path.relpath(os.path.join(fullpath,file)) 65 collectionList.append(newFile) 65 collectionList.append(newFile) 66 66 collid = writeCollection(collectionList, collectionFile, collectionName) 67 67 print "genbranch:" + collid … … 74 74 75 75 outstring = Template("""<?xml version="1.0" encoding="UTF-8"?> 76 <CMD xmlns :xsi="http://www.w3.org/2001/XMLSchema-instance"76 <CMD xmlns="http://www.clarin.eu/cmd/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" CMDVersion="1.1" 77 77 xsi:schemaLocation="http://www.clarin.eu/cmd http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1284723009187/xsd"> 78 78 <Header> … … 87 87 <JournalFileProxyList/> 88 88 <ResourceRelationList/> 89 </Resources> 89 </Resources> 90 90 <Components> 91 91 <collection> … … 99 99 100 100 resourceProxies = "" 101 collectionList.sort() 101 collectionList.sort() 102 102 if os.path.isfile(collectionName + "/ProviderURL"): 103 103 urlf = open(collectionName + "/ProviderURL", 'r') 104 104 url = urlf.readline() 105 else: 105 else: 106 106 url ="?" 107 107 name = "OLAC: " + collectionName.replace("_", " ") … … 114 114 if "<MdSelfLink>" in line: 115 115 # WARNING! rocket science employed here ! 116 idx = line.replace("<MdSelfLink>","").replace("</MdSelfLink>","").strip() 116 idx = line.replace("<MdSelfLink>","").replace("</MdSelfLink>","").strip() 117 117 break 118 118 else: 119 119 idx = item 120 120 121 121 if ref_flag=="path": 122 122 # need to add '../' for the number of dirs in the target_dir, so paths match again. … … 124 124 else: 125 125 path = idx; 126 126 127 127 #idx = item.replace(".xml.cmdi","").replace("_", ":",1)[::-1].replace("_", ":",1)[::-1].replace("_", "-") 128 128 resourceProxies += "\n" + resourceTemplate.substitute(idname = idx.replace(".","_").replace("/","_").replace("\\","_").replace(":","_"), idx = path) … … 135 135 else: 136 136 collidx = collectionFile 137 print collidx 137 print collidx 138 138 outfile = outstring.substitute(date= datetime.datetime.now().strftime("%Y-%m-%d"), selflink=collidx, rp=resourceProxies,url=url, name=name) 139 139 collectionFile = target_dir + collectionFile 140 f = open(collectionFile, 'w') 140 f = open(collectionFile, 'w') 141 141 f.write(outfile) 142 142 f.close() 143 143 144 144 print collectionFile 145 145 if ref_flag=="path": -
metadata/trunk/toolkit/scripts/lrt2cmdi.py
r1617 r1619 40 40 41 41 def removeEmptyNodes(self): 42 removeList = ["ResourceType", "BeginYearResourceCreation", "FinalizationYearResourceCreation", "Institute"] 42 # we added some elements so need to recalculate the parentmap 43 self.parentmap = dict((c, p) for p in self.xmlTree.getiterator() for c in p) 44 45 removeList = ["ResourceType", "BeginYearResourceCreation", "FinalizationYearResourceCreation", "Institute", \ 46 "DistributionType", "NonCommercialUsageOnly", "UsageReportRequired", "ModificationsRequireRedeposition", "WorkingLanguages", "Date"] 43 47 for r in removeList: 44 48 results = self.xmlTree.findall("//%s" % r) … … 161 165 <Size /> 162 166 <Access /> 163 <WorkingLanguages />167 <WorkingLanguages /> 164 168 </LrtLexiconDetails>''' 165 169 partTree = ElementTree.fromstring(template) … … 175 179 self.fillElement("//LrtLexiconDetails/Access", record["field_access_1"]) 176 180 self.addLanguages(isoList, record["field_working_languages_0"], 1, "//LrtLexiconDetails/WorkingLanguages") 177 178 181 179 182 def addServiceDetails(self, record):
Note: See TracChangeset
for help on using the changeset viewer.