Changeset 1619


Ignore:
Timestamp:
11/10/11 17:13:49 (13 years ago)
Author:
dietuyt
Message:

Several fixes to ensure validation against the relevant XSDs

Location:
metadata/trunk/toolkit/scripts
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • metadata/trunk/toolkit/scripts/dir2cmdicollection.py

    r990 r1619  
    33# generates CMDI collection file hierarchy for collections of CMDI records
    44# support and questions: Dieter Van Uytvanck <dietuyt@mpi.nl>
    5 # rework by Matej id@vronk.net : 
     5# rework by Matej id@vronk.net :
    66#       - already filling ResourceRef with handles read from the MdSelfLink of the mdrecords
    77#   - also reading ProviderURL-file and filling as ID in the basic collection-profile
     
    99
    1010# params:
    11 #  ref_flag := what to put into ResourceRef: 
    12 #                       'handle' := PID (read from MdSelfLink of the resources), 
     11#  ref_flag := what to put into ResourceRef:
     12#                       'handle' := PID (read from MdSelfLink of the resources),
    1313#                       'path' := the relative path of the record
    1414
     
    2929        if not os.path.isdir(target_dir):
    3030                os.mkdir(target_dir)
    31        
     31
    3232        if not simple_dir_structure:
    33                 root = os.getcwd()             
    34                 dirs = filter (os.path.isdir, os.listdir(root))         
     33                root = os.getcwd()
     34                dirs = filter (os.path.isdir, os.listdir(root))
    3535                dirs.remove(target_dir.replace("/",""))
    3636                #for root, dirs, files in os.walk(os.getcwd()):
    37                 print dirs             
    38                 for d in dirs:         
     37                print dirs
     38                for d in dirs:
    3939                        rootList.append(generate_branch(root, [d], d))
    40                         #startpath = os.getcwd()               
     40                        #startpath = os.getcwd()
    4141                        #for d in dirs:
    4242                                #if d == "0":
    43                                
     43
    4444        else:
    4545                rootList.append(generate_branch(os.getcwd(), [""], "lrt_inventory"))
     
    5454        #collectionName = os.path.relpath(root)
    5555        collectionFile = "collection_%s.cmdi" % collectionName
    56        
     56
    5757        dirs.sort()
    58         collectionList = []     
     58        collectionList = []
    5959        for d in dirs:
    6060                fullpath = os.path.join(root, d)
     
    6363                        if ".cmdi" in file:
    6464                                newFile = os.path.relpath(os.path.join(fullpath,file))
    65                                 collectionList.append(newFile)         
     65                                collectionList.append(newFile)
    6666        collid = writeCollection(collectionList, collectionFile, collectionName)
    6767        print "genbranch:" + collid
     
    7474
    7575        outstring = Template("""<?xml version="1.0" encoding="UTF-8"?>
    76 <CMD xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
     76<CMD xmlns="http://www.clarin.eu/cmd/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" CMDVersion="1.1"
    7777    xsi:schemaLocation="http://www.clarin.eu/cmd http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1284723009187/xsd">
    7878    <Header>
     
    8787        <JournalFileProxyList/>
    8888        <ResourceRelationList/>
    89     </Resources>   
     89    </Resources>
    9090    <Components>
    9191        <collection>
     
    9999
    100100        resourceProxies = ""
    101         collectionList.sort()           
     101        collectionList.sort()
    102102        if os.path.isfile(collectionName + "/ProviderURL"):
    103103                urlf = open(collectionName + "/ProviderURL", 'r')
    104104                url = urlf.readline()
    105         else: 
     105        else:
    106106          url ="?"
    107107        name = "OLAC: " + collectionName.replace("_", " ")
     
    114114                                        if "<MdSelfLink>" in line:
    115115                                                #  WARNING! rocket science employed here !
    116                                                 idx = line.replace("<MdSelfLink>","").replace("</MdSelfLink>","").strip()                                               
     116                                                idx = line.replace("<MdSelfLink>","").replace("</MdSelfLink>","").strip()
    117117                                                break
    118118                else:
    119119                         idx = item
    120                          
     120
    121121                if ref_flag=="path":
    122122                        # need to add '../' for the number of dirs in the target_dir, so paths match again.
     
    124124                else:
    125125                        path = idx;
    126                        
     126
    127127                #idx = item.replace(".xml.cmdi","").replace("_", ":",1)[::-1].replace("_", ":",1)[::-1].replace("_", "-")
    128128                resourceProxies += "\n" + resourceTemplate.substitute(idname = idx.replace(".","_").replace("/","_").replace("\\","_").replace(":","_"), idx = path)
     
    135135                else:
    136136                        collidx = collectionFile
    137         print collidx 
     137        print collidx
    138138        outfile = outstring.substitute(date= datetime.datetime.now().strftime("%Y-%m-%d"), selflink=collidx, rp=resourceProxies,url=url, name=name)
    139139        collectionFile = target_dir + collectionFile
    140         f = open(collectionFile, 'w')   
     140        f = open(collectionFile, 'w')
    141141        f.write(outfile)
    142142        f.close()
    143        
     143
    144144        print collectionFile
    145145        if ref_flag=="path":
  • metadata/trunk/toolkit/scripts/lrt2cmdi.py

    r1617 r1619  
    4040
    4141    def removeEmptyNodes(self):
    42         removeList = ["ResourceType", "BeginYearResourceCreation", "FinalizationYearResourceCreation", "Institute"]
     42        # we added some elements so need to recalculate the parentmap
     43        self.parentmap = dict((c, p) for p in self.xmlTree.getiterator() for c in p)
     44
     45        removeList = ["ResourceType", "BeginYearResourceCreation", "FinalizationYearResourceCreation", "Institute", \
     46                      "DistributionType", "NonCommercialUsageOnly", "UsageReportRequired", "ModificationsRequireRedeposition", "WorkingLanguages", "Date"]
    4347        for r in removeList:
    4448            results = self.xmlTree.findall("//%s" % r)
     
    161165                <Size />
    162166                <Access />
    163                 <WorkingLanguages/>
     167                <WorkingLanguages />
    164168            </LrtLexiconDetails>'''
    165169        partTree = ElementTree.fromstring(template)
     
    175179        self.fillElement("//LrtLexiconDetails/Access", record["field_access_1"])
    176180        self.addLanguages(isoList, record["field_working_languages_0"], 1, "//LrtLexiconDetails/WorkingLanguages")
    177 
    178181
    179182    def addServiceDetails(self, record):
Note: See TracChangeset for help on using the changeset viewer.