diff --git a/README.md b/README.md index cacd8be..8da7cdd 100644 --- a/README.md +++ b/README.md @@ -10,29 +10,12 @@ All of these scripts require a secrets.py file in the same directory that must c filePath = '/Users/dspace_user/dspace-data-collection/data/' handlePrefix = 'http://dspace.myuni.edu/handle/' verify = True or False (no quotes). Use False if using an SSH tunnel to connect to the DSpace API + skippedCollections = A list of the 'uuid' of any collections that you wish the script to skip. (e.g. ['45794375-6640-4efe-848e-082e60bae375']) ``` The 'filePath' is directory into which output files will be written and 'handlePrefix' may or may not vary from your DSpace URL depending on your configuration. This secrets.py file will be ignored according to the repository's .gitignore file so that DSpace login details will not be inadvertently exposed through GitHub. If you are using both a development server and a production server, you can create a separate secrets.py file with a different name (e.g. secretsProd.py) and containing the production server information. When running each of these scripts, you will be prompted to enter the file name (e.g 'secretsProd' without '.py') of an alternate secrets file. If you skip the prompt or incorrectly type the file name, the scripts will default to the information in the secrets.py file. This ensures that you will only edit the production server if you really intend to. -**Note**: All of these scripts skip collection '45794375-6640-4efe-848e-082e60bae375' for local reasons. To change this, edit the following portion of the script (typically between line 27-39) - - -Skips collection 45794375-6640-4efe-848e-082e60bae375: - - for j in range (0, len (collections)): - collectionID = collections[j]['uuid'] - if collectionID != '45794375-6640-4efe-848e-082e60bae375': - offset = 0 - - -No collections skipped: - - for j in range (0, len (collections)): - collectionID = collections[j]['uuid'] - if collectionID != 0: - offset = 0 - #### [addKeyValuePairOnHandleCSV.py](addKeyValuePairOnHandleCSV.py) Based on user input, adds key-value pairs from a specified CSV file of DSpace item handles and the value to be added to that item using the specified key. A CSV log is written with all of the changes made and a 'dc.description.provenance' note describing the change is added to the metadata of each item that is updated. diff --git a/addKeyValuePairOnHandleCSV.py b/addKeyValuePairOnHandleCSV.py index 706e653..6a58969 100644 --- a/addKeyValuePairOnHandleCSV.py +++ b/addKeyValuePairOnHandleCSV.py @@ -23,6 +23,7 @@ password = secrets.password filePath = secrets.filePath verify = secrets.verify +skippedCollections = secrets.skippedCollections startTime = time.time() data = {'email':email,'password':password} diff --git a/addKeyValuePairToCollection.py b/addKeyValuePairToCollection.py index 68dd719..bfcb522 100644 --- a/addKeyValuePairToCollection.py +++ b/addKeyValuePairToCollection.py @@ -48,6 +48,7 @@ password = secrets.password filePath = secrets.filePath verify = secrets.verify +skippedCollections = secrets.skippedCollections startTime = time.time() data = {'email':email,'password':password} diff --git a/addKeyValuePairToCommunity.py b/addKeyValuePairToCommunity.py index e3f4421..9018038 100644 --- a/addKeyValuePairToCommunity.py +++ b/addKeyValuePairToCommunity.py @@ -48,6 +48,7 @@ password = secrets.password filePath = secrets.filePath verify = secrets.verify +skippedCollections = secrets.skippedCollections startTime = time.time() data = {'email':email,'password':password} @@ -67,7 +68,7 @@ collections = requests.get(baseURL+'/rest/communities/'+str(communityID)+'/collections', headers=header, cookies=cookies, verify=verify).json() for j in range (0, len (collections)): collectionID = collections[j]['uuid'] - if collectionID != '45794375-6640-4efe-848e-082e60bae375': + if collectionID not in skippedCollections: offset = 0 items = '' while items != []: diff --git a/addNewItemsToCollection.py b/addNewItemsToCollection.py index 66e5b40..aaa50cc 100644 --- a/addNewItemsToCollection.py +++ b/addNewItemsToCollection.py @@ -45,6 +45,7 @@ password = secrets.password filePath = secrets.filePath verify = secrets.verify +skippedCollections = secrets.skippedCollections startTime = time.time() diff --git a/deleteBitstreamsFromItem.py b/deleteBitstreamsFromItem.py index 5c640b2..3a4de74 100644 --- a/deleteBitstreamsFromItem.py +++ b/deleteBitstreamsFromItem.py @@ -23,6 +23,7 @@ password = secrets.password filePath = secrets.filePath verify = secrets.verify +skippedCollections = secrets.skippedCollections itemHandle = raw_input('Enter item handle: ') diff --git a/deleteKeyFromCollection.py b/deleteKeyFromCollection.py index 86f9eda..d43e9f4 100644 --- a/deleteKeyFromCollection.py +++ b/deleteKeyFromCollection.py @@ -16,7 +16,7 @@ print 'Editing Stage' else: print 'Editing Stage' - + parser = argparse.ArgumentParser() parser.add_argument('-k', '--deletedKey', help='the key to be deleted. optional - if not provided, the script will ask for input') parser.add_argument('-i', '--handle', help='handle of the collection to retreive. optional - if not provided, the script will ask for input') @@ -39,6 +39,7 @@ password = secrets.password filePath = secrets.filePath verify = secrets.verify +skippedCollections = secrets.skippedCollections startTime = time.time() data = {'email':email,'password':password} @@ -61,6 +62,7 @@ offset = 0 recordsEdited = 0 items = '' +itemLinks = [] while items != []: endpoint = baseURL+'/rest/filtered-items?query_field[]='+deletedKey+'&query_op[]=exists&query_val[]='+collSels+'&limit=200&offset='+str(offset) print endpoint @@ -69,32 +71,36 @@ for item in items: itemMetadataProcessed = [] itemLink = item['link'] - print itemLink - metadata = requests.get(baseURL + itemLink + '/metadata', headers=header, cookies=cookies, verify=verify).json() - for l in range (0, len (metadata)): - metadata[l].pop('schema', None) - metadata[l].pop('element', None) - metadata[l].pop('qualifier', None) - languageValue = metadata[l]['language'] - if metadata[l]['key'] == deletedKey: - provNote = '\''+deletedKey+'\' was deleted through a batch process on '+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+'.' - provNoteElement = {} - provNoteElement['key'] = 'dc.description.provenance' - provNoteElement['value'] = unicode(provNote) - provNoteElement['language'] = 'en_US' - itemMetadataProcessed.append(provNoteElement) - else: - itemMetadataProcessed.append(metadata[l]) - recordsEdited = recordsEdited + 1 - itemMetadataProcessed = json.dumps(itemMetadataProcessed) - print 'updated', itemLink, recordsEdited - delete = requests.delete(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify) - print delete - post = requests.put(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify, data=itemMetadataProcessed) - print post - f.writerow([itemLink]+[deletedKey]+[delete]+[post]) + itemLinks.append(itemLink) offset = offset + 200 print offset +for itemLink in itemLinks: + itemMetadataProcessed = [] + print itemLink + metadata = requests.get(baseURL + itemLink + '/metadata', headers=header, cookies=cookies, verify=verify).json() + for l in range (0, len (metadata)): + metadata[l].pop('schema', None) + metadata[l].pop('element', None) + metadata[l].pop('qualifier', None) + languageValue = metadata[l]['language'] + if metadata[l]['key'] == deletedKey: + provNote = '\''+deletedKey+'\' was deleted through a batch process on '+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+'.' + provNoteElement = {} + provNoteElement['key'] = 'dc.description.provenance' + provNoteElement['value'] = unicode(provNote) + provNoteElement['language'] = 'en_US' + itemMetadataProcessed.append(provNoteElement) + else: + itemMetadataProcessed.append(metadata[l]) + recordsEdited = recordsEdited + 1 + itemMetadataProcessed = json.dumps(itemMetadataProcessed) + print 'updated', itemLink, recordsEdited + delete = requests.delete(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify) + print delete + post = requests.put(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify, data=itemMetadataProcessed) + print post + f.writerow([itemLink]+[deletedKey]+[delete]+[post]) + logout = requests.post(baseURL+'/rest/logout', headers=header, cookies=cookies, verify=verify) diff --git a/deleteKeyFromCommunity.py b/deleteKeyFromCommunity.py index fa30118..e06ee22 100644 --- a/deleteKeyFromCommunity.py +++ b/deleteKeyFromCommunity.py @@ -16,7 +16,7 @@ print 'Editing Stage' else: print 'Editing Stage' - + parser = argparse.ArgumentParser() parser.add_argument('-k', '--deletedKey', help='the key to be deleted. optional - if not provided, the script will ask for input') parser.add_argument('-i', '--handle', help='handle of the community to retreive. optional - if not provided, the script will ask for input') @@ -39,6 +39,7 @@ password = secrets.password filePath = secrets.filePath verify = secrets.verify +skippedCollections = secrets.skippedCollections startTime = time.time() data = {'email':email,'password':password} @@ -66,6 +67,7 @@ offset = 0 recordsEdited = 0 items = '' +itemLinks = [] while items != []: endpoint = baseURL+'/rest/filtered-items?query_field[]='+deletedKey+'&query_op[]=exists&query_val[]='+collSels+'&limit=200&offset='+str(offset) print endpoint @@ -74,32 +76,35 @@ for item in items: itemMetadataProcessed = [] itemLink = item['link'] - print itemLink - metadata = requests.get(baseURL + itemLink + '/metadata', headers=header, cookies=cookies, verify=verify).json() - for l in range (0, len (metadata)): - metadata[l].pop('schema', None) - metadata[l].pop('element', None) - metadata[l].pop('qualifier', None) - languageValue = metadata[l]['language'] - if metadata[l]['key'] == deletedKey: - provNote = '\''+deletedKey+'\' was deleted through a batch process on '+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+'.' - provNoteElement = {} - provNoteElement['key'] = 'dc.description.provenance' - provNoteElement['value'] = unicode(provNote) - provNoteElement['language'] = 'en_US' - itemMetadataProcessed.append(provNoteElement) - else: - itemMetadataProcessed.append(metadata[l]) - recordsEdited = recordsEdited + 1 - itemMetadataProcessed = json.dumps(itemMetadataProcessed) - print 'updated', itemLink, recordsEdited - delete = requests.delete(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify) - print delete - post = requests.put(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify, data=itemMetadataProcessed) - print post - f.writerow([itemLink]+[deletedKey]+[delete]+[post]) + itemLinks.append(itemLink) offset = offset + 200 print offset +for itemLink in itemLinks: + itemMetadataProcessed = [] + print itemLink + metadata = requests.get(baseURL + itemLink + '/metadata', headers=header, cookies=cookies, verify=verify).json() + for l in range (0, len (metadata)): + metadata[l].pop('schema', None) + metadata[l].pop('element', None) + metadata[l].pop('qualifier', None) + languageValue = metadata[l]['language'] + if metadata[l]['key'] == deletedKey: + provNote = '\''+deletedKey+'\' was deleted through a batch process on '+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+'.' + provNoteElement = {} + provNoteElement['key'] = 'dc.description.provenance' + provNoteElement['value'] = unicode(provNote) + provNoteElement['language'] = 'en_US' + itemMetadataProcessed.append(provNoteElement) + else: + itemMetadataProcessed.append(metadata[l]) + recordsEdited = recordsEdited + 1 + itemMetadataProcessed = json.dumps(itemMetadataProcessed) + print 'updated', itemLink, recordsEdited + delete = requests.delete(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify) + print delete + post = requests.put(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify, data=itemMetadataProcessed) + print post + f.writerow([itemLink]+[deletedKey]+[delete]+[post]) logout = requests.post(baseURL+'/rest/logout', headers=header, cookies=cookies, verify=verify) diff --git a/deleteKeyValuePairFromCollection.py b/deleteKeyValuePairFromCollection.py index 679ecfe..d1acdfb 100644 --- a/deleteKeyValuePairFromCollection.py +++ b/deleteKeyValuePairFromCollection.py @@ -5,8 +5,7 @@ import csv from datetime import datetime import urllib3 - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) +import argparse secretsVersion = raw_input('To edit production server, enter the name of the secrets file: ') if secretsVersion != '': @@ -18,15 +17,33 @@ else: print 'Editing Stage' +parser = argparse.ArgumentParser() +parser.add_argument('-k', '--deletedKey', help='the key to be deleted. optional - if not provided, the script will ask for input') +parser.add_argument('-v', '--deletedValue', help='the value to be deleted. optional - if not provided, the script will ask for input') +parser.add_argument('-i', '--handle', help='handle of the community to retreive. optional - if not provided, the script will ask for input') +args = parser.parse_args() + +if args.deletedKey: + deletedKey = args.deletedKey +else: + deletedKey = raw_input('Enter the key to be deleted: ') +if args.deletedValue: + deletedValue = args.deletedValue +else: + deletedValue = raw_input('Enter the value to be deleted: ') +if args.handle: + handle = args.handle +else: + handle = raw_input('Enter collection handle: ') + +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + baseURL = secrets.baseURL email = secrets.email password = secrets.password filePath = secrets.filePath verify = secrets.verify - -collectionHandle = raw_input('Enter collection handle: ') -deletedKey = raw_input('Enter key to be deleted: ') -deletedValue = raw_input('Enter value to be deleted: ') +skippedCollections = secrets.skippedCollections startTime = time.time() data = {'email':email,'password':password} @@ -38,36 +55,36 @@ status = requests.get(baseURL+'/rest/status', headers=header, cookies=cookies, verify=verify).json() print 'authenticated' -itemList = [] -endpoint = baseURL+'/rest/handle/'+collectionHandle +endpoint = baseURL+'/rest/handle/'+handle collection = requests.get(endpoint, headers=header, cookies=cookies, verify=verify).json() collectionID = collection['uuid'] +collSels = '&collSel[]=' + collectionID + +f=csv.writer(open(filePath+'deletedKey'+datetime.now().strftime('%Y-%m-%d %H.%M.%S')+'.csv', 'wb')) +f.writerow(['itemID']+['deletedKey']+['deletedValue']+['delete']+['post']) +recordsEdited = 0 offset = 0 items = '' +itemLinks = [] while items != []: - items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=200&offset='+str(offset), headers=header, cookies=cookies, verify=verify) - while items.status_code != 200: - time.sleep(5) - items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=200&offset='+str(offset), headers=header, cookies=cookies, verify=verify) - items = items.json() - for k in range (0, len (items)): - itemID = items[k]['uuid'] - itemList.append(itemID) + endpoint = baseURL+'/rest/filtered-items?query_field[]='+deletedKey+'&query_op[]=exists&query_val[]='+collSels+'&limit=200&offset='+str(offset) + print endpoint + response = requests.get(endpoint, headers=header, cookies=cookies, verify=verify).json() + items = response['items'] + for item in items: + itemMetadataProcessed = [] + itemLink = item['link'] + itemLinks.append(itemLink) offset = offset + 200 -elapsedTime = time.time() - startTime -m, s = divmod(elapsedTime, 60) -h, m = divmod(m, 60) -print 'Item list creation time: ','%d:%02d:%02d' % (h, m, s) - -recordsEdited = 0 -f=csv.writer(open(filePath+'deletedKey'+datetime.now().strftime('%Y-%m-%d %H.%M.%S')+'.csv', 'wb')) -f.writerow(['itemID']+['deletedKey']+['deletedValue']+['delete']+['post']) -for number, itemID in enumerate(itemList): - itemsRemaining = len(itemList) - number - print 'Items remaining: ', itemsRemaining, 'ItemID: ', itemID - metadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=header, cookies=cookies, verify=verify).json() + print offset +for itemLink in itemLinks: itemMetadataProcessed = [] + print itemLink + metadata = requests.get(baseURL + itemLink + '/metadata', headers=header, cookies=cookies, verify=verify).json() for l in range (0, len (metadata)): + metadata[l].pop('schema', None) + metadata[l].pop('element', None) + metadata[l].pop('qualifier', None) if metadata[l]['key'] == deletedKey and metadata[l]['value'] == deletedValue: provNote = '\''+deletedKey+':'+deletedValue+'\' was deleted through a batch process on '+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+'.' provNoteElement = {} @@ -77,16 +94,15 @@ itemMetadataProcessed.append(provNoteElement) else: itemMetadataProcessed.append(metadata[l]) - if itemMetadataProcessed != metadata: recordsEdited = recordsEdited + 1 itemMetadataProcessed = json.dumps(itemMetadataProcessed) - print 'updated', itemID, recordsEdited - delete = requests.delete(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=header, cookies=cookies, verify=verify) + print 'updated', itemLink, recordsEdited + delete = requests.delete(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify) print delete - post = requests.put(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=header, cookies=cookies, verify=verify, data=itemMetadataProcessed) + post = requests.put(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify, data=itemMetadataProcessed) print post - f.writerow([itemID]+[deletedKey]+[deletedValue]+[delete]+[post]) + f.writerow([itemLink]+[deletedKey]+[deletedValue]+[delete]+[post]) logout = requests.post(baseURL+'/rest/logout', headers=header, cookies=cookies, verify=verify) diff --git a/editBitstreamsNames.py b/editBitstreamsNames.py index 8f3156c..802c812 100644 --- a/editBitstreamsNames.py +++ b/editBitstreamsNames.py @@ -32,6 +32,7 @@ password = secrets.password filePath = secrets.filePath verify = secrets.verify +skippedCollections = secrets.skippedCollections startTime = time.time() data = {'email':email,'password':password} diff --git a/generateCollectionLevelAbstract.py b/generateCollectionLevelAbstract.py index dee934a..2fdd364 100644 --- a/generateCollectionLevelAbstract.py +++ b/generateCollectionLevelAbstract.py @@ -36,6 +36,7 @@ password = secrets.password filePath = secrets.filePath verify = secrets.verify +skippedCollections = secrets.skippedCollections data = {'email':email,'password':password} header = {'content-type':'application/json','accept':'application/json'} diff --git a/overwriteExistingMetadata.py b/overwriteExistingMetadata.py index 81247a4..b64f2b7 100644 --- a/overwriteExistingMetadata.py +++ b/overwriteExistingMetadata.py @@ -32,6 +32,7 @@ password = secrets.password filePath = secrets.filePath verify = secrets.verify +skippedCollections = secrets.skippedCollections handleIdDict = {} with open(fileName) as csvfile: diff --git a/postCollection.py b/postCollection.py index 9a21b5a..969c930 100644 --- a/postCollection.py +++ b/postCollection.py @@ -49,6 +49,7 @@ password = secrets.password filePath = secrets.filePath verify = secrets.verify +skippedCollections = secrets.skippedCollections startTime = time.time() data = {'email':email,'password':password} diff --git a/removeDuplicateKeyValuePairsFromItems.py b/removeDuplicateKeyValuePairsFromItems.py index eeaa2d0..eb57221 100644 --- a/removeDuplicateKeyValuePairsFromItems.py +++ b/removeDuplicateKeyValuePairsFromItems.py @@ -23,6 +23,7 @@ password = secrets.password filePath = secrets.filePath verify = secrets.verify +skippedCollections = secrets.skippedCollections startTime = time.time() data = {'email':email,'password':password} @@ -43,7 +44,7 @@ collections = requests.get(baseURL+'/rest/communities/'+str(communityID)+'/collections', headers=header, cookies=cookies, verify=verify).json() for j in range (0, len (collections)): collectionID = collections[j]['uuid'] - if collectionID != '45794375-6640-4efe-848e-082e60bae375': + if collectionID not in skippedCollections: offset = 0 items = '' while items != []: diff --git a/replaceKey.py b/replaceKey.py index 972f401..f27c656 100644 --- a/replaceKey.py +++ b/replaceKey.py @@ -16,7 +16,7 @@ print 'Editing Stage' else: print 'Editing Stage' - + parser = argparse.ArgumentParser() parser.add_argument('-1', '--replacedKey', help='the key to be replaced. optional - if not provided, the script will ask for input') parser.add_argument('-2', '--replacementKey', help='the replacement key. optional - if not provided, the script will ask for input') @@ -38,6 +38,7 @@ password = secrets.password filePath = secrets.filePath verify = secrets.verify +skippedCollections = secrets.skippedCollections startTime = time.time() data = {'email':email,'password':password} @@ -54,6 +55,7 @@ offset = 0 recordsEdited = 0 items = '' +itemLinks = [] while items != []: endpoint = baseURL+'/rest/filtered-items?query_field[]='+replacedKey+'&query_op[]=exists&query_val[]=&limit=200&offset='+str(offset) print endpoint @@ -62,37 +64,40 @@ for item in items: itemMetadataProcessed = [] itemLink = item['link'] - metadata = requests.get(baseURL + itemLink + '/metadata', headers=header, cookies=cookies, verify=verify).json() - for l in range (0, len (metadata)): - metadata[l].pop('schema', None) - metadata[l].pop('element', None) - metadata[l].pop('qualifier', None) - if metadata[l]['key'] == replacedKey: - replacedElement = metadata[l] - updatedMetadataElement = {} - updatedMetadataElement['key'] = replacementKey - updatedMetadataElement['value'] = unicode(replacedElement['value']) - updatedMetadataElement['language'] = unicode(replacedElement['language']) - print updatedMetadataElement - itemMetadataProcessed.append(updatedMetadataElement) - provNote = '\''+replacedKey+'\' was replaced by \''+replacementKey+'\' through a batch process on '+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+'.' - provNoteElement = {} - provNoteElement['key'] = 'dc.description.provenance' - provNoteElement['value'] = unicode(provNote) - provNoteElement['language'] = 'en_US' - itemMetadataProcessed.append(provNoteElement) - else: - if metadata[l] not in itemMetadataProcessed: - itemMetadataProcessed.append(metadata[l]) - itemMetadataProcessed = json.dumps(itemMetadataProcessed) - print itemMetadataProcessed - delete = requests.delete(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify) - print delete - post = requests.put(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify, data=itemMetadataProcessed) - print post - f.writerow([itemID]+[replacedElement['key']]+[replacedElement['value'].encode('utf-8')]+[delete]+[post]) + itemLinks.append(itemLink) offset = offset + 200 print offset +for itemLink in itemLinks: + itemMetadataProcessed = [] + print itemLink + metadata = requests.get(baseURL + itemLink + '/metadata', headers=header, cookies=cookies, verify=verify).json() + for l in range (0, len (metadata)): + metadata[l].pop('schema', None) + metadata[l].pop('element', None) + metadata[l].pop('qualifier', None) + if metadata[l]['key'] == replacedKey: + replacedElement = metadata[l] + updatedMetadataElement = {} + updatedMetadataElement['key'] = replacementKey + updatedMetadataElement['value'] = unicode(replacedElement['value']) + updatedMetadataElement['language'] = unicode(replacedElement['language']) + print updatedMetadataElement + itemMetadataProcessed.append(updatedMetadataElement) + provNote = '\''+replacedKey+'\' was replaced by \''+replacementKey+'\' through a batch process on '+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+'.' + provNoteElement = {} + provNoteElement['key'] = 'dc.description.provenance' + provNoteElement['value'] = unicode(provNote) + provNoteElement['language'] = 'en_US' + itemMetadataProcessed.append(provNoteElement) + else: + if metadata[l] not in itemMetadataProcessed: + itemMetadataProcessed.append(metadata[l]) + itemMetadataProcessed = json.dumps(itemMetadataProcessed) + delete = requests.delete(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify) + print delete + post = requests.put(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify, data=itemMetadataProcessed) + print post + f.writerow([itemLink]+[replacedElement['key']]+[replacedElement['value'].encode('utf-8')]+[delete]+[post]) logout = requests.post(baseURL+'/rest/logout', headers=header, cookies=cookies, verify=verify) diff --git a/replaceKeyForCollection.py b/replaceKeyForCollection.py index 576bb7d..a1bff7d 100644 --- a/replaceKeyForCollection.py +++ b/replaceKeyForCollection.py @@ -43,6 +43,7 @@ password = secrets.password filePath = secrets.filePath verify = secrets.verify +skippedCollections = secrets.skippedCollections startTime = time.time() data = {'email':email,'password':password} @@ -64,6 +65,7 @@ offset = 0 recordsEdited = 0 items = '' +itemLinks = [] while items != []: endpoint = baseURL+'/rest/filtered-items?query_field[]='+replacedKey+'&query_op[]=exists&query_val[]='+collSels+'&limit=200&offset='+str(offset) print endpoint @@ -72,37 +74,40 @@ for item in items: itemMetadataProcessed = [] itemLink = item['link'] - metadata = requests.get(baseURL + itemLink + '/metadata', headers=header, cookies=cookies, verify=verify).json() - for l in range (0, len (metadata)): - metadata[l].pop('schema', None) - metadata[l].pop('element', None) - metadata[l].pop('qualifier', None) - if metadata[l]['key'] == replacedKey: - replacedElement = metadata[l] - updatedMetadataElement = {} - updatedMetadataElement['key'] = replacementKey - updatedMetadataElement['value'] = unicode(replacedElement['value']) - updatedMetadataElement['language'] = unicode(replacedElement['language']) - print updatedMetadataElement - itemMetadataProcessed.append(updatedMetadataElement) - provNote = '\''+replacedKey+'\' was replaced by \''+replacementKey+'\' through a batch process on '+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+'.' - provNoteElement = {} - provNoteElement['key'] = 'dc.description.provenance' - provNoteElement['value'] = unicode(provNote) - provNoteElement['language'] = 'en_US' - itemMetadataProcessed.append(provNoteElement) - else: - if metadata[l] not in itemMetadataProcessed: - itemMetadataProcessed.append(metadata[l]) - itemMetadataProcessed = json.dumps(itemMetadataProcessed) - print itemMetadataProcessed - delete = requests.delete(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify) - print delete - post = requests.put(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify, data=itemMetadataProcessed) - print post - f.writerow([itemID]+[replacedElement['key']]+[replacedElement['value'].encode('utf-8')]+[delete]+[post]) + itemLinks.append(itemLink) offset = offset + 200 print offset +for itemLink in itemLinks: + itemMetadataProcessed = [] + print itemLink + metadata = requests.get(baseURL + itemLink + '/metadata', headers=header, cookies=cookies, verify=verify).json() + for l in range (0, len (metadata)): + metadata[l].pop('schema', None) + metadata[l].pop('element', None) + metadata[l].pop('qualifier', None) + if metadata[l]['key'] == replacedKey: + replacedElement = metadata[l] + updatedMetadataElement = {} + updatedMetadataElement['key'] = replacementKey + updatedMetadataElement['value'] = unicode(replacedElement['value']) + updatedMetadataElement['language'] = unicode(replacedElement['language']) + print updatedMetadataElement + itemMetadataProcessed.append(updatedMetadataElement) + provNote = '\''+replacedKey+'\' was replaced by \''+replacementKey+'\' through a batch process on '+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+'.' + provNoteElement = {} + provNoteElement['key'] = 'dc.description.provenance' + provNoteElement['value'] = unicode(provNote) + provNoteElement['language'] = 'en_US' + itemMetadataProcessed.append(provNoteElement) + else: + if metadata[l] not in itemMetadataProcessed: + itemMetadataProcessed.append(metadata[l]) + itemMetadataProcessed = json.dumps(itemMetadataProcessed) + delete = requests.delete(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify) + print delete + post = requests.put(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify, data=itemMetadataProcessed) + print post + f.writerow([itemLink]+[replacedElement['key']]+[replacedElement['value'].encode('utf-8')]+[delete]+[post]) logout = requests.post(baseURL+'/rest/logout', headers=header, cookies=cookies, verify=verify) diff --git a/replaceKeyForCommunity.py b/replaceKeyForCommunity.py index 2cd3f64..1288f50 100644 --- a/replaceKeyForCommunity.py +++ b/replaceKeyForCommunity.py @@ -5,6 +5,7 @@ import csv from datetime import datetime import urllib3 +import argparse secretsVersion = raw_input('To edit production server, enter the name of the secrets file: ') if secretsVersion != '': @@ -42,6 +43,7 @@ password = secrets.password filePath = secrets.filePath verify = secrets.verify +skippedCollections = secrets.skippedCollections startTime = time.time() data = {'email':email,'password':password} @@ -68,6 +70,7 @@ offset = 0 recordsEdited = 0 items = '' +itemLinks = [] while items != []: endpoint = baseURL+'/rest/filtered-items?query_field[]='+replacedKey+'&query_op[]=exists&query_val[]='+collSels+'&limit=200&offset='+str(offset) print endpoint @@ -76,37 +79,40 @@ for item in items: itemMetadataProcessed = [] itemLink = item['link'] - metadata = requests.get(baseURL + itemLink + '/metadata', headers=header, cookies=cookies, verify=verify).json() - for l in range (0, len (metadata)): - metadata[l].pop('schema', None) - metadata[l].pop('element', None) - metadata[l].pop('qualifier', None) - if metadata[l]['key'] == replacedKey: - replacedElement = metadata[l] - updatedMetadataElement = {} - updatedMetadataElement['key'] = replacementKey - updatedMetadataElement['value'] = unicode(replacedElement['value']) - updatedMetadataElement['language'] = unicode(replacedElement['language']) - print updatedMetadataElement - itemMetadataProcessed.append(updatedMetadataElement) - provNote = '\''+replacedKey+'\' was replaced by \''+replacementKey+'\' through a batch process on '+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+'.' - provNoteElement = {} - provNoteElement['key'] = 'dc.description.provenance' - provNoteElement['value'] = unicode(provNote) - provNoteElement['language'] = 'en_US' - itemMetadataProcessed.append(provNoteElement) - else: - if metadata[l] not in itemMetadataProcessed: - itemMetadataProcessed.append(metadata[l]) - itemMetadataProcessed = json.dumps(itemMetadataProcessed) - print itemMetadataProcessed - delete = requests.delete(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify) - print delete - post = requests.put(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify, data=itemMetadataProcessed) - print post - f.writerow([itemLink]+[replacedElement['key']]+[replacedElement['value'].encode('utf-8')]+[delete]+[post]) + itemLinks.append(itemLink) offset = offset + 200 print offset +for itemLink in itemLinks: + itemMetadataProcessed = [] + print itemLink + metadata = requests.get(baseURL + itemLink + '/metadata', headers=header, cookies=cookies, verify=verify).json() + for l in range (0, len (metadata)): + metadata[l].pop('schema', None) + metadata[l].pop('element', None) + metadata[l].pop('qualifier', None) + if metadata[l]['key'] == replacedKey: + replacedElement = metadata[l] + updatedMetadataElement = {} + updatedMetadataElement['key'] = replacementKey + updatedMetadataElement['value'] = unicode(replacedElement['value']) + updatedMetadataElement['language'] = unicode(replacedElement['language']) + print updatedMetadataElement + itemMetadataProcessed.append(updatedMetadataElement) + provNote = '\''+replacedKey+'\' was replaced by \''+replacementKey+'\' through a batch process on '+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+'.' + provNoteElement = {} + provNoteElement['key'] = 'dc.description.provenance' + provNoteElement['value'] = unicode(provNote) + provNoteElement['language'] = 'en_US' + itemMetadataProcessed.append(provNoteElement) + else: + if metadata[l] not in itemMetadataProcessed: + itemMetadataProcessed.append(metadata[l]) + itemMetadataProcessed = json.dumps(itemMetadataProcessed) + delete = requests.delete(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify) + print delete + post = requests.put(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify, data=itemMetadataProcessed) + print post + f.writerow([itemLink]+[replacedElement['key']]+[replacedElement['value'].encode('utf-8')]+[delete]+[post]) logout = requests.post(baseURL+'/rest/logout', headers=header, cookies=cookies, verify=verify) diff --git a/replaceKeyValuePairOnItemIdCSV.py b/replaceKeyValuePairOnItemIdCSV.py index e253fb6..1bce655 100644 --- a/replaceKeyValuePairOnItemIdCSV.py +++ b/replaceKeyValuePairOnItemIdCSV.py @@ -23,6 +23,7 @@ password = secrets.password filePath = secrets.filePath verify = secrets.verify +skippedCollections = secrets.skippedCollections startTime = time.time() data = {'email':email,'password':password} diff --git a/replaceKeyValuePairsFromCSV.py b/replaceKeyValuePairsFromCSV.py index bb5a76d..1bf1ecc 100644 --- a/replaceKeyValuePairsFromCSV.py +++ b/replaceKeyValuePairsFromCSV.py @@ -22,6 +22,7 @@ password = secrets.password filePath = secrets.filePath verify = secrets.verify +skippedCollections = secrets.skippedCollections parser = argparse.ArgumentParser() parser.add_argument('-f', '--fileName', help='the CSV file of changes. optional - if not provided, the script will ask for input') @@ -56,6 +57,7 @@ offset = 0 recordsEdited = 0 items = '' + itemLinks = [] while items != []: endpoint = baseURL+'/rest/filtered-items?query_field[]='+replacedKey+'&query_op[]=equals&query_val[]='+replacedValue+'&limit=200&offset='+str(offset) print endpoint @@ -64,37 +66,40 @@ for item in items: itemMetadataProcessed = [] itemLink = item['link'] - print itemLink - metadata = requests.get(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify).json() - for l in range (0, len (metadata)): - metadata[l].pop('schema', None) - metadata[l].pop('element', None) - metadata[l].pop('qualifier', None) - languageValue = metadata[l]['language'] - if metadata[l]['key'] == replacedKey and metadata[l]['value'].encode('utf-8') == replacedValue: - replacedElement = metadata[l] - updatedMetadataElement = {} - updatedMetadataElement['key'] = replacementKey - updatedMetadataElement['value'] = unicode(replacementValue) - updatedMetadataElement['language'] = languageValue - itemMetadataProcessed.append(updatedMetadataElement) - provNote = '\''+replacedKey+': '+replacedValue+'\' was replaced by \''+replacementKey+': '+replacementValue+'\' through a batch process on '+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+'.' - provNoteElement = {} - provNoteElement['key'] = 'dc.description.provenance' - provNoteElement['value'] = unicode(provNote) - provNoteElement['language'] = 'en_US' - itemMetadataProcessed.append(provNoteElement) - else: - if metadata[l] not in itemMetadataProcessed: - itemMetadataProcessed.append(metadata[l]) - itemMetadataProcessed = json.dumps(itemMetadataProcessed) - delete = requests.delete(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify) - print delete - post = requests.put(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify, data=itemMetadataProcessed) - print post - f.writerow([itemLink]+[replacedElement['key']]+[replacedElement['value']]+[delete]+[post]) + itemLinks.append(itemLink) offset = offset + 200 print offset + for itemLink in itemLinks: + itemMetadataProcessed = [] + print itemLink + metadata = requests.get(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify).json() + for l in range (0, len (metadata)): + metadata[l].pop('schema', None) + metadata[l].pop('element', None) + metadata[l].pop('qualifier', None) + languageValue = metadata[l]['language'] + if metadata[l]['key'] == replacedKey and metadata[l]['value'].encode('utf-8') == replacedValue: + replacedElement = metadata[l] + updatedMetadataElement = {} + updatedMetadataElement['key'] = replacementKey + updatedMetadataElement['value'] = unicode(replacementValue) + updatedMetadataElement['language'] = languageValue + itemMetadataProcessed.append(updatedMetadataElement) + provNote = '\''+replacedKey+': '+replacedValue+'\' was replaced by \''+replacementKey+': '+replacementValue+'\' through a batch process on '+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+'.' + provNoteElement = {} + provNoteElement['key'] = 'dc.description.provenance' + provNoteElement['value'] = unicode(provNote) + provNoteElement['language'] = 'en_US' + itemMetadataProcessed.append(provNoteElement) + else: + if metadata[l] not in itemMetadataProcessed: + itemMetadataProcessed.append(metadata[l]) + itemMetadataProcessed = json.dumps(itemMetadataProcessed) + delete = requests.delete(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify) + print delete + post = requests.put(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify, data=itemMetadataProcessed) + print post + f.writerow([itemLink]+[replacedElement['key']]+[replacedElement['value']]+[delete]+[post]) logout = requests.post(baseURL+'/rest/logout', headers=header, cookies=cookies, verify=verify) diff --git a/replaceUnnecessarySpaces.py b/replaceUnnecessarySpaces.py index f1cb3a9..055c5c5 100644 --- a/replaceUnnecessarySpaces.py +++ b/replaceUnnecessarySpaces.py @@ -23,6 +23,7 @@ password = secrets.password filePath = secrets.filePath verify = secrets.verify +skippedCollections = secrets.skippedCollections communityHandle = raw_input('Enter community handle: ') key = raw_input('Enter key: ') diff --git a/replaceValueInCollection.py b/replaceValueInCollection.py index b2caeed..995a401 100644 --- a/replaceValueInCollection.py +++ b/replaceValueInCollection.py @@ -48,6 +48,7 @@ password = secrets.password filePath = secrets.filePath verify = secrets.verify +skippedCollections = secrets.skippedCollections startTime = time.time() data = {'email':email,'password':password} @@ -70,6 +71,7 @@ offset = 0 recordsEdited = 0 items = '' +itemLinks = [] while items != []: endpoint = baseURL+'/rest/filtered-items?query_field[]='+key+'&query_op[]=equals&query_val[]='+replacedValue+collSels+'&limit=200&offset='+str(offset) print endpoint @@ -80,38 +82,42 @@ for item in items: itemMetadataProcessed = [] itemLink = item['link'] - metadata = requests.get(baseURL + itemLink + '/metadata', headers=header, cookies=cookies, verify=verify).json() - for l in range (0, len (metadata)): - metadata[l].pop('schema', None) - metadata[l].pop('element', None) - metadata[l].pop('qualifier', None) - languageValue = metadata[l]['language'] - if metadata[l]['key'] == replacedKey and metadata[l]['value'].encode('utf-8') == replacedValue: - replacedElement = metadata[l] - updatedMetadataElement = {} - updatedMetadataElement['key'] = replacementKey - updatedMetadataElement['value'] = unicode(replacementValue) - updatedMetadataElement['language'] = languageValue - itemMetadataProcessed.append(updatedMetadataElement) - provNote = '\''+replacedKey+': '+replacedValue+'\' was replaced by \''+replacementKey+': '+replacementValue+'\' through a batch process on '+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+'.' - provNoteElement = {} - provNoteElement['key'] = 'dc.description.provenance' - provNoteElement['value'] = unicode(provNote) - provNoteElement['language'] = 'en_US' - itemMetadataProcessed.append(provNoteElement) - recordsEdited = recordsEdited + 1 - else: - if metadata[l] not in itemMetadataProcessed: - itemMetadataProcessed.append(metadata[l]) - itemMetadataProcessed = json.dumps(itemMetadataProcessed) - print 'updated', itemLink, recordsEdited - delete = requests.delete(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify) - print delete - post = requests.put(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify, data=itemMetadataProcessed) - print post - f.writerow([itemLink]+[updatedMetadataElement['key']]+[updatedMetadataElement['value']]+[delete]+[post]) + itemLinks.append(itemLink) offset = offset + 200 print offset +for itemLink in itemLinks: + itemMetadataProcessed = [] + print itemLink + metadata = requests.get(baseURL + itemLink + '/metadata', headers=header, cookies=cookies, verify=verify).json() + for l in range (0, len (metadata)): + metadata[l].pop('schema', None) + metadata[l].pop('element', None) + metadata[l].pop('qualifier', None) + languageValue = metadata[l]['language'] + if metadata[l]['key'] == replacedKey and metadata[l]['value'].encode('utf-8') == replacedValue: + replacedElement = metadata[l] + updatedMetadataElement = {} + updatedMetadataElement['key'] = replacementKey + updatedMetadataElement['value'] = unicode(replacementValue) + updatedMetadataElement['language'] = languageValue + itemMetadataProcessed.append(updatedMetadataElement) + provNote = '\''+replacedKey+': '+replacedValue+'\' was replaced by \''+replacementKey+': '+replacementValue+'\' through a batch process on '+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+'.' + provNoteElement = {} + provNoteElement['key'] = 'dc.description.provenance' + provNoteElement['value'] = unicode(provNote) + provNoteElement['language'] = 'en_US' + itemMetadataProcessed.append(provNoteElement) + recordsEdited = recordsEdited + 1 + else: + if metadata[l] not in itemMetadataProcessed: + itemMetadataProcessed.append(metadata[l]) + itemMetadataProcessed = json.dumps(itemMetadataProcessed) + print 'updated', itemLink, recordsEdited + delete = requests.delete(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify) + print delete + post = requests.put(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify, data=itemMetadataProcessed) + print post + f.writerow([itemLink]+[updatedMetadataElement['key']]+[updatedMetadataElement['value']]+[delete]+[post]) logout = requests.post(baseURL+'/rest/logout', headers=header, cookies=cookies, verify=verify) diff --git a/replaceValueInCommunityFromCSV.py b/replaceValueInCommunityFromCSV.py index 60b96e4..2f5fe3f 100644 --- a/replaceValueInCommunityFromCSV.py +++ b/replaceValueInCommunityFromCSV.py @@ -39,6 +39,7 @@ password = secrets.password filePath = secrets.filePath verify = secrets.verify +skippedCollections = secrets.skippedCollections startTime = time.time() data = {'email':email,'password':password} @@ -80,6 +81,7 @@ offset = 0 recordsEdited = 0 items = '' + itemLinks = [] while items != []: endpoint = baseURL+'/rest/filtered-items?query_field[]=*&query_op[]=equals&query_val[]='+replacedValue+collSels+'&limit=200&offset='+str(offset) print endpoint @@ -89,42 +91,45 @@ items = response['items'] print len(items), ' search results' for item in items: - itemMetadataProcessed = [] itemLink = item['link'] - metadata = requests.get(baseURL + itemLink + '/metadata', headers=header, cookies=cookies, verify=verify).json() - counter += 1 - print counter - for l in range (0, len (metadata)): - metadata[l].pop('schema', None) - metadata[l].pop('element', None) - metadata[l].pop('qualifier', None) - languageValue = metadata[l]['language'] - if metadata[l]['value'].encode('utf-8') == replacedValue: - key = metadata[l]['key'] - replacedElement = metadata[l] - updatedMetadataElement = {} - updatedMetadataElement['key'] = metadata[l]['key'] - updatedMetadataElement['value'] = unicode(replacementValue) - updatedMetadataElement['language'] = languageValue - itemMetadataProcessed.append(updatedMetadataElement) - provNote = '\''+key+': '+replacedValue+'\' was replaced by \''+key+': '+replacementValue+'\' through a batch process on '+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+'.' - provNoteElement = {} - provNoteElement['key'] = 'dc.description.provenance' - provNoteElement['value'] = unicode(provNote) - provNoteElement['language'] = 'en_US' - itemMetadataProcessed.append(provNoteElement) - recordsEdited = recordsEdited + 1 - else: - if metadata[l] not in itemMetadataProcessed: - itemMetadataProcessed.append(metadata[l]) - itemMetadataProcessed = json.dumps(itemMetadataProcessed) - print 'updated', itemLink, recordsEdited - delete = requests.delete(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify) - print delete - post = requests.put(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify, data=itemMetadataProcessed) - print post - f.writerow([itemLink]+[replacedValue.encode('utf-8')]+[replacementValue.encode('utf-8')]+[delete]+[post]) + itemLinks.append(itemLink) offset = offset + 200 + print offset + for itemLink in itemLinks: + itemMetadataProcessed = [] + metadata = requests.get(baseURL + itemLink + '/metadata', headers=header, cookies=cookies, verify=verify).json() + counter += 1 + print counter + for l in range (0, len (metadata)): + metadata[l].pop('schema', None) + metadata[l].pop('element', None) + metadata[l].pop('qualifier', None) + languageValue = metadata[l]['language'] + if metadata[l]['value'].encode('utf-8') == replacedValue: + key = metadata[l]['key'] + replacedElement = metadata[l] + updatedMetadataElement = {} + updatedMetadataElement['key'] = metadata[l]['key'] + updatedMetadataElement['value'] = unicode(replacementValue) + updatedMetadataElement['language'] = languageValue + itemMetadataProcessed.append(updatedMetadataElement) + provNote = '\''+key+': '+replacedValue+'\' was replaced by \''+key+': '+replacementValue+'\' through a batch process on '+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+'.' + provNoteElement = {} + provNoteElement['key'] = 'dc.description.provenance' + provNoteElement['value'] = unicode(provNote) + provNoteElement['language'] = 'en_US' + itemMetadataProcessed.append(provNoteElement) + recordsEdited = recordsEdited + 1 + else: + if metadata[l] not in itemMetadataProcessed: + itemMetadataProcessed.append(metadata[l]) + itemMetadataProcessed = json.dumps(itemMetadataProcessed) + print 'updated', itemLink, recordsEdited + delete = requests.delete(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify) + print delete + post = requests.put(baseURL+itemLink+'/metadata', headers=header, cookies=cookies, verify=verify, data=itemMetadataProcessed) + print post + f.writerow([itemLink]+[replacedValue.encode('utf-8')]+[replacementValue.encode('utf-8')]+[delete]+[post]) logout = requests.post(baseURL+'/rest/logout', headers=header, cookies=cookies, verify=verify) diff --git a/repositoryMetadataBackup.py b/repositoryMetadataBackup.py index ac5913a..680bf14 100644 --- a/repositoryMetadataBackup.py +++ b/repositoryMetadataBackup.py @@ -21,6 +21,7 @@ filePath = secrets.filePath handlePrefix = secrets.handlePrefix verify = secrets.verify +skippedCollections = secrets.skippedCollections requests.packages.urllib3.disable_warnings() @@ -43,7 +44,7 @@ collections = requests.get(baseURL+'/rest/communities/'+str(communityID)+'/collections', headers=header, cookies=cookies, verify=verify).json() for j in range (0, len (collections)): collectionID = collections[j]['uuid'] - if collectionID != '45794375-6640-4efe-848e-082e60bae375': + if collectionID not in skippedCollections: collectionHandle = collections[j]['handle'].replace(handlePrefix,'').replace('/','-') print 'collectionID: ', collectionID itemList = [] diff --git a/repositoryMetadataRestore.py b/repositoryMetadataRestore.py index 4860433..a96cde5 100644 --- a/repositoryMetadataRestore.py +++ b/repositoryMetadataRestore.py @@ -20,6 +20,7 @@ filePath = secrets.filePath handlePrefix = secrets.handlePrefix verify = secrets.verify +skippedCollections = secrets.skippedCollections requests.packages.urllib3.disable_warnings() diff --git a/splitFieldIntoMultipleFields.py b/splitFieldIntoMultipleFields.py index cc37935..6df4872 100644 --- a/splitFieldIntoMultipleFields.py +++ b/splitFieldIntoMultipleFields.py @@ -22,6 +22,7 @@ password = secrets.password filePath = secrets.filePath verify = secrets.verify +skippedCollections = secrets.skippedCollections parser = argparse.ArgumentParser() parser.add_argument('-r', '--replacedKey', help='the key to be replaced. optional - if not provided, the script will ask for input') @@ -63,49 +64,54 @@ replacementValueList = ast.literal_eval(row['structuredList']) offset = 0 items = '' + itemLinks = [] while items != []: endpoint = baseURL+'/rest/filtered-items?query_field[]='+replacedKey+'&query_op[]=equals&query_val[]='+replacedValue+'&limit=200&offset='+str(offset) response = requests.get(endpoint, headers=header, cookies=cookies, verify=verify).json() items = response['items'] for item in items: itemLink = item['link'] - itemMetadataProcessed = [] - metadata = requests.get(baseURL + itemLink + '/metadata', headers=header, cookies=cookies, verify=verify).json() - for l in range (0, len (metadata)): - metadata[l].pop('schema', None) - metadata[l].pop('element', None) - metadata[l].pop('qualifier', None) - languageValue = metadata[l]['language'] - if metadata[l]['key'] == replacedKey and metadata[l]['value'].encode('utf-8') == replacedValue: - print 'match' - replacedElement = metadata[l] - for replacementValue in replacementValueList: - updatedMetadataElement = {} - updatedMetadataElement['key'] = replacedKey - updatedMetadataElement['value'] = replacementValue - updatedMetadataElement['language'] = languageValue - itemMetadataProcessed.append(updatedMetadataElement) - provNote = '\''+replacedKey+': '+replacedValue+'\' split into \''+replacedKey+': '+replacementValue+'\' through a batch process on '+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+'.' - provNoteElement = {} - provNoteElement['key'] = 'dc.description.provenance' - provNoteElement['value'] = provNote - provNoteElement['language'] = 'en_US' - itemMetadataProcessed.append(provNoteElement) - elementsEdited = elementsEdited + 1 - else: - if metadata[l] not in itemMetadataProcessed: - itemMetadataProcessed.append(metadata[l]) - recordsEdited = recordsEdited + 1 - itemMetadataProcessed = json.dumps(itemMetadataProcessed) - #print itemMetadataProcessed - print 'updated', itemLink, recordsEdited, elementsEdited - delete = requests.delete(baseURL + itemLink + '/metadata', headers=header, cookies=cookies, verify=verify) - print delete - post = requests.put(baseURL + itemLink + '/metadata', headers=header, cookies=cookies, verify=verify, data=itemMetadataProcessed) - print post - f.writerow([itemLink]+[replacedKey]+[replacementValueList]+[delete]+[post]) + itemLinks.append(itemLink) offset = offset + 200 print offset + for itemLink in itemLinks: + itemMetadataProcessed = [] + print itemLink + metadata = requests.get(baseURL + itemLink + '/metadata', headers=header, cookies=cookies, verify=verify).json() + for l in range (0, len (metadata)): + metadata[l].pop('schema', None) + metadata[l].pop('element', None) + metadata[l].pop('qualifier', None) + languageValue = metadata[l]['language'] + if metadata[l]['key'] == replacedKey and metadata[l]['value'].encode('utf-8') == replacedValue: + print 'match' + replacedElement = metadata[l] + for replacementValue in replacementValueList: + updatedMetadataElement = {} + updatedMetadataElement['key'] = replacedKey + updatedMetadataElement['value'] = replacementValue + updatedMetadataElement['language'] = languageValue + itemMetadataProcessed.append(updatedMetadataElement) + provNote = '\''+replacedKey+': '+replacedValue+'\' split into \''+replacedKey+': '+replacementValue+'\' through a batch process on '+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+'.' + provNoteElement = {} + provNoteElement['key'] = 'dc.description.provenance' + provNoteElement['value'] = provNote + provNoteElement['language'] = 'en_US' + itemMetadataProcessed.append(provNoteElement) + elementsEdited = elementsEdited + 1 + else: + if metadata[l] not in itemMetadataProcessed: + itemMetadataProcessed.append(metadata[l]) + recordsEdited = recordsEdited + 1 + itemMetadataProcessed = json.dumps(itemMetadataProcessed) + #print itemMetadataProcessed + print 'updated', itemLink, recordsEdited, elementsEdited + delete = requests.delete(baseURL + itemLink + '/metadata', headers=header, cookies=cookies, verify=verify) + print delete + post = requests.put(baseURL + itemLink + '/metadata', headers=header, cookies=cookies, verify=verify, data=itemMetadataProcessed) + print post + f.writerow([itemLink]+[replacedKey]+[replacementValueList]+[delete]+[post]) + logout = requests.post(baseURL+'/rest/logout', headers=header, cookies=cookies, verify=verify) elapsedTime = time.time() - startTime diff --git a/updateLanguageTagsForKey.py b/updateLanguageTagsForKey.py index ac2ec03..adb9eb7 100644 --- a/updateLanguageTagsForKey.py +++ b/updateLanguageTagsForKey.py @@ -33,6 +33,7 @@ password = secrets.password filePath = secrets.filePath verify = secrets.verify +skippedCollections = secrets.skippedCollections startTime = time.time() data = {'email':email,'password':password} @@ -49,6 +50,7 @@ offset = 0 recordsEdited = 0 items = '' +itemLinks = [] while items != []: endpoint = baseURL+'/rest/filtered-items?query_field[]='+key+'&query_op[]=exists&query_val[]=&limit=200&offset='+str(offset) print endpoint @@ -57,30 +59,37 @@ for item in items: itemMetadataProcessed = [] itemLink = item['link'] - print itemLink - metadata = requests.get(baseURL + itemLink + '/metadata', headers=header, cookies=cookies, verify=verify).json() - for l in range (0, len (metadata)): - if metadata[l]['key'] == key and metadata[l]['language'] == None: - updatedMetadataElement = {} - updatedMetadataElement['key'] = metadata[l]['key'] - updatedMetadataElement['value'] = metadata[l]['value'] - updatedMetadataElement['language'] = 'en_US' - itemMetadataProcessed.append(updatedMetadataElement) - provNote = 'The language tag for \''+metadata[l]['key']+': '+metadata[l]['value']+'\' was changed from \'null\' to \'en_US\' through a batch process on '+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+'.' - provNoteElement = {} - provNoteElement['key'] = 'dc.description.provenance' - provNoteElement['value'] = unicode(provNote) - provNoteElement['language'] = 'en_US' - itemMetadataProcessed.append(provNoteElement) - else: - itemMetadataProcessed.append(metadata[l]) - delete = requests.delete(baseURL + itemLink + '/metadata', headers=header, cookies=cookies, verify=verify) - print delete - post = requests.put(baseURL + itemLink + '/metadata', headers=header, cookies=cookies, verify=verify, data=itemMetadataProcessed) - print post - f.writerow([itemLink]+[key]) - offset = offset + 200 - print offset + itemLinks.append(itemLink) + offset = offset + 200 + print offset +for itemLink in itemLinks: + itemMetadataProcessed = [] + print itemLink + metadata = requests.get(baseURL + itemLink + '/metadata', headers=header, cookies=cookies, verify=verify).json() + for l in range (0, len (metadata)): + metadata[l].pop('schema', None) + metadata[l].pop('element', None) + metadata[l].pop('qualifier', None) + if metadata[l]['key'] == key and metadata[l]['language'] == None: + updatedMetadataElement = {} + updatedMetadataElement['key'] = metadata[l]['key'] + updatedMetadataElement['value'] = metadata[l]['value'] + updatedMetadataElement['language'] = 'en_US' + itemMetadataProcessed.append(updatedMetadataElement) + provNote = 'The language tag for \''+metadata[l]['key']+': '+metadata[l]['value']+'\' was changed from \'null\' to \'en_US\' through a batch process on '+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+'.' + provNoteElement = {} + provNoteElement['key'] = 'dc.description.provenance' + provNoteElement['value'] = unicode(provNote) + provNoteElement['language'] = 'en_US' + itemMetadataProcessed.append(provNoteElement) + else: + itemMetadataProcessed.append(metadata[l]) + itemMetadataProcessed = json.dumps(itemMetadataProcessed) + delete = requests.delete(baseURL + itemLink + '/metadata', headers=header, cookies=cookies, verify=verify) + print delete + post = requests.put(baseURL + itemLink + '/metadata', headers=header, cookies=cookies, verify=verify, data=itemMetadataProcessed) + print post + f.writerow([itemLink]+[key]) logout = requests.post(baseURL+'/rest/logout', headers=header, cookies=cookies, verify=verify) diff --git a/updateLanguageTagsForKeyInCollection.py b/updateLanguageTagsForKeyInCollection.py index 2f40c4a..a34760a 100644 --- a/updateLanguageTagsForKeyInCollection.py +++ b/updateLanguageTagsForKeyInCollection.py @@ -23,6 +23,7 @@ password = secrets.password filePath = secrets.filePath verify = secrets.verify +skippedCollections = secrets.skippedCollections key = raw_input('Enter key: ') collectionHandle = raw_input('Enter collection handle: ')