diff --git a/ami_scripts/config.json b/ami_scripts/config.json index 9af5a1b9..4b95a691 100644 --- a/ami_scripts/config.json +++ b/ami_scripts/config.json @@ -1,30 +1,30 @@ { - "replacements": [ - { - "find": ";", - "replace": "-" - }, - { - "find": "\\\\", - "replace": "-" - }, - { - "find": "\\n", - "replace": "-" - }, - { - "find": "\\r", - "replace": "-" - }, - { - "find": "\\t", - "replace": "-" - }, - { - "find": "\"", - "replace": "''" - } - ], + "replacements": [ + { + "find": ";", + "replace": "-" + }, + { + "find": "\\\\", + "replace": "-" + }, + { + "find": "\\n", + "replace": "-" + }, + { + "find": "\\r", + "replace": "-" + }, + { + "find": "\\t", + "replace": "-" + }, + { + "find": "\"", + "replace": "''" + } + ], "format_fixes": { "video cassette analog": [ "Betacam", @@ -97,5 +97,51 @@ "VHS/PCM", "Hi8/PCM" ] + }, + "digitizers": { + "Media Preserve": { + "organization": { + "address": { + "city": "Cranberry Township", + "postalCode": 16066, + "state": "PA", + "street1": "111 Thomsom Park Drive" + }, + "name": "Preservation Technologies, L.P." + } + }, + "Colorlab": { + "organization": { + "address": { + "city": "Rockville", + "postalCode": 20852, + "state": "MD", + "street1": "5708 Arundel Ave" + }, + "name": "Colorlab" + } + }, + "NYPL": { + "organization": { + "address": { + "city": "New York", + "postalCode": "10023", + "state": "NY", + "street1": "40 Lincoln Center Plaza" + }, + "name": "New York Public Library" + } + }, + "Memnon": { + "organization": { + "address": { + "city": "Bloomington", + "postalCode": 47408, + "state": "IN", + "street1": "2719 E 10th St" + }, + "name": "Memnon Archiving Services" + } + } } } diff --git a/ami_scripts/create_media_json.py b/ami_scripts/create_media_json.py index 4bed0fd6..a414f953 100755 --- a/ami_scripts/create_media_json.py +++ b/ami_scripts/create_media_json.py @@ -24,11 +24,13 @@ def get_args(): parser = argparse.ArgumentParser(description="Create NYPL JSON Files from SPEC Export and user-supplied directory of media files") parser.add_argument('-c', '--config', required=True, help='Path to config file') parser.add_argument('-s', '--source', help='path to SPEC CSV Export', required=False) - parser.add_argument('-d', '--directory', help='path to directory of media files', required=False) + parser.add_argument('-m', '--media', help='path to directory of media files', required=False) # Modified here + parser.add_argument('-d', '--digitizer', choices=['Media Preserve', 'NYPL', 'Memnon'], required=False, help='Name of the digitizer') parser.add_argument('-o', '--output', help='path to destination for JSON files', required=True) return parser.parse_args() + def load_config(config_file): with open(config_file) as f: config = json.load(f) @@ -55,15 +57,17 @@ def load_csv(args): def get_media_files(args): media_list = [] - if args.directory: + if args.media: try: - media_dir = os.scandir(args.directory) + media_dir = os.scandir(args.media) for entry in media_dir: if entry.is_file() and entry.name.lower().endswith(tuple(valid_extensions)): media_list.append(entry.path) media_list.sort() except OSError as e: logger.error(f"Error getting media files: {e}") + if media_list: + logger.info(f"Found these files: {', '.join(media_list)}") return media_list @@ -155,6 +159,8 @@ def create_new_json(args, media_data, config): 'filesize': {'measure': media_data['file_size'], 'unit': 'B'} } } + if args.digitizer: + nested_json['digitizer'] = config['digitizers'][args.digitizer] # Remove any keys in the 'technical' dictionary that have a value of None nested_json['technical'] = {k: v for k, v in nested_json['technical'].items() if v is not None} @@ -174,7 +180,10 @@ def process_media_files(args, data_dict, media_list, config): cms_id = media_data['cms_id'] if cms_id in data_dict: media_data['bibliographic'] = data_dict[cms_id] - create_new_json(args, media_data, config) + logger.info(f"Now making JSON for {media_data['filename']} file") + create_new_json(args, media_data, config) + else: + logger.warning(f"{media_data['filename']} File not found in SPEC CSV Export (data dict)") def main():