1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39 """
40 Synchonizes a local directory with an Amazon S3 bucket.
41
42 No configuration is required; all necessary information is taken from the
43 command-line. The only thing configuration would help with is the path
44 resolver interface, and it doesn't seem worth it to require configuration just
45 to get that.
46
47 @author: Kenneth J. Pronovici <pronovic@ieee.org>
48 """
49
50
51
52
53
54
55 import sys
56 import os
57 import logging
58 import getopt
59 import json
60 import chardet
61 import warnings
62
63
64 from CedarBackup2.release import AUTHOR, EMAIL, VERSION, DATE, COPYRIGHT
65 from CedarBackup2.filesystem import FilesystemList
66 from CedarBackup2.cli import setupLogging, DEFAULT_LOGFILE, DEFAULT_OWNERSHIP, DEFAULT_MODE
67 from CedarBackup2.util import Diagnostics, splitCommandLine, encodePath
68 from CedarBackup2.util import executeCommand
69
70
71
72
73
74
75 logger = logging.getLogger("CedarBackup2.log.tools.amazons3")
76
77 AWS_COMMAND = [ "aws" ]
78
79 SHORT_SWITCHES = "hVbql:o:m:OdsDvw"
80 LONG_SWITCHES = [ 'help', 'version', 'verbose', 'quiet',
81 'logfile=', 'owner=', 'mode=',
82 'output', 'debug', 'stack', 'diagnostics',
83 'verifyOnly', 'ignoreWarnings', ]
84
85
86
87
88
89
91
92
93
94
95
96 """
97 Class representing command-line options for the cback-amazons3-sync script.
98
99 The C{Options} class is a Python object representation of the command-line
100 options of the cback script.
101
102 The object representation is two-way: a command line string or a list of
103 command line arguments can be used to create an C{Options} object, and then
104 changes to the object can be propogated back to a list of command-line
105 arguments or to a command-line string. An C{Options} object can even be
106 created from scratch programmatically (if you have a need for that).
107
108 There are two main levels of validation in the C{Options} class. The first
109 is field-level validation. Field-level validation comes into play when a
110 given field in an object is assigned to or updated. We use Python's
111 C{property} functionality to enforce specific validations on field values,
112 and in some places we even use customized list classes to enforce
113 validations on list members. You should expect to catch a C{ValueError}
114 exception when making assignments to fields if you are programmatically
115 filling an object.
116
117 The second level of validation is post-completion validation. Certain
118 validations don't make sense until an object representation of options is
119 fully "complete". We don't want these validations to apply all of the time,
120 because it would make building up a valid object from scratch a real pain.
121 For instance, we might have to do things in the right order to keep from
122 throwing exceptions, etc.
123
124 All of these post-completion validations are encapsulated in the
125 L{Options.validate} method. This method can be called at any time by a
126 client, and will always be called immediately after creating a C{Options}
127 object from a command line and before exporting a C{Options} object back to
128 a command line. This way, we get acceptable ease-of-use but we also don't
129 accept or emit invalid command lines.
130
131 @note: Lists within this class are "unordered" for equality comparisons.
132
133 @sort: __init__, __repr__, __str__, __cmp__
134 """
135
136
137
138
139
140 - def __init__(self, argumentList=None, argumentString=None, validate=True):
141 """
142 Initializes an options object.
143
144 If you initialize the object without passing either C{argumentList} or
145 C{argumentString}, the object will be empty and will be invalid until it
146 is filled in properly.
147
148 No reference to the original arguments is saved off by this class. Once
149 the data has been parsed (successfully or not) this original information
150 is discarded.
151
152 The argument list is assumed to be a list of arguments, not including the
153 name of the command, something like C{sys.argv[1:]}. If you pass
154 C{sys.argv} instead, things are not going to work.
155
156 The argument string will be parsed into an argument list by the
157 L{util.splitCommandLine} function (see the documentation for that
158 function for some important notes about its limitations). There is an
159 assumption that the resulting list will be equivalent to C{sys.argv[1:]},
160 just like C{argumentList}.
161
162 Unless the C{validate} argument is C{False}, the L{Options.validate}
163 method will be called (with its default arguments) after successfully
164 parsing any passed-in command line. This validation ensures that
165 appropriate actions, etc. have been specified. Keep in mind that even if
166 C{validate} is C{False}, it might not be possible to parse the passed-in
167 command line, so an exception might still be raised.
168
169 @note: The command line format is specified by the L{_usage} function.
170 Call L{_usage} to see a usage statement for the cback script.
171
172 @note: It is strongly suggested that the C{validate} option always be set
173 to C{True} (the default) unless there is a specific need to read in
174 invalid command line arguments.
175
176 @param argumentList: Command line for a program.
177 @type argumentList: List of arguments, i.e. C{sys.argv}
178
179 @param argumentString: Command line for a program.
180 @type argumentString: String, i.e. "cback --verbose stage store"
181
182 @param validate: Validate the command line after parsing it.
183 @type validate: Boolean true/false.
184
185 @raise getopt.GetoptError: If the command-line arguments could not be parsed.
186 @raise ValueError: If the command-line arguments are invalid.
187 """
188 self._help = False
189 self._version = False
190 self._verbose = False
191 self._quiet = False
192 self._logfile = None
193 self._owner = None
194 self._mode = None
195 self._output = False
196 self._debug = False
197 self._stacktrace = False
198 self._diagnostics = False
199 self._verifyOnly = False
200 self._ignoreWarnings = False
201 self._sourceDir = None
202 self._s3BucketUrl = None
203 if argumentList is not None and argumentString is not None:
204 raise ValueError("Use either argumentList or argumentString, but not both.")
205 if argumentString is not None:
206 argumentList = splitCommandLine(argumentString)
207 if argumentList is not None:
208 self._parseArgumentList(argumentList)
209 if validate:
210 self.validate()
211
212
213
214
215
216
222
224 """
225 Informal string representation for class instance.
226 """
227 return self.__repr__()
228
229
230
231
232
233
319
320
321
322
323
324
326 """
327 Property target used to set the help flag.
328 No validations, but we normalize the value to C{True} or C{False}.
329 """
330 if value:
331 self._help = True
332 else:
333 self._help = False
334
336 """
337 Property target used to get the help flag.
338 """
339 return self._help
340
342 """
343 Property target used to set the version flag.
344 No validations, but we normalize the value to C{True} or C{False}.
345 """
346 if value:
347 self._version = True
348 else:
349 self._version = False
350
352 """
353 Property target used to get the version flag.
354 """
355 return self._version
356
358 """
359 Property target used to set the verbose flag.
360 No validations, but we normalize the value to C{True} or C{False}.
361 """
362 if value:
363 self._verbose = True
364 else:
365 self._verbose = False
366
368 """
369 Property target used to get the verbose flag.
370 """
371 return self._verbose
372
374 """
375 Property target used to set the quiet flag.
376 No validations, but we normalize the value to C{True} or C{False}.
377 """
378 if value:
379 self._quiet = True
380 else:
381 self._quiet = False
382
384 """
385 Property target used to get the quiet flag.
386 """
387 return self._quiet
388
390 """
391 Property target used to set the logfile parameter.
392 @raise ValueError: If the value cannot be encoded properly.
393 """
394 if value is not None:
395 if len(value) < 1:
396 raise ValueError("The logfile parameter must be a non-empty string.")
397 self._logfile = encodePath(value)
398
400 """
401 Property target used to get the logfile parameter.
402 """
403 return self._logfile
404
406 """
407 Property target used to set the owner parameter.
408 If not C{None}, the owner must be a C{(user,group)} tuple or list.
409 Strings (and inherited children of strings) are explicitly disallowed.
410 The value will be normalized to a tuple.
411 @raise ValueError: If the value is not valid.
412 """
413 if value is None:
414 self._owner = None
415 else:
416 if isinstance(value, str):
417 raise ValueError("Must specify user and group tuple for owner parameter.")
418 if len(value) != 2:
419 raise ValueError("Must specify user and group tuple for owner parameter.")
420 if len(value[0]) < 1 or len(value[1]) < 1:
421 raise ValueError("User and group tuple values must be non-empty strings.")
422 self._owner = (value[0], value[1])
423
425 """
426 Property target used to get the owner parameter.
427 The parameter is a tuple of C{(user, group)}.
428 """
429 return self._owner
430
432 """
433 Property target used to set the mode parameter.
434 """
435 if value is None:
436 self._mode = None
437 else:
438 try:
439 if isinstance(value, str):
440 value = int(value, 8)
441 else:
442 value = int(value)
443 except TypeError:
444 raise ValueError("Mode must be an octal integer >= 0, i.e. 644.")
445 if value < 0:
446 raise ValueError("Mode must be an octal integer >= 0. i.e. 644.")
447 self._mode = value
448
450 """
451 Property target used to get the mode parameter.
452 """
453 return self._mode
454
456 """
457 Property target used to set the output flag.
458 No validations, but we normalize the value to C{True} or C{False}.
459 """
460 if value:
461 self._output = True
462 else:
463 self._output = False
464
466 """
467 Property target used to get the output flag.
468 """
469 return self._output
470
472 """
473 Property target used to set the debug flag.
474 No validations, but we normalize the value to C{True} or C{False}.
475 """
476 if value:
477 self._debug = True
478 else:
479 self._debug = False
480
482 """
483 Property target used to get the debug flag.
484 """
485 return self._debug
486
488 """
489 Property target used to set the stacktrace flag.
490 No validations, but we normalize the value to C{True} or C{False}.
491 """
492 if value:
493 self._stacktrace = True
494 else:
495 self._stacktrace = False
496
498 """
499 Property target used to get the stacktrace flag.
500 """
501 return self._stacktrace
502
504 """
505 Property target used to set the diagnostics flag.
506 No validations, but we normalize the value to C{True} or C{False}.
507 """
508 if value:
509 self._diagnostics = True
510 else:
511 self._diagnostics = False
512
514 """
515 Property target used to get the diagnostics flag.
516 """
517 return self._diagnostics
518
520 """
521 Property target used to set the verifyOnly flag.
522 No validations, but we normalize the value to C{True} or C{False}.
523 """
524 if value:
525 self._verifyOnly = True
526 else:
527 self._verifyOnly = False
528
530 """
531 Property target used to get the verifyOnly flag.
532 """
533 return self._verifyOnly
534
536 """
537 Property target used to set the ignoreWarnings flag.
538 No validations, but we normalize the value to C{True} or C{False}.
539 """
540 if value:
541 self._ignoreWarnings = True
542 else:
543 self._ignoreWarnings = False
544
546 """
547 Property target used to get the ignoreWarnings flag.
548 """
549 return self._ignoreWarnings
550
552 """
553 Property target used to set the sourceDir parameter.
554 """
555 if value is not None:
556 if len(value) < 1:
557 raise ValueError("The sourceDir parameter must be a non-empty string.")
558 self._sourceDir = value
559
561 """
562 Property target used to get the sourceDir parameter.
563 """
564 return self._sourceDir
565
567 """
568 Property target used to set the s3BucketUrl parameter.
569 """
570 if value is not None:
571 if len(value) < 1:
572 raise ValueError("The s3BucketUrl parameter must be a non-empty string.")
573 self._s3BucketUrl = value
574
576 """
577 Property target used to get the s3BucketUrl parameter.
578 """
579 return self._s3BucketUrl
580
581 help = property(_getHelp, _setHelp, None, "Command-line help (C{-h,--help}) flag.")
582 version = property(_getVersion, _setVersion, None, "Command-line version (C{-V,--version}) flag.")
583 verbose = property(_getVerbose, _setVerbose, None, "Command-line verbose (C{-b,--verbose}) flag.")
584 quiet = property(_getQuiet, _setQuiet, None, "Command-line quiet (C{-q,--quiet}) flag.")
585 logfile = property(_getLogfile, _setLogfile, None, "Command-line logfile (C{-l,--logfile}) parameter.")
586 owner = property(_getOwner, _setOwner, None, "Command-line owner (C{-o,--owner}) parameter, as tuple C{(user,group)}.")
587 mode = property(_getMode, _setMode, None, "Command-line mode (C{-m,--mode}) parameter.")
588 output = property(_getOutput, _setOutput, None, "Command-line output (C{-O,--output}) flag.")
589 debug = property(_getDebug, _setDebug, None, "Command-line debug (C{-d,--debug}) flag.")
590 stacktrace = property(_getStacktrace, _setStacktrace, None, "Command-line stacktrace (C{-s,--stack}) flag.")
591 diagnostics = property(_getDiagnostics, _setDiagnostics, None, "Command-line diagnostics (C{-D,--diagnostics}) flag.")
592 verifyOnly = property(_getVerifyOnly, _setVerifyOnly, None, "Command-line verifyOnly (C{-v,--verifyOnly}) flag.")
593 ignoreWarnings = property(_getIgnoreWarnings, _setIgnoreWarnings, None, "Command-line ignoreWarnings (C{-w,--ignoreWarnings}) flag.")
594 sourceDir = property(_getSourceDir, _setSourceDir, None, "Command-line sourceDir, source of sync.")
595 s3BucketUrl = property(_getS3BucketUrl, _setS3BucketUrl, None, "Command-line s3BucketUrl, target of sync.")
596
597
598
599
600
601
603 """
604 Validates command-line options represented by the object.
605
606 Unless C{--help} or C{--version} are supplied, at least one action must
607 be specified. Other validations (as for allowed values for particular
608 options) will be taken care of at assignment time by the properties
609 functionality.
610
611 @note: The command line format is specified by the L{_usage} function.
612 Call L{_usage} to see a usage statement for the cback script.
613
614 @raise ValueError: If one of the validations fails.
615 """
616 if not self.help and not self.version and not self.diagnostics:
617 if self.sourceDir is None or self.s3BucketUrl is None:
618 raise ValueError("Source directory and S3 bucket URL are both required.")
619
621 """
622 Extracts options into a list of command line arguments.
623
624 The original order of the various arguments (if, indeed, the object was
625 initialized with a command-line) is not preserved in this generated
626 argument list. Besides that, the argument list is normalized to use the
627 long option names (i.e. --version rather than -V). The resulting list
628 will be suitable for passing back to the constructor in the
629 C{argumentList} parameter. Unlike L{buildArgumentString}, string
630 arguments are not quoted here, because there is no need for it.
631
632 Unless the C{validate} parameter is C{False}, the L{Options.validate}
633 method will be called (with its default arguments) against the
634 options before extracting the command line. If the options are not valid,
635 then an argument list will not be extracted.
636
637 @note: It is strongly suggested that the C{validate} option always be set
638 to C{True} (the default) unless there is a specific need to extract an
639 invalid command line.
640
641 @param validate: Validate the options before extracting the command line.
642 @type validate: Boolean true/false.
643
644 @return: List representation of command-line arguments.
645 @raise ValueError: If options within the object are invalid.
646 """
647 if validate:
648 self.validate()
649 argumentList = []
650 if self._help:
651 argumentList.append("--help")
652 if self.version:
653 argumentList.append("--version")
654 if self.verbose:
655 argumentList.append("--verbose")
656 if self.quiet:
657 argumentList.append("--quiet")
658 if self.logfile is not None:
659 argumentList.append("--logfile")
660 argumentList.append(self.logfile)
661 if self.owner is not None:
662 argumentList.append("--owner")
663 argumentList.append("%s:%s" % (self.owner[0], self.owner[1]))
664 if self.mode is not None:
665 argumentList.append("--mode")
666 argumentList.append("%o" % self.mode)
667 if self.output:
668 argumentList.append("--output")
669 if self.debug:
670 argumentList.append("--debug")
671 if self.stacktrace:
672 argumentList.append("--stack")
673 if self.diagnostics:
674 argumentList.append("--diagnostics")
675 if self.verifyOnly:
676 argumentList.append("--verifyOnly")
677 if self.ignoreWarnings:
678 argumentList.append("--ignoreWarnings")
679 if self.sourceDir is not None:
680 argumentList.append(self.sourceDir)
681 if self.s3BucketUrl is not None:
682 argumentList.append(self.s3BucketUrl)
683 return argumentList
684
686 """
687 Extracts options into a string of command-line arguments.
688
689 The original order of the various arguments (if, indeed, the object was
690 initialized with a command-line) is not preserved in this generated
691 argument string. Besides that, the argument string is normalized to use
692 the long option names (i.e. --version rather than -V) and to quote all
693 string arguments with double quotes (C{"}). The resulting string will be
694 suitable for passing back to the constructor in the C{argumentString}
695 parameter.
696
697 Unless the C{validate} parameter is C{False}, the L{Options.validate}
698 method will be called (with its default arguments) against the options
699 before extracting the command line. If the options are not valid, then
700 an argument string will not be extracted.
701
702 @note: It is strongly suggested that the C{validate} option always be set
703 to C{True} (the default) unless there is a specific need to extract an
704 invalid command line.
705
706 @param validate: Validate the options before extracting the command line.
707 @type validate: Boolean true/false.
708
709 @return: String representation of command-line arguments.
710 @raise ValueError: If options within the object are invalid.
711 """
712 if validate:
713 self.validate()
714 argumentString = ""
715 if self._help:
716 argumentString += "--help "
717 if self.version:
718 argumentString += "--version "
719 if self.verbose:
720 argumentString += "--verbose "
721 if self.quiet:
722 argumentString += "--quiet "
723 if self.logfile is not None:
724 argumentString += "--logfile \"%s\" " % self.logfile
725 if self.owner is not None:
726 argumentString += "--owner \"%s:%s\" " % (self.owner[0], self.owner[1])
727 if self.mode is not None:
728 argumentString += "--mode %o " % self.mode
729 if self.output:
730 argumentString += "--output "
731 if self.debug:
732 argumentString += "--debug "
733 if self.stacktrace:
734 argumentString += "--stack "
735 if self.diagnostics:
736 argumentString += "--diagnostics "
737 if self.verifyOnly:
738 argumentString += "--verifyOnly "
739 if self.ignoreWarnings:
740 argumentString += "--ignoreWarnings "
741 if self.sourceDir is not None:
742 argumentString += "\"%s\" " % self.sourceDir
743 if self.s3BucketUrl is not None:
744 argumentString += "\"%s\" " % self.s3BucketUrl
745 return argumentString
746
748 """
749 Internal method to parse a list of command-line arguments.
750
751 Most of the validation we do here has to do with whether the arguments
752 can be parsed and whether any values which exist are valid. We don't do
753 any validation as to whether required elements exist or whether elements
754 exist in the proper combination (instead, that's the job of the
755 L{validate} method).
756
757 For any of the options which supply parameters, if the option is
758 duplicated with long and short switches (i.e. C{-l} and a C{--logfile})
759 then the long switch is used. If the same option is duplicated with the
760 same switch (long or short), then the last entry on the command line is
761 used.
762
763 @param argumentList: List of arguments to a command.
764 @type argumentList: List of arguments to a command, i.e. C{sys.argv[1:]}
765
766 @raise ValueError: If the argument list cannot be successfully parsed.
767 """
768 switches = { }
769 opts, remaining = getopt.getopt(argumentList, SHORT_SWITCHES, LONG_SWITCHES)
770 for o, a in opts:
771 switches[o] = a
772 if switches.has_key("-h") or switches.has_key("--help"):
773 self.help = True
774 if switches.has_key("-V") or switches.has_key("--version"):
775 self.version = True
776 if switches.has_key("-b") or switches.has_key("--verbose"):
777 self.verbose = True
778 if switches.has_key("-q") or switches.has_key("--quiet"):
779 self.quiet = True
780 if switches.has_key("-l"):
781 self.logfile = switches["-l"]
782 if switches.has_key("--logfile"):
783 self.logfile = switches["--logfile"]
784 if switches.has_key("-o"):
785 self.owner = switches["-o"].split(":", 1)
786 if switches.has_key("--owner"):
787 self.owner = switches["--owner"].split(":", 1)
788 if switches.has_key("-m"):
789 self.mode = switches["-m"]
790 if switches.has_key("--mode"):
791 self.mode = switches["--mode"]
792 if switches.has_key("-O") or switches.has_key("--output"):
793 self.output = True
794 if switches.has_key("-d") or switches.has_key("--debug"):
795 self.debug = True
796 if switches.has_key("-s") or switches.has_key("--stack"):
797 self.stacktrace = True
798 if switches.has_key("-D") or switches.has_key("--diagnostics"):
799 self.diagnostics = True
800 if switches.has_key("-v") or switches.has_key("--verifyOnly"):
801 self.verifyOnly = True
802 if switches.has_key("-w") or switches.has_key("--ignoreWarnings"):
803 self.ignoreWarnings = True
804 try:
805 (self.sourceDir, self.s3BucketUrl) = remaining
806 except ValueError:
807 pass
808
809
810
811
812
813
814
815
816
817
819 """
820 Implements the command-line interface for the C{cback-amazons3-sync} script.
821
822 Essentially, this is the "main routine" for the cback-amazons3-sync script. It does
823 all of the argument processing for the script, and then also implements the
824 tool functionality.
825
826 This function looks pretty similiar to C{CedarBackup2.cli.cli()}. It's not
827 easy to refactor this code to make it reusable and also readable, so I've
828 decided to just live with the duplication.
829
830 A different error code is returned for each type of failure:
831
832 - C{1}: The Python interpreter version is < 2.5
833 - C{2}: Error processing command-line arguments
834 - C{3}: Error configuring logging
835 - C{5}: Backup was interrupted with a CTRL-C or similar
836 - C{6}: Error executing other parts of the script
837
838 @note: This script uses print rather than logging to the INFO level, because
839 it is interactive. Underlying Cedar Backup functionality uses the logging
840 mechanism exclusively.
841
842 @return: Error code as described above.
843 """
844 try:
845 if map(int, [sys.version_info[0], sys.version_info[1]]) < [2, 5]:
846 sys.stderr.write("Python version 2.5 or greater required.\n")
847 return 1
848 except:
849
850 sys.stderr.write("Python version 2.5 or greater required.\n")
851 return 1
852
853 try:
854 options = Options(argumentList=sys.argv[1:])
855 except Exception, e:
856 _usage()
857 sys.stderr.write(" *** Error: %s\n" % e)
858 return 2
859
860 if options.help:
861 _usage()
862 return 0
863 if options.version:
864 _version()
865 return 0
866 if options.diagnostics:
867 _diagnostics()
868 return 0
869
870 try:
871 logfile = setupLogging(options)
872 except Exception, e:
873 sys.stderr.write("Error setting up logging: %s\n" % e)
874 return 3
875
876 logger.info("Cedar Backup Amazon S3 sync run started.")
877 logger.info("Options were [%s]" % options)
878 logger.info("Logfile is [%s]" % logfile)
879 Diagnostics().logDiagnostics(method=logger.info)
880
881 if options.stacktrace:
882 _executeAction(options)
883 else:
884 try:
885 _executeAction(options)
886 except KeyboardInterrupt:
887 logger.error("Backup interrupted.")
888 logger.info("Cedar Backup Amazon S3 sync run completed with status 5.")
889 return 5
890 except Exception, e:
891 logger.error("Error executing backup: %s" % e)
892 logger.info("Cedar Backup Amazon S3 sync run completed with status 6.")
893 return 6
894
895 logger.info("Cedar Backup Amazon S3 sync run completed with status 0.")
896 return 0
897
898
899
900
901
902
903
904
905
906
908 """
909 Prints usage information for the cback-amazons3-sync script.
910 @param fd: File descriptor used to print information.
911 @note: The C{fd} is used rather than C{print} to facilitate unit testing.
912 """
913 fd.write("\n")
914 fd.write(" Usage: cback-amazons3-sync [switches] sourceDir s3bucketUrl\n")
915 fd.write("\n")
916 fd.write(" Cedar Backup Amazon S3 sync tool.\n")
917 fd.write("\n")
918 fd.write(" This Cedar Backup utility synchronizes a local directory to an Amazon S3\n")
919 fd.write(" bucket. After the sync is complete, a validation step is taken. An\n")
920 fd.write(" error is reported if the contents of the bucket do not match the\n")
921 fd.write(" source directory, or if the indicated size for any file differs.\n")
922 fd.write(" This tool is a wrapper over the AWS CLI command-line tool.\n")
923 fd.write("\n")
924 fd.write(" The following arguments are required:\n")
925 fd.write("\n")
926 fd.write(" sourceDir The local source directory on disk (must exist)\n")
927 fd.write(" s3BucketUrl The URL to the target Amazon S3 bucket\n")
928 fd.write("\n")
929 fd.write(" The following switches are accepted:\n")
930 fd.write("\n")
931 fd.write(" -h, --help Display this usage/help listing\n")
932 fd.write(" -V, --version Display version information\n")
933 fd.write(" -b, --verbose Print verbose output as well as logging to disk\n")
934 fd.write(" -q, --quiet Run quietly (display no output to the screen)\n")
935 fd.write(" -l, --logfile Path to logfile (default: %s)\n" % DEFAULT_LOGFILE)
936 fd.write(" -o, --owner Logfile ownership, user:group (default: %s:%s)\n" % (DEFAULT_OWNERSHIP[0], DEFAULT_OWNERSHIP[1]))
937 fd.write(" -m, --mode Octal logfile permissions mode (default: %o)\n" % DEFAULT_MODE)
938 fd.write(" -O, --output Record some sub-command (i.e. aws) output to the log\n")
939 fd.write(" -d, --debug Write debugging information to the log (implies --output)\n")
940 fd.write(" -s, --stack Dump Python stack trace instead of swallowing exceptions\n")
941 fd.write(" -D, --diagnostics Print runtime diagnostics to the screen and exit\n")
942 fd.write(" -v, --verifyOnly Only verify the S3 bucket contents, do not make changes\n")
943 fd.write(" -w, --ignoreWarnings Ignore warnings about problematic filename encodings\n")
944 fd.write("\n")
945 fd.write(" Typical usage would be something like:\n")
946 fd.write("\n")
947 fd.write(" cback-amazons3-sync /home/myuser s3://example.com-backup/myuser\n")
948 fd.write("\n")
949 fd.write(" This will sync the contents of /home/myuser into the indicated bucket.\n")
950 fd.write("\n")
951
952
953
954
955
956
958 """
959 Prints version information for the cback script.
960 @param fd: File descriptor used to print information.
961 @note: The C{fd} is used rather than C{print} to facilitate unit testing.
962 """
963 fd.write("\n")
964 fd.write(" Cedar Backup Amazon S3 sync tool.\n")
965 fd.write(" Included with Cedar Backup version %s, released %s.\n" % (VERSION, DATE))
966 fd.write("\n")
967 fd.write(" Copyright (c) %s %s <%s>.\n" % (COPYRIGHT, AUTHOR, EMAIL))
968 fd.write(" See CREDITS for a list of included code and other contributors.\n")
969 fd.write(" This is free software; there is NO warranty. See the\n")
970 fd.write(" GNU General Public License version 2 for copying conditions.\n")
971 fd.write("\n")
972 fd.write(" Use the --help option for usage information.\n")
973 fd.write("\n")
974
975
976
977
978
979
981 """
982 Prints runtime diagnostics information.
983 @param fd: File descriptor used to print information.
984 @note: The C{fd} is used rather than C{print} to facilitate unit testing.
985 """
986 fd.write("\n")
987 fd.write("Diagnostics:\n")
988 fd.write("\n")
989 Diagnostics().printDiagnostics(fd=fd, prefix=" ")
990 fd.write("\n")
991
992
993
994
995
996
1012
1013
1014
1015
1016
1017
1019 """
1020 Build a list of files in a source directory
1021 @param sourceDir: Local source directory
1022 @return: FilesystemList with contents of source directory
1023 """
1024 if not os.path.isdir(sourceDir):
1025 raise ValueError("Source directory does not exist on disk.")
1026 sourceFiles = FilesystemList()
1027 sourceFiles.addDirContents(sourceDir)
1028 return sourceFiles
1029
1030
1031
1032
1033
1034
1036 """
1037 Check source files, trying to guess which ones will have encoding problems.
1038 @param sourceDir: Local source directory
1039 @param sourceDir: Local source directory
1040 @raises ValueError: If a problem file is found
1041 @see U{http://opensourcehacker.com/2011/09/16/fix-linux-filename-encodings-with-python/}
1042 @see U{http://serverfault.com/questions/82821/how-to-tell-the-language-encoding-of-a-filename-on-linux}
1043 @see U{http://randysofia.com/2014/06/06/aws-cli-and-your-locale/}
1044 """
1045 with warnings.catch_warnings():
1046 warnings.simplefilter("ignore")
1047
1048 encoding = Diagnostics().encoding
1049
1050 failed = False
1051 for entry in sourceFiles:
1052 result = chardet.detect(entry)
1053 source = entry.decode(result["encoding"])
1054 try:
1055 target = source.encode(encoding)
1056 if source != target:
1057 logger.error("Inconsistent encoding for [%s]: got %s, but need %s" % (entry, result["encoding"], encoding))
1058 failed = True
1059 except UnicodeEncodeError:
1060 logger.error("Inconsistent encoding for [%s]: got %s, but need %s" % (entry, result["encoding"], encoding))
1061 failed = True
1062
1063 if not failed:
1064 logger.info("Completed checking source filename encoding (no problems found).")
1065 else:
1066 logger.error("Some filenames have inconsistent encodings and will likely cause sync problems.")
1067 logger.error("You may be able to fix this by setting a more sensible locale in your environment.")
1068 logger.error("Aternately, you can rename the problem files to be valid in the indicated locale.")
1069 logger.error("To ignore this warning and proceed anyway, use --ignoreWarnings")
1070 raise ValueError("Some filenames have inconsistent encodings and will likely cause sync problems.")
1071
1072
1073
1074
1075
1076
1078 """
1079 Synchronize a local directory to an Amazon S3 bucket.
1080 @param sourceDir: Local source directory
1081 @param s3BucketUrl: Target S3 bucket URL
1082 """
1083 logger.info("Synchronizing local source directory up to Amazon S3.")
1084 args = [ "s3", "sync", sourceDir, s3BucketUrl, "--delete", "--recursive", ]
1085 result = executeCommand(AWS_COMMAND, args, returnOutput=False)[0]
1086 if result != 0:
1087 raise IOError("Error [%d] calling AWS CLI synchronize bucket." % result)
1088
1089
1090
1091
1092
1093
1094 -def _verifyBucketContents(sourceDir, sourceFiles, s3BucketUrl):
1095 """
1096 Verify that a source directory is equivalent to an Amazon S3 bucket.
1097 @param sourceDir: Local source directory
1098 @param sourceFiles: Filesystem list containing contents of source directory
1099 @param s3BucketUrl: Target S3 bucket URL
1100 """
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123 (bucket, prefix) = s3BucketUrl.replace("s3://", "").split("/", 1)
1124
1125 query = "Contents[].{Key: Key, Size: Size}"
1126 args = [ "s3api", "list-objects", "--bucket", bucket, "--prefix", prefix, "--query", query, ]
1127 (result, data) = executeCommand(AWS_COMMAND, args, returnOutput=True)
1128 if result != 0:
1129 raise IOError("Error [%d] calling AWS CLI verify bucket contents." % result)
1130
1131 contents = { }
1132 for entry in json.loads("".join(data)):
1133 key = entry["Key"].replace(prefix, "")
1134 size = long(entry["Size"])
1135 contents[key] = size
1136
1137 failed = False
1138 for entry in sourceFiles:
1139 if os.path.isfile(entry):
1140 key = entry.replace(sourceDir, "")
1141 size = long(os.stat(entry).st_size)
1142 if not key in contents:
1143 logger.error("File was apparently not uploaded: [%s]" % entry)
1144 failed = True
1145 else:
1146 if size != contents[key]:
1147 logger.error("File size differs [%s]: expected %s bytes but got %s bytes" % (entry, size, contents[key]))
1148 failed = True
1149
1150 if not failed:
1151 logger.info("Completed verifying Amazon S3 bucket contents (no problems found).")
1152 else:
1153 logger.error("There were differences between source directory and target S3 bucket.")
1154 raise ValueError("There were differences between source directory and target S3 bucket.")
1155
1156
1157
1158
1159
1160
1161 if __name__ == "__main__":
1162 sys.exit(cli())
1163