001/* 002 * Copyright 2016-2018 Ping Identity Corporation 003 * All Rights Reserved. 004 */ 005/* 006 * Copyright (C) 2016-2018 Ping Identity Corporation 007 * 008 * This program is free software; you can redistribute it and/or modify 009 * it under the terms of the GNU General Public License (GPLv2 only) 010 * or the terms of the GNU Lesser General Public License (LGPLv2.1 only) 011 * as published by the Free Software Foundation. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Public License for more details. 017 * 018 * You should have received a copy of the GNU General Public License 019 * along with this program; if not, see <http://www.gnu.org/licenses>. 020 */ 021package com.unboundid.ldap.sdk.unboundidds.tools; 022 023 024 025import java.io.File; 026import java.io.FileOutputStream; 027import java.io.InputStream; 028import java.io.IOException; 029import java.io.OutputStream; 030import java.util.ArrayList; 031import java.util.Collections; 032import java.util.LinkedHashMap; 033import java.util.LinkedHashSet; 034import java.util.List; 035import java.util.Map; 036import java.util.Set; 037import java.util.TreeMap; 038import java.util.concurrent.atomic.AtomicLong; 039import java.util.zip.GZIPOutputStream; 040 041import com.unboundid.ldap.sdk.Filter; 042import com.unboundid.ldap.sdk.LDAPException; 043import com.unboundid.ldap.sdk.ResultCode; 044import com.unboundid.ldap.sdk.Version; 045import com.unboundid.ldap.sdk.schema.Schema; 046import com.unboundid.ldif.LDIFException; 047import com.unboundid.ldif.LDIFReader; 048import com.unboundid.util.ByteStringBuffer; 049import com.unboundid.util.CommandLineTool; 050import com.unboundid.util.Debug; 051import com.unboundid.util.ObjectPair; 052import com.unboundid.util.PassphraseEncryptedOutputStream; 053import com.unboundid.util.StaticUtils; 054import com.unboundid.util.ThreadSafety; 055import com.unboundid.util.ThreadSafetyLevel; 056import com.unboundid.util.args.ArgumentException; 057import com.unboundid.util.args.ArgumentParser; 058import com.unboundid.util.args.BooleanArgument; 059import com.unboundid.util.args.DNArgument; 060import com.unboundid.util.args.FileArgument; 061import com.unboundid.util.args.FilterArgument; 062import com.unboundid.util.args.IntegerArgument; 063import com.unboundid.util.args.SubCommand; 064import com.unboundid.util.args.StringArgument; 065 066import static com.unboundid.ldap.sdk.unboundidds.tools.ToolMessages.*; 067 068 069 070/** 071 * This class provides a command-line tool that can be used to split an LDIF 072 * file below a specified base DN. This can be used to help initialize an 073 * entry-balancing deployment for use with the Directory Proxy Server. 074 * <BR> 075 * <BLOCKQUOTE> 076 * <B>NOTE:</B> This class, and other classes within the 077 * {@code com.unboundid.ldap.sdk.unboundidds} package structure, are only 078 * supported for use against Ping Identity, UnboundID, and Alcatel-Lucent 8661 079 * server products. These classes provide support for proprietary 080 * functionality or for external specifications that are not considered stable 081 * or mature enough to be guaranteed to work in an interoperable way with 082 * other types of LDAP servers. 083 * </BLOCKQUOTE> 084 * <BR> 085 * It supports a number of algorithms for determining how to split the data, 086 * including: 087 * <UL> 088 * <LI> 089 * split-using-hash-on-rdn -- The tool will compute a digest of the DN 090 * component that is immediately below the split base DN, and will use a 091 * modulus to select a backend set for a given entry. Since the split is 092 * based purely on computation involving the DN, the there is no need for 093 * caching to ensure that children are placed in the same sets as their 094 * parent, which allows it to run effectively with a small memory footprint. 095 * </LI> 096 * <LI> 097 * split-using-hash-on-attribute -- The tool will compute a digest of the 098 * value(s) of a specified attribute, and will use a modulus to select a 099 * backend set for a given entry. This hash will only be computed for 100 * entries immediately below the split base DN, and a cache will be used to 101 * ensure that entries more than one level below the split base DN are 102 * placed in the same backend set as their parent. 103 * </LI> 104 * <LI> 105 * split-using-fewest-entries -- When examining an entry immediately below 106 * the split base DN, the tool will place that entry in the set that has the 107 * fewest entries. For flat DITs in which entries only exist one level 108 * below the split base DN, this will effectively ensure a round-robin 109 * distribution. But for cases in which there are branches of varying sizes 110 * below the split base DN, this can help ensure that entries are more 111 * evenly distributed across backend sets. A cache will be used to ensure 112 * that entries more than one level below the split base DN are placed in 113 * the same backend set as their parent. 114 * </LI> 115 * <LI> 116 * split-using-filter -- When examining an entry immediately below the split 117 * base DN, a series of filters will be evaluated against that entry, which 118 * each filter associated with a specific backend set. If an entry doesn't 119 * match any of the provided filters, an RDN hash can be used to select the 120 * set. A cache will be used to ensure that entries more than one level 121 * below the split base DN are placed in the same backend set as their 122 * parent. 123 * </LI> 124 * </UL> 125 */ 126@ThreadSafety(level=ThreadSafetyLevel.NOT_THREADSAFE) 127public final class SplitLDIF 128 extends CommandLineTool 129{ 130 /** 131 * The maximum length of any message to write to standard output or standard 132 * error. 133 */ 134 private static final int MAX_OUTPUT_LINE_LENGTH = 135 StaticUtils.TERMINAL_WIDTH_COLUMNS - 1; 136 137 138 139 // The global arguments used by this tool. 140 private BooleanArgument addEntriesOutsideSplitBaseDNToAllSets = null; 141 private BooleanArgument addEntriesOutsideSplitBaseDNToDedicatedSet = null; 142 private BooleanArgument compressTarget = null; 143 private BooleanArgument encryptTarget = null; 144 private BooleanArgument sourceCompressed = null; 145 private DNArgument splitBaseDN = null; 146 private FileArgument encryptionPassphraseFile = null; 147 private FileArgument schemaPath = null; 148 private FileArgument sourceLDIF = null; 149 private FileArgument targetLDIFBasePath = null; 150 private IntegerArgument numThreads = null; 151 152 // The arguments used to split using a hash of the RDN. 153 private IntegerArgument splitUsingHashOnRDNNumSets = null; 154 private SubCommand splitUsingHashOnRDN = null; 155 156 // The arguments used to split using a hash on a specified attribute. 157 private BooleanArgument splitUsingHashOnAttributeAssumeFlatDIT = null; 158 private BooleanArgument splitUsingHashOnAttributeUseAllValues = null; 159 private IntegerArgument splitUsingHashOnAttributeNumSets = null; 160 private StringArgument splitUsingHashOnAttributeAttributeName = null; 161 private SubCommand splitUsingHashOnAttribute = null; 162 163 // The arguments used to choose the set with the fewest entries. 164 private BooleanArgument splitUsingFewestEntriesAssumeFlatDIT = null; 165 private IntegerArgument splitUsingFewestEntriesNumSets = null; 166 private SubCommand splitUsingFewestEntries = null; 167 168 // The arguments used to choose the set using a provided set of filters. 169 private BooleanArgument splitUsingFilterAssumeFlatDIT = null; 170 private FilterArgument splitUsingFilterFilter = null; 171 private SubCommand splitUsingFilter = null; 172 173 174 175 /** 176 * Runs the tool with the provided set of command-line arguments. 177 * 178 * @param args The command-line arguments provided to this tool. 179 */ 180 public static void main(final String... args) 181 { 182 final ResultCode resultCode = main(System.out, System.err, args); 183 if (resultCode != ResultCode.SUCCESS) 184 { 185 System.exit(resultCode.intValue()); 186 } 187 } 188 189 190 191 /** 192 * Runs the tool with the provided set of command-line arguments. 193 * 194 * @param out The output stream used for standard output. It may be 195 * {@code null} if standard output should be suppressed. 196 * @param err The output stream used for standard error. It may be 197 * {@code null} if standard error should be suppressed. 198 * @param args The command-line arguments provided to this tool. 199 * 200 * @return A result code with information about the processing performed. 201 * Any result code other than {@link ResultCode#SUCCESS} indicates 202 * that an error occurred. 203 */ 204 public static ResultCode main(final OutputStream out, final OutputStream err, 205 final String... args) 206 { 207 final SplitLDIF tool = new SplitLDIF(out, err); 208 return tool.runTool(args); 209 } 210 211 212 213 /** 214 * Creates a new instance of this tool with the provided information. 215 * 216 * @param out The output stream used for standard output. It may be 217 * {@code null} if standard output should be suppressed. 218 * @param err The output stream used for standard error. It may be 219 * {@code null} if standard error should be suppressed. 220 */ 221 public SplitLDIF(final OutputStream out, final OutputStream err) 222 { 223 super(out, err); 224 } 225 226 227 228 /** 229 * {@inheritDoc} 230 */ 231 @Override() 232 public String getToolName() 233 { 234 return "split-ldif"; 235 } 236 237 238 239 /** 240 * {@inheritDoc} 241 */ 242 @Override() 243 public String getToolDescription() 244 { 245 return INFO_SPLIT_LDIF_TOOL_DESCRIPTION.get(); 246 } 247 248 249 250 /** 251 * {@inheritDoc} 252 */ 253 @Override() 254 public String getToolVersion() 255 { 256 return Version.NUMERIC_VERSION_STRING; 257 } 258 259 260 261 /** 262 * {@inheritDoc} 263 */ 264 @Override() 265 public boolean supportsInteractiveMode() 266 { 267 return true; 268 } 269 270 271 272 /** 273 * {@inheritDoc} 274 */ 275 @Override() 276 public boolean defaultsToInteractiveMode() 277 { 278 return true; 279 } 280 281 282 283 /** 284 * {@inheritDoc} 285 */ 286 @Override() 287 public boolean supportsPropertiesFile() 288 { 289 return true; 290 } 291 292 293 294 /** 295 * {@inheritDoc} 296 */ 297 @Override() 298 public void addToolArguments(final ArgumentParser parser) 299 throws ArgumentException 300 { 301 // Add the global arguments. 302 sourceLDIF = new FileArgument('l', "sourceLDIF", true, 0, null, 303 INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_SOURCE_LDIF.get(), true, false, true, 304 false); 305 sourceLDIF.addLongIdentifier("inputLDIF", true); 306 sourceLDIF.addLongIdentifier("source-ldif", true); 307 sourceLDIF.addLongIdentifier("input-ldif", true); 308 parser.addArgument(sourceLDIF); 309 310 sourceCompressed = new BooleanArgument('C', "sourceCompressed", 311 INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_SOURCE_COMPRESSED.get()); 312 sourceCompressed.addLongIdentifier("inputCompressed", true); 313 sourceCompressed.addLongIdentifier("source-compressed", true); 314 sourceCompressed.addLongIdentifier("input-compressed", true); 315 parser.addArgument(sourceCompressed); 316 317 targetLDIFBasePath = new FileArgument('o', "targetLDIFBasePath", false, 1, 318 null, INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_TARGET_LDIF_BASE.get(), false, 319 true, true, false); 320 targetLDIFBasePath.addLongIdentifier("outputLDIFBasePath", true); 321 targetLDIFBasePath.addLongIdentifier("target-ldif-base-path", true); 322 targetLDIFBasePath.addLongIdentifier("output-ldif-base-path", true); 323 parser.addArgument(targetLDIFBasePath); 324 325 compressTarget = new BooleanArgument('c', "compressTarget", 326 INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_COMPRESS_TARGET.get()); 327 compressTarget.addLongIdentifier("compressOutput", true); 328 compressTarget.addLongIdentifier("compress", true); 329 compressTarget.addLongIdentifier("compress-target", true); 330 compressTarget.addLongIdentifier("compress-output", true); 331 parser.addArgument(compressTarget); 332 333 encryptTarget = new BooleanArgument(null, "encryptTarget", 334 INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_ENCRYPT_TARGET.get()); 335 encryptTarget.addLongIdentifier("encryptOutput", true); 336 encryptTarget.addLongIdentifier("encrypt", true); 337 encryptTarget.addLongIdentifier("encrypt-target", true); 338 encryptTarget.addLongIdentifier("encrypt-output", true); 339 parser.addArgument(encryptTarget); 340 341 encryptionPassphraseFile = new FileArgument(null, 342 "encryptionPassphraseFile", false, 1, null, 343 INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_ENCRYPT_PW_FILE.get(), true, true, 344 true, false); 345 encryptionPassphraseFile.addLongIdentifier("encryptionPasswordFile", true); 346 encryptionPassphraseFile.addLongIdentifier("encryption-passphrase-file", 347 true); 348 encryptionPassphraseFile.addLongIdentifier("encryption-password-file", 349 true); 350 parser.addArgument(encryptionPassphraseFile); 351 352 splitBaseDN = new DNArgument('b', "splitBaseDN", true, 1, null, 353 INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_SPLIT_BASE_DN.get()); 354 splitBaseDN.addLongIdentifier("baseDN", true); 355 splitBaseDN.addLongIdentifier("split-base-dn", true); 356 splitBaseDN.addLongIdentifier("base-dn", true); 357 parser.addArgument(splitBaseDN); 358 359 addEntriesOutsideSplitBaseDNToAllSets = new BooleanArgument(null, 360 "addEntriesOutsideSplitBaseDNToAllSets", 1, 361 INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_OUTSIDE_TO_ALL_SETS.get()); 362 addEntriesOutsideSplitBaseDNToAllSets.addLongIdentifier( 363 "add-entries-outside-split-base-dn-to-all-sets", true); 364 parser.addArgument(addEntriesOutsideSplitBaseDNToAllSets); 365 366 addEntriesOutsideSplitBaseDNToDedicatedSet = new BooleanArgument(null, 367 "addEntriesOutsideSplitBaseDNToDedicatedSet", 1, 368 INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_OUTSIDE_TO_DEDICATED_SET.get()); 369 addEntriesOutsideSplitBaseDNToDedicatedSet.addLongIdentifier( 370 "add-entries-outside-split-base-dn-to-dedicated-set", true); 371 parser.addArgument(addEntriesOutsideSplitBaseDNToDedicatedSet); 372 373 schemaPath = new FileArgument(null, "schemaPath", false, 0, null, 374 INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_SCHEMA_PATH.get(), true, false, false, 375 false); 376 schemaPath.addLongIdentifier("schemaFile", true); 377 schemaPath.addLongIdentifier("schemaDirectory", true); 378 schemaPath.addLongIdentifier("schema-path", true); 379 schemaPath.addLongIdentifier("schema-file", true); 380 schemaPath.addLongIdentifier("schema-directory", true); 381 parser.addArgument(schemaPath); 382 383 numThreads = new IntegerArgument('t', "numThreads", false, 1, null, 384 INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_NUM_THREADS.get(), 1, 385 Integer.MAX_VALUE, 1); 386 numThreads.addLongIdentifier("num-threads", true); 387 parser.addArgument(numThreads); 388 389 390 // Add the subcommand used to split entries using a hash on the RDN. 391 final ArgumentParser splitUsingHashOnRDNParser = new ArgumentParser( 392 "split-using-hash-on-rdn", INFO_SPLIT_LDIF_SC_HASH_ON_RDN_DESC.get()); 393 394 splitUsingHashOnRDNNumSets = new IntegerArgument(null, "numSets", true, 1, 395 null, INFO_SPLIT_LDIF_SC_HASH_ON_RDN_ARG_DESC_NUM_SETS.get(), 2, 396 Integer.MAX_VALUE); 397 splitUsingHashOnRDNNumSets.addLongIdentifier("num-sets", true); 398 splitUsingHashOnRDNParser.addArgument(splitUsingHashOnRDNNumSets); 399 400 final LinkedHashMap<String[],String> splitUsingHashOnRDNExamples = 401 new LinkedHashMap<String[],String>(1); 402 splitUsingHashOnRDNExamples.put( 403 new String[] 404 { 405 "split-using-hash-on-rdn", 406 "--sourceLDIF", "whole.ldif", 407 "--targetLDIFBasePath", "split.ldif", 408 "--splitBaseDN", "ou=People,dc=example,dc=com", 409 "--numSets", "4", 410 "--schemaPath", "config/schema", 411 "--addEntriesOutsideSplitBaseDNToAllSets" 412 }, 413 INFO_SPLIT_LDIF_SC_HASH_ON_RDN_EXAMPLE.get()); 414 415 splitUsingHashOnRDN = new SubCommand("split-using-hash-on-rdn", 416 INFO_SPLIT_LDIF_SC_HASH_ON_RDN_DESC.get(), splitUsingHashOnRDNParser, 417 splitUsingHashOnRDNExamples); 418 splitUsingHashOnRDN.addName("hash-on-rdn", true); 419 420 parser.addSubCommand(splitUsingHashOnRDN); 421 422 423 // Add the subcommand used to split entries using a hash on a specified 424 // attribute. 425 final ArgumentParser splitUsingHashOnAttributeParser = new ArgumentParser( 426 "split-using-hash-on-attribute", 427 INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_DESC.get()); 428 429 splitUsingHashOnAttributeAttributeName = new StringArgument(null, 430 "attributeName", true, 1, "{attr}", 431 INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_ARG_DESC_ATTR_NAME.get()); 432 splitUsingHashOnAttributeAttributeName.addLongIdentifier("attribute-name", 433 true); 434 splitUsingHashOnAttributeParser.addArgument( 435 splitUsingHashOnAttributeAttributeName); 436 437 splitUsingHashOnAttributeNumSets = new IntegerArgument(null, "numSets", 438 true, 1, null, INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_ARG_DESC_NUM_SETS.get(), 439 2, Integer.MAX_VALUE); 440 splitUsingHashOnAttributeNumSets.addLongIdentifier("num-sets", true); 441 splitUsingHashOnAttributeParser.addArgument( 442 splitUsingHashOnAttributeNumSets); 443 444 splitUsingHashOnAttributeUseAllValues = new BooleanArgument(null, 445 "useAllValues", 1, 446 INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_ARG_DESC_ALL_VALUES.get()); 447 splitUsingHashOnAttributeUseAllValues.addLongIdentifier("use-all-values", 448 true); 449 splitUsingHashOnAttributeParser.addArgument( 450 splitUsingHashOnAttributeUseAllValues); 451 452 splitUsingHashOnAttributeAssumeFlatDIT = new BooleanArgument(null, 453 "assumeFlatDIT", 1, 454 INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_ARG_DESC_ASSUME_FLAT_DIT.get()); 455 splitUsingHashOnAttributeAssumeFlatDIT.addLongIdentifier("assume-flat-dit", 456 true); 457 splitUsingHashOnAttributeParser.addArgument( 458 splitUsingHashOnAttributeAssumeFlatDIT); 459 460 final LinkedHashMap<String[],String> splitUsingHashOnAttributeExamples = 461 new LinkedHashMap<String[],String>(1); 462 splitUsingHashOnAttributeExamples.put( 463 new String[] 464 { 465 "split-using-hash-on-attribute", 466 "--sourceLDIF", "whole.ldif", 467 "--targetLDIFBasePath", "split.ldif", 468 "--splitBaseDN", "ou=People,dc=example,dc=com", 469 "--attributeName", "uid", 470 "--numSets", "4", 471 "--schemaPath", "config/schema", 472 "--addEntriesOutsideSplitBaseDNToAllSets" 473 }, 474 INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_EXAMPLE.get()); 475 476 splitUsingHashOnAttribute = new SubCommand("split-using-hash-on-attribute", 477 INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_DESC.get(), 478 splitUsingHashOnAttributeParser, splitUsingHashOnAttributeExamples); 479 splitUsingHashOnAttribute.addName("hash-on-attribute", true); 480 481 parser.addSubCommand(splitUsingHashOnAttribute); 482 483 484 // Add the subcommand used to split entries by selecting the set with the 485 // fewest entries. 486 final ArgumentParser splitUsingFewestEntriesParser = new ArgumentParser( 487 "split-using-fewest-entries", 488 INFO_SPLIT_LDIF_SC_FEWEST_ENTRIES_DESC.get()); 489 490 splitUsingFewestEntriesNumSets = new IntegerArgument(null, "numSets", 491 true, 1, null, 492 INFO_SPLIT_LDIF_SC_FEWEST_ENTRIES_ARG_DESC_NUM_SETS.get(), 493 2, Integer.MAX_VALUE); 494 splitUsingFewestEntriesNumSets.addLongIdentifier("num-sets", true); 495 splitUsingFewestEntriesParser.addArgument(splitUsingFewestEntriesNumSets); 496 497 splitUsingFewestEntriesAssumeFlatDIT = new BooleanArgument(null, 498 "assumeFlatDIT", 1, 499 INFO_SPLIT_LDIF_SC_FEWEST_ENTRIES_ARG_DESC_ASSUME_FLAT_DIT.get()); 500 splitUsingFewestEntriesAssumeFlatDIT.addLongIdentifier("assume-flat-dit", 501 true); 502 splitUsingFewestEntriesParser.addArgument( 503 splitUsingFewestEntriesAssumeFlatDIT); 504 505 final LinkedHashMap<String[],String> splitUsingFewestEntriesExamples = 506 new LinkedHashMap<String[],String>(1); 507 splitUsingFewestEntriesExamples.put( 508 new String[] 509 { 510 "split-using-fewest-entries", 511 "--sourceLDIF", "whole.ldif", 512 "--targetLDIFBasePath", "split.ldif", 513 "--splitBaseDN", "ou=People,dc=example,dc=com", 514 "--numSets", "4", 515 "--schemaPath", "config/schema", 516 "--addEntriesOutsideSplitBaseDNToAllSets" 517 }, 518 INFO_SPLIT_LDIF_SC_FEWEST_ENTRIES_EXAMPLE.get()); 519 520 splitUsingFewestEntries = new SubCommand("split-using-fewest-entries", 521 INFO_SPLIT_LDIF_SC_FEWEST_ENTRIES_DESC.get(), 522 splitUsingFewestEntriesParser, splitUsingFewestEntriesExamples); 523 splitUsingFewestEntries.addName("fewest-entries", true); 524 525 parser.addSubCommand(splitUsingFewestEntries); 526 527 528 // Add the subcommand used to split entries by selecting the set based on a 529 // filter. 530 final ArgumentParser splitUsingFilterParser = new ArgumentParser( 531 "split-using-filter", INFO_SPLIT_LDIF_SC_FILTER_DESC.get()); 532 533 splitUsingFilterFilter = new FilterArgument(null, "filter", true, 0, null, 534 INFO_SPLIT_LDIF_SC_FILTER_ARG_DESC_FILTER.get()); 535 splitUsingFilterParser.addArgument(splitUsingFilterFilter); 536 537 splitUsingFilterAssumeFlatDIT = new BooleanArgument(null, "assumeFlatDIT", 538 1, INFO_SPLIT_LDIF_SC_FILTER_ARG_DESC_ASSUME_FLAT_DIT.get()); 539 splitUsingFilterAssumeFlatDIT.addLongIdentifier("assume-flat-dit", true); 540 splitUsingFilterParser.addArgument(splitUsingFilterAssumeFlatDIT); 541 542 final LinkedHashMap<String[],String> splitUsingFilterExamples = 543 new LinkedHashMap<String[],String>(1); 544 splitUsingFilterExamples.put( 545 new String[] 546 { 547 "split-using-filter", 548 "--sourceLDIF", "whole.ldif", 549 "--targetLDIFBasePath", "split.ldif", 550 "--splitBaseDN", "ou=People,dc=example,dc=com", 551 "--filter", "(timeZone=Eastern)", 552 "--filter", "(timeZone=Central)", 553 "--filter", "(timeZone=Mountain)", 554 "--filter", "(timeZone=Pacific)", 555 "--schemaPath", "config/schema", 556 "--addEntriesOutsideSplitBaseDNToAllSets" 557 }, 558 INFO_SPLIT_LDIF_SC_FILTER_EXAMPLE.get()); 559 560 splitUsingFilter = new SubCommand("split-using-filter", 561 INFO_SPLIT_LDIF_SC_FILTER_DESC.get(), 562 splitUsingFilterParser, splitUsingFilterExamples); 563 splitUsingFilter.addName("filter", true); 564 565 parser.addSubCommand(splitUsingFilter); 566 } 567 568 569 570 /** 571 * {@inheritDoc} 572 */ 573 @Override() 574 public void doExtendedArgumentValidation() 575 throws ArgumentException 576 { 577 // If multiple sourceLDIF values were provided, then a target LDIF base path 578 // must have been given. 579 final List<File> sourceLDIFValues = sourceLDIF.getValues(); 580 if (sourceLDIFValues.size() > 1) 581 { 582 if (! targetLDIFBasePath.isPresent()) 583 { 584 throw new ArgumentException(ERR_SPLIT_LDIF_NO_TARGET_BASE_PATH.get( 585 sourceLDIF.getIdentifierString(), 586 targetLDIFBasePath.getIdentifierString())); 587 } 588 } 589 590 591 // If the split-using-filter subcommand was provided, then at least two 592 // filters must have been provided, and none of the filters can be logically 593 // equivalent to any of the others. 594 if (splitUsingFilter.isPresent()) 595 { 596 final List<Filter> filterList = splitUsingFilterFilter.getValues(); 597 final Set<Filter> filterSet = 598 new LinkedHashSet<Filter>(filterList.size()); 599 for (final Filter f : filterList) 600 { 601 if (filterSet.contains(f)) 602 { 603 throw new ArgumentException(ERR_SPLIT_LDIF_NON_UNIQUE_FILTER.get( 604 splitUsingFilterFilter.getIdentifierString(), f.toString())); 605 } 606 else 607 { 608 filterSet.add(f); 609 } 610 } 611 612 if (filterSet.size() < 2) 613 { 614 throw new ArgumentException(ERR_SPLIT_LDIF_NOT_ENOUGH_FILTERS.get( 615 splitUsingFilter.getPrimaryName(), 616 splitUsingFilterFilter.getIdentifierString())); 617 } 618 } 619 } 620 621 622 623 /** 624 * {@inheritDoc} 625 */ 626 @Override() 627 public ResultCode doToolProcessing() 628 { 629 // Get the schema to use during processing. 630 final Schema schema; 631 try 632 { 633 schema = getSchema(); 634 } 635 catch (final LDAPException le) 636 { 637 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, le.getMessage()); 638 return le.getResultCode(); 639 } 640 641 642 // If an encryption passphrase file is provided, then get the passphrase 643 // from it. 644 String encryptionPassphrase = null; 645 if (encryptionPassphraseFile.isPresent()) 646 { 647 try 648 { 649 encryptionPassphrase = ToolUtils.readEncryptionPassphraseFromFile( 650 encryptionPassphraseFile.getValue()); 651 } 652 catch (final LDAPException e) 653 { 654 Debug.debugException(e); 655 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, e.getMessage()); 656 return e.getResultCode(); 657 } 658 } 659 660 661 // Figure out which subcommand was selected, and create the appropriate 662 // translator to use to perform the processing. 663 final SplitLDIFTranslator translator; 664 if (splitUsingHashOnRDN.isPresent()) 665 { 666 translator = new SplitLDIFRDNHashTranslator(splitBaseDN.getValue(), 667 splitUsingHashOnRDNNumSets.getValue(), 668 addEntriesOutsideSplitBaseDNToAllSets.isPresent(), 669 addEntriesOutsideSplitBaseDNToDedicatedSet.isPresent()); 670 } 671 else if (splitUsingHashOnAttribute.isPresent()) 672 { 673 translator = new SplitLDIFAttributeHashTranslator(splitBaseDN.getValue(), 674 splitUsingHashOnAttributeNumSets.getValue(), 675 splitUsingHashOnAttributeAttributeName.getValue(), 676 splitUsingHashOnAttributeUseAllValues.isPresent(), 677 splitUsingHashOnAttributeAssumeFlatDIT.isPresent(), 678 addEntriesOutsideSplitBaseDNToAllSets.isPresent(), 679 addEntriesOutsideSplitBaseDNToDedicatedSet.isPresent()); 680 } 681 else if (splitUsingFewestEntries.isPresent()) 682 { 683 translator = new SplitLDIFFewestEntriesTranslator(splitBaseDN.getValue(), 684 splitUsingFewestEntriesNumSets.getValue(), 685 splitUsingFewestEntriesAssumeFlatDIT.isPresent(), 686 addEntriesOutsideSplitBaseDNToAllSets.isPresent(), 687 addEntriesOutsideSplitBaseDNToDedicatedSet.isPresent()); 688 } 689 else if (splitUsingFilter.isPresent()) 690 { 691 final List<Filter> filterList = splitUsingFilterFilter.getValues(); 692 final LinkedHashSet<Filter> filterSet = 693 new LinkedHashSet<Filter>(filterList.size()); 694 for (final Filter f : filterList) 695 { 696 filterSet.add(f); 697 } 698 699 translator = new SplitLDIFFilterTranslator(splitBaseDN.getValue(), 700 schema, filterSet, splitUsingFilterAssumeFlatDIT.isPresent(), 701 addEntriesOutsideSplitBaseDNToAllSets.isPresent(), 702 addEntriesOutsideSplitBaseDNToDedicatedSet.isPresent()); 703 } 704 else 705 { 706 // This should never happen. 707 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, 708 ERR_SPLIT_LDIF_CANNOT_DETERMINE_SPLIT_ALGORITHM.get( 709 splitUsingHashOnRDN.getPrimaryName() + ", " + 710 splitUsingHashOnAttribute.getPrimaryName() + ", " + 711 splitUsingFewestEntries.getPrimaryName() + ", " + 712 splitUsingFilter.getPrimaryName())); 713 return ResultCode.PARAM_ERROR; 714 } 715 716 717 // Create the LDIF reader. 718 final LDIFReader ldifReader; 719 try 720 { 721 final InputStream inputStream; 722 if (sourceLDIF.isPresent()) 723 { 724 final ObjectPair<InputStream,String> p = 725 ToolUtils.getInputStreamForLDIFFiles(sourceLDIF.getValues(), 726 encryptionPassphrase, getOut(), getErr()); 727 inputStream = p.getFirst(); 728 if ((encryptionPassphrase == null) && (p.getSecond() != null)) 729 { 730 encryptionPassphrase = p.getSecond(); 731 } 732 } 733 else 734 { 735 inputStream = System.in; 736 } 737 738 ldifReader = new LDIFReader(inputStream, numThreads.getValue(), 739 translator); 740 if (schema != null) 741 { 742 ldifReader.setSchema(schema); 743 } 744 } 745 catch (final Exception e) 746 { 747 Debug.debugException(e); 748 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, 749 ERR_SPLIT_LDIF_ERROR_CREATING_LDIF_READER.get( 750 StaticUtils.getExceptionMessage(e))); 751 return ResultCode.LOCAL_ERROR; 752 } 753 754 755 // Iterate through and process all of the entries. 756 ResultCode resultCode = ResultCode.SUCCESS; 757 final LinkedHashMap<String,OutputStream> outputStreams = 758 new LinkedHashMap<String,OutputStream>(10); 759 try 760 { 761 final AtomicLong entriesRead = new AtomicLong(0L); 762 final AtomicLong entriesExcluded = new AtomicLong(0L); 763 final TreeMap<String,AtomicLong> fileCounts = 764 new TreeMap<String,AtomicLong>(); 765 766readLoop: 767 while (true) 768 { 769 final SplitLDIFEntry entry; 770 try 771 { 772 entry = (SplitLDIFEntry) ldifReader.readEntry(); 773 } 774 catch (final LDIFException le) 775 { 776 Debug.debugException(le); 777 resultCode = ResultCode.LOCAL_ERROR; 778 779 final File f = getOutputFile(SplitLDIFEntry.SET_NAME_ERRORS); 780 OutputStream s = outputStreams.get(SplitLDIFEntry.SET_NAME_ERRORS); 781 if (s == null) 782 { 783 try 784 { 785 s = new FileOutputStream(f); 786 787 if (encryptTarget.isPresent()) 788 { 789 if (encryptionPassphrase == null) 790 { 791 try 792 { 793 encryptionPassphrase = 794 ToolUtils.promptForEncryptionPassphrase(false, true, 795 getOut(), getErr()); 796 } 797 catch (final LDAPException ex) 798 { 799 Debug.debugException(ex); 800 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, ex.getMessage()); 801 return ex.getResultCode(); 802 } 803 } 804 805 s = new PassphraseEncryptedOutputStream(encryptionPassphrase, 806 s); 807 } 808 809 if (compressTarget.isPresent()) 810 { 811 s = new GZIPOutputStream(s); 812 } 813 814 outputStreams.put(SplitLDIFEntry.SET_NAME_ERRORS, s); 815 fileCounts.put(SplitLDIFEntry.SET_NAME_ERRORS, 816 new AtomicLong(0L)); 817 } 818 catch (final Exception e) 819 { 820 Debug.debugException(e); 821 resultCode = ResultCode.LOCAL_ERROR; 822 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, 823 ERR_SPLIT_LDIF_CANNOT_OPEN_OUTPUT_FILE.get( 824 f.getAbsolutePath(), 825 StaticUtils.getExceptionMessage(e))); 826 break readLoop; 827 } 828 } 829 830 final ByteStringBuffer buffer = new ByteStringBuffer(); 831 buffer.append("# "); 832 buffer.append(le.getMessage()); 833 buffer.append(StaticUtils.EOL_BYTES); 834 835 final List<String> dataLines = le.getDataLines(); 836 if (dataLines != null) 837 { 838 for (final String dataLine : dataLines) 839 { 840 buffer.append(dataLine); 841 buffer.append(StaticUtils.EOL_BYTES); 842 } 843 } 844 845 buffer.append(StaticUtils.EOL_BYTES); 846 847 try 848 { 849 s.write(buffer.toByteArray()); 850 } 851 catch (final Exception e) 852 { 853 Debug.debugException(e); 854 resultCode = ResultCode.LOCAL_ERROR; 855 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, 856 ERR_SPLIT_LDIF_ERROR_WRITING_ERROR_TO_FILE.get( 857 le.getMessage(), f.getAbsolutePath(), 858 StaticUtils.getExceptionMessage(e))); 859 break readLoop; 860 } 861 862 if (le.mayContinueReading()) 863 { 864 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, 865 ERR_SPLIT_LDIF_INVALID_LDIF_RECORD_RECOVERABLE.get( 866 StaticUtils.getExceptionMessage(le))); 867 continue; 868 } 869 else 870 { 871 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, 872 ERR_SPLIT_LDIF_INVALID_LDIF_RECORD_UNRECOVERABLE.get( 873 StaticUtils.getExceptionMessage(le))); 874 break; 875 } 876 } 877 catch (final IOException ioe) 878 { 879 Debug.debugException(ioe); 880 resultCode = ResultCode.LOCAL_ERROR; 881 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, 882 ERR_SPLIT_LDIF_IO_READ_ERROR.get( 883 StaticUtils.getExceptionMessage(ioe))); 884 break; 885 } 886 catch (final Exception e) 887 { 888 Debug.debugException(e); 889 resultCode = ResultCode.LOCAL_ERROR; 890 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, 891 ERR_SPLIT_LDIF_UNEXPECTED_READ_ERROR.get( 892 StaticUtils.getExceptionMessage(e))); 893 break; 894 } 895 896 if (entry == null) 897 { 898 break; 899 } 900 901 final long readCount = entriesRead.incrementAndGet(); 902 if ((readCount % 1000L) == 0) 903 { 904 // Even though we aren't done with this entry yet, we'll go ahead and 905 // log a progress message now because it's easier to do that now than 906 // to ensure that it's handled properly through all possible error 907 // conditions that need to be handled below. 908 wrapOut(0, MAX_OUTPUT_LINE_LENGTH, 909 INFO_SPLIT_LDIF_PROGRESS.get(readCount)); 910 } 911 912 913 // Get the set(s) to which the entry should be written. If this is 914 // null (which could be the case as a result of a race condition when 915 // using multiple threads where processing for a child completes before 916 // processing for its parent, or as a result of a case in which a 917 // child is included without or before its parent), then try to see if 918 // we can get the sets by passing the entry through the translator. 919 Set<String> sets = entry.getSets(); 920 byte[] ldifBytes = entry.getLDIFBytes(); 921 if (sets == null) 922 { 923 try 924 { 925 sets = translator.translate(entry, 0L).getSets(); 926 } 927 catch (final Exception e) 928 { 929 Debug.debugException(e); 930 } 931 932 if (sets == null) 933 { 934 final SplitLDIFEntry errorEntry = translator.createEntry(entry, 935 ERR_SPLIT_LDIF_ENTRY_WITHOUT_PARENT.get( 936 entry.getDN(), splitBaseDN.getStringValue()), 937 Collections.singleton(SplitLDIFEntry.SET_NAME_ERRORS)); 938 ldifBytes = errorEntry.getLDIFBytes(); 939 sets = errorEntry.getSets(); 940 } 941 } 942 943 944 // If the entry shouldn't be written into any sets, then we don't need 945 // to do anything else. 946 if (sets.isEmpty()) 947 { 948 entriesExcluded.incrementAndGet(); 949 continue; 950 } 951 952 953 // Write the entry into each of the target sets, creating the output 954 // files if necessary. 955 for (final String set : sets) 956 { 957 if (set.equals(SplitLDIFEntry.SET_NAME_ERRORS)) 958 { 959 // This indicates that an error was encountered during processing, 960 // so we'll update the result code to reflect that. 961 resultCode = ResultCode.LOCAL_ERROR; 962 } 963 964 final File f = getOutputFile(set); 965 OutputStream s = outputStreams.get(set); 966 if (s == null) 967 { 968 try 969 { 970 s = new FileOutputStream(f); 971 972 if (encryptTarget.isPresent()) 973 { 974 if (encryptionPassphrase == null) 975 { 976 try 977 { 978 encryptionPassphrase = 979 ToolUtils.promptForEncryptionPassphrase(false, true, 980 getOut(), getErr()); 981 } 982 catch (final LDAPException ex) 983 { 984 Debug.debugException(ex); 985 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, ex.getMessage()); 986 return ex.getResultCode(); 987 } 988 } 989 990 s = new PassphraseEncryptedOutputStream(encryptionPassphrase, 991 s); 992 } 993 994 if (compressTarget.isPresent()) 995 { 996 s = new GZIPOutputStream(s); 997 } 998 999 outputStreams.put(set, s); 1000 fileCounts.put(set, new AtomicLong(0L)); 1001 } 1002 catch (final Exception e) 1003 { 1004 Debug.debugException(e); 1005 resultCode = ResultCode.LOCAL_ERROR; 1006 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, 1007 ERR_SPLIT_LDIF_CANNOT_OPEN_OUTPUT_FILE.get( 1008 f.getAbsolutePath(), 1009 StaticUtils.getExceptionMessage(e))); 1010 break readLoop; 1011 } 1012 } 1013 1014 try 1015 { 1016 s.write(ldifBytes); 1017 } 1018 catch (final Exception e) 1019 { 1020 Debug.debugException(e); 1021 resultCode = ResultCode.LOCAL_ERROR; 1022 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, 1023 ERR_SPLIT_LDIF_ERROR_WRITING_TO_FILE.get( 1024 entry.getDN(), f.getAbsolutePath(), 1025 StaticUtils.getExceptionMessage(e))); 1026 break readLoop; 1027 } 1028 1029 fileCounts.get(set).incrementAndGet(); 1030 } 1031 } 1032 1033 1034 // Processing is complete. Summarize the processing that was performed. 1035 final long finalReadCount = entriesRead.get(); 1036 if (finalReadCount > 1000L) 1037 { 1038 out(); 1039 } 1040 1041 wrapOut(0, MAX_OUTPUT_LINE_LENGTH, 1042 INFO_SPLIT_LDIF_PROCESSING_COMPLETE.get(finalReadCount)); 1043 1044 final long excludedCount = entriesExcluded.get(); 1045 if (excludedCount > 0L) 1046 { 1047 wrapOut(0, MAX_OUTPUT_LINE_LENGTH, 1048 INFO_SPLIT_LDIF_EXCLUDED_COUNT.get(excludedCount)); 1049 } 1050 1051 for (final Map.Entry<String,AtomicLong> e : fileCounts.entrySet()) 1052 { 1053 final File f = getOutputFile(e.getKey()); 1054 wrapOut(0, MAX_OUTPUT_LINE_LENGTH, 1055 INFO_SPLIT_LDIF_COUNT_TO_FILE.get(e.getValue().get(), 1056 f.getName())); 1057 } 1058 } 1059 finally 1060 { 1061 try 1062 { 1063 ldifReader.close(); 1064 } 1065 catch (final Exception e) 1066 { 1067 Debug.debugException(e); 1068 } 1069 1070 for (final Map.Entry<String,OutputStream> e : outputStreams.entrySet()) 1071 { 1072 try 1073 { 1074 e.getValue().close(); 1075 } 1076 catch (final Exception ex) 1077 { 1078 Debug.debugException(ex); 1079 resultCode = ResultCode.LOCAL_ERROR; 1080 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, 1081 ERR_SPLIT_LDIF_ERROR_CLOSING_FILE.get( 1082 getOutputFile(e.getKey()), 1083 StaticUtils.getExceptionMessage(ex))); 1084 } 1085 } 1086 } 1087 1088 return resultCode; 1089 } 1090 1091 1092 1093 /** 1094 * Retrieves the schema that should be used for processing. 1095 * 1096 * @return The schema that was created. 1097 * 1098 * @throws LDAPException If a problem is encountered while retrieving the 1099 * schema. 1100 */ 1101 private Schema getSchema() 1102 throws LDAPException 1103 { 1104 // If any schema paths were specified, then load the schema only from those 1105 // paths. 1106 if (schemaPath.isPresent()) 1107 { 1108 final ArrayList<File> schemaFiles = new ArrayList<File>(10); 1109 for (final File path : schemaPath.getValues()) 1110 { 1111 if (path.isFile()) 1112 { 1113 schemaFiles.add(path); 1114 } 1115 else 1116 { 1117 final TreeMap<String,File> fileMap = new TreeMap<String,File>(); 1118 for (final File schemaDirFile : path.listFiles()) 1119 { 1120 final String name = schemaDirFile.getName(); 1121 if (schemaDirFile.isFile() && name.toLowerCase().endsWith(".ldif")) 1122 { 1123 fileMap.put(name, schemaDirFile); 1124 } 1125 } 1126 schemaFiles.addAll(fileMap.values()); 1127 } 1128 } 1129 1130 if (schemaFiles.isEmpty()) 1131 { 1132 throw new LDAPException(ResultCode.PARAM_ERROR, 1133 ERR_SPLIT_LDIF_NO_SCHEMA_FILES.get( 1134 schemaPath.getIdentifierString())); 1135 } 1136 else 1137 { 1138 try 1139 { 1140 return Schema.getSchema(schemaFiles); 1141 } 1142 catch (final Exception e) 1143 { 1144 Debug.debugException(e); 1145 throw new LDAPException(ResultCode.LOCAL_ERROR, 1146 ERR_SPLIT_LDIF_ERROR_LOADING_SCHEMA.get( 1147 StaticUtils.getExceptionMessage(e))); 1148 } 1149 } 1150 } 1151 else 1152 { 1153 // If the INSTANCE_ROOT environment variable is set and it refers to a 1154 // directory that has a config/schema subdirectory that has one or more 1155 // schema files in it, then read the schema from that directory. 1156 try 1157 { 1158 final String instanceRootStr = System.getenv("INSTANCE_ROOT"); 1159 if (instanceRootStr != null) 1160 { 1161 final File instanceRoot = new File(instanceRootStr); 1162 final File configDir = new File(instanceRoot, "config"); 1163 final File schemaDir = new File(configDir, "schema"); 1164 if (schemaDir.exists()) 1165 { 1166 final TreeMap<String,File> fileMap = new TreeMap<String,File>(); 1167 for (final File schemaDirFile : schemaDir.listFiles()) 1168 { 1169 final String name = schemaDirFile.getName(); 1170 if (schemaDirFile.isFile() && 1171 name.toLowerCase().endsWith(".ldif")) 1172 { 1173 fileMap.put(name, schemaDirFile); 1174 } 1175 } 1176 1177 if (! fileMap.isEmpty()) 1178 { 1179 return Schema.getSchema(new ArrayList<File>(fileMap.values())); 1180 } 1181 } 1182 } 1183 } 1184 catch (final Exception e) 1185 { 1186 Debug.debugException(e); 1187 } 1188 } 1189 1190 1191 // If we've gotten here, then just return null and the tool will try to use 1192 // the default standard schema. 1193 return null; 1194 } 1195 1196 1197 1198 /** 1199 * Retrieves a file object that refers to an output file with the provided 1200 * extension. 1201 * 1202 * @param extension The extension to use for the file. 1203 * 1204 * @return A file object that refers to an output file with the provided 1205 * extension. 1206 */ 1207 private File getOutputFile(final String extension) 1208 { 1209 final File baseFile; 1210 if (targetLDIFBasePath.isPresent()) 1211 { 1212 baseFile = targetLDIFBasePath.getValue(); 1213 } 1214 else 1215 { 1216 baseFile = sourceLDIF.getValue(); 1217 } 1218 1219 return new File(baseFile.getAbsolutePath() + extension); 1220 } 1221 1222 1223 1224 /** 1225 * {@inheritDoc} 1226 */ 1227 @Override() 1228 public LinkedHashMap<String[],String> getExampleUsages() 1229 { 1230 final LinkedHashMap<String[],String> exampleMap = 1231 new LinkedHashMap<String[],String>(4); 1232 1233 for (final Map.Entry<String[],String> e : 1234 splitUsingHashOnRDN.getExampleUsages().entrySet()) 1235 { 1236 exampleMap.put(e.getKey(), e.getValue()); 1237 } 1238 1239 for (final Map.Entry<String[],String> e : 1240 splitUsingHashOnAttribute.getExampleUsages().entrySet()) 1241 { 1242 exampleMap.put(e.getKey(), e.getValue()); 1243 } 1244 1245 for (final Map.Entry<String[],String> e : 1246 splitUsingFewestEntries.getExampleUsages().entrySet()) 1247 { 1248 exampleMap.put(e.getKey(), e.getValue()); 1249 } 1250 1251 for (final Map.Entry<String[],String> e : 1252 splitUsingFilter.getExampleUsages().entrySet()) 1253 { 1254 exampleMap.put(e.getKey(), e.getValue()); 1255 } 1256 1257 return exampleMap; 1258 } 1259}