001package org.unix4j.unix.sort; 002 003import java.util.Collections; 004import java.util.EnumSet; 005import java.util.Iterator; 006 007import org.unix4j.option.Option; 008import org.unix4j.unix.Sort; 009 010/** 011 * Options for the {@link Sort sort} command. 012 * <p> 013 * For most applications, it may be more convenient to use {@link Sort#Options} 014 * instead of the option constants defined here. 015 * <p> 016 * <table> 017 * <tr valign="top"><td width="10px"></td><td nowrap="nowrap">{@code -c}</td><td> </td><td nowrap="nowrap">{@code --check}</td><td> </td><td>Checks that the single input file is ordered as specified by the 018 arguments and the collating sequence of the current locale. No 019 output is produced; only the exit code is affected.</td></tr> 020 * <tr valign="top"><td width="10px"></td><td nowrap="nowrap">{@code -m}</td><td> </td><td nowrap="nowrap">{@code --merge}</td><td> </td><td>Merge only; the input file are assumed to be already sorted.</td></tr> 021 * <tr valign="top"><td width="10px"></td><td nowrap="nowrap">{@code -u}</td><td> </td><td nowrap="nowrap">{@code --unique}</td><td> </td><td>Unique: suppress all but one in each set of lines having equal keys. 022 If used with the {@code -c} option, checks that there are no lines 023 with duplicate keys, in addition to checking that the input file is 024 sorted.</td></tr> 025 * <tr valign="top"><td width="10px"></td><td nowrap="nowrap">{@code -b}</td><td> </td><td nowrap="nowrap">{@code --ignoreLeadingBlanks}</td><td> </td><td>Ignore leading blanks. 026 (This option is ignored if a comparator operand is present).</td></tr> 027 * <tr valign="top"><td width="10px"></td><td nowrap="nowrap">{@code -d}</td><td> </td><td nowrap="nowrap">{@code --dictionaryOrder}</td><td> </td><td>Consider only blanks and alphanumeric characters. 028 (This option is ignored if a comparator operand is present).</td></tr> 029 * <tr valign="top"><td width="10px"></td><td nowrap="nowrap">{@code -f}</td><td> </td><td nowrap="nowrap">{@code --ignoreCase}</td><td> </td><td>Consider all lowercase characters that have uppercase equivalents to 030 be the uppercase equivalent for the purposes of comparison. 031 (This option is ignored if a comparator operand is present).</td></tr> 032 * <tr valign="top"><td width="10px"></td><td nowrap="nowrap">{@code -n}</td><td> </td><td nowrap="nowrap">{@code --numericSort}</td><td> </td><td>Sort numerically; the number begins each line and consists of 033 optional blanks, an optional minus sign, and zero or more digits 034 possibly separated by thousands separators, optionally followed by a 035 decimal-point character and zero or more digits. An empty number is 036 treated as '0'. The current local specifies the decimal-point 037 character and thousands separator. 038 <p> 039 Comparison is exact; there is no rounding error. 040 <p> 041 Neither a leading '+' nor exponential notation is recognized. To 042 compare such strings numerically, use the 043 {@code -genericNumericSort (-g)} option. 044<p> 045 (This option is ignored if a comparator operand is present).</td></tr> 046 * <tr valign="top"><td width="10px"></td><td nowrap="nowrap">{@code -g}</td><td> </td><td nowrap="nowrap">{@code --generalNumericSort}</td><td> </td><td>Sort numerically, using the standard {@link Double#parseDouble(String)} 047 function to convert a trimmed line to a double-precision floating 048 point number. This allows floating point numbers to be specified in 049 scientific notation, like 1.0e-34 and 10e100. 050 <p> 051 Uses the following collating sequence: Lines that cannot be parsed 052 because they do not represent valid double values (in alpha-numeric 053 order); "-Infinity"; finite numbers in ascending numeric order 054 (with -0 < +0); "Infinity"; "NaN". 055<p> 056 This option is usually slower than {@code -numeric-sort (-n)} and it 057 can lose information when converting to floating point. 058 <p> 059 (This option is ignored if a comparator operand is present).</td></tr> 060 * <tr valign="top"><td width="10px"></td><td nowrap="nowrap">{@code -h}</td><td> </td><td nowrap="nowrap">{@code --humanNumericSort}</td><td> </td><td>Sort numerically, first by numeric sign (negative, zero, or 061 positive); then by SI suffix (either empty, or 'k' or 'K', or one 062 of 'MGTPEZY', in that order); and finally by numeric value. For 063 example, '1023M' sorts before '1G' because 'M' (mega) precedes 'G' 064 (giga) as an SI suffix. 065 <p> 066 This option sorts values that are consistently scaled to the nearest 067 suffix, regardless of whether suffixes denote powers of 1000 or 068 1024, and it therefore sorts the output of any single invocation of 069 the {@code ls} command that are invoked with the --human-readable 070 option. 071 <p> 072 The syntax for numbers is the same as for the 073 {@code --numericSort (-n)} option; the SI suffix must immediately 074 follow the number. 075<p> 076 (This option is ignored if a comparator operand is present).</td></tr> 077 * <tr valign="top"><td width="10px"></td><td nowrap="nowrap">{@code -M}</td><td> </td><td nowrap="nowrap">{@code --monthSort}</td><td> </td><td>An initial string, consisting of any amount of blanks, followed by a 078 month name abbreviation, is folded to UPPER case and compared in the 079 order: (unknown) < 'JAN' < ... < 'DEC'. The current locale 080 determines the month spellings.</td></tr> 081 * <tr valign="top"><td width="10px"></td><td nowrap="nowrap">{@code -V}</td><td> </td><td nowrap="nowrap">{@code --versionSort}</td><td> </td><td>Sort by version name and number. It behaves like a standard sort, 082 except that each sequence of decimal digits is treated numerically 083 as an index/version number. 084 <p> 085 (This option is ignored if a comparator operand is present).</td></tr> 086 * <tr valign="top"><td width="10px"></td><td nowrap="nowrap">{@code -r}</td><td> </td><td nowrap="nowrap">{@code --reverse}</td><td> </td><td>Reverse the sense of comparisons.</td></tr> 087 * </table> 088 */ 089public enum SortOption implements Option, SortOptions { 090 /** 091 * Option <b>{@code --check}</b>, <b>{@code -c}</b>: 092 * Checks that the single input file is ordered as specified by the 093 arguments and the collating sequence of the current locale. No 094 output is produced; only the exit code is affected. 095 */ 096 check('c'), 097 /** 098 * Option <b>{@code --merge}</b>, <b>{@code -m}</b>: 099 * Merge only; the input file are assumed to be already sorted. 100 */ 101 merge('m'), 102 /** 103 * Option <b>{@code --unique}</b>, <b>{@code -u}</b>: 104 * Unique: suppress all but one in each set of lines having equal keys. 105 If used with the {@code -c} option, checks that there are no lines 106 with duplicate keys, in addition to checking that the input file is 107 sorted. 108 */ 109 unique('u'), 110 /** 111 * Option <b>{@code --ignoreLeadingBlanks}</b>, <b>{@code -b}</b>: 112 * Ignore leading blanks. 113 (This option is ignored if a comparator operand is present). 114 */ 115 ignoreLeadingBlanks('b'), 116 /** 117 * Option <b>{@code --dictionaryOrder}</b>, <b>{@code -d}</b>: 118 * Consider only blanks and alphanumeric characters. 119 (This option is ignored if a comparator operand is present). 120 */ 121 dictionaryOrder('d'), 122 /** 123 * Option <b>{@code --ignoreCase}</b>, <b>{@code -f}</b>: 124 * Consider all lowercase characters that have uppercase equivalents to 125 be the uppercase equivalent for the purposes of comparison. 126 (This option is ignored if a comparator operand is present). 127 */ 128 ignoreCase('f'), 129 /** 130 * Option <b>{@code --numericSort}</b>, <b>{@code -n}</b>: 131 * Sort numerically; the number begins each line and consists of 132 optional blanks, an optional minus sign, and zero or more digits 133 possibly separated by thousands separators, optionally followed by a 134 decimal-point character and zero or more digits. An empty number is 135 treated as '0'. The current local specifies the decimal-point 136 character and thousands separator. 137 <p> 138 Comparison is exact; there is no rounding error. 139 <p> 140 Neither a leading '+' nor exponential notation is recognized. To 141 compare such strings numerically, use the 142 {@code -genericNumericSort (-g)} option. 143<p> 144 (This option is ignored if a comparator operand is present). 145 */ 146 numericSort('n'), 147 /** 148 * Option <b>{@code --generalNumericSort}</b>, <b>{@code -g}</b>: 149 * Sort numerically, using the standard {@link Double#parseDouble(String)} 150 function to convert a trimmed line to a double-precision floating 151 point number. This allows floating point numbers to be specified in 152 scientific notation, like 1.0e-34 and 10e100. 153 <p> 154 Uses the following collating sequence: Lines that cannot be parsed 155 because they do not represent valid double values (in alpha-numeric 156 order); "-Infinity"; finite numbers in ascending numeric order 157 (with -0 < +0); "Infinity"; "NaN". 158<p> 159 This option is usually slower than {@code -numeric-sort (-n)} and it 160 can lose information when converting to floating point. 161 <p> 162 (This option is ignored if a comparator operand is present). 163 */ 164 generalNumericSort('g'), 165 /** 166 * Option <b>{@code --humanNumericSort}</b>, <b>{@code -h}</b>: 167 * Sort numerically, first by numeric sign (negative, zero, or 168 positive); then by SI suffix (either empty, or 'k' or 'K', or one 169 of 'MGTPEZY', in that order); and finally by numeric value. For 170 example, '1023M' sorts before '1G' because 'M' (mega) precedes 'G' 171 (giga) as an SI suffix. 172 <p> 173 This option sorts values that are consistently scaled to the nearest 174 suffix, regardless of whether suffixes denote powers of 1000 or 175 1024, and it therefore sorts the output of any single invocation of 176 the {@code ls} command that are invoked with the --human-readable 177 option. 178 <p> 179 The syntax for numbers is the same as for the 180 {@code --numericSort (-n)} option; the SI suffix must immediately 181 follow the number. 182<p> 183 (This option is ignored if a comparator operand is present). 184 */ 185 humanNumericSort('h'), 186 /** 187 * Option <b>{@code --monthSort}</b>, <b>{@code -M}</b>: 188 * An initial string, consisting of any amount of blanks, followed by a 189 month name abbreviation, is folded to UPPER case and compared in the 190 order: (unknown) < 'JAN' < ... < 'DEC'. The current locale 191 determines the month spellings. 192 */ 193 monthSort('M'), 194 /** 195 * Option <b>{@code --versionSort}</b>, <b>{@code -V}</b>: 196 * Sort by version name and number. It behaves like a standard sort, 197 except that each sequence of decimal digits is treated numerically 198 as an index/version number. 199 <p> 200 (This option is ignored if a comparator operand is present). 201 */ 202 versionSort('V'), 203 /** 204 * Option <b>{@code --reverse}</b>, <b>{@code -r}</b>: 205 * Reverse the sense of comparisons. 206 */ 207 reverse('r'); 208 209 private final char acronym; 210 private SortOption(char acronym) { 211 this.acronym = acronym; 212 } 213 @Override 214 public Class<SortOption> optionType() { 215 return SortOption.class; 216 } 217 /** 218 * Returns the option with the given {@code acronym}, or {@code null} if no 219 * such option is found. 220 * 221 * @param acronym the option {@link #acronym() acronym} 222 * @return the option with the given {@code acronym} or {@code null} if it 223 * is not found 224 */ 225 public static SortOption findByAcronym(char acronym) { 226 for (final SortOption opt : values()) { 227 if (opt.acronym() == acronym) return opt; 228 } 229 return null; 230 } 231 @Override 232 public char acronym() { 233 return acronym; 234 } 235 @Override 236 public boolean isSet(SortOption option) { 237 return equals(option); 238 } 239 /** 240 * Returns a new set with {@code this} active option. 241 * 242 * @return a new set containing this option 243 */ 244 @Override 245 public EnumSet<SortOption> asSet() { 246 return EnumSet.of(this); 247 } 248 249 /** 250 * Returns an immutable iterator returning o single element: {@code this} 251 * option. 252 * 253 * @return an immutable iterator with {@code this} active option. 254 */ 255 @Override 256 public Iterator<SortOption> iterator() { 257 return Collections.singleton(this).iterator(); 258 } 259 260 /** 261 * Returns 1 as this is a set with a single element: {@code this} option 262 * 263 * @return one 264 */ 265 @Override 266 public int size() { 267 return 1; 268 } 269 270 /** 271 * Returns true if the {@link Option#acronym() acronym} should be used for 272 * the specified {@code option} in string representations. 273 * <p> 274 * This method returns always true for all options. 275 * 276 * @param option 277 * the option of interest 278 * @return always true indicating that option acronyms should be used in 279 * string representations for all options 280 */ 281 @Override 282 public boolean useAcronymFor(SortOption option) { 283 return true; 284 } 285}