001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.compressors; 020 021import java.util.Collections; 022import java.util.HashMap; 023import java.util.Locale; 024import java.util.Map; 025 026/** 027 * File name mapping code for the compression formats. 028 * @ThreadSafe 029 * @since 1.4 030 */ 031public class FileNameUtil { 032 033 /** 034 * Map from common file name suffixes to the suffixes that identify compressed 035 * versions of those file types. For example: from ".tar" to ".tgz". 036 */ 037 private final Map<String, String> compressSuffix = 038 new HashMap<>(); 039 040 /** 041 * Map from common file name suffixes of compressed files to the 042 * corresponding suffixes of uncompressed files. For example: from 043 * ".tgz" to ".tar". 044 * <p> 045 * This map also contains format-specific suffixes like ".gz" and "-z". 046 * These suffixes are mapped to the empty string, as they should simply 047 * be removed from the file name when the file is uncompressed. 048 */ 049 private final Map<String, String> uncompressSuffix; 050 051 /** 052 * Length of the longest compressed suffix. 053 */ 054 private final int longestCompressedSuffix; 055 056 /** 057 * Length of the shortest compressed suffix. 058 */ 059 private final int shortestCompressedSuffix; 060 061 /** 062 * Length of the longest uncompressed suffix. 063 */ 064 private final int longestUncompressedSuffix; 065 066 /** 067 * Length of the shortest uncompressed suffix longer than the 068 * empty string. 069 */ 070 private final int shortestUncompressedSuffix; 071 072 /** 073 * The format's default extension. 074 */ 075 private final String defaultExtension; 076 077 /** 078 * sets up the utility with a map of known compressed to 079 * uncompressed suffix mappings and the default extension of the 080 * format. 081 * 082 * @param uncompressSuffix Map from common file name suffixes of 083 * compressed files to the corresponding suffixes of uncompressed 084 * files. For example: from ".tgz" to ".tar". This map also 085 * contains format-specific suffixes like ".gz" and "-z". These 086 * suffixes are mapped to the empty string, as they should simply 087 * be removed from the file name when the file is uncompressed. 088 * 089 * @param defaultExtension the format's default extension like ".gz" 090 */ 091 public FileNameUtil(final Map<String, String> uncompressSuffix, 092 final String defaultExtension) { 093 this.uncompressSuffix = Collections.unmodifiableMap(uncompressSuffix); 094 int lc = Integer.MIN_VALUE, sc = Integer.MAX_VALUE; 095 int lu = Integer.MIN_VALUE, su = Integer.MAX_VALUE; 096 for (final Map.Entry<String, String> ent : uncompressSuffix.entrySet()) { 097 final int cl = ent.getKey().length(); 098 if (cl > lc) { 099 lc = cl; 100 } 101 if (cl < sc) { 102 sc = cl; 103 } 104 105 final String u = ent.getValue(); 106 final int ul = u.length(); 107 if (ul > 0) { 108 compressSuffix.computeIfAbsent(u, k -> ent.getKey()); 109 if (ul > lu) { 110 lu = ul; 111 } 112 if (ul < su) { 113 su = ul; 114 } 115 } 116 } 117 longestCompressedSuffix = lc; 118 longestUncompressedSuffix = lu; 119 shortestCompressedSuffix = sc; 120 shortestUncompressedSuffix = su; 121 this.defaultExtension = defaultExtension; 122 } 123 124 /** 125 * Maps the given file name to the name that the file should have after 126 * compression. Common file types with custom suffixes for 127 * compressed versions are automatically detected and correctly mapped. 128 * For example the name "package.tar" is mapped to "package.tgz". If no 129 * custom mapping is applicable, then the default ".gz" suffix is appended 130 * to the file name. 131 * 132 * @param fileName name of a file 133 * @return name of the corresponding compressed file 134 * @deprecated Use {@link #getCompressedFileName(String)}. 135 */ 136 @Deprecated 137 public String getCompressedFilename(final String fileName) { 138 return getCompressedFileName(fileName); 139 } 140 141 /** 142 * Maps the given file name to the name that the file should have after 143 * compression. Common file types with custom suffixes for 144 * compressed versions are automatically detected and correctly mapped. 145 * For example the name "package.tar" is mapped to "package.tgz". If no 146 * custom mapping is applicable, then the default ".gz" suffix is appended 147 * to the file name. 148 * 149 * @param fileName name of a file 150 * @return name of the corresponding compressed file 151 * @since 1.25.0 152 */ 153 public String getCompressedFileName(final String fileName) { 154 final String lower = fileName.toLowerCase(Locale.ENGLISH); 155 final int n = lower.length(); 156 for (int i = shortestUncompressedSuffix; 157 i <= longestUncompressedSuffix && i < n; i++) { 158 final String suffix = compressSuffix.get(lower.substring(n - i)); 159 if (suffix != null) { 160 return fileName.substring(0, n - i) + suffix; 161 } 162 } 163 // No custom suffix found, just append the default 164 return fileName + defaultExtension; 165 } 166 167 /** 168 * Maps the given name of a compressed file to the name that the 169 * file should have after uncompression. Commonly used file type specific 170 * suffixes like ".tgz" or ".svgz" are automatically detected and 171 * correctly mapped. For example the name "package.tgz" is mapped to 172 * "package.tar". And any file names with the generic ".gz" suffix 173 * (or any other generic gzip suffix) is mapped to a name without that 174 * suffix. If no format suffix is detected, then the file name is returned 175 * unmapped. 176 * 177 * @param fileName name of a file 178 * @return name of the corresponding uncompressed file 179 * @deprecated Use {@link #getUncompressedFileName(String)}. 180 */ 181 @Deprecated 182 public String getUncompressedFilename(final String fileName) { 183 return getUncompressedFileName(fileName); 184 } 185 186 /** 187 * Maps the given name of a compressed file to the name that the 188 * file should have after uncompression. Commonly used file type specific 189 * suffixes like ".tgz" or ".svgz" are automatically detected and 190 * correctly mapped. For example the name "package.tgz" is mapped to 191 * "package.tar". And any file names with the generic ".gz" suffix 192 * (or any other generic gzip suffix) is mapped to a name without that 193 * suffix. If no format suffix is detected, then the file name is returned 194 * unmapped. 195 * 196 * @param fileName name of a file 197 * @return name of the corresponding uncompressed file 198 * @since 1.25.0 199 */ 200 public String getUncompressedFileName(final String fileName) { 201 final String lower = fileName.toLowerCase(Locale.ENGLISH); 202 final int n = lower.length(); 203 for (int i = shortestCompressedSuffix; 204 i <= longestCompressedSuffix && i < n; i++) { 205 final String suffix = uncompressSuffix.get(lower.substring(n - i)); 206 if (suffix != null) { 207 return fileName.substring(0, n - i) + suffix; 208 } 209 } 210 return fileName; 211 } 212 213 /** 214 * Detects common format suffixes in the given file name. 215 * 216 * @param fileName name of a file 217 * @return {@code true} if the file name has a common format suffix, 218 * {@code false} otherwise 219 * @deprecated Use {@link #isCompressedFileName(String)}. 220 */ 221 @Deprecated 222 public boolean isCompressedFilename(final String fileName) { 223 return isCompressedFileName(fileName); 224 } 225 226 /** 227 * Detects common format suffixes in the given file name. 228 * 229 * @param fileName name of a file 230 * @return {@code true} if the file name has a common format suffix, 231 * {@code false} otherwise 232 * @since 1.25.0 233 */ 234 public boolean isCompressedFileName(final String fileName) { 235 final String lower = fileName.toLowerCase(Locale.ENGLISH); 236 final int n = lower.length(); 237 for (int i = shortestCompressedSuffix; 238 i <= longestCompressedSuffix && i < n; i++) { 239 if (uncompressSuffix.containsKey(lower.substring(n - i))) { 240 return true; 241 } 242 } 243 return false; 244 } 245}