1 /** 2 HAR - Human Archive Format 3 4 https://github.com/marler8997/har 5 6 HAR is a simple format to represent multiple files in a single block of text, i.e. 7 --- 8 --- main.d 9 import foo; 10 void main() 11 { 12 foofunc(); 13 } 14 --- foo.d 15 module foo; 16 void foofunc() 17 { 18 } 19 --- 20 */ 21 module archive.har; 22 23 import std.typecons : Flag, Yes, No; 24 import std.array : Appender; 25 import std.format : format; 26 import std.string : startsWith, indexOf, stripRight; 27 import std.utf : decode, replacementDchar; 28 import std.path : dirName, buildPath; 29 import std.file : exists, isDir, mkdirRecurse; 30 import std.stdio : File; 31 32 class HarException : Exception 33 { 34 this(string msg, string file, size_t line) 35 { 36 super(msg, file, line); 37 } 38 } 39 40 struct HarExtractor 41 { 42 string filenameForErrors; 43 string outputDir; 44 45 private bool verbose; 46 private File verboseFile; 47 48 bool dryRun; 49 50 private size_t lineNumber; 51 private void extractMkdir(string dir, Flag!"forEmptyDir" forEmptyDir) 52 { 53 if (exists(dir)) 54 { 55 if (!isDir(dir)) 56 { 57 if (forEmptyDir) 58 throw harFileException("cannot extract empty directory %s since it already exists as non-directory", 59 dir.formatDir); 60 throw harFileException("cannot extract files to non-directory %s", dir.formatDir); 61 } 62 } 63 else 64 { 65 if (verbose) 66 verboseFile.writefln("mkdir %s", dir.formatDir); 67 if (!dryRun) 68 mkdirRecurse(dir); 69 } 70 } 71 72 void enableVerbose(File verboseFile) 73 { 74 this.verbose = true; 75 this.verboseFile = verboseFile; 76 } 77 78 void extractFromFile(T)(string harFilename, T fileInfoCallback) 79 { 80 this.filenameForErrors = harFilename; 81 auto harFile = File(harFilename, "r"); 82 extract(harFile.byLine(Yes.keepTerminator), fileInfoCallback); 83 } 84 85 void extract(T, U)(T lineRange, U fileInfoCallback) 86 { 87 if (outputDir is null) 88 outputDir = ""; 89 90 lineNumber = 1; 91 if (lineRange.empty) 92 throw harFileException("file is empty"); 93 94 auto line = lineRange.front; 95 auto firstLineSpaceIndex = line.indexOf(' '); 96 if (firstLineSpaceIndex <= 0) 97 throw harFileException("first line does not start with a delimiter ending with a space"); 98 99 auto delimiter = line[0 .. firstLineSpaceIndex + 1].idup; 100 101 LfileLoop: 102 for (;;) 103 { 104 auto fileInfo = parseFileLine(line[delimiter.length .. $], delimiter[0]); 105 auto fullFileName = buildPath(outputDir, fileInfo.filename); 106 fileInfoCallback(fullFileName, fileInfo); 107 108 if (fullFileName[$-1] == '/') 109 { 110 if (!dryRun) 111 extractMkdir(fullFileName, Yes.forEmptyDir); 112 lineRange.popFront(); 113 if (lineRange.empty) 114 break; 115 lineNumber++; 116 line = lineRange.front; 117 if (!line.startsWith(delimiter)) 118 throw harFileException("expected delimiter after empty directory"); 119 continue; 120 } 121 122 { 123 auto dir = dirName(fileInfo.filename); 124 if (dir.length > 0) 125 { 126 auto fullDir = buildPath(outputDir, dir); 127 extractMkdir(fullDir, No.forEmptyDir); 128 } 129 } 130 if (verbose) 131 verboseFile.writefln("creating %s", fullFileName.formatFile); 132 { 133 File currentOutputFile; 134 if (!dryRun) 135 currentOutputFile = File(fullFileName, "w"); 136 scope(exit) 137 { 138 if (!dryRun) 139 currentOutputFile.close(); 140 } 141 for (;;) 142 { 143 lineRange.popFront(); 144 if (lineRange.empty) 145 break LfileLoop; 146 lineNumber++; 147 line = lineRange.front; 148 if (line.startsWith(delimiter)) 149 break; 150 if (!dryRun) 151 currentOutputFile.write(line); 152 } 153 } 154 } 155 } 156 private HarException harFileException(T...)(string fmt, T args) if (T.length > 0) 157 { 158 return harFileException(format(fmt, args)); 159 } 160 private HarException harFileException(string msg) 161 { 162 return new HarException(msg, filenameForErrors, lineNumber); 163 } 164 165 FileProperties parseFileLine(const(char)[] line, char firstDelimiterChar) 166 { 167 if (line.length == 0) 168 throw harFileException("missing filename"); 169 170 const(char)[] filename; 171 const(char)[] rest; 172 if (line[0] == '"') 173 { 174 size_t afterFileIndex; 175 filename = parseQuotedFilename(line[1 .. $], &afterFileIndex); 176 rest = line[afterFileIndex .. $]; 177 } 178 else 179 { 180 filename = parseFilename(line); 181 rest = line[filename.length .. $]; 182 } 183 for (;;) 184 { 185 rest = skipSpaces(rest); 186 if (rest.length == 0 || rest == "\n" || rest == "\r" || rest == "\r\n" || rest[0] == firstDelimiterChar) 187 break; 188 throw harFileException("properties not implemented '%s'", rest); 189 } 190 return FileProperties(filename); 191 } 192 193 void checkComponent(const(char)[] component) 194 { 195 if (component.length == 0) 196 throw harFileException("invalid filename, contains double slash '//'"); 197 if (component == "..") 198 throw harFileException("invalid filename, contains double dot '..' parent directory"); 199 } 200 201 inout(char)[] parseFilename(inout(char)[] line) 202 { 203 if (line.length == 0 || isEndOfFileChar(line[0])) 204 throw harFileException("missing filename"); 205 206 if (line[0] == '/') 207 throw harFileException("absolute filenames are invalid"); 208 209 size_t start = 0; 210 size_t next = 0; 211 while (true) 212 { 213 auto cIndex = next; 214 auto c = decode!(Yes.useReplacementDchar)(line, next); 215 if (c == replacementDchar) 216 throw harFileException("invalid utf8 sequence"); 217 218 if (c == '/') 219 { 220 checkComponent(line[start .. cIndex]); 221 if (next >= line.length) 222 return line[0 .. next]; 223 start = next; 224 } 225 else if (isEndOfFileChar(c)) 226 { 227 checkComponent(line[start .. cIndex]); 228 return line[0 .. cIndex]; 229 } 230 231 if (next >= line.length) 232 { 233 checkComponent(line[start .. next]); 234 return line[0 ..next]; 235 } 236 } 237 } 238 239 inout(char)[] parseQuotedFilename(inout(char)[] line, size_t* afterFileIndex) 240 { 241 if (line.length == 0) 242 throw harFileException("filename missing end-quote"); 243 if (line[0] == '"') 244 throw harFileException("empty filename"); 245 if (line[0] == '/') 246 throw harFileException("absolute filenames are invalid"); 247 248 size_t start = 0; 249 size_t next = 0; 250 while(true) 251 { 252 auto cIndex = next; 253 auto c = decode!(Yes.useReplacementDchar)(line, next); 254 if (c == replacementDchar) 255 throw harFileException("invalid utf8 sequence"); 256 257 if (c == '/') 258 { 259 checkComponent(line[start .. cIndex]); 260 start = next; 261 } 262 else if (c == '"') 263 { 264 checkComponent(line[start .. cIndex]); 265 *afterFileIndex = next + 1; 266 return line[0 .. cIndex]; 267 } 268 if (next >= line.length) 269 throw harFileException("filename missing end-quote"); 270 } 271 } 272 } 273 274 private inout(char)[] skipSpaces(inout(char)[] str) 275 { 276 size_t i = 0; 277 for (; i < str.length; i++) 278 { 279 if (str[i] != ' ') 280 break; 281 } 282 return str[i .. $]; 283 } 284 285 private bool isEndOfFileChar(C)(const(C) c) 286 { 287 return c == '\n' || c == ' ' || c == '\r'; 288 } 289 290 struct FileProperties 291 { 292 const(char)[] filename; 293 } 294 295 auto formatDir(const(char)[] dir) 296 { 297 if (dir.length == 0) 298 dir = "."; 299 300 return formatQuotedIfSpaces(dir); 301 } 302 auto formatFile(const(char)[] file) 303 in { assert(file.length > 0); } do 304 { 305 return formatQuotedIfSpaces(file); 306 } 307 308 // returns a formatter that will print the given string. it will print 309 // it surrounded with quotes if the string contains any spaces. 310 auto formatQuotedIfSpaces(T...)(T args) 311 if (T.length > 0) 312 { 313 struct Formatter 314 { 315 T args; 316 void toString(scope void delegate(const(char)[]) sink) const 317 { 318 import std.string : indexOf; 319 bool useQuotes = false; 320 foreach (arg; args) 321 { 322 if (arg.indexOf(' ') >= 0) 323 { 324 useQuotes = true; 325 break; 326 } 327 } 328 329 if (useQuotes) 330 sink(`"`); 331 foreach (arg; args) 332 sink(arg); 333 if (useQuotes) 334 sink(`"`); 335 } 336 } 337 return Formatter(args); 338 }