1 /**
2 HAR - Human Archive Format
3 
4 https://github.com/marler8997/har
5 
6 HAR is a simple format to represent multiple files in a single block of text, i.e.
7 ---
8 --- main.d
9 import foo;
10 void main()
11 {
12     foofunc();
13 }
14 --- foo.d
15 module foo;
16 void foofunc()
17 {
18 }
19 ---
20 */
21 module archive.har;
22 
23 import std.typecons : Flag, Yes, No;
24 import std.array : Appender;
25 import std.format : format;
26 import std.string : startsWith, indexOf, stripRight;
27 import std.utf : decode, replacementDchar;
28 import std.path : dirName, buildPath;
29 import std.file : exists, isDir, mkdirRecurse;
30 import std.stdio : File;
31 
32 class HarException : Exception
33 {
34     this(string msg, string file, size_t line)
35     {
36         super(msg, file, line);
37     }
38 }
39 
40 struct HarExtractor
41 {
42     string filenameForErrors;
43     string outputDir;
44 
45     private bool verbose;
46     private File verboseFile;
47 
48     bool dryRun;
49 
50     private size_t lineNumber;
51     private void extractMkdir(string dir, Flag!"forEmptyDir" forEmptyDir)
52     {
53         if (exists(dir))
54         {
55             if (!isDir(dir))
56             {
57                 if (forEmptyDir)
58                     throw harFileException("cannot extract empty directory %s since it already exists as non-directory",
59                         dir.formatDir);
60                 throw harFileException("cannot extract files to non-directory %s", dir.formatDir);
61             }
62         }
63         else
64         {
65             if (verbose)
66                 verboseFile.writefln("mkdir %s", dir.formatDir);
67             if (!dryRun)
68                 mkdirRecurse(dir);
69         }
70     }
71 
72     void enableVerbose(File verboseFile)
73     {
74         this.verbose = true;
75         this.verboseFile = verboseFile;
76     }
77 
78     void extractFromFile(T)(string harFilename, T fileInfoCallback)
79     {
80         this.filenameForErrors = harFilename;
81         auto harFile = File(harFilename, "r");
82         extract(harFile.byLine(Yes.keepTerminator), fileInfoCallback);
83     }
84 
85     void extract(T, U)(T lineRange, U fileInfoCallback)
86     {
87         if (outputDir is null)
88             outputDir = "";
89 
90         lineNumber = 1;
91         if (lineRange.empty)
92             throw harFileException("file is empty");
93 
94         auto line = lineRange.front;
95         auto firstLineSpaceIndex = line.indexOf(' ');
96         if (firstLineSpaceIndex <= 0)
97             throw harFileException("first line does not start with a delimiter ending with a space");
98 
99         auto delimiter = line[0 .. firstLineSpaceIndex + 1].idup;
100 
101     LfileLoop:
102         for (;;)
103         {
104             auto fileInfo = parseFileLine(line[delimiter.length .. $], delimiter[0]);
105             auto fullFileName = buildPath(outputDir, fileInfo.filename);
106             fileInfoCallback(fullFileName, fileInfo);
107 
108             if (fullFileName[$-1] == '/')
109             {
110                 if (!dryRun)
111                     extractMkdir(fullFileName, Yes.forEmptyDir);
112                 lineRange.popFront();
113                 if (lineRange.empty)
114                     break;
115                 lineNumber++;
116                 line = lineRange.front;
117                 if (!line.startsWith(delimiter))
118                     throw harFileException("expected delimiter after empty directory");
119                 continue;
120             }
121 
122             {
123                 auto dir = dirName(fileInfo.filename);
124                 if (dir.length > 0)
125                 {
126                     auto fullDir = buildPath(outputDir, dir);
127                     extractMkdir(fullDir, No.forEmptyDir);
128                 }
129             }
130             if (verbose)
131                 verboseFile.writefln("creating %s", fullFileName.formatFile);
132             {
133                 File currentOutputFile;
134                 if (!dryRun)
135                     currentOutputFile = File(fullFileName, "w");
136                 scope(exit)
137                 {
138                     if (!dryRun)
139                         currentOutputFile.close();
140                 }
141                 for (;;)
142                 {
143                     lineRange.popFront();
144                     if (lineRange.empty)
145                         break LfileLoop;
146                     lineNumber++;
147                     line = lineRange.front;
148                     if (line.startsWith(delimiter))
149                         break;
150                     if (!dryRun)
151                         currentOutputFile.write(line);
152                 }
153             }
154         }
155     }
156     private HarException harFileException(T...)(string fmt, T args) if (T.length > 0)
157     {
158         return harFileException(format(fmt, args));
159     }
160     private HarException harFileException(string msg)
161     {
162         return new HarException(msg, filenameForErrors, lineNumber);
163     }
164 
165     FileProperties parseFileLine(const(char)[] line, char firstDelimiterChar)
166     {
167         if (line.length == 0)
168             throw harFileException("missing filename");
169 
170         const(char)[] filename;
171         const(char)[] rest;
172         if (line[0] == '"')
173         {
174             size_t afterFileIndex;
175             filename = parseQuotedFilename(line[1 .. $], &afterFileIndex);
176             rest = line[afterFileIndex .. $];
177         }
178         else
179         {
180             filename = parseFilename(line);
181             rest = line[filename.length .. $];
182         }
183         for (;;)
184         {
185             rest = skipSpaces(rest);
186             if (rest.length == 0 || rest == "\n" || rest == "\r" || rest == "\r\n" || rest[0] == firstDelimiterChar)
187                 break;
188             throw harFileException("properties not implemented '%s'", rest);
189         }
190         return FileProperties(filename);
191     }
192 
193     void checkComponent(const(char)[] component)
194     {
195         if (component.length == 0)
196             throw harFileException("invalid filename, contains double slash '//'");
197         if (component == "..")
198             throw harFileException("invalid filename, contains double dot '..' parent directory");
199     }
200 
201     inout(char)[] parseFilename(inout(char)[] line)
202     {
203         if (line.length == 0 || isEndOfFileChar(line[0]))
204             throw harFileException("missing filename");
205 
206         if (line[0] == '/')
207             throw harFileException("absolute filenames are invalid");
208 
209         size_t start = 0;
210         size_t next = 0;
211         while (true)
212         {
213             auto cIndex = next;
214             auto c = decode!(Yes.useReplacementDchar)(line, next);
215             if (c == replacementDchar)
216                 throw harFileException("invalid utf8 sequence");
217 
218             if (c == '/')
219             {
220                 checkComponent(line[start .. cIndex]);
221                 if (next >= line.length)
222                     return line[0 .. next];
223                 start = next;
224             }
225             else if (isEndOfFileChar(c))
226             {
227                 checkComponent(line[start .. cIndex]);
228                 return line[0 .. cIndex];
229             }
230 
231             if (next >= line.length)
232             {
233                 checkComponent(line[start .. next]);
234                 return line[0 ..next];
235             }
236         }
237     }
238 
239     inout(char)[] parseQuotedFilename(inout(char)[] line, size_t* afterFileIndex)
240     {
241         if (line.length == 0)
242             throw harFileException("filename missing end-quote");
243         if (line[0] == '"')
244             throw harFileException("empty filename");
245         if (line[0] == '/')
246             throw harFileException("absolute filenames are invalid");
247 
248         size_t start = 0;
249         size_t next = 0;
250         while(true)
251         {
252             auto cIndex = next;
253             auto c = decode!(Yes.useReplacementDchar)(line, next);
254             if (c == replacementDchar)
255                 throw harFileException("invalid utf8 sequence");
256 
257             if (c == '/')
258             {
259                 checkComponent(line[start .. cIndex]);
260                 start = next;
261             }
262             else if (c == '"')
263             {
264                 checkComponent(line[start .. cIndex]);
265                 *afterFileIndex = next + 1;
266                 return line[0 .. cIndex];
267             }
268             if (next >= line.length)
269                 throw harFileException("filename missing end-quote");
270         }
271     }
272 }
273 
274 private inout(char)[] skipSpaces(inout(char)[] str)
275 {
276     size_t i = 0;
277     for (; i < str.length; i++)
278     {
279         if (str[i] != ' ')
280             break;
281     }
282     return str[i .. $];
283 }
284 
285 private bool isEndOfFileChar(C)(const(C) c)
286 {
287     return c == '\n' || c == ' ' || c == '\r';
288 }
289 
290 struct FileProperties
291 {
292     const(char)[] filename;
293 }
294 
295 auto formatDir(const(char)[] dir)
296 {
297     if (dir.length == 0)
298         dir = ".";
299 
300     return formatQuotedIfSpaces(dir);
301 }
302 auto formatFile(const(char)[] file)
303   in { assert(file.length > 0); } do
304 {
305     return formatQuotedIfSpaces(file);
306 }
307 
308 // returns a formatter that will print the given string.  it will print
309 // it surrounded with quotes if the string contains any spaces.
310 auto formatQuotedIfSpaces(T...)(T args)
311 if (T.length > 0)
312 {
313     struct Formatter
314     {
315         T args;
316         void toString(scope void delegate(const(char)[]) sink) const
317         {
318             import std.string : indexOf;
319             bool useQuotes = false;
320             foreach (arg; args)
321             {
322                 if (arg.indexOf(' ') >= 0)
323                 {
324                     useQuotes = true;
325                     break;
326                 }
327             }
328 
329             if (useQuotes)
330                 sink(`"`);
331             foreach (arg; args)
332                 sink(arg);
333             if (useQuotes)
334                 sink(`"`);
335         }
336     }
337     return Formatter(args);
338 }