ZipFile.Read.cs 44 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114
  1. // ZipFile.Read.cs
  2. // ------------------------------------------------------------------
  3. //
  4. // Copyright (c) 2009-2011 Dino Chiesa.
  5. // All rights reserved.
  6. //
  7. // This code module is part of DotNetZip, a zipfile class library.
  8. //
  9. // ------------------------------------------------------------------
  10. //
  11. // This code is licensed under the Microsoft Public License.
  12. // See the file License.txt for the license details.
  13. // More info on: http://dotnetzip.codeplex.com
  14. //
  15. // ------------------------------------------------------------------
  16. //
  17. // last saved (in emacs):
  18. // Time-stamp: <2011-August-05 11:38:59>
  19. //
  20. // ------------------------------------------------------------------
  21. //
  22. // This module defines the methods for Reading zip files.
  23. //
  24. // ------------------------------------------------------------------
  25. //
  26. using System;
  27. using System.IO;
  28. using System.Collections.Generic;
  29. namespace Ionic.Zip
  30. {
  31. /// <summary>
  32. /// A class for collecting the various options that can be used when
  33. /// Reading zip files for extraction or update.
  34. /// </summary>
  35. ///
  36. /// <remarks>
  37. /// <para>
  38. /// When reading a zip file, there are several options an
  39. /// application can set, to modify how the file is read, or what
  40. /// the library does while reading. This class collects those
  41. /// options into one container.
  42. /// </para>
  43. ///
  44. /// <para>
  45. /// Pass an instance of the <c>ReadOptions</c> class into the
  46. /// <c>ZipFile.Read()</c> method.
  47. /// </para>
  48. ///
  49. /// <seealso cref="ZipFile.Read(String, ReadOptions)"/>.
  50. /// <seealso cref="ZipFile.Read(Stream, ReadOptions)"/>.
  51. /// </remarks>
  52. public class ReadOptions
  53. {
  54. /// <summary>
  55. /// An event handler for Read operations. When opening large zip
  56. /// archives, you may want to display a progress bar or other
  57. /// indicator of status progress while reading. This parameter
  58. /// allows you to specify a ReadProgress Event Handler directly.
  59. /// When you call <c>Read()</c>, the progress event is invoked as
  60. /// necessary.
  61. /// </summary>
  62. public EventHandler<ReadProgressEventArgs> ReadProgress { get; set; }
  63. /// <summary>
  64. /// The <c>System.IO.TextWriter</c> to use for writing verbose status messages
  65. /// during operations on the zip archive. A console application may wish to
  66. /// pass <c>System.Console.Out</c> to get messages on the Console. A graphical
  67. /// or headless application may wish to capture the messages in a different
  68. /// <c>TextWriter</c>, such as a <c>System.IO.StringWriter</c>.
  69. /// </summary>
  70. public TextWriter StatusMessageWriter { get; set; }
  71. /// <summary>
  72. /// The <c>System.Text.Encoding</c> to use when reading in the zip archive. Be
  73. /// careful specifying the encoding. If the value you use here is not the same
  74. /// as the Encoding used when the zip archive was created (possibly by a
  75. /// different archiver) you will get unexpected results and possibly exceptions.
  76. /// </summary>
  77. ///
  78. /// <seealso cref="ZipFile.ProvisionalAlternateEncoding"/>
  79. ///
  80. public System.Text.Encoding @Encoding { get; set; }
  81. }
  82. public partial class ZipFile
  83. {
  84. /// <summary>
  85. /// Reads a zip file archive and returns the instance.
  86. /// </summary>
  87. ///
  88. /// <remarks>
  89. /// <para>
  90. /// The stream is read using the default <c>System.Text.Encoding</c>, which is the
  91. /// <c>IBM437</c> codepage.
  92. /// </para>
  93. /// </remarks>
  94. ///
  95. /// <exception cref="System.Exception">
  96. /// Thrown if the <c>ZipFile</c> cannot be read. The implementation of this method
  97. /// relies on <c>System.IO.File.OpenRead</c>, which can throw a variety of exceptions,
  98. /// including specific exceptions if a file is not found, an unauthorized access
  99. /// exception, exceptions for poorly formatted filenames, and so on.
  100. /// </exception>
  101. ///
  102. /// <param name="fileName">
  103. /// The name of the zip archive to open. This can be a fully-qualified or relative
  104. /// pathname.
  105. /// </param>
  106. ///
  107. /// <seealso cref="ZipFile.Read(String, ReadOptions)"/>.
  108. ///
  109. /// <returns>The instance read from the zip archive.</returns>
  110. ///
  111. public static ZipFile Read(string fileName)
  112. {
  113. return ZipFile.Read(fileName, null, null, null);
  114. }
  115. /// <summary>
  116. /// Reads a zip file archive from the named filesystem file using the
  117. /// specified options.
  118. /// </summary>
  119. ///
  120. /// <remarks>
  121. /// <para>
  122. /// This version of the <c>Read()</c> method allows the caller to pass
  123. /// in a <c>TextWriter</c> an <c>Encoding</c>, via an instance of the
  124. /// <c>ReadOptions</c> class. The <c>ZipFile</c> is read in using the
  125. /// specified encoding for entries where UTF-8 encoding is not
  126. /// explicitly specified.
  127. /// </para>
  128. /// </remarks>
  129. ///
  130. /// <example>
  131. ///
  132. /// <para>
  133. /// This example shows how to read a zip file using the Big-5 Chinese
  134. /// code page (950), and extract each entry in the zip file, while
  135. /// sending status messages out to the Console.
  136. /// </para>
  137. ///
  138. /// <para>
  139. /// For this code to work as intended, the zipfile must have been
  140. /// created using the big5 code page (CP950). This is typical, for
  141. /// example, when using WinRar on a machine with CP950 set as the
  142. /// default code page. In that case, the names of entries within the
  143. /// Zip archive will be stored in that code page, and reading the zip
  144. /// archive must be done using that code page. If the application did
  145. /// not use the correct code page in ZipFile.Read(), then names of
  146. /// entries within the zip archive would not be correctly retrieved.
  147. /// </para>
  148. ///
  149. /// <code lang="C#">
  150. /// string zipToExtract = "MyArchive.zip";
  151. /// string extractDirectory = "extract";
  152. /// var options = new ReadOptions
  153. /// {
  154. /// StatusMessageWriter = System.Console.Out,
  155. /// Encoding = System.Text.Encoding.GetEncoding(950)
  156. /// };
  157. /// using (ZipFile zip = ZipFile.Read(zipToExtract, options))
  158. /// {
  159. /// foreach (ZipEntry e in zip)
  160. /// {
  161. /// e.Extract(extractDirectory);
  162. /// }
  163. /// }
  164. /// </code>
  165. ///
  166. ///
  167. /// <code lang="VB">
  168. /// Dim zipToExtract as String = "MyArchive.zip"
  169. /// Dim extractDirectory as String = "extract"
  170. /// Dim options as New ReadOptions
  171. /// options.Encoding = System.Text.Encoding.GetEncoding(950)
  172. /// options.StatusMessageWriter = System.Console.Out
  173. /// Using zip As ZipFile = ZipFile.Read(zipToExtract, options)
  174. /// Dim e As ZipEntry
  175. /// For Each e In zip
  176. /// e.Extract(extractDirectory)
  177. /// Next
  178. /// End Using
  179. /// </code>
  180. /// </example>
  181. ///
  182. ///
  183. /// <example>
  184. ///
  185. /// <para>
  186. /// This example shows how to read a zip file using the default
  187. /// code page, to remove entries that have a modified date before a given threshold,
  188. /// sending status messages out to a <c>StringWriter</c>.
  189. /// </para>
  190. ///
  191. /// <code lang="C#">
  192. /// var options = new ReadOptions
  193. /// {
  194. /// StatusMessageWriter = new System.IO.StringWriter()
  195. /// };
  196. /// using (ZipFile zip = ZipFile.Read("PackedDocuments.zip", options))
  197. /// {
  198. /// var Threshold = new DateTime(2007,7,4);
  199. /// // We cannot remove the entry from the list, within the context of
  200. /// // an enumeration of said list.
  201. /// // So we add the doomed entry to a list to be removed later.
  202. /// // pass 1: mark the entries for removal
  203. /// var MarkedEntries = new System.Collections.Generic.List&lt;ZipEntry&gt;();
  204. /// foreach (ZipEntry e in zip)
  205. /// {
  206. /// if (e.LastModified &lt; Threshold)
  207. /// MarkedEntries.Add(e);
  208. /// }
  209. /// // pass 2: actually remove the entry.
  210. /// foreach (ZipEntry zombie in MarkedEntries)
  211. /// zip.RemoveEntry(zombie);
  212. /// zip.Comment = "This archive has been updated.";
  213. /// zip.Save();
  214. /// }
  215. /// // can now use contents of sw, eg store in an audit log
  216. /// </code>
  217. ///
  218. /// <code lang="VB">
  219. /// Dim options as New ReadOptions
  220. /// options.StatusMessageWriter = New System.IO.StringWriter
  221. /// Using zip As ZipFile = ZipFile.Read("PackedDocuments.zip", options)
  222. /// Dim Threshold As New DateTime(2007, 7, 4)
  223. /// ' We cannot remove the entry from the list, within the context of
  224. /// ' an enumeration of said list.
  225. /// ' So we add the doomed entry to a list to be removed later.
  226. /// ' pass 1: mark the entries for removal
  227. /// Dim MarkedEntries As New System.Collections.Generic.List(Of ZipEntry)
  228. /// Dim e As ZipEntry
  229. /// For Each e In zip
  230. /// If (e.LastModified &lt; Threshold) Then
  231. /// MarkedEntries.Add(e)
  232. /// End If
  233. /// Next
  234. /// ' pass 2: actually remove the entry.
  235. /// Dim zombie As ZipEntry
  236. /// For Each zombie In MarkedEntries
  237. /// zip.RemoveEntry(zombie)
  238. /// Next
  239. /// zip.Comment = "This archive has been updated."
  240. /// zip.Save
  241. /// End Using
  242. /// ' can now use contents of sw, eg store in an audit log
  243. /// </code>
  244. /// </example>
  245. ///
  246. /// <exception cref="System.Exception">
  247. /// Thrown if the zipfile cannot be read. The implementation of
  248. /// this method relies on <c>System.IO.File.OpenRead</c>, which
  249. /// can throw a variety of exceptions, including specific
  250. /// exceptions if a file is not found, an unauthorized access
  251. /// exception, exceptions for poorly formatted filenames, and so
  252. /// on.
  253. /// </exception>
  254. ///
  255. /// <param name="fileName">
  256. /// The name of the zip archive to open.
  257. /// This can be a fully-qualified or relative pathname.
  258. /// </param>
  259. ///
  260. /// <param name="options">
  261. /// The set of options to use when reading the zip file.
  262. /// </param>
  263. ///
  264. /// <returns>The ZipFile instance read from the zip archive.</returns>
  265. ///
  266. /// <seealso cref="ZipFile.Read(Stream, ReadOptions)"/>
  267. ///
  268. public static ZipFile Read(string fileName,
  269. ReadOptions options)
  270. {
  271. if (options == null)
  272. throw new ArgumentNullException("options");
  273. return Read(fileName,
  274. options.StatusMessageWriter,
  275. options.Encoding,
  276. options.ReadProgress);
  277. }
  278. /// <summary>
  279. /// Reads a zip file archive using the specified text encoding, the specified
  280. /// TextWriter for status messages, and the specified ReadProgress event handler,
  281. /// and returns the instance.
  282. /// </summary>
  283. ///
  284. /// <param name="fileName">
  285. /// The name of the zip archive to open.
  286. /// This can be a fully-qualified or relative pathname.
  287. /// </param>
  288. ///
  289. /// <param name="readProgress">
  290. /// An event handler for Read operations.
  291. /// </param>
  292. ///
  293. /// <param name="statusMessageWriter">
  294. /// The <c>System.IO.TextWriter</c> to use for writing verbose status messages
  295. /// during operations on the zip archive. A console application may wish to
  296. /// pass <c>System.Console.Out</c> to get messages on the Console. A graphical
  297. /// or headless application may wish to capture the messages in a different
  298. /// <c>TextWriter</c>, such as a <c>System.IO.StringWriter</c>.
  299. /// </param>
  300. ///
  301. /// <param name="encoding">
  302. /// The <c>System.Text.Encoding</c> to use when reading in the zip archive. Be
  303. /// careful specifying the encoding. If the value you use here is not the same
  304. /// as the Encoding used when the zip archive was created (possibly by a
  305. /// different archiver) you will get unexpected results and possibly exceptions.
  306. /// </param>
  307. ///
  308. /// <returns>The instance read from the zip archive.</returns>
  309. ///
  310. private static ZipFile Read(string fileName,
  311. TextWriter statusMessageWriter,
  312. System.Text.Encoding encoding,
  313. EventHandler<ReadProgressEventArgs> readProgress)
  314. {
  315. ZipFile zf = new ZipFile();
  316. zf.AlternateEncoding = encoding ?? DefaultEncoding;
  317. zf.AlternateEncodingUsage = ZipOption.Always;
  318. zf._StatusMessageTextWriter = statusMessageWriter;
  319. zf._name = fileName;
  320. if (readProgress != null)
  321. zf.ReadProgress = readProgress;
  322. if (zf.Verbose) zf._StatusMessageTextWriter.WriteLine("reading from {0}...", fileName);
  323. ReadIntoInstance(zf);
  324. zf._fileAlreadyExists = true;
  325. return zf;
  326. }
  327. /// <summary>
  328. /// Reads a zip archive from a stream.
  329. /// </summary>
  330. ///
  331. /// <remarks>
  332. ///
  333. /// <para>
  334. /// When reading from a file, it's probably easier to just use
  335. /// <see cref="ZipFile.Read(String,
  336. /// ReadOptions)">ZipFile.Read(String, ReadOptions)</see>. This
  337. /// overload is useful when when the zip archive content is
  338. /// available from an already-open stream. The stream must be
  339. /// open and readable and seekable when calling this method. The
  340. /// stream is left open when the reading is completed.
  341. /// </para>
  342. ///
  343. /// <para>
  344. /// Using this overload, the stream is read using the default
  345. /// <c>System.Text.Encoding</c>, which is the <c>IBM437</c>
  346. /// codepage. If you want to specify the encoding to use when
  347. /// reading the zipfile content, see
  348. /// <see cref="ZipFile.Read(Stream,
  349. /// ReadOptions)">ZipFile.Read(Stream, ReadOptions)</see>. This
  350. /// </para>
  351. ///
  352. /// <para>
  353. /// Reading of zip content begins at the current position in the
  354. /// stream. This means if you have a stream that concatenates
  355. /// regular data and zip data, if you position the open, readable
  356. /// stream at the start of the zip data, you will be able to read
  357. /// the zip archive using this constructor, or any of the ZipFile
  358. /// constructors that accept a <see cref="System.IO.Stream" /> as
  359. /// input. Some examples of where this might be useful: the zip
  360. /// content is concatenated at the end of a regular EXE file, as
  361. /// some self-extracting archives do. (Note: SFX files produced
  362. /// by DotNetZip do not work this way; they can be read as normal
  363. /// ZIP files). Another example might be a stream being read from
  364. /// a database, where the zip content is embedded within an
  365. /// aggregate stream of data.
  366. /// </para>
  367. ///
  368. /// </remarks>
  369. ///
  370. /// <example>
  371. /// <para>
  372. /// This example shows how to Read zip content from a stream, and
  373. /// extract one entry into a different stream. In this example,
  374. /// the filename "NameOfEntryInArchive.doc", refers only to the
  375. /// name of the entry within the zip archive. A file by that
  376. /// name is not created in the filesystem. The I/O is done
  377. /// strictly with the given streams.
  378. /// </para>
  379. ///
  380. /// <code>
  381. /// using (ZipFile zip = ZipFile.Read(InputStream))
  382. /// {
  383. /// zip.Extract("NameOfEntryInArchive.doc", OutputStream);
  384. /// }
  385. /// </code>
  386. ///
  387. /// <code lang="VB">
  388. /// Using zip as ZipFile = ZipFile.Read(InputStream)
  389. /// zip.Extract("NameOfEntryInArchive.doc", OutputStream)
  390. /// End Using
  391. /// </code>
  392. /// </example>
  393. ///
  394. /// <param name="zipStream">the stream containing the zip data.</param>
  395. ///
  396. /// <returns>The ZipFile instance read from the stream</returns>
  397. ///
  398. public static ZipFile Read(Stream zipStream)
  399. {
  400. return Read(zipStream, null, null, null);
  401. }
  402. /// <summary>
  403. /// Reads a zip file archive from the given stream using the
  404. /// specified options.
  405. /// </summary>
  406. ///
  407. /// <remarks>
  408. ///
  409. /// <para>
  410. /// When reading from a file, it's probably easier to just use
  411. /// <see cref="ZipFile.Read(String,
  412. /// ReadOptions)">ZipFile.Read(String, ReadOptions)</see>. This
  413. /// overload is useful when when the zip archive content is
  414. /// available from an already-open stream. The stream must be
  415. /// open and readable and seekable when calling this method. The
  416. /// stream is left open when the reading is completed.
  417. /// </para>
  418. ///
  419. /// <para>
  420. /// Reading of zip content begins at the current position in the
  421. /// stream. This means if you have a stream that concatenates
  422. /// regular data and zip data, if you position the open, readable
  423. /// stream at the start of the zip data, you will be able to read
  424. /// the zip archive using this constructor, or any of the ZipFile
  425. /// constructors that accept a <see cref="System.IO.Stream" /> as
  426. /// input. Some examples of where this might be useful: the zip
  427. /// content is concatenated at the end of a regular EXE file, as
  428. /// some self-extracting archives do. (Note: SFX files produced
  429. /// by DotNetZip do not work this way; they can be read as normal
  430. /// ZIP files). Another example might be a stream being read from
  431. /// a database, where the zip content is embedded within an
  432. /// aggregate stream of data.
  433. /// </para>
  434. /// </remarks>
  435. ///
  436. /// <param name="zipStream">the stream containing the zip data.</param>
  437. ///
  438. /// <param name="options">
  439. /// The set of options to use when reading the zip file.
  440. /// </param>
  441. ///
  442. /// <exception cref="System.Exception">
  443. /// Thrown if the zip archive cannot be read.
  444. /// </exception>
  445. ///
  446. /// <returns>The ZipFile instance read from the stream.</returns>
  447. ///
  448. /// <seealso cref="ZipFile.Read(String, ReadOptions)"/>
  449. ///
  450. public static ZipFile Read(Stream zipStream, ReadOptions options)
  451. {
  452. if (options == null)
  453. throw new ArgumentNullException("options");
  454. return Read(zipStream,
  455. options.StatusMessageWriter,
  456. options.Encoding,
  457. options.ReadProgress);
  458. }
  459. /// <summary>
  460. /// Reads a zip archive from a stream, using the specified text Encoding, the
  461. /// specified TextWriter for status messages,
  462. /// and the specified ReadProgress event handler.
  463. /// </summary>
  464. ///
  465. /// <remarks>
  466. /// <para>
  467. /// Reading of zip content begins at the current position in the stream. This
  468. /// means if you have a stream that concatenates regular data and zip data, if
  469. /// you position the open, readable stream at the start of the zip data, you
  470. /// will be able to read the zip archive using this constructor, or any of the
  471. /// ZipFile constructors that accept a <see cref="System.IO.Stream" /> as
  472. /// input. Some examples of where this might be useful: the zip content is
  473. /// concatenated at the end of a regular EXE file, as some self-extracting
  474. /// archives do. (Note: SFX files produced by DotNetZip do not work this
  475. /// way). Another example might be a stream being read from a database, where
  476. /// the zip content is embedded within an aggregate stream of data.
  477. /// </para>
  478. /// </remarks>
  479. ///
  480. /// <param name="zipStream">the stream containing the zip data.</param>
  481. ///
  482. /// <param name="statusMessageWriter">
  483. /// The <c>System.IO.TextWriter</c> to which verbose status messages are written
  484. /// during operations on the <c>ZipFile</c>. For example, in a console
  485. /// application, System.Console.Out works, and will get a message for each entry
  486. /// added to the ZipFile. If the TextWriter is <c>null</c>, no verbose messages
  487. /// are written.
  488. /// </param>
  489. ///
  490. /// <param name="encoding">
  491. /// The text encoding to use when reading entries that do not have the UTF-8
  492. /// encoding bit set. Be careful specifying the encoding. If the value you use
  493. /// here is not the same as the Encoding used when the zip archive was created
  494. /// (possibly by a different archiver) you will get unexpected results and
  495. /// possibly exceptions. See the <see cref="ProvisionalAlternateEncoding"/>
  496. /// property for more information.
  497. /// </param>
  498. ///
  499. /// <param name="readProgress">
  500. /// An event handler for Read operations.
  501. /// </param>
  502. ///
  503. /// <returns>an instance of ZipFile</returns>
  504. private static ZipFile Read(Stream zipStream,
  505. TextWriter statusMessageWriter,
  506. System.Text.Encoding encoding,
  507. EventHandler<ReadProgressEventArgs> readProgress)
  508. {
  509. if (zipStream == null)
  510. throw new ArgumentNullException("zipStream");
  511. ZipFile zf = new ZipFile();
  512. zf._StatusMessageTextWriter = statusMessageWriter;
  513. zf._alternateEncoding = encoding ?? ZipFile.DefaultEncoding;
  514. zf._alternateEncodingUsage = ZipOption.Always;
  515. if (readProgress != null)
  516. zf.ReadProgress += readProgress;
  517. zf._readstream = (zipStream.Position == 0L)
  518. ? zipStream
  519. : new OffsetStream(zipStream);
  520. zf._ReadStreamIsOurs = false;
  521. if (zf.Verbose) zf._StatusMessageTextWriter.WriteLine("reading from stream...");
  522. ReadIntoInstance(zf);
  523. return zf;
  524. }
  525. private static void ReadIntoInstance(ZipFile zf)
  526. {
  527. Stream s = zf.ReadStream;
  528. try
  529. {
  530. zf._readName = zf._name; // workitem 13915
  531. if (!s.CanSeek)
  532. {
  533. ReadIntoInstance_Orig(zf);
  534. return;
  535. }
  536. zf.OnReadStarted();
  537. // change for workitem 8098
  538. //zf._originPosition = s.Position;
  539. // Try reading the central directory, rather than scanning the file.
  540. uint datum = ReadFirstFourBytes(s);
  541. if (datum == ZipConstants.EndOfCentralDirectorySignature)
  542. return;
  543. // start at the end of the file...
  544. // seek backwards a bit, then look for the EoCD signature.
  545. int nTries = 0;
  546. bool success = false;
  547. // The size of the end-of-central-directory-footer plus 2 bytes is 18.
  548. // This implies an archive comment length of 0. We'll add a margin of
  549. // safety and start "in front" of that, when looking for the
  550. // EndOfCentralDirectorySignature
  551. long posn = s.Length - 64;
  552. long maxSeekback = Math.Max(s.Length - 0x4000, 10);
  553. do
  554. {
  555. if (posn < 0) posn = 0; // BOF
  556. s.Seek(posn, SeekOrigin.Begin);
  557. long bytesRead = SharedUtilities.FindSignature(s, (int)ZipConstants.EndOfCentralDirectorySignature);
  558. if (bytesRead != -1)
  559. success = true;
  560. else
  561. {
  562. if (posn==0) break; // started at the BOF and found nothing
  563. nTries++;
  564. // Weird: with NETCF, negative offsets from SeekOrigin.End DO
  565. // NOT WORK. So rather than seek a negative offset, we seek
  566. // from SeekOrigin.Begin using a smaller number.
  567. posn -= (32 * (nTries + 1) * nTries);
  568. }
  569. }
  570. while (!success && posn > maxSeekback);
  571. if (success)
  572. {
  573. // workitem 8299
  574. zf._locEndOfCDS = s.Position - 4;
  575. byte[] block = new byte[16];
  576. s.Read(block, 0, block.Length);
  577. zf._diskNumberWithCd = BitConverter.ToUInt16(block, 2);
  578. if (zf._diskNumberWithCd == 0xFFFF)
  579. throw new ZipException("Spanned archives with more than 65534 segments are not supported at this time.");
  580. zf._diskNumberWithCd++; // I think the number in the file differs from reality by 1
  581. int i = 12;
  582. uint offset32 = (uint) BitConverter.ToUInt32(block, i);
  583. if (offset32 == 0xFFFFFFFF)
  584. {
  585. Zip64SeekToCentralDirectory(zf);
  586. }
  587. else
  588. {
  589. zf._OffsetOfCentralDirectory = offset32;
  590. // change for workitem 8098
  591. s.Seek(offset32, SeekOrigin.Begin);
  592. }
  593. ReadCentralDirectory(zf);
  594. }
  595. else
  596. {
  597. // Could not find the central directory.
  598. // Fallback to the old method.
  599. // workitem 8098: ok
  600. //s.Seek(zf._originPosition, SeekOrigin.Begin);
  601. s.Seek(0L, SeekOrigin.Begin);
  602. ReadIntoInstance_Orig(zf);
  603. }
  604. }
  605. catch (Exception ex1)
  606. {
  607. if (zf._ReadStreamIsOurs && zf._readstream != null)
  608. {
  609. try
  610. {
  611. #if NETCF
  612. zf._readstream.Close();
  613. #else
  614. zf._readstream.Dispose();
  615. #endif
  616. zf._readstream = null;
  617. }
  618. finally { }
  619. }
  620. throw new ZipException("Cannot read that as a ZipFile", ex1);
  621. }
  622. // the instance has been read in
  623. zf._contentsChanged = false;
  624. }
  625. private static void Zip64SeekToCentralDirectory(ZipFile zf)
  626. {
  627. Stream s = zf.ReadStream;
  628. byte[] block = new byte[16];
  629. // seek back to find the ZIP64 EoCD.
  630. // I think this might not work for .NET CF ?
  631. s.Seek(-40, SeekOrigin.Current);
  632. s.Read(block, 0, 16);
  633. Int64 offset64 = BitConverter.ToInt64(block, 8);
  634. zf._OffsetOfCentralDirectory = 0xFFFFFFFF;
  635. zf._OffsetOfCentralDirectory64 = offset64;
  636. // change for workitem 8098
  637. s.Seek(offset64, SeekOrigin.Begin);
  638. //zf.SeekFromOrigin(Offset64);
  639. uint datum = (uint)Ionic.Zip.SharedUtilities.ReadInt(s);
  640. if (datum != ZipConstants.Zip64EndOfCentralDirectoryRecordSignature)
  641. throw new BadReadException(String.Format(" Bad signature (0x{0:X8}) looking for ZIP64 EoCD Record at position 0x{1:X8}", datum, s.Position));
  642. s.Read(block, 0, 8);
  643. Int64 Size = BitConverter.ToInt64(block, 0);
  644. block = new byte[Size];
  645. s.Read(block, 0, block.Length);
  646. offset64 = BitConverter.ToInt64(block, 36);
  647. // change for workitem 8098
  648. s.Seek(offset64, SeekOrigin.Begin);
  649. //zf.SeekFromOrigin(Offset64);
  650. }
  651. private static uint ReadFirstFourBytes(Stream s)
  652. {
  653. uint datum = (uint)Ionic.Zip.SharedUtilities.ReadInt(s);
  654. return datum;
  655. }
  656. private static void ReadCentralDirectory(ZipFile zf)
  657. {
  658. // We must have the central directory footer record, in order to properly
  659. // read zip dir entries from the central directory. This because the logic
  660. // knows when to open a spanned file when the volume number for the central
  661. // directory differs from the volume number for the zip entry. The
  662. // _diskNumberWithCd was set when originally finding the offset for the
  663. // start of the Central Directory.
  664. // workitem 9214
  665. bool inputUsesZip64 = false;
  666. ZipEntry de;
  667. // in lieu of hashset, use a dictionary
  668. var previouslySeen = new Dictionary<String, object>(StringComparer.Ordinal);
  669. while ((de = ZipEntry.ReadDirEntry(zf, previouslySeen)) != null)
  670. {
  671. de.ResetDirEntry();
  672. zf.OnReadEntry(true, null);
  673. if (zf.Verbose)
  674. zf.StatusMessageTextWriter.WriteLine("entry {0}", de.FileName);
  675. zf._entries.Add(de.FileName,de);
  676. if (!zf._entriesInsensitive.ContainsKey(de.FileName))
  677. zf._entriesInsensitive.Add(de.FileName,de);
  678. // workitem 9214
  679. if (de._InputUsesZip64) inputUsesZip64 = true;
  680. previouslySeen.Add(de.FileName, null); // to prevent dupes
  681. }
  682. // workitem 9214; auto-set the zip64 flag
  683. if (inputUsesZip64) zf.UseZip64WhenSaving = Zip64Option.Always;
  684. // workitem 8299
  685. if (zf._locEndOfCDS > 0)
  686. zf.ReadStream.Seek(zf._locEndOfCDS, SeekOrigin.Begin);
  687. ReadCentralDirectoryFooter(zf);
  688. if (zf.Verbose && !String.IsNullOrEmpty(zf.Comment))
  689. zf.StatusMessageTextWriter.WriteLine("Zip file Comment: {0}", zf.Comment);
  690. // We keep the read stream open after reading.
  691. if (zf.Verbose)
  692. zf.StatusMessageTextWriter.WriteLine("read in {0} entries.", zf._entries.Count);
  693. zf.OnReadCompleted();
  694. }
  695. // build the TOC by reading each entry in the file.
  696. private static void ReadIntoInstance_Orig(ZipFile zf)
  697. {
  698. zf.OnReadStarted();
  699. zf._entries.Clear();
  700. zf._entriesInsensitive.Clear();
  701. ZipEntry e;
  702. if (zf.Verbose)
  703. if (zf.Name == null)
  704. zf.StatusMessageTextWriter.WriteLine("Reading zip from stream...");
  705. else
  706. zf.StatusMessageTextWriter.WriteLine("Reading zip {0}...", zf.Name);
  707. // work item 6647: PK00 (packed to removable disk)
  708. bool firstEntry = true;
  709. ZipContainer zc = new ZipContainer(zf);
  710. while ((e = ZipEntry.ReadEntry(zc, firstEntry)) != null)
  711. {
  712. if (zf.Verbose)
  713. zf.StatusMessageTextWriter.WriteLine(" {0}", e.FileName);
  714. zf._entries.Add(e.FileName,e);
  715. if (!zf._entriesInsensitive.ContainsKey(e.FileName))
  716. zf._entriesInsensitive.Add(e.FileName,e);
  717. firstEntry = false;
  718. }
  719. // read the zipfile's central directory structure here.
  720. // workitem 9912
  721. // But, because it may be corrupted, ignore errors.
  722. try
  723. {
  724. ZipEntry de;
  725. // in lieu of hashset, use a dictionary
  726. var previouslySeen = new Dictionary<String,Object>(StringComparer.Ordinal);
  727. while ((de = ZipEntry.ReadDirEntry(zf, previouslySeen)) != null)
  728. {
  729. // Housekeeping: Since ZipFile exposes ZipEntry elements in the enumerator,
  730. // we need to copy the comment that we grab from the ZipDirEntry
  731. // into the ZipEntry, so the application can access the comment.
  732. // Also since ZipEntry is used to Write zip files, we need to copy the
  733. // file attributes to the ZipEntry as appropriate.
  734. ZipEntry e1 = zf._entries[de.FileName];
  735. if (e1 != null)
  736. {
  737. e1._Comment = de.Comment;
  738. if (de.IsDirectory) e1.MarkAsDirectory();
  739. }
  740. previouslySeen.Add(de.FileName,null); // to prevent dupes
  741. }
  742. // workitem 8299
  743. if (zf._locEndOfCDS > 0)
  744. zf.ReadStream.Seek(zf._locEndOfCDS, SeekOrigin.Begin);
  745. ReadCentralDirectoryFooter(zf);
  746. if (zf.Verbose && !String.IsNullOrEmpty(zf.Comment))
  747. zf.StatusMessageTextWriter.WriteLine("Zip file Comment: {0}", zf.Comment);
  748. }
  749. catch (ZipException) { }
  750. catch (IOException) { }
  751. zf.OnReadCompleted();
  752. }
  753. private static void ReadCentralDirectoryFooter(ZipFile zf)
  754. {
  755. Stream s = zf.ReadStream;
  756. int signature = Ionic.Zip.SharedUtilities.ReadSignature(s);
  757. byte[] block = null;
  758. int j = 0;
  759. if (signature == ZipConstants.Zip64EndOfCentralDirectoryRecordSignature)
  760. {
  761. // We have a ZIP64 EOCD
  762. // This data block is 4 bytes sig, 8 bytes size, 44 bytes fixed data,
  763. // followed by a variable-sized extension block. We have read the sig already.
  764. // 8 - datasize (64 bits)
  765. // 2 - version made by
  766. // 2 - version needed to extract
  767. // 4 - number of this disk
  768. // 4 - number of the disk with the start of the CD
  769. // 8 - total number of entries in the CD on this disk
  770. // 8 - total number of entries in the CD
  771. // 8 - size of the CD
  772. // 8 - offset of the CD
  773. // -----------------------
  774. // 52 bytes
  775. block = new byte[8 + 44];
  776. s.Read(block, 0, block.Length);
  777. Int64 DataSize = BitConverter.ToInt64(block, 0); // == 44 + the variable length
  778. if (DataSize < 44)
  779. throw new ZipException("Bad size in the ZIP64 Central Directory.");
  780. zf._versionMadeBy = BitConverter.ToUInt16(block, j);
  781. j += 2;
  782. zf._versionNeededToExtract = BitConverter.ToUInt16(block, j);
  783. j += 2;
  784. zf._diskNumberWithCd = BitConverter.ToUInt32(block, j);
  785. j += 2;
  786. //zf._diskNumberWithCd++; // hack!!
  787. // read the extended block
  788. block = new byte[DataSize - 44];
  789. s.Read(block, 0, block.Length);
  790. // discard the result
  791. signature = Ionic.Zip.SharedUtilities.ReadSignature(s);
  792. if (signature != ZipConstants.Zip64EndOfCentralDirectoryLocatorSignature)
  793. throw new ZipException("Inconsistent metadata in the ZIP64 Central Directory.");
  794. block = new byte[16];
  795. s.Read(block, 0, block.Length);
  796. // discard the result
  797. signature = Ionic.Zip.SharedUtilities.ReadSignature(s);
  798. }
  799. // Throw if this is not a signature for "end of central directory record"
  800. // This is a sanity check.
  801. if (signature != ZipConstants.EndOfCentralDirectorySignature)
  802. {
  803. s.Seek(-4, SeekOrigin.Current);
  804. throw new BadReadException(String.Format("Bad signature ({0:X8}) at position 0x{1:X8}",
  805. signature, s.Position));
  806. }
  807. // read the End-of-Central-Directory-Record
  808. block = new byte[16];
  809. zf.ReadStream.Read(block, 0, block.Length);
  810. // off sz data
  811. // -------------------------------------------------------
  812. // 0 4 end of central dir signature (0x06054b50)
  813. // 4 2 number of this disk
  814. // 6 2 number of the disk with start of the central directory
  815. // 8 2 total number of entries in the central directory on this disk
  816. // 10 2 total number of entries in the central directory
  817. // 12 4 size of the central directory
  818. // 16 4 offset of start of central directory with respect to the starting disk number
  819. // 20 2 ZIP file comment length
  820. // 22 ?? ZIP file comment
  821. if (zf._diskNumberWithCd == 0)
  822. {
  823. zf._diskNumberWithCd = BitConverter.ToUInt16(block, 2);
  824. //zf._diskNumberWithCd++; // hack!!
  825. }
  826. // read the comment here
  827. ReadZipFileComment(zf);
  828. }
  829. private static void ReadZipFileComment(ZipFile zf)
  830. {
  831. // read the comment here
  832. byte[] block = new byte[2];
  833. zf.ReadStream.Read(block, 0, block.Length);
  834. Int16 commentLength = (short)(block[0] + block[1] * 256);
  835. if (commentLength > 0)
  836. {
  837. block = new byte[commentLength];
  838. zf.ReadStream.Read(block, 0, block.Length);
  839. // workitem 10392 - prefer ProvisionalAlternateEncoding,
  840. // first. The fix for workitem 6513 tried to use UTF8
  841. // only as necessary, but that is impossible to test
  842. // for, in this direction. There's no way to know what
  843. // characters the already-encoded bytes refer
  844. // to. Therefore, must do what the user tells us.
  845. string s1 = zf.AlternateEncoding.GetString(block, 0, block.Length);
  846. zf.Comment = s1;
  847. }
  848. }
  849. // private static bool BlocksAreEqual(byte[] a, byte[] b)
  850. // {
  851. // if (a.Length != b.Length) return false;
  852. // for (int i = 0; i < a.Length; i++)
  853. // {
  854. // if (a[i] != b[i]) return false;
  855. // }
  856. // return true;
  857. // }
  858. /// <summary>
  859. /// Checks the given file to see if it appears to be a valid zip file.
  860. /// </summary>
  861. /// <remarks>
  862. ///
  863. /// <para>
  864. /// Calling this method is equivalent to calling <see cref="IsZipFile(string,
  865. /// bool)"/> with the testExtract parameter set to false.
  866. /// </para>
  867. /// </remarks>
  868. ///
  869. /// <param name="fileName">The file to check.</param>
  870. /// <returns>true if the file appears to be a zip file.</returns>
  871. public static bool IsZipFile(string fileName)
  872. {
  873. return IsZipFile(fileName, false);
  874. }
  875. /// <summary>
  876. /// Checks a file to see if it is a valid zip file.
  877. /// </summary>
  878. ///
  879. /// <remarks>
  880. /// <para>
  881. /// This method opens the specified zip file, reads in the zip archive,
  882. /// verifying the ZIP metadata as it reads.
  883. /// </para>
  884. ///
  885. /// <para>
  886. /// If everything succeeds, then the method returns true. If anything fails -
  887. /// for example if an incorrect signature or CRC is found, indicating a
  888. /// corrupt file, the the method returns false. This method also returns
  889. /// false for a file that does not exist.
  890. /// </para>
  891. ///
  892. /// <para>
  893. /// If <paramref name="testExtract"/> is true, as part of its check, this
  894. /// method reads in the content for each entry, expands it, and checks CRCs.
  895. /// This provides an additional check beyond verifying the zip header and
  896. /// directory data.
  897. /// </para>
  898. ///
  899. /// <para>
  900. /// If <paramref name="testExtract"/> is true, and if any of the zip entries
  901. /// are protected with a password, this method will return false. If you want
  902. /// to verify a <c>ZipFile</c> that has entries which are protected with a
  903. /// password, you will need to do that manually.
  904. /// </para>
  905. ///
  906. /// </remarks>
  907. ///
  908. /// <param name="fileName">The zip file to check.</param>
  909. /// <param name="testExtract">true if the caller wants to extract each entry.</param>
  910. /// <returns>true if the file contains a valid zip file.</returns>
  911. public static bool IsZipFile(string fileName, bool testExtract)
  912. {
  913. bool result = false;
  914. try
  915. {
  916. if (!File.Exists(fileName)) return false;
  917. using (var s = File.Open(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
  918. {
  919. result = IsZipFile(s, testExtract);
  920. }
  921. }
  922. catch (IOException) { }
  923. catch (ZipException) { }
  924. return result;
  925. }
  926. /// <summary>
  927. /// Checks a stream to see if it contains a valid zip archive.
  928. /// </summary>
  929. ///
  930. /// <remarks>
  931. /// <para>
  932. /// This method reads the zip archive contained in the specified stream, verifying
  933. /// the ZIP metadata as it reads. If testExtract is true, this method also extracts
  934. /// each entry in the archive, dumping all the bits into <see cref="Stream.Null"/>.
  935. /// </para>
  936. ///
  937. /// <para>
  938. /// If everything succeeds, then the method returns true. If anything fails -
  939. /// for example if an incorrect signature or CRC is found, indicating a corrupt
  940. /// file, the the method returns false. This method also returns false for a
  941. /// file that does not exist.
  942. /// </para>
  943. ///
  944. /// <para>
  945. /// If <c>testExtract</c> is true, this method reads in the content for each
  946. /// entry, expands it, and checks CRCs. This provides an additional check
  947. /// beyond verifying the zip header data.
  948. /// </para>
  949. ///
  950. /// <para>
  951. /// If <c>testExtract</c> is true, and if any of the zip entries are protected
  952. /// with a password, this method will return false. If you want to verify a
  953. /// ZipFile that has entries which are protected with a password, you will need
  954. /// to do that manually.
  955. /// </para>
  956. /// </remarks>
  957. ///
  958. /// <seealso cref="IsZipFile(string, bool)"/>
  959. ///
  960. /// <param name="stream">The stream to check.</param>
  961. /// <param name="testExtract">true if the caller wants to extract each entry.</param>
  962. /// <returns>true if the stream contains a valid zip archive.</returns>
  963. public static bool IsZipFile(Stream stream, bool testExtract)
  964. {
  965. if (stream == null)
  966. throw new ArgumentNullException("stream");
  967. bool result = false;
  968. try
  969. {
  970. if (!stream.CanRead) return false;
  971. var bitBucket = Stream.Null;
  972. using (ZipFile zip1 = ZipFile.Read(stream, null, null, null))
  973. {
  974. if (testExtract)
  975. {
  976. foreach (var e in zip1)
  977. {
  978. if (!e.IsDirectory)
  979. {
  980. e.Extract(bitBucket);
  981. }
  982. }
  983. }
  984. }
  985. result = true;
  986. }
  987. catch (IOException) { }
  988. catch (ZipException) { }
  989. return result;
  990. }
  991. }
  992. }