/* Copyright 2010 Aaron J. Radke Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ package cc.drx // import net.jpountz.lz4 import org.apache.commons.compress.archivers import org.apache.commons.compress.compressors import archivers.ArchiveEntry //TODO can this be a generic one that is part of drx????? import archivers.ArchiveInputStream import archivers.ArchiveOutputStream import java.util.zip.GZIPInputStream import java.util.zip.GZIPOutputStream import java.io.{InputStream => JIS} import java.io.{OutputStream => JOS} /* TODO generalize the ArchiveReader and ArchiveWriter classes so it will work on zip.ZipInputStream files without depending on apache commons TODO unify the File and Archive api's using / and glob like formats this will also offer a more general entry point from File director instead of Archive TODO use commons compress lz4 instead of jpountz ?? TODO add brotli br file reading TODO add snappy read/write format TODO add 7z in memory/file walking */ /* * Extract an archive to the current directory: * Archive("large.tgz").extract * * Extract an archive to a specified directory: * Archive("large.tgz").extract(File("large") * * Extract a tar ball but stream the files out as GZ * Archive("large.tgz").extract(File("."),Archive.GZ)} * for(f <- Archive("large.tgz")) {print(s"%-50s " format f.file.path); Console.flush; println(IO.lines(f.is,autoClose=false).size)} * * Make and write to an archive * val entries = Seq(FileEntry("test.txt", "simple text"), FileEntry("test2.txt", "simple 2 test")) * Archive.write("test2.tar"){a => entries foreach {a write _}} */ object Archive{ private def mkEntryFromStream(entry:ArchiveEntry,input:Input):PathEntry = { val modDate = Try{Date(entry.getLastModifiedDate)} getOrElse Date.now //TODO what date should be the failed time? if(entry.isDirectory) DirEntry(File(entry.getName), modDate) else new FileEntry(File(entry.getName), ()=>input, Bytes(entry.getSize), modDate) } class ArchiveReader(ais:ArchiveInputStream) extends Iterator[PathEntry] { private var opened = true private var autoClose = true private var entry:ArchiveEntry = null def keepOpen:ArchiveReader = {autoClose = false; this} def hasNext:Boolean = { entry = ais.getNextEntry //null if no more entries, java.io.EOFException if it canot be read if(entry != null) true else { if(opened && autoClose) close false } } def next:PathEntry = mkEntryFromStream(entry,Input(ais).keepOpen) //TODO is this keepOpen needed or best placed here? def close = {opened = false; ais.close} def extract(outputDir:File=File("."), streamGenerator:StreamGenerator=StreamGenerator.Normal):Unit = foreach{ case e:FileEntry => { val ext = streamGenerator.exts.headOption val f = if(ext.isDefined) File(outputDir.path + "/" + e.file.path + "." + ext.get) else outputDir / e.file println(s"Extracting to $f") //f.out.as(streamGenerator).write{os => IO.copy(e.is, os, false)} e.in.keepOpen copy f.out.as(streamGenerator) } case d:DirEntry => d.file.file.mkdirs //java mkdirs } def files:Iterator[FileEntry] = collect{case f:FileEntry => f} //filter by File entry only def walk(parent:FileEntry)(implicit lib:ArchiveLib):Iterator[FileEntry] = files.flatMap{ f => Archive.tryWalk(Try(lib reader f), new DeepFileEntry(parent,f) ) } } class ArchiveWriter(val aos:ArchiveOutputStream,ag:ArchiveGenerator){ def write(writeFunc:ArchiveWriter => Unit) = { writeFunc(this); close } def write(entries:Traversable[PathEntry]):Unit = write{_ ++= entries} final def +=(entry:PathEntry) = add(entry) final def ++=(entries:Traversable[PathEntry]) = entries foreach (add _) final def add(entry:PathEntry) = { aos.putArchiveEntry(ag.entry(entry)) entry match{ case e:FileEntry => e.in.keepOpen copy Output(aos).keepOpen case _ => } aos.closeArchiveEntry } def close:Unit = aos.close def finish:Unit = aos.finish } trait ArchiveGenerator extends IOGenerator[ArchiveReader,ArchiveWriter]{ def entry(e:PathEntry):ArchiveEntry /**forward the FileEntry to the default cc.drx.File test for canRead */ def canRead(f:FileEntry):Boolean = canRead(f.file) def reader( f:FileEntry):ArchiveReader = apply(f.in) // def canWrite(f:FileEntry) = canWrite(f.file) // def writer( f:FileEntry) = apply(f) //for now writing has only been tested to files, but IO stream has been tested both directions def reader(f:File):ArchiveReader = apply(f.in.is) def writer(f:File):ArchiveWriter = apply(f.out.os) } case object TBZ2 extends ArchiveGenerator { val exts = List("tbz2","tar.bz2") def apply(jis:JIS) = new ArchiveReader(new archivers.tar.TarArchiveInputStream(new compressors.bzip2.BZip2CompressorInputStream(jis))) def apply(jos:JOS) = new ArchiveWriter(new archivers.tar.TarArchiveOutputStream(new compressors.bzip2.BZip2CompressorOutputStream(jos)),this) def entry(e:PathEntry):ArchiveEntry = {val t = new archivers.tar.TarArchiveEntry(e.file.path); t.setSize(e.size.byteCount); t} } case object TLZ4 extends ArchiveGenerator { val exts = List("tlz4","tar.lz4") def apply(jis:JIS) = new ArchiveReader(new archivers.tar.TarArchiveInputStream(new compressors.lz4.BlockLZ4CompressorInputStream(jis))) def apply(jos:JOS) = new ArchiveWriter(new archivers.tar.TarArchiveOutputStream(new compressors.lz4.BlockLZ4CompressorOutputStream(jos)),this) def entry(e:PathEntry):ArchiveEntry = {val t = new archivers.tar.TarArchiveEntry(e.file.path); t.setSize(e.size.byteCount); t} } case object TGZ extends ArchiveGenerator { val exts = List("tgz","tar.gz") def apply(jis:JIS) = new ArchiveReader(new archivers.tar.TarArchiveInputStream(new GZIPInputStream(jis))) def apply(jos:JOS) = new ArchiveWriter(new archivers.tar.TarArchiveOutputStream(new GZIPOutputStream(jos)),this) def entry(e:PathEntry):ArchiveEntry = {val t = new archivers.tar.TarArchiveEntry(e.file.path); t.setSize(e.size.byteCount); t} } case object TAR extends ArchiveGenerator { val exts = List("tar") def apply(jis:JIS) = new ArchiveReader(new archivers.tar.TarArchiveInputStream(jis)) def apply(jos:JOS) = new ArchiveWriter(new archivers.tar.TarArchiveOutputStream(jos),this) def entry(e:PathEntry):ArchiveEntry = {val t = new archivers.tar.TarArchiveEntry(e.file.path); t.setSize(e.size.byteCount); t} } case object ZIP extends ArchiveGenerator { val exts = List("zip") def apply(jis:JIS) = new ArchiveReader(new archivers.zip.ZipArchiveInputStream(jis)) def apply(jos:JOS) = new ArchiveWriter(new archivers.zip.ZipArchiveOutputStream(jos),this) def entry(e:PathEntry):ArchiveEntry = new archivers.zip.ZipArchiveEntry(e.file.path) } case object JAR extends ArchiveGenerator { val exts = List("jar") def apply(jis:JIS) = new ArchiveReader(new archivers.jar.JarArchiveInputStream(jis)) def apply(jos:JOS) = new ArchiveWriter(new archivers.jar.JarArchiveOutputStream(jos),this) def entry(e:PathEntry):ArchiveEntry = new archivers.jar.JarArchiveEntry(e.file.path) } case object AR extends ArchiveGenerator { val exts = List("ar") def apply(jis:JIS) = new ArchiveReader(new archivers.ar.ArArchiveInputStream(jis)) def apply(jos:JOS) = new ArchiveWriter(new archivers.ar.ArArchiveOutputStream(jos),this) def entry(e:PathEntry):ArchiveEntry = new archivers.ar.ArArchiveEntry(e.file.path, e.size.byteCount) } case object ARJ extends ArchiveGenerator { val exts = List("arj") def apply(jis:JIS) = new ArchiveReader(new archivers.arj.ArjArchiveInputStream(jis)) override def canWrite = false def apply(jos:JOS) = ??? def entry(e:PathEntry):ArchiveEntry = ??? } case object CPIO extends ArchiveGenerator { val exts = List("arj") def apply(jis:JIS) = new ArchiveReader(new archivers.cpio.CpioArchiveInputStream(jis)) override def canWrite = false //FIXME error writing with past end of STORED entry def apply(jos:JOS) = new ArchiveWriter(new archivers.cpio.CpioArchiveOutputStream(jos),this) def entry(e:PathEntry):ArchiveEntry = new archivers.cpio.CpioArchiveEntry(e.file.path) } case object DUMP extends ArchiveGenerator { val exts = List("dump") def apply(jis:JIS) = new ArchiveReader(new archivers.dump.DumpArchiveInputStream(jis)) override def canWrite = false def apply(jos:JOS) = ??? //read-only support def entry(e:PathEntry):ArchiveEntry = ??? } case object SEVENZ extends ArchiveGenerator { import archivers.sevenz._//{SevenZFile, Seve SevenZMethod} val exts = List("7z") override def canRead(f:File):Boolean = canGenerate(f) && f.isFile //the file exists and is not a stream.. /**these classes are required to simulate 7z file streaming like the rest of the commons-compress api*/ class SevenZArchiveInputStream(f:File) extends ArchiveInputStream { private val zf = new SevenZFile(f.file) override def close() = zf.close() def getNextEntry() = zf.getNextEntry() override def read(b:Array[Byte], off:Int, len:Int) = zf.read(b,off,len) } /**these classes are required to simulate 7z file streaming like the rest of the commons-compress api*/ class SevenZArchiveOutputStream(f:File) extends ArchiveOutputStream { private val zf = new SevenZOutputFile(f.file) override def close() = zf.close() def putArchiveEntry(e:ArchiveEntry) = zf.putArchiveEntry(e) def closeArchiveEntry() = zf.closeArchiveEntry() def createArchiveEntry(jf:java.io.File,name:String) = zf.createArchiveEntry(jf,name) def finish() = zf.finish() override def write(b:Array[Byte], off:Int, len:Int) = zf.write(b,off,len) def setContentCompression(m:SevenZMethod) = zf.setContentCompression(m) } override def reader(f:File) = new ArchiveReader(new SevenZArchiveInputStream(f)) override def writer(f:File) = new ArchiveWriter(new SevenZArchiveOutputStream(f),this) //def apply(is:JIS) = new ArchiveReader(new archivers.sevenz.Seven7ArchiveInputStream(is)) import archivers.ArchiveStreamFactory def apply(is:JIS) = new ArchiveReader(new ArchiveStreamFactory().createArchiveInputStream(ArchiveStreamFactory.SEVEN_Z, is)) //TODO this does not support streaming... try copy to a tmp dir for sevenz file access def apply(os:JOS) = new ArchiveWriter(new ArchiveStreamFactory().createArchiveOutputStream(ArchiveStreamFactory.SEVEN_Z, os),this) //TODO this does not support streaming.. def entry(e:PathEntry):ArchiveEntry = {val t = new archivers.sevenz.SevenZArchiveEntry(); t.setName(e.file.path); t.setSize(e.size.byteCount); t} } trait ArchiveLib{ val libs:Set[ArchiveGenerator] def reader(f:FileEntry):Option[ArchiveReader] = libs.find(_ canRead f).map{_.reader(f)} // def writer(f:FileEntry):Option[ArchiveWriter] = libs.find(_ canWrite f).map{_.writer(f)} def reader(f:File):Option[ArchiveReader] = libs.find(_ canRead f).map{_.reader(f)} //this is a special version that prevents streaming and uses direct file access def writer(f:File):Option[ArchiveWriter] = libs.find(_ canWrite f).map{_.writer(f)} } object ArchiveLib{ //implicits are first searched for within an object of the same name (so no imports are necessary) implicit object DefaultArchiveLib extends ArchiveLib{ val libs:Set[ArchiveGenerator] = Set(TBZ2,TLZ4,TGZ,TAR,ZIP,JAR,AR,ARJ,CPIO,DUMP,SEVENZ) } } /**function to provide shared code with deepFileEntry and FileEntry, so it requires the strange fe geneerator argument*/ protected def tryWalk(reader:Try[Option[ArchiveReader]], fe: =>FileEntry):Iterator[FileEntry] = { reader match { //-- If the file is a known archive format try walking it case Success(Some(r)) => Try{r.keepOpen.walk(fe)} match { case Success(it) => it //if a failure occurred during the walk then skip and let us know case Failure(e) => Console.err.println(s"Warning: Archive.walk skipping $fe during walk because: $e") Iterator() } //--When not an archive file, just return the single deep file entry case Success(None) => Iterator(fe) //--On failure to open an archive case Failure(e) => Console.err.println(s"Warning: Archive.walk skipping $fe during open because: $e") Iterator(fe) } } /** loop through directories looking for all files including archive files to unzip and stream */ def walk(startingFile:File)(implicit lib:ArchiveLib):Iterator[FileEntry] = { if(!startingFile.isDir && !startingFile.isFile){ Console.err.println(s"Warning: in Archive.walk start file does not exist: $startingFile") } startingFile.walk.flatMap{f => if(f.isDir) Archive.walk(f) //recursive dir and archive search else tryWalk( Try(lib reader f), FileEntry(f)) } } /** loop through directories like walk, but first dive through all the files counting the number of files for a progress*/ def walkWithProgress(startingFile:File)(implicit lib:ArchiveLib):Iterator[(Ratio,FileEntry)] = { //to string = s"[$progress] ${file.fullPath}" def files = Archive.walk(startingFile) var total = 0 val counting = for((f,i) <- files.zipWithIndex) yield {total += 1; (Ratio(0,i),f)} val listing = for((f,i) <- files.zipWithIndex) yield (Ratio(i+1, total), f) counting ++ listing } } //TODO maybe add implicit from File ??? case class Archive(f:File)(implicit lib:Archive.ArchiveLib){ import Archive._ private def in = lib.reader(f) getOrElse ZIP(f.in.is) private def out = lib.writer(f) getOrElse ZIP(f.out.os) def write(writeFunc:ArchiveWriter => Unit) = out.write(writeFunc) def walk:Iterator[FileEntry] = Archive.walk(f) //deep walk open enclosing zip files def walkWithProgress = Archive.walkWithProgress(f) def list:Iterator[PathEntry] = in.toIterator //list all the entries def files:Iterator[FileEntry] = in.files //list all the files entries // def iterator = in.toIterator }