How can I create a zip archive of a directory structure in Python?
27 Answers
As others have pointed out, you should use zipfile. The documentation tells you what functions are available, but doesn't really explain how you can use them to zip an entire directory. I think it's easiest to explain with some example code:
import os
import zipfile
def zipdir(path, ziph):
# ziph is zipfile handle
for root, dirs, files in os.walk(path):
for file in files:
ziph.write(os.path.join(root, file),
os.path.relpath(os.path.join(root, file),
os.path.join(path, '..')))
zipf = zipfile.ZipFile('Python.zip', 'w', zipfile.ZIP_DEFLATED)
zipdir('tmp/', zipf)
zipf.close()
The easiest way is to use shutil.make_archive
. It supports both zip and tar formats.
import shutil
shutil.make_archive(output_filename, 'zip', dir_name)
If you need to do something more complicated than zipping the whole directory (such as skipping certain files), then you'll need to dig into the zipfile
module as others have suggested.
To add the contents of mydirectory
to a new zip file, including all files and subdirectories:
import os
import zipfile
zf = zipfile.ZipFile("myzipfile.zip", "w")
for dirname, subdirs, files in os.walk("mydirectory"):
zf.write(dirname)
for filename in files:
zf.write(os.path.join(dirname, filename))
zf.close()
How can I create a zip archive of a directory structure in Python?
In a Python script
In Python 2.7+, shutil
has a make_archive
function.
from shutil import make_archive
make_archive(
'zipfile_name',
'zip', # the archive format - or tar, bztar, gztar
root_dir=None, # root for archive - current working dir if None
base_dir=None) # start archiving from here - cwd if None too
Here the zipped archive will be named zipfile_name.zip
. If base_dir
is farther down from root_dir
it will exclude files not in the base_dir
, but still archive the files in the parent dirs up to the root_dir
.
I did have an issue testing this on Cygwin with 2.7 - it wants a root_dir argument, for cwd:
make_archive('zipfile_name', 'zip', root_dir='.')
Using Python from the shell
You can do this with Python from the shell also using the zipfile
module:
$ python -m zipfile -c zipname sourcedir
Where zipname
is the name of the destination file you want (add .zip
if you want it, it won't do it automatically) and sourcedir is the path to the directory.
Zipping up Python (or just don't want parent dir):
If you're trying to zip up a python package with a __init__.py
and __main__.py
, and you don't want the parent dir, it's
$ python -m zipfile -c zipname sourcedir/*
And
$ python zipname
would run the package. (Note that you can't run subpackages as the entry point from a zipped archive.)
Zipping a Python app:
If you have python3.5+, and specifically want to zip up a Python package, use zipapp:
$ python -m zipapp myapp
$ python myapp.pyz
This function will recursively zip up a directory tree, compressing the files, and recording the correct relative filenames in the archive. The archive entries are the same as those generated by zip -r output.zip source_dir
.
import os
import zipfile
def make_zipfile(output_filename, source_dir):
relroot = os.path.abspath(os.path.join(source_dir, os.pardir))
with zipfile.ZipFile(output_filename, "w", zipfile.ZIP_DEFLATED) as zip:
for root, dirs, files in os.walk(source_dir):
# add directory (needed for empty dirs)
zip.write(root, os.path.relpath(root, relroot))
for file in files:
filename = os.path.join(root, file)
if os.path.isfile(filename): # regular files only
arcname = os.path.join(os.path.relpath(root, relroot), file)
zip.write(filename, arcname)
Modern Python (3.6+) using the pathlib
module for concise OOP-like handling of paths, and pathlib.Path.rglob()
for recursive globbing. As far as I can tell, this is equivalent to George V. Reilly's answer: zips with compression, the topmost element is a directory, keeps empty dirs, uses relative paths.
from pathlib import Path
from zipfile import ZIP_DEFLATED, ZipFile
from os import PathLike
from typing import Union
def zip_dir(zip_name: str, source_dir: Union[str, PathLike]):
src_path = Path(source_dir).expanduser().resolve(strict=True)
with ZipFile(zip_name, 'w', ZIP_DEFLATED) as zf:
for file in src_path.rglob('*'):
zf.write(file, file.relative_to(src_path.parent))
Note: as optional type hints indicate, zip_name
can't be a Path object (would be fixed in 3.6.2+).
For adding compression to the resulting zip file, check out this link.
You need to change:
zip = zipfile.ZipFile('Python.zip', 'w')
to
zip = zipfile.ZipFile('Python.zip', 'w', zipfile.ZIP_DEFLATED)
I've made some changes to code given by Mark Byers. Below function will also adds empty directories if you have them. Examples should make it more clear what is the path added to the zip.
#!/usr/bin/env python
import os
import zipfile
def addDirToZip(zipHandle, path, basePath=""):
"""
Adding directory given by \a path to opened zip file \a zipHandle
@param basePath path that will be removed from \a path when adding to archive
Examples:
# add whole "dir" to "test.zip" (when you open "test.zip" you will see only "dir")
zipHandle = zipfile.ZipFile('test.zip', 'w')
addDirToZip(zipHandle, 'dir')
zipHandle.close()
# add contents of "dir" to "test.zip" (when you open "test.zip" you will see only it's contents)
zipHandle = zipfile.ZipFile('test.zip', 'w')
addDirToZip(zipHandle, 'dir', 'dir')
zipHandle.close()
# add contents of "dir/subdir" to "test.zip" (when you open "test.zip" you will see only contents of "subdir")
zipHandle = zipfile.ZipFile('test.zip', 'w')
addDirToZip(zipHandle, 'dir/subdir', 'dir/subdir')
zipHandle.close()
# add whole "dir/subdir" to "test.zip" (when you open "test.zip" you will see only "subdir")
zipHandle = zipfile.ZipFile('test.zip', 'w')
addDirToZip(zipHandle, 'dir/subdir', 'dir')
zipHandle.close()
# add whole "dir/subdir" with full path to "test.zip" (when you open "test.zip" you will see only "dir" and inside it only "subdir")
zipHandle = zipfile.ZipFile('test.zip', 'w')
addDirToZip(zipHandle, 'dir/subdir')
zipHandle.close()
# add whole "dir" and "otherDir" (with full path) to "test.zip" (when you open "test.zip" you will see only "dir" and "otherDir")
zipHandle = zipfile.ZipFile('test.zip', 'w')
addDirToZip(zipHandle, 'dir')
addDirToZip(zipHandle, 'otherDir')
zipHandle.close()
"""
basePath = basePath.rstrip("\\/") + ""
basePath = basePath.rstrip("\\/")
for root, dirs, files in os.walk(path):
# add dir itself (needed for empty dirs
zipHandle.write(os.path.join(root, "."))
# add files
for file in files:
filePath = os.path.join(root, file)
inZipPath = filePath.replace(basePath, "", 1).lstrip("\\/")
#print filePath + " , " + inZipPath
zipHandle.write(filePath, inZipPath)
Above is a simple function that should work for simple cases. You can find more elegant class in my Gist: https://gist.github.com/Eccenux/17526123107ca0ac28e6
I have another code example that may help, using python3, pathlib and zipfile. It should work in any OS.
from pathlib import Path
import zipfile
from datetime import datetime
DATE_FORMAT = '%y%m%d'
def date_str():
"""returns the today string year, month, day"""
return '{}'.format(datetime.now().strftime(DATE_FORMAT))
def zip_name(path):
"""returns the zip filename as string"""
cur_dir = Path(path).resolve()
parent_dir = cur_dir.parents[0]
zip_filename = '{}/{}_{}.zip'.format(parent_dir, cur_dir.name, date_str())
p_zip = Path(zip_filename)
n = 1
while p_zip.exists():
zip_filename = ('{}/{}_{}_{}.zip'.format(parent_dir, cur_dir.name,
date_str(), n))
p_zip = Path(zip_filename)
n += 1
return zip_filename
def all_files(path):
"""iterator returns all files and folders from path as absolute path string
"""
for child in Path(path).iterdir():
yield str(child)
if child.is_dir():
for grand_child in all_files(str(child)):
yield str(Path(grand_child))
def zip_dir(path):
"""generate a zip"""
zip_filename = zip_name(path)
zip_file = zipfile.ZipFile(zip_filename, 'w')
print('create:', zip_filename)
for file in all_files(path):
print('adding... ', file)
zip_file.write(file)
zip_file.close()
if __name__ == '__main__':
zip_dir('.')
print('end!')
For a concise way to retain the folder hierarchy under the parent directory to be archived:
import glob
import zipfile
with zipfile.ZipFile(fp_zip, "w", zipfile.ZIP_DEFLATED) as zipf:
for fp in glob(os.path.join(parent, "**/*")):
base = os.path.commonpath([parent, fp])
zipf.write(fp, arcname=fp.replace(base, ""))
If you want, you could change this to use pathlib
for file globbing.
You probably want to look at the zipfile
module; there's documentation at http://docs.python.org/library/zipfile.html.
You may also want os.walk()
to index the directory structure.
Here is a variation on the answer given by Nux that works for me:
def WriteDirectoryToZipFile( zipHandle, srcPath, zipLocalPath = "", zipOperation = zipfile.ZIP_DEFLATED ):
basePath = os.path.split( srcPath )[ 0 ]
for root, dirs, files in os.walk( srcPath ):
p = os.path.join( zipLocalPath, root [ ( len( basePath ) + 1 ) : ] )
# add dir
zipHandle.write( root, p, zipOperation )
# add files
for f in files:
filePath = os.path.join( root, f )
fileInZipPath = os.path.join( p, f )
zipHandle.write( filePath, fileInZipPath, zipOperation )
Try the below one .it worked for me.
import zipfile, os
zipf = "compress.zip"
def main():
directory = r"Filepath"
toZip(directory)
def toZip(directory):
zippedHelp = zipfile.ZipFile(zipf, "w", compression=zipfile.ZIP_DEFLATED )
list = os.listdir(directory)
for file_list in list:
file_name = os.path.join(directory,file_list)
if os.path.isfile(file_name):
print file_name
zippedHelp.write(file_name)
else:
addFolderToZip(zippedHelp,file_list,directory)
print "---------------Directory Found-----------------------"
zippedHelp.close()
def addFolderToZip(zippedHelp,folder,directory):
path=os.path.join(directory,folder)
print path
file_list=os.listdir(path)
for file_name in file_list:
file_path=os.path.join(path,file_name)
if os.path.isfile(file_path):
zippedHelp.write(file_path)
elif os.path.isdir(file_name):
print "------------------sub directory found--------------------"
addFolderToZip(zippedHelp,file_name,path)
if __name__=="__main__":
main()
If you want a functionality like the compress folder of any common graphical file manager you can use the following code, it uses the zipfile module. Using this code you will have the zip file with the path as its root folder.
import os
import zipfile
def zipdir(path, ziph):
# Iterate all the directories and files
for root, dirs, files in os.walk(path):
# Create a prefix variable with the folder structure inside the path folder.
# So if a file is at the path directory will be at the root directory of the zip file
# so the prefix will be empty. If the file belongs to a containing folder of path folder
# then the prefix will be that folder.
if root.replace(path,'') == '':
prefix = ''
else:
# Keep the folder structure after the path folder, append a '/' at the end
# and remome the first character, if it is a '/' in order to have a path like
# folder1/folder2/file.txt
prefix = root.replace(path, '') + '/'
if (prefix[0] == '/'):
prefix = prefix[1:]
for filename in files:
actual_file_path = root + '/' + filename
zipped_file_path = prefix + filename
zipf.write( actual_file_path, zipped_file_path)
zipf = zipfile.ZipFile('Python.zip', 'w', zipfile.ZIP_DEFLATED)
zipdir('/tmp/justtest/', zipf)
zipf.close()
To give more flexibility, e.g. select directory/file by name use:
import os
import zipfile
def zipall(ob, path, rel=""):
basename = os.path.basename(path)
if os.path.isdir(path):
if rel == "":
rel = basename
ob.write(path, os.path.join(rel))
for root, dirs, files in os.walk(path):
for d in dirs:
zipall(ob, os.path.join(root, d), os.path.join(rel, d))
for f in files:
ob.write(os.path.join(root, f), os.path.join(rel, f))
break
elif os.path.isfile(path):
ob.write(path, os.path.join(rel, basename))
else:
pass
For a file tree:
.
├── dir
│ ├── dir2
│ │ └── file2.txt
│ ├── dir3
│ │ └── file3.txt
│ └── file.txt
├── dir4
│ ├── dir5
│ └── file4.txt
├── listdir.zip
├── main.py
├── root.txt
└── selective.zip
You can e.g. select only dir4
and root.txt
:
cwd = os.getcwd()
files = [os.path.join(cwd, f) for f in ['dir4', 'root.txt']]
with zipfile.ZipFile("selective.zip", "w" ) as myzip:
for f in files:
zipall(myzip, f)
Or just listdir
in script invocation directory and add everything from there:
with zipfile.ZipFile("listdir.zip", "w" ) as myzip:
for f in os.listdir():
if f == "listdir.zip":
# Creating a listdir.zip in the same directory
# will include listdir.zip inside itself, beware of this
continue
zipall(myzip, f)
Say you want to Zip all the folders(sub directories) in the current directory.
for root, dirs, files in os.walk("."):
for sub_dir in dirs:
zip_you_want = sub_dir+".zip"
zip_process = zipfile.ZipFile(zip_you_want, "w", zipfile.ZIP_DEFLATED)
zip_process.write(file_you_want_to_include)
zip_process.close()
print("Successfully zipped directory: {sub_dir}".format(sub_dir=sub_dir))
So many answers here, and I hope I might contribute with my own version, which is based on the original answer (by the way), but with a more graphical perspective, also using context for each zipfile
setup and sorting os.walk()
, in order to have a ordered output.
Having these folders and them files (among other folders), I wanted to create a .zip
for each cap_
folder:
$ tree -d
.
├── cap_01
| ├── 0101000001.json
| ├── 0101000002.json
| ├── 0101000003.json
|
├── cap_02
| ├── 0201000001.json
| ├── 0201000002.json
| ├── 0201001003.json
|
├── cap_03
| ├── 0301000001.json
| ├── 0301000002.json
| ├── 0301000003.json
|
├── docs
| ├── map.txt
| ├── main_data.xml
|
├── core_files
├── core_master
├── core_slave
Here's what I applied, with comments for better understanding of the process.
$ cat zip_cap_dirs.py
""" Zip 'cap_*' directories. """
import os
import zipfile as zf
for root, dirs, files in sorted(os.walk('.')):
if 'cap_' in root:
print(f"Compressing: {root}")
# Defining .zip name, according to Capítulo.
cap_dir_zip = '{}.zip'.format(root)
# Opening zipfile context for current root dir.
with zf.ZipFile(cap_dir_zip, 'w', zf.ZIP_DEFLATED) as new_zip:
# Iterating over os.walk list of files for the current root dir.
for f in files:
# Defining relative path to files from current root dir.
f_path = os.path.join(root, f)
# Writing the file on the .zip file of the context
new_zip.write(f_path)
Basically, for each iteration over os.walk(path)
, I'm opening a context for zipfile
setup and afterwards, iterating iterating over files
, which is a list
of files from root
directory, forming the relative path for each file based on the current root
directory, appending to the zipfile
context which is running.
And the output is presented like this:
$ python3 zip_cap_dirs.py
Compressing: ./cap_01
Compressing: ./cap_02
Compressing: ./cap_03
To see the contents of each .zip
directory, you can use less
command:
$ less cap_01.zip
Archive: cap_01.zip
Length Method Size Cmpr Date Time CRC-32 Name
-------- ------ ------- ---- ---------- ----- -------- ----
22017 Defl:N 2471 89% 2019-09-05 08:05 7a3b5ec6 cap_01/0101000001.json
21998 Defl:N 2471 89% 2019-09-05 08:05 155bece7 cap_01/0101000002.json
23236 Defl:N 2573 89% 2019-09-05 08:05 55fced20 cap_01/0101000003.json
-------- ------- --- -------
67251 7515 89% 3 files
Zip a file or a tree (a directory and its sub-directories).
from pathlib import Path
from zipfile import ZipFile, ZIP_DEFLATED
def make_zip(tree_path, zip_path, mode='w', skip_empty_dir=False):
with ZipFile(zip_path, mode=mode, compression=ZIP_DEFLATED) as zf:
paths = [Path(tree_path)]
while paths:
p = paths.pop()
if p.is_dir():
paths.extend(p.iterdir())
if skip_empty_dir:
continue
zf.write(p)
To append to an existing archive, pass mode='a'
, to create a fresh archive mode='w'
(the default in the above). So let's say you want to bundle 3 different directory trees under the same archive.
make_zip(path_to_tree1, path_to_arch, mode='w')
make_zip(path_to_tree2, path_to_arch, mode='a')
make_zip(path_to_file3, path_to_arch, mode='a')
A solution using pathlib.Path
, which is independent of the OS used:
import zipfile
from pathlib import Path
def zip_dir(path: Path, zip_file_path: Path):
"""Zip all contents of path to zip_file"""
files_to_zip = [
file for file in path.glob('*') if file.is_file()]
with zipfile.ZipFile(
zip_file_path, 'w', zipfile.ZIP_DEFLATED) as zip_f:
for file in files_to_zip:
print(file.name)
zip_f.write(file, file.name)
current_dir = Path.cwd()
zip_dir = current_dir / "test"
tools.zip_dir(
zip_dir, current_dir / 'Zipped_dir.zip')
Here's a modern approach, using pathlib, and a context manager. Puts the files directly in the zip, rather than in a subfolder.
def zip_dir(filename: str, dir_to_zip: pathlib.Path):
with zipfile.ZipFile(filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
# Use glob instead of iterdir(), to cover all subdirectories.
for directory in dir_to_zip.glob('**'):
for file in directory.iterdir():
if not file.is_file():
continue
# Strip the first component, so we don't create an uneeded subdirectory
# containing everything.
zip_path = pathlib.Path(*file.parts[1:])
# Use a string, since zipfile doesn't support pathlib directly.
zipf.write(str(file), str(zip_path))
I prepared a function by consolidating Mark Byers' solution with Reimund and Morten Zilmer's comments (relative path and including empty directories). As a best practice, with
is used in ZipFile's file construction.
The function also prepares a default zip file name with the zipped directory name and '.zip' extension. Therefore, it works with only one argument: the source directory to be zipped.
import os
import zipfile
def zip_dir(path_dir, path_file_zip=''):
if not path_file_zip:
path_file_zip = os.path.join(
os.path.dirname(path_dir), os.path.basename(path_dir)+'.zip')
with zipfile.ZipFile(path_file_zip, 'wb', zipfile.ZIP_DEFLATED) as zip_file:
for root, dirs, files in os.walk(path_dir):
for file_or_dir in files + dirs:
zip_file.write(
os.path.join(root, file_or_dir),
os.path.relpath(os.path.join(root, file_or_dir),
os.path.join(path_dir, os.path.pardir)))
# import required python modules
# You have to install zipfile package using pip install
import os,zipfile
# Change the directory where you want your new zip file to be
os.chdir('Type your destination')
# Create a new zipfile ( I called it myfile )
zf = zipfile.ZipFile('myfile.zip','w')
# os.walk gives a directory tree. Access the files using a for loop
for dirnames,folders,files in os.walk('Type your directory'):
zf.write('Type your Directory')
for file in files:
zf.write(os.path.join('Type your directory',file))
Well, after reading the suggestions I came up with a very similar way that works with 2.7.x without creating "funny" directory names (absolute-like names), and will only create the specified folder inside the zip.
Or just in case you needed your zip to contain a folder inside with the contents of the selected directory.
def zipDir( path, ziph ) :
"""
Inserts directory (path) into zipfile instance (ziph)
"""
for root, dirs, files in os.walk( path ) :
for file in files :
ziph.write( os.path.join( root, file ) , os.path.basename( os.path.normpath( path ) ) + "\\" + file )
def makeZip( pathToFolder ) :
"""
Creates a zip file with the specified folder
"""
zipf = zipfile.ZipFile( pathToFolder + 'file.zip', 'w', zipfile.ZIP_DEFLATED )
zipDir( pathToFolder, zipf )
zipf.close()
print( "Zip file saved to: " + pathToFolder)
makeZip( "c:\\path\\to\\folder\\to\\insert\\into\\zipfile" )
Function to create zip file.
def CREATEZIPFILE(zipname, path):
#function to create a zip file
#Parameters: zipname - name of the zip file; path - name of folder/file to be put in zip file
zipf = zipfile.ZipFile(zipname, 'w', zipfile.ZIP_DEFLATED)
zipf.setpassword(b"password") #if you want to set password to zipfile
#checks if the path is file or directory
if os.path.isdir(path):
for files in os.listdir(path):
zipf.write(os.path.join(path, files), files)
elif os.path.isfile(path):
zipf.write(os.path.join(path), path)
zipf.close()
For anyone else delving into this question and trying to archive the very same directory their program is in and is getting both very deep tree structures and ending up with recursion due to the zip file zipping itself, try this.
It's a combination of Mark's answer and some extra checks to ensure that there's no recursive zipping of the zipfile itself, and no unnecessarily deep folder structures.
import os
import zipfile
def zipdir(path, ziph, ignored_directories, ignored_files):
# ziph is zipfile handle
for root, dirs, files in os.walk(path):
for file in files:
if not any(ignored_dir in root for ignored_dir in ignored_directories):
if not any(ignored_fname in file for ignored_fname in ignored_files):
ziph.write(os.path.join(root, file))
# current working directory
this_dir = os.path.dirname(os.path.abspath(__file__))
# the directory within the working directory the zip will be created in (build/archives).
zip_dest_dir = os.path.join('build', 'archives')
# verify zip_dest_dir exists: if not, create it
if not os.path.isdir(zip_dest_dir):
os.makedirs(zip_dest_dir, exist_ok=True)
# leave zip_dest_dir blank (or set dist_dir = this_dir) if you want the zip file in the working directory (same directory as the script)
dest_dir = os.path.join(this_dir, zip_dest_dir)
# name the zip file: remember the file extension
zip_filename = 'zipped_directory.zip'
# zip file's path
zip_path = os.path.join(dest_dir, zip_filename)
# create the zipfile handle: you can change ZIP_STORED to any other compression algorithm of your choice, like ZIP_DEFLATED, if you need actual compression
zipf = zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_STORED)
# ignored files and directories: I personally wanted to ignore the "build" directory, alongside with "node_modules", so those would be listed here.
ignored_dirs = []
# ignore any specific files: in my case, I was ignoring the script itself, so I'd include 'deploy.py' here
ignored_files = [zip_filename]
# zip directory contents
zipdir('.', zipf, ignored_dirs, ignored_files)
zipf.close()
The resulting zip file should only include directories starting from the working directory: so no Users/user/Desktop/code/.../working_directory/.../etc. kind of file structure.
The obvious way to go would be to go with shutil, Like the second top answer says so, But if you still wish to go with ZipFile for some reason, And if you are getting some trouble doing that (Like ERR 13 in Windows etc), You can use this fix:
import os
import zipfile
def retrieve_file_paths(dirName):
filePaths = []
for root, directories, files in os.walk(dirName):
for filename in files:
filePath = os.path.join(root, filename)
filePaths.append(filePath)
return filePaths
def main(dir_name, output_filename):
filePaths = retrieve_file_paths(dir_name)
zip_file = zipfile.ZipFile(output_filename+'.zip', 'w')
with zip_file:
for file in filePaths:
zip_file.write(file)
main("my_dir", "my_dir_archived")
This one recursively iterates through every sub-folder/file in your given folder, And writes them to a zip file instead of attempting to directly zip a folder.
make_archive
fromshutil
(if you want to zip a single directory recursively). – malana