0
votes

I have multiple directories dirs = [dir1, dir2, ...]

These dirs are structured like this:

dir1
  subdir1
    folder1
    file1
    file2
  subdir2
dir2
  subdir1
    folder2
      file3
    file4
  subdir2

Note that the names of the subdirs are the same. Both dir1 and dir2 have identical named subdirectories. What I need is to print an html table that combines the files and folders from dir1 and dir2, like so:

subdir1
  folder1
  folder2
    file3
  file1
  file2
  file4
subdir2

One more caveat is that I need to know the path of each file and folder, so I can link to it.

So far I create the tree for dir1 with os.walk and create an html table from that, every line of which is in a list. Then I do os.walk for all other dirs, and for every dir, go through that list, until the basename is the same, then insert the files and folders. But this is very slow. I am sure there is a very clever, five line solution that would achieve the same.

def get_table(self, teams=['test1', 'test2']):
    paths = []
    table = []
    for team in teams:
        paths.append(config.basepath + '/' + team)
    for path in paths:
        if not table:
            for root, dirs, files in os.walk(path):
                dirs = sorted(dirs)
                files = sorted(files)
                team = self.get_team(path) # extracts the 'dir' from path
                level = root.replace(path, '').count(os.sep)
                indent = ' ' * 4 * (level)
                subindent = ' ' * 4 * (level + 1)
                table.append('{0}<tr class="{2}"><td>{1}</td><td>{2}</td></tr>'.format(indent, os.path.basename(root), team))
                for f in files:
                    table.append('{0}<tr class="{2}"><td>{1}</td><td>{2}</td></tr>'.format(subindent, f, team))
        else:
            for root, dirs, files in os.walk(path):
                dirs = sorted(dirs)
                files = sorted(files)
                team = self.get_team(path)
                level = root.replace(path, '').count(os.sep)
                indent = ' ' * 4 * (level)
                subindent = ' ' * 4 * (level + 1)
                for idx, line in enumerate(table):
                    if os.path.basename(root) in line:
                        for f in files:
                            table.insert(idx+1, '{0}<tr class="{2}"><td>{1}</td><td>{2}</td></tr>'.format(subindent, f, team))
2

2 Answers

1
votes

I would separate the extraction part from the restitution part

Extraction:

def process(path, d={}):
    print('initial', d)
    for i in os.scandir(path):
        if i.is_file():
            if i.name in d: raise Exception(i.path +
                            "already present")
            d[i.name] = None
        elif i.is_dir():
            if not i.name in d: d[i.name] = {}
            process(i.path, d[i.name])
    print('final', d)
    return d

Display:

def process(path, d={}):
    print('initial', d)
    for i in os.scandir(path):
        if i.is_file():
            if i.name in d: raise Exception(i.path +
                            "already present")
            d[i.name] = None
        elif i.is_dir():
            if not i.name in d: d[i.name] = {}
            process(i.path, d[i.name])
    print('final', d)
    return d

With the structure you proposed, it gives:

>>> process('dir1')
>>> d = process('dir2')
>>> print(d)
{'subdir1': {'folder2': {'file3': None}, 'file1': None, 'file2': None, 'folder1': {}, 'file4': None}, 'subdir2': {}}
>>> display(d)
subdir1
  file1
  file2
  file4
  folder1
  folder2
    file3
subdir2

That way, you only have to change the display part for the HTML formatting...

0
votes

I've got it to run with this, however, I am sure there is a better solution:

for path in paths:
        for root, dirs, files in os.walk(path):
            dirs = sorted(dirs)
            files = sorted(files)
            team = self.get_team(path)
            level = root.replace(path, '').count(os.sep)
            indent = ' ' * 4 * (level)
            subindent = ' ' * 4 * (level + 1)
            basename = os.path.basename(root)
            if firstrun:
                table.append('{0}<tr class="{2}"><td>{1}</td><td>{2}</td></tr>'.format(indent, basename, team))
                coretasks[basename] = len(table) - 1
                for f in files:
                    table.append('{0}<tr class="{2}"><td>{1}</td><td>{2}</td></tr>'.format(subindent, f, team))
            else:
                parsed_folders = []
                if basename in coretasks:
                    inserted_files = 0
                    for f in files:
                        table.insert(coretasks[basename] + 1, '{0}<tr class="{2}"><td>{1}</td><td>{2}</td></tr>'.format(subindent, f, team))
                        inserted_files += 1
                    parsed_folders.append(basename)
                    for coretask in coretasks.keys():
                        if not coretask in parsed_folders: coretasks[coretask] += inserted_files
        firstrun = False
    print('\n'.join(table))