An eye for detail
Importing your file causes several lines to change, and PEP warnings poping up. I recommend switching to an editor that provides black
on save. For instance
totalSize = dataFromDictionary[1]
Should really be
totalSize = dataFromDictionary[1]
This indicates you being a bit lazy. There are several instances of this. Your imports are this
import os
import glob
import os
import collections
Which, after running isort
over it, becomes this
import collections
import glob
import os
Similarly directory_name = "\\SpeicifDir\\"
should probably be directory_name = "\\SpecificfDir\\"
no? And speaking of,here you suddenly use snake_case
when throughout the code camelCase
is used?
Comments
If you find yourself using multiple comments, it is a good indication that your structure needs to change. Use more functions, docstrings etc. Comments should explain why not how.
Modern solutions
The standard way to handle files and paths today is pathlib
. However, I think that a pandas
dataframe would be a more natural container for this type of information.
A pandas
solution might look like this
import pandas as pd
directory = Path.cwd() # current working dir
textfiles = get_files(directory, suffix=".txt")
size_by_date, count_by_date = filesizes_and_counts(textfiles)
dates = list(size_by_date.keys())
sizes = list(size_by_date.values())
counts = list(count_by_date.values())
system_info = pd.DataFrame(list(zip(dates, sizes, counts)))
system_info.columns = ["Date", "Filesize", "Files"]
See below for how the functions are defined.
All in all a modern solution might look like this
from collections import defaultdict
from pathlib import Path
from datetime import datetime
from typing import Generator, Optional, Iterable
MATCH_ANY = "*"
def get_files(
directory: Path, suffix, recursive: bool = False
) -> Generator[Path, None, None]:
folder_iter = directory.rglob if recursive else directory.glob
yield from (path for path in folder_iter(f"{MATCH_ANY}{suffix}"))
def filename_date(filename: str, delimiter="-") -> Optional[datetime]:
split_file = filename.split(delimiter)
if len(split_file) < 3:
return
try:
*_, day, month, year = split_file
return datetime(day=int(day), month=int(month), year=int(year))
except ValueError:
return None
def filesizes_and_counts(files: Iterable[Path]) -> tuple[Filesizes, Filecounts]:
file_sizes = defaultdict(float)
file_counts = defaultdict(int)
for path in files:
if (date := filename_date(path.name)) is None:
continue
file_sizes[date] += path.stat().st_size
file_counts[date] += 1
return file_sizes, file_counts
if __name__ == "__main__":
directory = Path.cwd() # current working dir
textfiles = get_files(directory, suffix=".txt")
sizes, counts = filesizes_and_counts(textfiles)
dates = sizes.keys()
for date in dates:
print(
f"{date.strftime('%Y-%m-%d')}: "
f"{sizes[date]:>20} bytes, "
f"files: {counts[date]}"
)
EDIT: Here is a full blown pandas solution which I think works very nicely

Most of the code is the same, but we let pandas
filter our data.
from pathlib import Path
from datetime import datetime
from typing import Generator, Optional, Iterable
import pandas as pd
MATCH_ANY = "*"
def get_files(
directory: Path, suffix, recursive: bool = False
) -> Generator[Path, None, None]:
folder_iter = directory.rglob if recursive else directory.glob
yield from (path for path in folder_iter(f"{MATCH_ANY}{suffix}"))
def filename_date(filename: str, delimiter="-") -> Optional[tuple[str, datetime]]:
split_file = filename.split(delimiter)
if len(split_file) < 3:
return
try:
*name, day, month, year = split_file
dt = datetime(day=int(day), month=int(month), year=int(year))
return delimiter.join(name), dt
except ValueError:
return None
def info(files: Iterable[Path]) -> Optional[pd.DataFrame]:
info_list = []
for path in files:
if (m := filename_date(path.stem)) is None:
continue
name, date = m
file_info = (date, name, path.stat().st_size, str(path))
info_list.append(file_info)
if not info_list:
return None
data = pd.DataFrame(info_list)
data.columns = ["Date", "Name", "Filesize", "Path"]
data = data.sort_values(by=["Date", "Filesize"], ascending=False)
return data.reset_index(drop=True)
def group_info_by_date(data: pd.DataFrame):
group_by_dates = data.groupby(["Date"], as_index=False)
df_by_dates = group_by_dates["Filesize"].sum()
df_by_dates["Files"] = group_by_dates.size()["size"]
df_by_dates = df_by_dates.sort_values(by=["Filesize", "Date"], ascending=False)
return df_by_dates.reset_index(drop=True)
if __name__ == "__main__":
directory = Path.cwd() # current working dir
textfiles = get_files(directory, suffix=".txt")
if (data := info(textfiles)) is not None:
df_by_dates = group_info_by_date(data)
print(df_by_dates)
print()
print(data[["Date", "Name", "Filesize"]])