Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ __pycache__/
venv/
target/
.DS_Store
.aider*
3 changes: 2 additions & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ ascii-graph = "*"
agate = "*"
termcolor = "*"
argparse = "*"
python-dateutil = "*"

[requires]
python_version = "3.7"
python_version = "3.8"
366 changes: 271 additions & 95 deletions Pipfile.lock

Large diffs are not rendered by default.

38 changes: 31 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
![screenshot](screenshots/tool.png)


This tool will analyze your gmail account to show you statics of your emails. e.g.
This tool will analyze your gmail account to show you statistics of your emails. e.g.

- Total number of emails
- First email received
Expand All @@ -28,14 +28,38 @@ $ python analyzer.py --help

```
$ python analyzer.py --help
usage: analyzer.py [-h] [--top TOP] [--user USER] [--verbose] [--version]
usage: analyzer.py [-h] [--top TOP] [--user USER] [--query QUERY]
[--inactive INACTIVE] [--max-retry-rounds MAX_RETRY_ROUNDS]
[--pull-data] [--refresh-data] [--analyze-only]
[--export-csv EXPORT_CSV] [--verbose] [--version]

Simple Gmail Analyzer

optional arguments:
-h, --help show this help message and exit
--top TOP Number of results to show
--user USER User ID to fetch data for
--verbose Verbose output, helpful for debugging
--version Display version and exit
-h, --help show this help message and exit
--top TOP Number of results to show
--user USER User ID to fetch data for
--query QUERY Gmail search query (e.g., 'label:work after:2023/01/01')
--inactive INACTIVE Show senders inactive for more than X days
--max-retry-rounds MAX_RETRY_ROUNDS
Max retry rounds for failed message fetches (0 for unlimited)
--pull-data Fetch and cache data, then exit
--refresh-data Force refresh cached data, then exit
--analyze-only Analyze using cached data only (no API calls)
--export-csv EXPORT_CSV
Export message metadata to CSV at the given path
--verbose Verbose output, helpful for debugging
--version Display version and exit
```

# Caching & Data Pulls

The analyzer caches message and metadata pickles in `cache/` for 24 hours. Queries
create separate cache files, so cached data stays scoped to each Gmail search.

Examples:

```
$ python analyzer.py --pull-data --query "label:work after:2023/01/01"
$ python analyzer.py --analyze-only --export-csv out/messages.csv
```
47 changes: 47 additions & 0 deletions analyzer.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#!/usr/bin/env python3

import argparse
import sys
import colorama
Expand All @@ -21,6 +23,42 @@ def init_args():
parser.add_argument(
"--version", action="store_true", help="Display version and exit"
)
parser.add_argument(
"--query",
type=str,
default=None,
help="Gmail search query (e.g., 'label:work after:2023/01/01')",
)
parser.add_argument(
"--inactive", type=int, default=0, help="Show senders inactive for more than X days"
)
parser.add_argument(
"--max-retry-rounds",
type=int,
default=5,
help="Max retry rounds for failed message fetches (0 for unlimited)",
)
parser.add_argument(
"--pull-data",
action="store_true",
help="Fetch and cache data, then exit",
)
parser.add_argument(
"--refresh-data",
action="store_true",
help="Force refresh cached data, then exit",
)
parser.add_argument(
"--analyze-only",
action="store_true",
help="Analyze using cached data only (no API calls)",
)
parser.add_argument(
"--export-csv",
type=str,
default=None,
help="Export message metadata to CSV at the given path",
)

args = vars(parser.parse_args())

Expand All @@ -36,4 +74,13 @@ def init_args():
print("gmail analyzer v{}".format(VERSION))
sys.exit()

mode_flags = [
args["pull_data"],
args["refresh_data"],
args["analyze_only"],
]
if sum(1 for flag in mode_flags if flag) > 1:
print("Error: --pull-data, --refresh-data, and --analyze-only are mutually exclusive.")
sys.exit(1)

Metrics(args).start()
28 changes: 20 additions & 8 deletions src/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,31 @@ def remove_dup_timezone(date_str):

def convert_date(date_str):
# Dates comes multiple formats, this function tries to guess it
from dateutil import parser

if not date_str:
return datetime.now()

clean_date = remove_dup_timezone(date_str)

_val = None

try:
_val = datetime.strptime(clean_date, "%d %b %Y %H:%M:%S %z")
except ValueError:
# Try using dateutil parser which handles many date formats
return parser.parse(clean_date)
except:
# Fall back to original method
_val = None
try:
_val = datetime.strptime(clean_date, "%d %b %Y %H:%M:%S %Z")
_val = datetime.strptime(clean_date, "%d %b %Y %H:%M:%S %z")
except ValueError:
_val = datetime.strptime(clean_date, "%d %b %Y %H:%M:%S")

return _val
try:
_val = datetime.strptime(clean_date, "%d %b %Y %H:%M:%S %Z")
except ValueError:
try:
_val = datetime.strptime(clean_date, "%d %b %Y %H:%M:%S")
except ValueError:
# If all parsing fails, return current date
return datetime.now()
return _val


def reduce_to_date(date_str):
Expand Down
176 changes: 158 additions & 18 deletions src/metrics.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
import time
import sys
from datetime import datetime
from progress.spinner import Spinner
from ascii_graph import Pyasciigraph
from termgraph.termgraph import chart, calendar_heatmap
try:
from ascii_graph import Pyasciigraph
from termgraph.termgraph import chart, calendar_heatmap
except ImportError:
print("Error: Required visualization packages not found.")
print("Please run: pipenv install ascii-graph termgraph")
sys.exit(1)
import agate
import warnings
import concurrent.futures
Expand All @@ -17,9 +24,17 @@ def __init__(self, args):
# Ignore warnings about SSL connections
warnings.simplefilter("ignore", ResourceWarning)

self.processor = Processor()
self.processor = Processor(
query=args.get("query"),
max_retry_rounds=args.get("max_retry_rounds"),
)
self.user_id = args["user"]
self.resultsLimit = args["top"]
self.inactive_days = args["inactive"]
self.pull_data = args.get("pull_data", False)
self.refresh_data = args.get("refresh_data", False)
self.analyze_only = args.get("analyze_only", False)
self.export_csv = args.get("export_csv")
self.table = None

def _load_table(self, event):
Expand Down Expand Up @@ -47,17 +62,25 @@ def _analyze_senders(self, event):
event.set()

print(f"\n\n{helpers.h1_icn} Senders (top {self.resultsLimit})\n")
args = {
"stacked": False,
"width": 55,
"no_labels": False,
"format": "{:<,d}",
"suffix": "",
"vertical": False,
"different_scale": False,
}

chart(colors=[94], data=data_count, args=args, labels=data_keys)

try:
args = {
"stacked": False,
"width": 55,
"no_labels": False,
"format": "{:<,d}",
"suffix": "",
"vertical": False,
"different_scale": False,
}

chart(colors=[94], data=data_count, args=args, labels=data_keys)
except Exception as e:
print(f"Note: Could not display chart. Using simple output instead. Error: {str(e)}")
# Print a simple table if chart function fails
for i in range(len(data_keys)):
if i < len(data_count):
print(f"{data_keys[i]}: {data_count[i][0]:,}")

def _analyze_count(self, event):
# Average emails per day
Expand Down Expand Up @@ -115,6 +138,81 @@ def _analyze_count(self, event):
print(f"\n\n{helpers.h1_icn} Stats\n")
print(termtables.to_string(metrics))

def _analyze_inactive_senders(self, event):
"""Analyze senders who haven't sent emails in X days"""
if self.inactive_days <= 0:
event.set()
return

# Get current date for comparison
current_date = datetime.now()

# Process the data to get the last email date for each sender
sender_last_dates = {}

# Filter out rows with no sender or date
filtered_table = self.table.where(
lambda row: row["fields/from"] is not None and row["fields/date"] is not None
)

# Convert dates to datetime objects for comparison
date_table = filtered_table.compute([
(
"datetime",
agate.Formula(
agate.DateTime(datetime_format="%Y-%m-%d %H:%M:%S"),
lambda row: helpers.reduce_to_datetime(row["fields/date"]),
),
)
])

# Group by sender and find the most recent email
for row in date_table.rows:
sender = row["fields/from"]
date_obj = helpers.convert_date(row["fields/date"])

if sender not in sender_last_dates or date_obj > sender_last_dates[sender]["date"]:
sender_last_dates[sender] = {
"date": date_obj,
"date_str": row["fields/date"]
}

# Find senders inactive for more than X days
inactive_senders = []
for sender, data in sender_last_dates.items():
days_since = (current_date - data["date"]).days
if days_since > self.inactive_days:
inactive_senders.append({
"sender": sender,
"last_email_date": data["date_str"],
"days_since": days_since
})

# Sort by days_since in descending order
inactive_senders.sort(key=lambda x: x["days_since"], reverse=True)

# Limit to top results
inactive_senders = inactive_senders[:self.resultsLimit]

event.set()

if inactive_senders:
print(f"\n\n{helpers.h1_icn} Senders inactive for more than {self.inactive_days} days\n")

# Prepare data for table display
table_data = []
for sender in inactive_senders:
table_data.append([
sender["sender"],
sender["last_email_date"],
f"{sender['days_since']} days"
])

headers = ["Sender", "Last Email Date", "Days Since"]
print(termtables.to_string(table_data, header=headers))
else:
print(f"\n\n{helpers.h1_icn} No senders inactive for more than {self.inactive_days} days found")

def _analyze_date(self, event):
table = self.table.where(lambda row: row["fields/date"] is not None).compute(
[
Expand Down Expand Up @@ -167,7 +265,15 @@ def _analyze_date(self, event):
args = {"color": False, "custom_tick": False, "start_dt": f"{year}-01-01"}

print(f"\n{helpers.h2_icn} Year {year} ({_sum:,} emails)\n")
calendar_heatmap(data=data_count, args=args, labels=data_keys)

try:
calendar_heatmap(data=data_count, args=args, labels=data_keys)
except Exception as e:
print(f"Note: Could not display calendar heatmap. Showing simple output instead. Error: {str(e)}")
# Show top 10 dates with most emails
sorted_dates = sorted(zip(data_keys, [c[0] for c in data_count]), key=lambda x: x[1], reverse=True)
for date, count in sorted_dates[:10]:
print(f"{date}: {count:,} emails")

def analyse(self):
"""
Expand Down Expand Up @@ -226,10 +332,44 @@ def analyse(self):
time.sleep(0.1)

progress.finish()

# Only run inactive senders analysis if the threshold is set
if self.inactive_days > 0:
progress = Spinner(f"{helpers.loader_icn} Analysing inactive senders ")

event = Event()

future = executor.submit(self._analyze_inactive_senders, event)

while not event.isSet() and future.running():
progress.next()
time.sleep(0.1)

progress.finish()

# Print completion message
print("\nAnalysis complete!")

def start(self):
messages = self.processor.get_messages()

self.processor.get_metadata(messages)
if self.analyze_only:
if not self.processor.load_cached_metadata():
print("No cached metadata found. Run with --pull-data first.")
return
if self.export_csv:
self.processor.export_csv(self.export_csv)
self.analyse()
return

force_refresh = self.refresh_data
messages = self.processor.get_messages(force_refresh=force_refresh)

self.processor.get_metadata(messages, force_refresh=force_refresh)

if self.export_csv:
self.processor.export_csv(self.export_csv)

if self.pull_data or self.refresh_data:
print("Data pull complete.")
return

self.analyse()
Loading