# Import pandas
import pandas as pd

# Read the CSV file
pp_df = pd.read_csv('./data/project_pulse_tracker.csv')

# Display the output
display(pp_df.head())

# Import pandas
import pandas as pd

# Read the CSV file
ps_df = pd.read_csv('./data/praise_story_tracker.csv')

# Display the output
display(ps_df.head())

# Import pandas library
import pandas as pd

# Get the total cost of the 2 dfs
total_cost_pp = pp_df['COST ($)'].sum()
total_cost_ps = ps_df['COST ($)'].sum()

# Get the total duration of the 2 dfs
total_duration_pp = pp_df['DURATION (H)'].sum()
total_duration_ps = ps_df['DURATION (H)'].sum()

# Transform the total cost and durations into a df
data = {
    "Project Pulse": {
        "Total Cost ($)": total_cost_pp,
        "Total Duration (H)": total_duration_pp,
    },
    "Praise Story": {
        "Total Cost ($)": total_cost_ps,
        "Total Duration (H)": total_duration_ps,
    }
}

print('-- Summary --')
summary_df = pd.DataFrame(data).T
summary_df

-- Summary --

# Import pandas
import pandas as pd

# Get average hourly rate per stage
pp_df['Hourly Rate'] = pp_df['COST ($)'] / pp_df['DURATION (H)']
ps_df['Hourly Rate'] = ps_df['COST ($)'] / ps_df['DURATION (H)']

# Group by Stage and Calculate the Average
avg1 = pp_df.groupby('STAGE')['Hourly Rate'].mean().rename('Project 1')
avg2 = ps_df.groupby('STAGE')['Hourly Rate'].mean().rename('Project 2')

comparison_df = pd.concat([avg1, avg2], axis=1)
print('-- Average Hourly Rate --')
display(comparison_df)

-- Average Hourly Rate --

# Import libraries
import pandas as pd

# Step 1: Group by Stage
# For Praise Story
praise_stage_summary = ps_df.groupby('STAGE')[['DURATION (H)', 'COST ($)']].sum().rename(
    columns={'DURATION (H)': 'Total Hours (Praise)', 'COST ($)': 'Total Cost (Praise)'})

# For Project Pulse
project_stage_summary = pp_df.groupby('STAGE')[['DURATION (H)', 'COST ($)']].sum().rename(
    columns={'DURATION (H)': 'Total Hours (Pulse)', 'COST ($)': 'Total Cost (Pulse)'})

# Step 2: Combine Results for Comparison
stage_comparison = pd.concat([praise_stage_summary, project_stage_summary], axis=1)
display(stage_comparison)

# Import necessary libraries
import matplotlib.pyplot as plt

# Prepare cumulative data
praise_story_df_sorted = ps_df.sort_values(by='STARTED AT').copy()
praise_story_df_sorted['Cumulative Hours'] = praise_story_df_sorted['DURATION (H)'].cumsum()
praise_story_df_sorted['Cumulative Cost'] = praise_story_df_sorted['COST ($)'].cumsum()

project_pulse_df_sorted = pp_df.sort_values(by='STARTED AT').copy()
project_pulse_df_sorted['Cumulative Hours'] = project_pulse_df_sorted['DURATION (H)'].cumsum()
project_pulse_df_sorted['Cumulative Cost'] = project_pulse_df_sorted['COST ($)'].cumsum()

# Plotting
plt.figure(figsize=(12, 6))

# Praise Story plot
plt.plot(praise_story_df_sorted['STARTED AT'], praise_story_df_sorted['Cumulative Hours'], label='Praise Story - Hours', linestyle='-', marker='o')
plt.plot(praise_story_df_sorted['STARTED AT'], praise_story_df_sorted['Cumulative Cost'], label='Praise Story - Cost', linestyle='--', marker='x')

# Project Pulse plot
plt.plot(project_pulse_df_sorted['STARTED AT'], project_pulse_df_sorted['Cumulative Hours'], label='Project Pulse - Hours', linestyle='-', marker='o')
plt.plot(project_pulse_df_sorted['STARTED AT'], project_pulse_df_sorted['Cumulative Cost'], label='Project Pulse - Cost', linestyle='--', marker='x')

plt.title('Cumulative Cost and Hours Over Time')
plt.xlabel('Started At (Excel Date Format)')
plt.ylabel('Cumulative Value')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# Import libraries
import pandas as pd

# Assuming you have already loaded your CSV files into two DataFrames:
# praise_story_df and project_pulse_df

# Step 1: Sum total duration and cost per stage
praise_eff = ps_df.groupby('STAGE')[['DURATION (H)', 'COST ($)']].sum()
project_eff = pp_df.groupby('STAGE')[['DURATION (H)', 'COST ($)']].sum()

# Step 2: Compute efficiency (Cost per Hour)
praise_eff['Efficiency (Praise)'] = praise_eff['COST ($)'] / praise_eff['DURATION (H)']
project_eff['Efficiency (Pulse)'] = project_eff['COST ($)'] / project_eff['DURATION (H)']

# Step 3: Combine efficiency columns into one comparison table
efficiency_comparison = pd.concat([
    praise_eff['Efficiency (Praise)'],
    project_eff['Efficiency (Pulse)']
], axis=1)

# Step 4: Print or export
display(efficiency_comparison)

# Import libraries
from collections import Counter
import re
import pandas as pd

# Combine remarks from both projects
combined_remarks = pd.concat([ps_df['REMARKS'], pp_df['REMARKS']])

# Convert to lowercase, remove punctuation, and tokenize
def tokenize_remarks(remarks_series):
    tokens = []
    for remark in remarks_series.dropna():
        words = re.findall(r'\b\w+\b', remark.lower())  # Extract words
        tokens.extend(words)
    return tokens

# Tokenize and count keyword frequency
tokens = tokenize_remarks(combined_remarks)
keyword_counts = Counter(tokens)

# Convert to DataFrame for display
keyword_df = pd.DataFrame(keyword_counts.items(), columns=['Keyword', 'Count'])
keyword_df_sorted = keyword_df.sort_values(by='Count', ascending=False).reset_index(drop=True)

keyword_df_sorted.head(10)

	STAGE	STARTED AT	ENDED AT	DURATION (H)	COST ($)	REMARKS
0	Analysis	01-01-1970, 12:00AM	45752.79167	0.500000	13.000000	Planning
1	Analysis	01-01-1970, 12:00AM	45752.98403	0.066667	1.733333	Setup environment
2	Implementation	01-01-1970, 12:00AM	45753.35139	0.933333	24.266667	Setup project
3	Implementation	01-01-1970, 12:00AM	45753.41667	1.566667	40.733333	Add roles to User module
4	Implementation	01-01-1970, 12:00AM	45753.48333	0.883333	22.966667	Backend of Project module completed

	STAGE	STARTED AT	ENDED AT	DURATION (H)	COST ($)	REMARKS
0	Analysis	45783.85278	45783.87500	0.533333	13.866667	Logo
1	Analysis	45784.49097	45784.51736	0.633333	16.466667	Screen Plan
2	Analysis	45784.51736	45784.52292	0.133333	3.466667	Project Plan
3	Implementation	45789.42361	45789.47569	1.250000	32.500000	Testimony migration/Status helper
4	Implementation	45789.82292	45789.91944	2.316667	60.233333	Testimony backend module

	Project 1	Project 2
STAGE
Analysis	26.0	26.0
Implementation	26.0	26.0
Testing	26.0	26.0
Design	NaN	26.0

	Total Hours (Praise)	Total Cost (Praise)	Total Hours (Pulse)	Total Cost (Pulse)
STAGE
Analysis	1.300000	33.800000	0.566667	14.733333
Design	6.650000	172.900000	NaN	NaN
Implementation	50.100000	1302.600000	72.133333	1875.466667
Testing	4.283333	111.366667	14.216667	369.633333

	Efficiency (Praise)	Efficiency (Pulse)
STAGE
Analysis	26.0	26.0
Design	26.0	NaN
Implementation	26.0	26.0
Testing	26.0	26.0

Timesheet Analysis¶

Turning the CSV Files into Dataframes for Analysis¶

Step 1.1: Open and Read the Project Pulse CSV¶

Step 1.2: Open and Read the Praise Story CSV¶

Used GenAI to Perform Analyses¶

Response¶

Here are several analyses you can perform on these datasets:¶

1. Time & Cost Summary¶

2. Stage Analysis¶

3. Trend Analysis¶

4. Comparative Analysis¶

5. Remarks-based Insights¶

	Total Cost ($)	Total Duration (H)
Project Pulse	2259.833333	86.916667
Praise Story	1620.666667	62.333333

	Keyword	Count
0	module	29
1	project	13
2	tests	11
3	wip	8
4	testimony	8
5	and	8
6	record	7
7	controller	7
8	backend	7
9	activity	6