Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

57 create library for flags for all the different python scripts #63

Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
fd37bae
Update tacc_stats.ini
sanga1999 Sep 19, 2024
81ec55e
Update setup.py
sanga1999 Sep 19, 2024
b15338f
Create conf_parser.py
sanga1999 Sep 19, 2024
91a68c9
Update listend.py
sanga1999 Sep 19, 2024
354d913
Update sync_acct.py
sanga1999 Sep 19, 2024
710cd3a
Update sync_acct.py
sanga1999 Sep 19, 2024
1354cab
Update sacct_gen.py
sanga1999 Sep 19, 2024
773f343
Update sync_timedb.py
sanga1999 Sep 19, 2024
e7be8dc
Update settings.py
sanga1999 Sep 19, 2024
c615d48
Update update_xalt.py
sanga1999 Sep 19, 2024
62f62f3
Update update_db.py
sanga1999 Sep 19, 2024
b4e25e8
Update update_metrics.py
sanga1999 Sep 19, 2024
0a87442
Update views.py
sanga1999 Sep 19, 2024
3b2dd39
Update jid_table.py
sanga1999 Sep 19, 2024
a8c5899
Update conf_parser.py
sanga1999 Sep 23, 2024
ba240f6
Update listend.py
sanga1999 Sep 23, 2024
7238553
Update sync_acct.py
sanga1999 Sep 23, 2024
cb861b8
Update sync_timedb.py
sanga1999 Sep 23, 2024
2f4e36b
Update sacct_gen.py
sanga1999 Sep 23, 2024
3c8046d
Update settings.py
sanga1999 Sep 23, 2024
d7e1533
Update update_xalt.py
sanga1999 Sep 23, 2024
2f54f27
Update update_db.py
sanga1999 Sep 23, 2024
a84893d
Update update_metrics.py
sanga1999 Sep 23, 2024
cfe7e3b
Update views.py
sanga1999 Sep 23, 2024
ce38589
Update jid_table.py
sanga1999 Sep 23, 2024
acb44dc
Update tacc_stats.ini
sanga1999 Oct 2, 2024
df04d18
Update jid_table.py
sanga1999 Oct 2, 2024
f8d80e5
Update conf_parser.py
sanga1999 Oct 2, 2024
39e8e41
Update settings.py
sanga1999 Oct 16, 2024
45c401b
Create conf_parser.py
sanga1999 Oct 16, 2024
f3c84a8
Delete conf_parser.py
sanga1999 Oct 16, 2024
3219366
Update listend.py
sanga1999 Oct 16, 2024
df3d340
Update sync_acct.py
sanga1999 Oct 16, 2024
fff2b4f
Update sacct_gen.py
sanga1999 Oct 16, 2024
c016690
Update sync_timedb.py
sanga1999 Oct 16, 2024
b8739a1
Update settings.py
sanga1999 Oct 16, 2024
825faab
Update update_xalt.py
sanga1999 Oct 16, 2024
a869298
Update update_db.py
sanga1999 Oct 16, 2024
ffcd09b
Update update_metrics.py
sanga1999 Oct 16, 2024
444db6b
Update views.py
sanga1999 Oct 16, 2024
d7ac010
Update jid_table.py
sanga1999 Oct 16, 2024
87dd9cc
Update conf_parser.py
sanga1999 Oct 16, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 97 additions & 0 deletions conf_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import configparser
import os
import sys
import time

# Append your local repository path here:
# sys.path.append("/home/sg99/tacc_stats")

cfg = configparser.ConfigParser()

# Append your local repository path here:
cfg.read('/home/sg99/tacc_stats/tacc_stats.ini')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

make this path non-absolute, we can talk about ways to do this.


def get_db_connection_string():
temp_string = "dbname={0} "+cfg.get('PORTAL', 'username')+" "+cfg.get('PORTAL', 'password')+" "+cfg.get('PORTAL', 'port')
connection_string = temp_string.format(cfg.get('PORTAL', 'dbname'))
return connection_string

def get_db_name():
db_name = cfg.get('PORTAL', 'dbname')
return db_name

def get_archive_dir_path():
archive_dir_path = cfg.get('PORTAL', 'archive_dir')
return archive_dir_path

def get_host_name_ext():
host_name_ext = cfg.get('PORTAL', 'host_name_ext')
return host_name_ext

def get_accounting_path():
accounting_path = cfg.get('PORTAL', 'acct_path')
return accounting_path

def get_daily_archive_dir_path():
daily_archive_dir_path = cfg.get('PORTAL', 'daily_archive_dir')
return daily_archive_dir_path

def get_rmq_server():
rmq_server = cfg.get('RMQ', 'rmq_server')
return rmq_server

def get_rmq_queue():
rmq_queue = cfg.get('RMQ', 'rmq_queue')
return rmq_queue

def get_machine_name():
machine_name = cfg.get('DEFAULT', 'machine')
return machine_name

def get_server_name():
server_name = cfg.get('DEFAULT', 'server')
return server_name

def get_data_dir_path():
data_dir_path = cfg.get('DEFAULT', 'data_dir')
return data_dir_path

def get_engine_name():
engine_name = cfg.get('PORTAL', 'engine_name')
return engine_name

def get_username():
username = cfg.get('PORTAL', 'username')
return username

def get_password():
password = cfg.get('PORTAL', 'password')
return password

def get_host():
host = cfg.get('PORTAL', 'host')
return host

def get_port():
port = cfg.get('PORTAL', 'port')
return port

def get_xalt_engine():
xalt_engine = cfg.get('XALT', 'xalt_engine')
return xalt_engine

def get_xalt_name():
xalt_name = cfg.get('XALT', 'xalt_name')
return xalt_name

def get_xalt_user():
xalt_user = cfg.get('XALT', 'xalt_user')
return xalt_user

def get_xalt_password():
xalt_password = cfg.get('XALT', 'xalt_password')
return xalt_password

def get_xalt_host():
xalt_host = cfg.get('XALT', 'xalt_host')
return xalt_host
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
include_package_data = True,
scripts = scripts,
install_requires = ['argparse','numpy', 'psycopg2-binary', 'pandas', 'pgcopy',
'bokeh', 'django', 'python-hostlist', 'PyMySQL', 'mod_wsgi',
'bokeh', 'django==3.2.25', 'python-hostlist', 'PyMySQL', 'mod_wsgi',
'mysql-connector-python', 'python-memcached', 'pika', 'mysqlclient'],
platforms = 'any',
classifiers = [
Expand Down
27 changes: 16 additions & 11 deletions tacc_stats.ini
Original file line number Diff line number Diff line change
@@ -1,22 +1,27 @@
## Basic configuration options - modify these
# machine = unique name of machine/queue
# server = database and rmq server hostname
# data_dir = where data is stored
[DEFAULT]
machine = stampede2
machine = tacc-stats-sanga
data_dir = /tacc_stats_site/%(machine)s
server = tacc-stats02.tacc.utexas.edu
server = tacc-stats04.tacc.utexas.edu
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Make this generic in the main tree


## RabbitMQ Configuration
# RMQ_SERVER = RMQ server
# RMQ_QUEUE = RMQ server
[RMQ]
rmq_server = %(server)s
rmq_queue = %(machine)s

## Configuration for Web Portal Support
[PORTAL]
acct_path = %(data_dir)s/accounting
archive_dir = %(data_dir)s/archive
host_name_ext = %(machine)s.tacc.utexas.edu
dbname = %(machine)s_db
dbname = test_db
daily_archive_dir = %(data_dir)s/daily_archive
engine_name = django.db.backends.postgresql_psycopg2
username = taccstats
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

change the taccstats user/pass to user and pass

password = taccstats
host = localhost
port = 5432

[XALT]
xalt_engine = none
xalt_name = none
xalt_user = none
xalt_password = none
xalt_host = none
12 changes: 8 additions & 4 deletions tacc_stats/analysis/gen/jid_table.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
import time
import os,sys,time
# Append your local repository path here:
# sys.path.append("/home/sg99/tacc_stats")
import psycopg2
import tacc_stats.cfg as cfg
import conf_parser as cfg
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Conf parser needs to be a full name under taccstats like the line below: from tacc_stats.analysis.gen.utils import read_sql

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a comment for every file

from tacc_stats.analysis.gen.utils import read_sql

class jid_table:

def __init__(self, jid):
CONNECTION = "dbname=ls6_db1 host=localhost user=postgres port=5432"

CONNECTION = cfg.get_db_connection_string()
print("SOMETHING SHOULD PRINT\n")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove debug statements

print(CONNECTION)
print("Initializing table for job {0}".format(jid))

self.jid = jid
Expand All @@ -18,7 +22,7 @@ def __init__(self, jid):
# Get job accounting data
acct_data = read_sql("""select * from job_data where jid = '{0}'""".format(jid), self.conj)
# job_data accounting host names must be converted to fqdn
self.acct_host_list = [h + '.' + cfg.host_name_ext for h in acct_data["host_list"].values[0]]
self.acct_host_list = [h + '.' + cfg.get_host_name_ext() for h in acct_data["host_list"].values[0]]

self.start_time = acct_data["start_time"].dt.tz_convert('US/Central').dt.tz_localize(None).values[0]
self.end_time = acct_data["end_time"].dt.tz_convert('US/Central').dt.tz_localize(None).values[0]
Expand Down
7 changes: 4 additions & 3 deletions tacc_stats/analysis/metrics/update_metrics.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#!/usr/bin/env python
import os,sys, pwd
# Append your local repository path here:
# sys.path.append("/home/sg99/tacc_stats")
from datetime import timedelta, datetime
import psycopg2

Expand All @@ -10,11 +12,11 @@
from tacc_stats.site.machine.models import job_data, metrics_data
from tacc_stats.analysis.metrics import metrics

import tacc_stats.cfg as cfg
import conf_parser as cfg
from tacc_stats.progress import progress

CONNECTION = cfg.get_db_connection_string()

CONNECTION = "dbname={0} user=postgres port=5432".format(cfg.dbname)
query_create_metric_table = """CREATE TABLE IF NOT EXISTS metrics_data (
id SERIAL PRIMARY KEY,
jid VARCHAR(32),
Expand Down Expand Up @@ -79,4 +81,3 @@ def update_metrics(date, rerun = False):
while date <= enddate:
update_metrics(date, rerun = False)
date += timedelta(days=1)

7 changes: 4 additions & 3 deletions tacc_stats/dbload/sacct_gen.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import os, sys
# Append your local repository path here:
# sys.path.append("/home/sg99/tacc_stats")
from datetime import timedelta, date, datetime
from dateutil.parser import parse
from tacc_stats import cfg
import conf_parser as cfg

acct_path = cfg.acct_path
acct_path = cfg.get_accounting_path()

def daterange(start_date, end_date):
for n in range(int ((end_date - start_date).days)):
Expand All @@ -24,4 +26,3 @@ def daterange(start_date, end_date):
sacct_command = "/bin/sacct -a -s CA,CD,F,NF,TO -P -X -S " + single_date.strftime("%Y-%m-%d") + " -E " + (single_date + timedelta(1)).strftime("%Y-%m-%d") +" -o JobID,User,Account,Start,End,Submit,Partition,TimeLimit,JobName,State,NNodes,ReqCPUS,NodeList > " + file_name
print(sacct_command)
os.system(sacct_command)

11 changes: 8 additions & 3 deletions tacc_stats/dbload/sync_acct.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
#!/usr/bin/env python3
import os,sys,time

# Append your local repository path here:
# sys.path.append("/home/sg99/tacc_stats")

from datetime import timedelta, datetime

import psycopg2
Expand All @@ -11,9 +15,10 @@
import hostlist

from tacc_stats.analysis.gen.utils import read_sql
from tacc_stats import cfg

CONNECTION = "dbname={0} user=postgres port=5432".format(cfg.dbname)
import conf_parser as cfg

CONNECTION = cfg.get_db_connection_string()

query_create_jobdata_table = """CREATE TABLE IF NOT EXISTS job_data (
jid VARCHAR(32) NOT NULL,
Expand Down Expand Up @@ -113,7 +118,7 @@ def sync_acct(acct_file, date_str):

# Parse and convert raw stats files to pandas dataframe
start = time.time()
directory = cfg.acct_path
directory = cfg.get_accounting_path()

while startdate <= enddate:
for entry in os.scandir(directory):
Expand Down
19 changes: 11 additions & 8 deletions tacc_stats/dbload/sync_timedb.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import psycopg2
from pgcopy import CopyManager
import os, sys, stat
# Append your local repository path here:
# sys.path.append("/home/sg99/tacc_stats")
import multiprocessing
import itertools
from multiprocessing import Pool, get_context, Lock, set_start_method
Expand All @@ -17,7 +19,8 @@
from pandas import DataFrame, to_datetime, Timedelta, Timestamp, concat

from tacc_stats.analysis.gen.utils import read_sql
from tacc_stats import cfg

import conf_parser as cfg

# archive toggle
should_archive = True
Expand All @@ -28,11 +31,10 @@
# Thread count for database loading and archival
thread_count = 8

tgz_archive_dir = "/tacc_stats_site/ls6/tgz_archives/"

tgz_archive_dir = cfg.get_daily_archive_dir_path()


CONNECTION = "dbname={0} user=postgres port=5432".format(cfg.dbname)
CONNECTION = cfg.get_db_connection_string()

amd64_pmc_eventmap = { 0x43ff03 : "FLOPS,W=48", 0x4300c2 : "BRANCH_INST_RETIRED,W=48", 0x4300c3: "BRANCH_INST_RETIRED_MISS,W=48",
0x4308af : "DISPATCH_STALL_CYCLES1,W=48", 0x43ffae :"DISPATCH_STALL_CYCLES0,W=48" }
Expand Down Expand Up @@ -373,6 +375,7 @@ def database_startup():
host VARCHAR(64),
jid VARCHAR(32),
type VARCHAR(32),
dev VARCHAR(64),
event VARCHAR(64),
unit VARCHAR(16),
value real,
Expand Down Expand Up @@ -415,9 +418,9 @@ def database_startup():
#cur.execute(query_create_hostdata_hypertable)
#cur.execute(query_create_compression)

# cur.execute(query_create_process_table)
# cur.execute(query_create_process_index)
cur.execute("SELECT pg_size_pretty(pg_database_size('{0}'));".format(cfg.dbname))
#cur.execute(query_create_process_table)
#cur.execute(query_create_process_index)
cur.execute("SELECT pg_size_pretty(pg_database_size('{0}'));".format(cfg.get_db_name()))
for x in cur.fetchall():
print("Database Size:", x[0])
if debug:
Expand Down Expand Up @@ -464,7 +467,7 @@ def database_startup():

# Parse and convert raw stats files to pandas dataframe
start = time.time()
directory = cfg.archive_dir
directory = cfg.get_archive_dir_path()

stats_files = []
ar_file_mapping = {}
Expand Down
13 changes: 9 additions & 4 deletions tacc_stats/listend.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,14 @@
import pika
import os, sys
import time
import tacc_stats.cfg as cfg

from fcntl import flock, LOCK_EX, LOCK_NB

# Append your local repository path here:
# sys.path.append("/home/sg99/tacc_stats")

import conf_parser as cfg

def on_message(channel, method_frame, header_frame, body):

try:
Expand All @@ -20,7 +25,7 @@ def on_message(channel, method_frame, header_frame, body):
host = message.split()[2]

#if host == "localhost.localdomain": return
host_dir = os.path.join(cfg.archive_dir, host)
host_dir = os.path.join(cfg.get_archive_dir_path(), host)
if not os.path.exists(host_dir):
os.makedirs(host_dir)

Expand All @@ -47,10 +52,10 @@ def on_message(channel, method_frame, header_frame, body):
print("listend is already running")
sys.exit()

parameters = pika.ConnectionParameters(cfg.rmq_server)
parameters = pika.ConnectionParameters(cfg.get_rmq_server())
connection = pika.BlockingConnection(parameters)
channel = connection.channel()
channel.basic_consume(cfg.rmq_queue, on_message)
channel.basic_consume(cfg.get_rmq_queue(), on_message)
try:
channel.start_consuming()
except KeyboardInterrupt:
Expand Down
6 changes: 4 additions & 2 deletions tacc_stats/site/machine/update_db.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#!/usr/bin/env python
import os,sys, pwd
# Append your local repository path here:
# sys.path.append("/home/sg99/tacc_stats")
from datetime import timedelta, datetime
from dateutil.parser import parse
from fcntl import flock, LOCK_EX, LOCK_NB
Expand All @@ -9,7 +11,7 @@
from tacc_stats.site.machine.models import Job, Host, Libraries
from tacc_stats.site.xalt.models import run, join_run_object, lib
from tacc_stats.analysis.metrics import metrics
import tacc_stats.cfg as cfg
import conf_parser as cfg
from tacc_stats.progress import progress
from tacc_stats.daterange import daterange
import pytz, calendar
Expand All @@ -23,7 +25,7 @@ def update_acct(date, rerun = False):
tz = pytz.timezone('US/Central')
ctr = 0

with open(os.path.join(cfg.acct_path, date.strftime("%Y-%m-%d") + '.txt'), encoding = "latin1") as fd:
with open(os.path.join(cfg.get_accounting_path(), date.strftime("%Y-%m-%d") + '.txt'), encoding = "latin1") as fd:
nrecords = sum(1 for record in csv.DictReader(fd))
fd.seek(0)

Expand Down
Loading