Understandメトリクスの簡単な可視化・分析¶

1. 初期設定¶

1.1 初期設定¶

  • 【共通】SCM(git/svn)から取得するか、csvから取得するかをフラグで設定(以後【】で示す)
    ※SCMはSource Code Managementの略
  • 【SCMからの場合】環境変数を設定
  • 【SCMからの場合】git/svn/undコマンド実行可否の確認
    ※ コマンドラインからの実行ができるよう、システムコマンド実行はsubprocessパッケージをインポートし、subprocess.run()で実行する方法により実行しています。
In [1]:
#不足しているパッケージがあればインストール
#!pip install plotly
In [2]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
from numpy.random import normal
import shutil
import os
import platform
import sys
import plotly
import plotly.graph_objects as go
plotly.offline.init_notebook_mode(connected=False)
import plotly.express as px
import subprocess

pd.set_option("display.max_colwidth", 0)
print(platform.system())

# いずれかをTrueにする
is_git = True
is_svn = False
is_read_csv = False

# check
flag_sum=is_git+is_svn+is_read_csv
if flag_sum !=1:
    try:
        raise Exception("Please set at least one and at most one flag")
    except ValueError as e:
        traceback.print_exc()

# csv出力設定
is_to_csv = True

# html出力設定
is_to_html = True

# 現在のディレクトリ取得
cwd = os.getcwd()
print(cwd)

# プレフィックスの削除設定
isRemovePrefix = True
#

if platform.system() == "Linux":
    #prefix="/opt/jupyter/repo/"
    prefix=cwd+"/repo/"
elif platform.system() == "Windows":
    prefix=cwd+"\\repo\\"
else:
    print("OS:not clear")




if is_git or is_svn:
    # undコマンドにPATHを通す
    if "scitools" or "SciTools" not in os.environ["PATH"]:
        if platform.system() == "Linux":
            #os.environ["PATH"] = os.environ["PATH"]+":/opt/jupyter/app/scitools/bin/linux64/"
            os.environ["PATH"] = os.environ["PATH"]+":/opt/jupyter/app/scitools1139/bin/linux64/"
        elif platform.system() == "Windows":
            print("Windows")
            # 適宜環境に応じて追加する
            os.environ["PATH"] = os.environ["PATH"]+":C:\\Program Files\\SciTools1139\\bin\\pc-win64\\"
        else:
            print("OS:not clear")
        
    #print(os.getenv("PATH"))

my_env = os.environ.copy()
    
Windows
C:\work\work_rad\showUndMetrics
Windows
In [3]:
def executeSubprocess(exeArr):
    res = subprocess.run(exeArr, encoding='utf-8',capture_output=True, env = my_env).stdout
    return res
def executeSubprocessTofile(exeArr,logFileName):
    with open(logFileName, 'w') as fp:
        subprocess.run(exeArr, encoding='utf-8', stdout = fp,env = my_env)
    return readLogFile(logFileName)
def executeSubprocessTofileAdd(exeArr,logFileName):
    with open(logFileName, 'a') as fp:
        subprocess.run(exeArr, encoding='utf-8', stdout = fp,env = my_env)
    return readLogFile(logFileName)

def executeSubprocessWithPipeTofile(exeArr1, exeArr2, logFileName):
    with open(logFileName, 'w') as fp:
        p1 = subprocess.Popen(exeArr1, encoding='utf-8', stdout=subprocess.PIPE,env = my_env)
        p2 = subprocess.Popen(exeArr2, encoding='utf-8', stdin=p1.stdout, stdout=fp,env = my_env)
        p1.stdout.close()
        output = p2.communicate()[0]
        
    return readLogFile(logFileName)

def readLogFile(logFileName):
    with open(logFileName,"r", encoding="utf-8") as f:
        s = f.read()
        return s
In [4]:
if is_git or is_svn:
    cmd = ["und", "version"]
    res = executeSubprocess(cmd)
    print(res)
if is_git:
    cmd = ["git", "--version"]
    res = executeSubprocess(cmd)
    print(res)
if is_svn:
    cmd = ["svn", "--version"]
    res = executeSubprocess(cmd)
    print(res)
(Build 1139)

git version 2.40.0.windows.1

1.2 言語設定¶

  • 【共通】言語設定実施
In [5]:
# 言語設定
#language = "C++"
language = "C"


isShowPackage = -1

if language == "C":
    isShowPackage = 0
    und_lang = "C++"
elif language == "C++":
    isShowPackage = 0
    und_lang = "C++"
elif language == "JAVA":
    isShowPackage = 1
    und_lang = "JAVA"
elif language == "C#":
    isShowPackage = 1
    und_lang = "C#"
#elif language == "Python":
#    isShowPackage = 1
#    und_lang = "Python"
else:
    isShowPackage = -1

1.2 共通使用ライブラリ関数の定義¶

  • 【共通】Prefix削除関数(グラフ表示用にファイルパス名の短縮の実施)
In [6]:
def removePrefix(df, removeTxt):
    for i in range(len(df)):
        df.at[i, "Name"] = str(df.at[i, "Name"]).replace(removeTxt, "")
        df.at[i, "File"] = str(df.at[i, "File"]).replace(removeTxt, "")
    return df

2. SCMから読み込む場合の処理¶

2.1 SCM利用時用ライブラリ関数の定義¶

  • 【SCMからの場合】リポジトリ削除関数
  • 【SCMからの場合】リポジトリ初期化関数
  • 【SCMからの場合】ソースコードからUnderstandメトリクスを取得する関数
In [7]:
# SCM(git/svn)用
def removeRepo(repo):
       
    # アクセス権を取れない場合にエラーになる。その場合はカーネルクリア後にdirPathRepoの下のcloneしたフォルダを毎回削除。
    if (os.path.exists(repo)):
        print("remove repository start")
        print(str(repo)+"を削除します。※アクセス権がない場合は削除に失敗します。手動で削除してください。")
        #shutil.rmtree(repo) #---repoのパスが削除対象にしてよいか確認後、ここを有効行としてください。---
        print("remove repository end")
 

# SCM(git/svn)用 ディレクトリ初期化関数
def initDirectory(path,url):
    
    # Gitの場合は、checkoutURLの最後の文字列はプロジェクト名のことが多いため、lastnameをプロジェクト名として自動抽出
    # Svnの場合は、svnProjectPathString配下のコードのみ部分的にチェックアウトされるが、この場合もlastnameをプロジェクト名と見做して自動抽出
    lastname=url.split('/')[-1]


    # ディレクトリ移動用
    #dirPathRepo = "/opt/jupyter/repo/"
    dirPathRepo = path
    os.makedirs(dirPathRepo,exist_ok=True)
    os.chdir(dirPathRepo)

    # プロジェクト解析用
    dirPathRepo = path+lastname
    print(dirPathRepo)
    dirPathRepoResult = dirPathRepo+"/Result"
    # 初期化のためディレクトリを削除
    removeRepo(dirPathRepo)
    
    os.makedirs(dirPathRepo,exist_ok=False)
    
    return lastname, dirPathRepo,dirPathRepoResult


def undCreateProject(dirPath, dirPathRepoResult, und_proj_filename, und_proj_name,und_lang):
    os.makedirs(dirPath+"/"+und_proj_filename+"/local",exist_ok=True)
    print("und create")
    cmd = ["und", "create", "-db", und_proj_filename, "-languages", und_lang]
    res = executeSubprocess(cmd)
    print(res)
def undAddProject(dirPath,und_proj_filename):
    print("und add")
    cmd = ["und", "add", dirPath, und_proj_filename]
    res = executeSubprocess(cmd)
    print(res)   
def undAnalyzeProject(und_proj_filename):
    print("und analyze")
    #!und analyze -db $und_proj_filename
    cmd = ["und", "analyze", und_proj_filename]
    res = executeSubprocess(cmd)

def undMetricsProject(und_proj_filename):
    print("und metrics")
    #!und settings -MetricMetrics all $und_proj_filename
    cmd = ["und", "settings", "-MetricMetrics", "all", und_proj_filename]
    res = executeSubprocess(cmd)
    #!und metrics $und_proj_filename
    cmd = ["und", "metrics", und_proj_filename]
    res = executeSubprocess(cmd)
    
def undSettingsProject(und_proj_filename):
    print("und settings")
    #!und settings -MetricFileNameDisplayMode FullPath $und_proj_filename
    cmd = ["und", "settings", "-MetricFileNameDisplayMode", "FullPath", und_proj_filename]
    res = executeSubprocess(cmd)

    #!und settings -MetricDeclaredInFileDisplayMode FullPath $und_proj_filename
    cmd = ["und", "settings", "-MetricDeclaredInFileDisplayMode", "FullPath", und_proj_filename]
    res = executeSubprocess(cmd)

    #!und settings -MetricShowDeclaredInFile on $und_proj_filename
    cmd = ["und", "settings", "-MetricShowDeclaredInFile", "on", und_proj_filename]
    res = executeSubprocess(cmd)
  
    #!und settings -C++UseStrict off $und_proj_filename
    if und_lang == "C++":
        cmd = ["und", "settings", "-C++UseStrict", "off", und_proj_filename]
        res = executeSubprocess(cmd)
        print("UseFuzzy")

def undListProject(settings_txt_path,und_proj_filename):
    print("und list")
    #!und list -lang settings $und_proj_filename >$settings_txt_path
    cmd = ["und", "list","-lang","settings", und_proj_filename]
    res = executeSubprocessTofile(cmd,settings_txt_path)
    
    
    #!und list -metrics settings $und_proj_filename >>$settings_txt_path
    cmd = ["und", "list", "-metrics", "settings",und_proj_filename]
    res = executeSubprocessTofileAdd(cmd,settings_txt_path)
    
    
# ソースコードからUnderstandメトリクス取得する関数
def getMetricsFromCode(dirPath, dirPathRepoResult, und_proj_filename, und_proj_name):
    
    #!und create -db $und_proj_filename -languages $und_lang
    undCreateProject(dirPath, dirPathRepoResult, und_proj_filename, und_proj_name,und_lang)
    #!und add $dirPath $und_proj_filename
    undAddProject(dirPath,und_proj_filename)
    undSettingsProject(und_proj_filename)
    undAnalyzeProject(und_proj_filename)
    #分析時のプロジェクト設定内容を記録としてテキスト出力
    settings_txt_path = dirPathRepoResult+"/"+"und_settings_lang.txt"
    
    undMetricsProject(und_proj_filename)
    undListProject(settings_txt_path,und_proj_filename)

    #csvの読み込み
    df_org = pd.read_csv('./'+und_proj_name+'.csv', sep=',', encoding="shift-jis")
    print(len(df_org))
    new_csv_path = shutil.move('./'+und_proj_name+'.csv', dirPathRepoResult+"/"+und_proj_name+'.csv')
    zip_path = dirPath+"/"+und_proj_filename
    shutil.make_archive(zip_path, 'zip', root_dir=zip_path)
    new_zip_path = shutil.move(zip_path+'.zip', dirPathRepoResult+"/"+und_proj_name+'.zip')
    
    # 解決できていない関数のメトリクスは除去
    # Function/Method
    df_org_FM=df_org[~df_org['Kind'].str.contains('Unknown') & df_org['Kind'].str.contains('Method')|df_org['Kind'].str.contains('Function')].copy().reset_index(drop=True)
    # File
    df_org_File=df_org[df_org['Kind'].str.contains('File')].copy().reset_index(drop=True)
    default_culumn = ['Kind','Name','File']
    metrics = ["CountInput","CountLine","CountLineBlank","CountLineCode","CountLineCodeDecl","CountLineCodeExe","CountLineComment","CountOutput","CountSemicolon","CountStmt","CountStmtDecl","CountStmtExe","Cyclomatic","Essential","MaxCyclomatic", "MaxNesting","RatioCommentToCode"]

    culumn_use = default_culumn + metrics
    df_org_FM=df_org_FM.loc[:,culumn_use]
    df_org_File=df_org_File.loc[:,culumn_use]
    return df_org_FM, df_org_File

2.2 SCMチェックアウト¶

  • 【SCMからの場合】リポジトリをチェックアウトするディレクトリの初期化
  • 【SCMからの場合】Git, Svnからチェックアウト(クローン)
In [8]:
und_proj_name = "project"
und_proj_filename = und_proj_name+".und"

# Svn用処理
if is_svn == True:
    checkoutURLRoot = "http://svn.code.sf.net/p/tortoisesvn/code"
    #checkoutURLRoot = "http://svn.code.sf.net/p/culfw/code"
    #checkoutURLRoot = "http://svn.code.sf.net/p/jamon/code"
    
    
    svnProjectPathString = "/trunk/src/Utils"
    #svnProjectPathString = "/trunk/src"
    #svnProjectPathString = "/trunk/jamon"

    checkoutURL = checkoutURLRoot+svnProjectPathString
    if platform.system() == "Linux":
        #dirPathRepo = "/opt/jupyter/repo/"
        dirPathRepo =cwd+"/repo/"
    elif platform.system() == "Windows":
        dirPathRepo = cwd+"\\repo\\"
    else:
        print("OS:not clear")

    
    lastname, dirPathRepo,dirPathRepoResult = initDirectory(dirPathRepo, checkoutURL)
    !svn checkout -r HEAD $checkoutURL
    os.chdir(dirPathRepo)

# Git用処理
if is_git == True:
    checkoutURL = "https://git.zx2c4.com/cgit"
    #checkoutURL = "https://github.com/apache/nuttx"
    #checkoutURL = "https://github.com/apache/maven"
    #checkoutURL = "https://github.com/numpy/numpy"
    #checkoutURL = "https://github.com/apache/httpd"

    if platform.system() == "Linux":
        #dirPathRepo = "/opt/jupyter/repo/"
        dirPathRepo =cwd+"/repo/"
    elif platform.system() == "Windows":
        dirPathRepo = cwd+"\\repo\\"
    else:
        print("OS:not clear")
    lastname, dirPathRepo,dirPathRepoResult = initDirectory(dirPathRepo, checkoutURL)
    cmd = ["git", "config", "--global","http.postBuffer", "524288000"]
    res = executeSubprocess(cmd)
    print(res)
    # 1世代分だけclone
    cmd = ["git", "clone", "--depth","1", checkoutURL]
    res = executeSubprocess(cmd)
    print(res)
    #!git clone --depth 1 $checkoutURL
    os.chdir(dirPathRepo)

    # sha,branch, remote URL取得
    #rData = !git remote -v
    logFileName = "1-1_git_remote.log"
    
    cmd = ["git", "remote", "-v"]
    res = executeSubprocessTofile(cmd,logFileName)
    print(res)
    #with open(logFileName, 'w') as fp:
    #    subprocess.run(["git", "remote", "-v", ">", logFileName], encoding='utf-8', stdout = fp)
    #!git remote -v >$logFileName
    rData = res
    rBranch = str(rData).split("\n")[0].split("\t")[0].replace("['","")
    rURL = str(rData).split("\n")[0].split("\t")[1].replace("'","").replace(" (fetch)","")
    
    logFileName = "1-1_git_rev-parse.log"
    cmd1 = ["git", "rev-parse", "HEAD"]
    #res = executeSubprocessWithPipeTofile(cmd1, cmd2, logFileName)
    res = executeSubprocessTofile(cmd1,logFileName)
    print(res)
    
    #logFileName = "1-1_git_show.log"
    #res = subprocess.run(["git", "show", "|", "head", ">", logFileName], encoding='utf-8', capture_output=True)
    #print(res.stdout)
    #!git show |head >$logFileName
    sha = res
    sha = str(sha)
    #sha = str(sha).split(" ")[1].replace("']","")
    print(rBranch)
    print(rURL)
    print(sha)
C:\work\work_rad\showUndMetrics\repo\cgit


origin	https://git.zx2c4.com/cgit (fetch)
origin	https://git.zx2c4.com/cgit (push)

00ecfaadea2c40cc62b7a43e246384329e6ddb98

origin
https://git.zx2c4.com/cgit
00ecfaadea2c40cc62b7a43e246384329e6ddb98

2.3 SCM用メトリクス取得処理実行¶

  • 【SCMからの場合】 SCM用メトリクス取得処理実行
In [9]:
if is_svn==True or is_git==True:
    # git, svn共通
    os.makedirs(dirPathRepoResult,exist_ok=False)

    df_understand_FM, df_understand_File = getMetricsFromCode(dirPathRepo, dirPathRepoResult, und_proj_filename, und_proj_name)
und create

und add
Files added: 51

und settings
UseFuzzy
und analyze
und metrics
und list
464

3. csvを読み込む場合の処理¶

3.1 CSV読み込み時用ライブラリ関数の定義¶

  • Resultフォルダ削除関数
  • Resultフォルダ初期化関数
In [10]:
def removeResult(resultDir):
       
    # アクセス権取れない場合にエラーになる。その場合はカーネルクリア後にresultDirを毎回削除。
    if (os.path.exists(resultDir)):
        print("remove result start")
        print(str(resultDir)+"を削除します。※アクセス権がない場合は削除に失敗します。手動で削除してください。")
        #shutil.rmtree(resultDir)#---resultDirのパスが削除対象にしてよいか確認後、ここを有効行としてください。---
        print("remove result end")



#csv用
def initResultDirectory(path):
    
    dirPathRepo = path
    dirPathRepoResult = dirPathRepo+"/Result"
    # 初期化のためディレクトリを削除
    removeResult(dirPathRepoResult)
    
    os.makedirs(dirPathRepoResult,exist_ok=False)
    
    return dirPathRepoResult

3.2 CSV読み込み用メトリクス取得処理実行¶

  • 【SCMからの場合】 SCM用メトリクス取得処理実行
In [11]:
if is_read_csv == True:

    if platform.system() == "Linux":
        #dirPathRepo = "/opt/jupyter/repo/csv"
        dirPathRepo =cwd+"/repo/csv"
    elif platform.system() == "Windows":
        dirPathRepo = cwd+"\\repo\\csv"
    else:
        print("OS:not clear")
    projectName = "project"
    lastname = projectName
    # 結果があったら削除が必要なので、exists_okはFalse
    dirPathRepoResult = initResultDirectory(dirPathRepo)
    os.chdir(dirPathRepo)
    csvFileName = projectName+".csv"
    #df_org = pd.read_csv(r'project.csv', sep=',', encoding="shift-jis")

    is_file = os.path.isfile(csvFileName)
    if is_file:
        df_org = pd.read_csv(csvFileName, sep=',', encoding="shift-jis")
        df_understand_FM=df_org[~df_org['Kind'].str.contains('Unknown') & df_org['Kind'].str.contains('Method')|df_org['Kind'].str.contains('Function')].copy().reset_index(drop=True)
        df_understand_File=df_org[df_org['Kind'].str.contains('File')].copy().reset_index(drop=True)
    else:
        try:
            print(dirPathRepo+"に"+csvFileName+"を配置してください。")
            raise Exception(dirPathRepo+"に"+csvFileName+"を配置してください。")
        except ValueError as e:
            traceback.print_exc()
        

4.取得したデータの整形(ファイル、関数メトリクスそれぞれに実行)¶

  • 【共通】メトリクスデータ内ファイルパスからプレフィックス削除実行
  • 【共通】関数別、ファイル別のコメント率の計算(メトリクスの追加)
    ※ CountLineが0の関数(宣言のみなど)の場合はコメント率のメトリクスがNaNになりますが、本スクリプトの目的から無視します。
    ※ コメント率はUnderstandが用意しているRatioCommentToCodeメトリクス(コード行数に対するコメント率)を使えば良いと考えられますが、
    関数処理全体の俯瞰性の観点から1画面に表示可能な(0スクロールで閲覧可能な)行数を有効に活用する、と考えた場合はCountLine(全行数)で割ったコメント率も重要と考えられます。
  • 【共通】使用するカラムを絞ります(メモリ節約のため使用するメトリクスを絞ります)
In [12]:
if isRemovePrefix == True:
    df_understand_FM = removePrefix(df_understand_FM.reset_index(drop=True), prefix)
    df_understand_File = removePrefix(df_understand_File.reset_index(drop=True), prefix)
df_understand_FM["CountLineCommentRatio"]=df_understand_FM["CountLineComment"]/df_understand_FM["CountLine"]
df_understand_File["CountLineCommentRatio"]=df_understand_File["CountLineComment"]/df_understand_File["CountLine"]

df_understand_FM=df_understand_FM.loc[:,['Kind','Name','File','CountLine','Cyclomatic','Essential','MaxNesting', 'CountLineComment', 'CountLineCommentRatio']].copy().reset_index(drop=True)
df_understand_File=df_understand_File.loc[:,['Kind','Name','File','CountLine','MaxCyclomatic','MaxNesting','CountLineComment', 'CountLineCommentRatio']].copy().reset_index(drop=True)

df_understand_FM
Out[12]:
Kind Name File CountLine Cyclomatic Essential MaxNesting CountLineComment CountLineCommentRatio
0 Static Function HEAD_fn cgit\cmd.c 4 1.0 1.0 0 0 0.000000
1 Function __attribute__ cgit\cache.h 28 1.0 1.0 0 6 0.214286
2 Function __attribute__ cgit\cache.h 0 6.0 4.0 1 0 NaN
3 Function __attribute__ cgit\cache.h 0 1.0 1.0 0 0 NaN
4 Static Function about_fn cgit\cmd.c 26 6.0 1.0 2 0 0.000000
... ... ... ... ... ... ... ... ... ...
380 Static Function write_tar_xz_archive cgit\ui-snapshot.c 5 1.0 1.0 0 0 0.000000
381 Static Function write_tar_zstd_archive cgit\ui-snapshot.c 5 1.0 1.0 0 0 0.000000
382 Static Function write_tree_link cgit\ui-tree.c 38 4.0 4.0 2 0 0.000000
383 Static Function write_zip_archive cgit\ui-snapshot.c 4 1.0 1.0 0 0 0.000000
384 Static Function xstrrchr cgit\scan-tree.c 6 3.0 1.0 1 0 0.000000

385 rows × 9 columns

In [13]:
if isRemovePrefix == True:
    df_understand_FM = removePrefix(df_understand_FM.reset_index(drop=True), prefix)
    df_understand_File = removePrefix(df_understand_File.reset_index(drop=True), prefix)
df_understand_FM["CountLineCommentRatio"]=df_understand_FM["CountLineComment"]/df_understand_FM["CountLine"]
df_understand_File["CountLineCommentRatio"]=df_understand_File["CountLineComment"]/df_understand_File["CountLine"]

df_understand_FM=df_understand_FM.loc[:,['Kind','Name','File','CountLine','Cyclomatic','Essential','MaxNesting', 'CountLineComment', 'CountLineCommentRatio']].copy().reset_index(drop=True)
df_understand_File=df_understand_File.loc[:,['Kind','Name','File','CountLine','MaxCyclomatic','MaxNesting','CountLineComment', 'CountLineCommentRatio']].copy().reset_index(drop=True)

df_understand_FM
Out[13]:
Kind Name File CountLine Cyclomatic Essential MaxNesting CountLineComment CountLineCommentRatio
0 Static Function HEAD_fn cgit\cmd.c 4 1.0 1.0 0 0 0.000000
1 Function __attribute__ cgit\cache.h 28 1.0 1.0 0 6 0.214286
2 Function __attribute__ cgit\cache.h 0 6.0 4.0 1 0 NaN
3 Function __attribute__ cgit\cache.h 0 1.0 1.0 0 0 NaN
4 Static Function about_fn cgit\cmd.c 26 6.0 1.0 2 0 0.000000
... ... ... ... ... ... ... ... ... ...
380 Static Function write_tar_xz_archive cgit\ui-snapshot.c 5 1.0 1.0 0 0 0.000000
381 Static Function write_tar_zstd_archive cgit\ui-snapshot.c 5 1.0 1.0 0 0 0.000000
382 Static Function write_tree_link cgit\ui-tree.c 38 4.0 4.0 2 0 0.000000
383 Static Function write_zip_archive cgit\ui-snapshot.c 4 1.0 1.0 0 0 0.000000
384 Static Function xstrrchr cgit\scan-tree.c 6 3.0 1.0 1 0 0.000000

385 rows × 9 columns

5 結果の出力¶

5.1 結果出力用関数の定義¶

  • 【共通】散布図描画要関数
  • 【共通】棒グラフ描画用関数
  • 【共通】閾値毎の領域別個数集計関数
  • 【共通】html出力用関数
  • 【共通】csv出力用関数
In [14]:
def plotScatterGraph(metrics1, metrics2, focus, df, showLabel=False, showTOPAnnotation = False):
    df = df.reset_index(drop=True)
    X = df[metrics1]
    Y = df[metrics2]

    fig = go.Figure()
    if focus == "File":
        LABEL=df["File"]
    elif focus == "Function":
        if  (isShowPackage == 0):
            LABEL=df["File"]+"\r\n"+"["+df["Name"]+"()]"
        elif (isShowPackage == 1):
            LABEL=df["Name"]+"()"
        else:
            print("unknown language!!")
            return -1
    else:
        LABEL=""
    if showLabel==True:
        trace = go.Scatter(x = X, y = Y, mode='markers+text', text =LABEL, textposition='top left') # mode='markers+text' でラベルも表示
    else:
        trace = go.Scatter(x = X, y = Y, mode='markers', text =LABEL, textposition='top left') # mode='markers+text' でラベルも表示

    layout = go.Layout(
        xaxis = dict(title=metrics1, range = [0,X.max()+10], dtick=10),
        yaxis = dict(title=metrics2, range = [0,Y.max()+10], dtick=10),
        font = dict(size = 15),
        title_text=metrics1+"("+str(focus)+")"+" vs "+metrics2+"("+str(focus)+")",
        title_x=0.5,
        xaxis_title_text=metrics1,    #x軸ラベル
        yaxis_title_text=metrics2,    #y軸ラベル
        title_font_size=30,
        title_font_family='HGMinchoE',
        title_font_color='Black'
    )
    if showTOPAnnotation==True:
        fig.add_annotation(
            x=X.iloc[np.argsort((X+Y).to_numpy())[-1]],
            y=Y.iloc[np.argsort((X+Y).to_numpy())[-1]],
            xref="x",
            yref="y",
            text=str(LABEL.iloc[np.argsort((X+Y).to_numpy())[-1]]),
            showarrow=True,
            font=dict(
                family="Courier New, monospace",
                size=12,
                color="#ffffff"
                ),
            align="center",
            arrowhead=2,
            arrowsize=1,
            arrowwidth=2,
            arrowcolor="#636363",
            ax=-20,
            ay=-30,
            bordercolor="#c7c7c7",
            borderwidth=2,
            borderpad=4,
            bgcolor="#ff7f0e",
            opacity=0.8
            )
        fig.add_annotation(
            x=X.iloc[np.argsort((X+Y).to_numpy())[-2]],
            y=Y.iloc[np.argsort((X+Y).to_numpy())[-2]],
            xref="x",
            yref="y",
            text=str(LABEL.iloc[np.argsort((X+Y).to_numpy())[-2]]),
            showarrow=True,
            font=dict(
                family="Courier New, monospace",
                size=12,
                color="#ffffff"
                ),
            align="center",
            arrowhead=2,
            arrowsize=1,
            arrowwidth=2,
            arrowcolor="#636363",
            ax=-20,
            ay=-30,
            bordercolor="#c7c7c7",
            borderwidth=2,
            borderpad=4,
            bgcolor="#ff7f0e",
            opacity=0.8
            )
        fig.add_annotation(
            x=X.iloc[np.argsort((X+Y).to_numpy())[-3]],
            y=Y.iloc[np.argsort((X+Y).to_numpy())[-3]],
            xref="x",
            yref="y",
            text=str(LABEL.iloc[np.argsort((X+Y).to_numpy())[-3]]),
            showarrow=True,
            font=dict(
                family="Courier New, monospace",
                size=12,
                color="#ffffff"
                ),
            align="center",
            arrowhead=2,
            arrowsize=1,
            arrowwidth=2,
            arrowcolor="#636363",
            ax=-20,
            ay=-30,
            bordercolor="#c7c7c7",
            borderwidth=2,
            borderpad=4,
            bgcolor="#ff7f0e",
            opacity=0.8
            )
        
    fig.add_trace(trace)
    fig.update_layout(layout)
    plotly.offline.iplot(fig)
    return fig

    
In [15]:
def plotBarGraph(metrics1, focus, numWorst, df, X, showTOPAnnotation = False):
    if focus == "File":
        LABEL=df["File"]
    elif focus == "Function":
        if  (isShowPackage == 0):
            LABEL=df["File"]+"\r\n"+"["+df["Name"]+"()]"
        elif (isShowPackage == 1):
            LABEL=df["Name"]+"()"
        else:
            print("unknown language!!")
            return -1
    else:
        LABEL=""

    fig_go = go.Figure(go.Bar(
            x=X,
            y=LABEL,
            orientation='h'))
    fig_go.update_layout(
        title_text=metrics1+"ワースト"+str(numWorst)+"("+str(focus)+")",
        title_x=0.5,
        xaxis_title_text=metrics1,
        yaxis_title_text=focus,
        title_font_size=30,
        title_font_family='HGMinchoE',
        title_font_color='Black'
    )
    #print(X.iloc[np.argsort((X).to_numpy())[-1]])
    #print(len(X)-1)
    if showTOPAnnotation==True:
        fig_go.add_annotation(
            x=X.iloc[np.argsort((X).to_numpy())[-1]],
            y=len(X)-1,
            xref="x",
            yref="y",
            text=str(X.iloc[np.argsort((X).to_numpy())[-1]]),
            showarrow=True,
            font=dict(
                family="Courier New, monospace",
                size=11,
                color="#ffffff"
                ),
            align="center",
            arrowhead=2,
            arrowsize=1,
            arrowwidth=2,
            arrowcolor="#636363",
            ax=20,
            ay=-30,
            bordercolor="#c7c7c7",
            borderwidth=2,
            borderpad=4,
            bgcolor="#ff7f0e",
            opacity=0.8
            )
        fig_go.add_annotation(
            x=X.iloc[np.argsort((X).to_numpy())[-2]],
            y=len(X)-2,
            xref="x",
            yref="y",
            text=str(X.iloc[np.argsort((X).to_numpy())[-2]]),
            showarrow=True,
            font=dict(
                family="Courier New, monospace",
                size=11,
                color="#ffffff"
                ),
            align="center",
            arrowhead=2,
            arrowsize=1,
            arrowwidth=2,
            arrowcolor="#636363",
            ax=20,
            ay=-30,
            bordercolor="#c7c7c7",
            borderwidth=2,
            borderpad=4,
            bgcolor="#ff7f0e",
            opacity=0.8
            )
        fig_go.add_annotation(
            x=X.iloc[np.argsort((X).to_numpy())[-3]],
            y=len(X)-3,
            xref="x",
            yref="y",
            text=str(X.iloc[np.argsort((X).to_numpy())[-3]]),
            showarrow=True,
            font=dict(
                family="Courier New, monospace",
                size=11,
                color="#ffffff"
                ),
            align="center",
            arrowhead=2,
            arrowsize=1,
            arrowwidth=2,
            arrowcolor="#636363",
            ax=20,
            ay=-30,
            bordercolor="#c7c7c7",
            borderwidth=2,
            borderpad=4,
            bgcolor="#ff7f0e",
            opacity=0.8
            )
    fig_go.show()
    to_html(fig_go,metrics1+"("+focus+")"+"_bar")
In [16]:
def countNumMultiMetricsRange(metrics1,metrics1Threshhold, metrics2,metrics2Threshhold,df):
    if (len(metrics1Threshhold)<1):
        return 0
    if (len(metrics2Threshhold)<1):
        return 0
    
    #絶対収まる仮の最大値を追加
    inf = 10000000
    metrics1Threshhold.append(max(metrics1Threshhold)*inf)
    metrics2Threshhold.append(max(metrics2Threshhold)*inf)
    
    metrics1Threshhold = sorted(metrics1Threshhold)
    metrics2Threshhold = sorted(metrics2Threshhold)
    l1 = len(metrics1Threshhold)
    l2 = len(metrics2Threshhold)

    rangeNum = np.zeros((l1, l2)).astype(np.int64)
    temp_min_i = 0
    temp_min_j = 0
    temp_max_i = 0
    temp_max_j = 0
    rangeNumColumns = []
    rangeNumIndex = []
    for i, threshhold_i in enumerate(metrics1Threshhold):
        temp_max_i = threshhold_i
        text_min_i = temp_min_i
        text_max_i = temp_max_i-1
        if temp_max_i>inf:
            text_max_i = ""
        rangeNumIndex.append(metrics1+":"+str(text_min_i)+"-"+str(text_max_i))
        
        for j, threshhold_j in enumerate(metrics2Threshhold):
            temp_max_j = threshhold_j
            if i ==0:
                text_min_j = temp_min_j
                text_max_j = temp_max_j-1
                if temp_max_j>inf:
                    text_max_j = ""
                rangeNumColumns.append(metrics2+":"+str(text_min_j)+"-"+str(text_max_j))
            rangeNum[i][j] = (
                (df[metrics1] >=temp_min_i)
                &(temp_max_i>df[metrics1])
                &(df[metrics2] >=temp_min_j)
                &(temp_max_j>df[metrics2])).sum()
            temp_min_j = threshhold_j
        # 区間最小値を更新
        temp_min_i = threshhold_i
    
    df_rangeNum = pd.DataFrame(data=rangeNum, columns=rangeNumColumns, index=rangeNumIndex)
    return df_rangeNum

def to_csv(df, filename):
    if is_to_csv == True:
        df.to_csv(dirPathRepoResult+"/"+str(filename)+'.csv',sep=',',encoding='cp932')

def to_html(fig, filename):
    if is_to_html == True:
        fig.write_html(dirPathRepoResult+"/"+str(filename)+".html")

5.2 結果の出力¶

  • 各メトリクス毎にワースト10関数やワースト10ファイルを出力
    ※ numWorstが大きい場合、出力数が多くなります。このとき、 JupyterNotebook上でグラフ表示した場合は、Y軸ラベルの関数名/ファイル名が間引き表示されてしまいます。
    html/csvへの出力した場合、出力数が多くともデータは正しく出力されますが、やはりグラフ描画時には間引かれるため注意が必要です。
    ※ 間引き表示された場合も、Plotlyのグラフ上でインタラクティブに拡大やマウスオーバーするとそれぞれ正しく表示されます。
  • メトリクス同士の散布図を出力
  • 閾値毎の領域別個数集計実施
  • アーカイブ作成

CountLine(Function)¶

In [17]:
numWorst=10
metrics1="CountLine"
focus="Function"
df = df_understand_FM.sort_values([metrics1],ascending=False)[:numWorst].iloc[::-1]
X = df[metrics1]
plotBarGraph(metrics1=metrics1, focus=focus, numWorst=numWorst, df=df, X=X, showTOPAnnotation = True)
to_csv(df = df.iloc[::-1].reset_index(drop=True), filename = str(focus)+"_"+metrics1+"ワースト"+str(numWorst))

Cyclomatic(Function)¶

In [18]:
numWorst=10
metrics1="Cyclomatic"
focus="Function"
df = df_understand_FM.sort_values([metrics1],ascending=False)[:numWorst].iloc[::-1]
X = df[metrics1]
plotBarGraph(metrics1=metrics1, focus=focus, numWorst=numWorst, df=df, X=X, showTOPAnnotation = True)
to_csv(df = df.iloc[::-1].reset_index(drop=True), filename = str(focus)+"_"+metrics1+"ワースト"+str(numWorst))

Essential(Function)¶

In [19]:
numWorst=10
metrics1="Essential"
focus="Function"
df = df_understand_FM.sort_values([metrics1],ascending=False)[:numWorst].iloc[::-1]
X = df[metrics1]
plotBarGraph(metrics1=metrics1, focus=focus, numWorst=numWorst, df=df, X=X, showTOPAnnotation = True)
to_csv(df = df.iloc[::-1].reset_index(drop=True), filename = str(focus)+"_"+metrics1+"ワースト"+str(numWorst))

MaxNesting(Function)¶

In [20]:
numWorst=10
metrics1="MaxNesting"
focus="Function"
df = df_understand_FM.sort_values([metrics1],ascending=False)[:numWorst].iloc[::-1]
X = df[metrics1]
plotBarGraph(metrics1=metrics1, focus=focus, numWorst=numWorst, df=df, X=X, showTOPAnnotation = True)
to_csv(df = df.iloc[::-1].reset_index(drop=True), filename = str(focus)+"_"+metrics1+"ワースト"+str(numWorst))

CountLineComment(Function)¶

In [21]:
numWorst=10
metrics1="CountLineComment"
focus="Function"
df = df_understand_FM.sort_values([metrics1],ascending=False)[:numWorst].iloc[::-1]
X = df[metrics1]
plotBarGraph(metrics1=metrics1, focus=focus, numWorst=numWorst, df=df, X=X, showTOPAnnotation = True)
to_csv(df = df.iloc[::-1].reset_index(drop=True), filename = str(focus)+"_"+metrics1+"ワースト"+str(numWorst))

CountLineCommentRatio(Function)¶

In [22]:
numWorst=10
metrics1="CountLineCommentRatio"
focus="Function"
df = df_understand_FM.sort_values([metrics1],ascending=False)[:numWorst].iloc[::-1]
X = df[metrics1]
plotBarGraph(metrics1=metrics1, focus=focus, numWorst=numWorst, df=df, X=X, showTOPAnnotation = True)
to_csv(df = df.iloc[::-1].reset_index(drop=True), filename = str(focus)+"_"+metrics1+"ワースト"+str(numWorst))

CountLineCommentRatio(File)¶

In [23]:
numWorst=10
metrics1="CountLineCommentRatio"
focus="File"
df = df_understand_File.sort_values([metrics1],ascending=False)[:numWorst].iloc[::-1]
X = df[metrics1]
plotBarGraph(metrics1=metrics1, focus=focus, numWorst=numWorst, df=df, X=X, showTOPAnnotation = True)
to_csv(df = df.iloc[::-1].reset_index(drop=True), filename = str(focus)+"_"+metrics1+"ワースト"+str(numWorst))

CountLine(File)¶

In [24]:
numWorst=10
metrics1="CountLine"
focus="File"
df = df_understand_File.sort_values([metrics1],ascending=False)[:numWorst].iloc[::-1]
X = df[metrics1]
plotBarGraph(metrics1=metrics1, focus=focus, numWorst=numWorst, df=df, X=X, showTOPAnnotation = True)
to_csv(df = df.iloc[::-1].reset_index(drop=True), filename = str(focus)+"_"+metrics1+"ワースト"+str(numWorst))

MaxCyclomatic(File)¶

In [25]:
numWorst=10
metrics1="MaxCyclomatic"
focus="File"
df = df_understand_File.sort_values([metrics1],ascending=False)[:numWorst].iloc[::-1]
X = df[metrics1]
plotBarGraph(metrics1=metrics1, focus=focus, numWorst=numWorst, df=df, X=X, showTOPAnnotation = True)
to_csv(df = df.iloc[::-1].reset_index(drop=True), filename = str(focus)+"_"+metrics1+"ワースト"+str(numWorst))

CountLine(Function) vs Cyclomatic(Function)¶

In [26]:
metrics1="CountLine"
metrics2="Cyclomatic"
focus="Function"
df = df_understand_FM.sort_values([metrics1],ascending=False)
fig = plotScatterGraph(metrics1=metrics1, metrics2=metrics2, focus=focus, df=df, showLabel = False, showTOPAnnotation=True)
to_html(fig,metrics1+"-"+metrics2+"("+focus+")"+"_scatter")

CountLine(Function) vs Cyclomatic(Function)(ワースト20)¶

In [27]:
numWorst=20
metrics1="CountLine"
metrics2="Cyclomatic"
focus="Function"
df = df_understand_FM.sort_values([metrics1],ascending=False)[:numWorst].iloc[::-1]
fig = plotScatterGraph(metrics1=metrics1, metrics2=metrics2, focus=focus, df=df, showTOPAnnotation=True)
to_html(fig,metrics1+"-"+metrics2+"("+focus+")"+"_scatter(ワースト"+str(numWorst)+")")

CountLine-Cyclomatic関数の数の集計¶

In [28]:
focus="Function"
metrics1="CountLine"
metrics2="Cyclomatic"

metrics1Threshhold=[]
metrics1Threshhold.append(20)
metrics1Threshhold.append(50)
metrics2Threshhold=[]
metrics2Threshhold.append(5)
metrics2Threshhold.append(100)
metrics2Threshhold.append(200)


df = df_understand_FM.sort_values([metrics1],ascending=False)
df_rangeNum = countNumMultiMetricsRange(metrics1=metrics1, metrics1Threshhold = metrics1Threshhold, metrics2=metrics2, metrics2Threshhold = metrics2Threshhold,df=df)
to_csv(df_rangeNum, metrics1+"-"+metrics2+"("+focus+")"+"関数の数")

各領域における関数の数¶

In [29]:
df_rangeNum
Out[29]:
Cyclomatic:0-4 Cyclomatic:5-99 Cyclomatic:100-199 Cyclomatic:200-
CountLine:0-19 237 9 0 0
CountLine:20-49 0 65 0 0
CountLine:50- 0 40 0 0

各領域における関数の割合¶

In [30]:
num_all = df_rangeNum.sum().sum()
df_rangeNum_per = df_rangeNum.copy(deep=True)
df_rangeNum_per = (df_rangeNum_per/num_all).applymap("{0:.2%}".format)
to_csv(df_rangeNum_per, metrics1+"-"+metrics2+"("+focus+")"+"関数の数(割合)")
df_rangeNum_per
Out[30]:
Cyclomatic:0-4 Cyclomatic:5-99 Cyclomatic:100-199 Cyclomatic:200-
CountLine:0-19 67.52% 2.56% 0.00% 0.00%
CountLine:20-49 0.00% 18.52% 0.00% 0.00%
CountLine:50- 0.00% 11.40% 0.00% 0.00%

CountLine-MaxNesting関数の数の集計¶

In [31]:
focus="Function"
metrics1="CountLine"
metrics2="MaxNesting"

metrics1Threshhold=[]
metrics1Threshhold.append(5)
metrics1Threshhold.append(100)
metrics1Threshhold.append(200)

metrics2Threshhold=[]
metrics2Threshhold.append(2)
metrics2Threshhold.append(4)
metrics2Threshhold.append(6)
metrics2Threshhold.append(8)

df = df_understand_FM.sort_values([metrics1],ascending=False)
df_rangeNum = countNumMultiMetricsRange(metrics1=metrics1, metrics1Threshhold = metrics1Threshhold, metrics2=metrics2, metrics2Threshhold = metrics2Threshhold,df=df)
to_csv(df_rangeNum, metrics1+"-"+metrics2+"("+focus+")"+"関数の数")

各領域における関数の数¶

In [32]:
df_rangeNum
Out[32]:
MaxNesting:0-1 MaxNesting:2-3 MaxNesting:4-5 MaxNesting:6-7 MaxNesting:8-
CountLine:0-4 45 0 0 0 0
CountLine:5-99 0 101 3 0 0
CountLine:100-199 0 5 3 0 0
CountLine:200- 0 0 0 0 0

各領域における関数の割合¶

In [33]:
num_all = df_rangeNum.sum().sum()
df_rangeNum_per = df_rangeNum.copy(deep=True)
df_rangeNum_per = (df_rangeNum_per/num_all).applymap("{0:.2%}".format)
to_csv(df_rangeNum_per, metrics1+"-"+metrics2+"("+focus+")"+"関数の数(割合)")
df_rangeNum_per
Out[33]:
MaxNesting:0-1 MaxNesting:2-3 MaxNesting:4-5 MaxNesting:6-7 MaxNesting:8-
CountLine:0-4 28.66% 0.00% 0.00% 0.00% 0.00%
CountLine:5-99 0.00% 64.33% 1.91% 0.00% 0.00%
CountLine:100-199 0.00% 3.18% 1.91% 0.00% 0.00%
CountLine:200- 0.00% 0.00% 0.00% 0.00% 0.00%

アーカイブ作成¶

In [34]:
shutil.make_archive(dirPathRepoResult, 'zip', root_dir=dirPathRepoResult)
Out[34]:
'C:\\work\\work_rad\\showUndMetrics\\repo\\cgit\\Result.zip'
In [35]:
before_path = dirPathRepoResult+".zip"
after_path = dirPathRepoResult+"_"+lastname+".zip"
new_zip_path = shutil.move(before_path, after_path)
In [36]:
new_zip_path
Out[36]:
'C:\\work\\work_rad\\showUndMetrics\\repo\\cgit/Result_cgit.zip'