From 10b896c111845f47ed490b3f014f7e6b832d16da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fays=20J=C3=A9r=C3=A9mie?= <j.fays@uliege.be> Date: Mon, 27 May 2019 15:19:25 +0200 Subject: [PATCH] Upload New File --- Simian-filtered.py | 64 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 Simian-filtered.py diff --git a/Simian-filtered.py b/Simian-filtered.py new file mode 100644 index 0000000..2395b80 --- /dev/null +++ b/Simian-filtered.py @@ -0,0 +1,64 @@ +# Script analysing results from Simian (http://www.harukizaemon.com/simian/) xml file output +# in order to detect plagiarism between two projects. It will filter Simian results and remove +# all duplicated blocks that concern only one project. +# +# File structure needs to be the following : +# +# simianBaseDir +# bin/ +# simianApp +# CODE/ +# (this script) +# projectName/ +# project1/ (contains source code of project 1) +# project2/ (contains source code of project 2) +# ... (could contain other projects) +# +# ---------------------------------------------------------------------- +# Result : writes a file named projectName+'-simian-filtered.xml' in the CODE/ directory +# This file contains only duplicated code from one project to another (all duplicated code within a project is removed) +# + +import subprocess + + +simianBaseDir='/simianDir' +simianApp="simian-2.4.0.jar" +projectName='projectNameDir' +callArgument='java -jar '+simianBaseDir+'/bin/'+simianApp+' -formatter=xml:'+projectName+'-simian.xml "'+simianBaseDir+'/CODE/Predetector/**/*.*"' +#print callArgument + +retcode = subprocess.Popen(callArgument, shell=True) + + +#function that returns the main subdirectory from the 'projectName' directory +def getSubProject(filePath): + subProjectPath = filePath.split(projectName, 1) + subProjectName = subProjectPath[1].split('/',2) + return subProjectName[1] + + +import xml.etree.ElementTree as ET +tree = ET.parse(simianBaseDir+"/CODE/"+projectName+'-simian.xml') +root = tree.getroot() +for set in root[0].findall('set'): + duplicateCount=0 + #stores the first subproject name from the set, in order to compare with others + sourceFile=set[0].get('sourceFile') + firstDir=getSubProject(sourceFile) + + for block in set.iter('block'): + if getSubProject(block.get('sourceFile')) != firstDir: + duplicateCount=duplicateCount+1 + if not duplicateCount: + root[0].remove(set) + +tree.write(simianBaseDir+"/CODE/"+projectName+'-simian-filtered.xml') + + + + + + + + -- GitLab