#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Calculate the unbiased indicators using ISI Web of Knowledge reports
# Copyright (C) 2011, 2012 Märt Põder
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
#########################################################################
# constants (you can modify these for command line use)
#
authors_type = "IGNORE" # CA authors handling type IGNORE or INCLUDE
authors_count = "COUNT" # when set to COUNT, use penalties if INCLUDE is set
ca_penalty = 10 # assumed count for authors group when COUNT is selected
results_type = "SHORT" # SHORT, HTML, TEXT or CSV
#########################################################################
import csv, sys, io, os, codecs
from StringIO import StringIO
import cgi, cgitb; cgitb.enable() # for troubleshooting
# counter variables
articles = 0
w_articles = 0
cites = 0
w_cites = 0
# internal variables
refs = None
new_table = []
new_table.append([])
# check if running on web
if "HTTP_HOST" in os.environ.keys():
running_on_web = True
else:
running_on_web = False
# html queries
form = cgi.FieldStorage()
if "isi_report" not in form and "results_type" not in form:
# command line arguments for testing/scripting
argc = len(sys.argv)
if argc < 2:
if running_on_web:
print "Content-Type: text/html"
print
print """
Let's Correct That Small Mistake: Calculation of
unbiased performance indicators for individual scientists from ISI Web of
Knowledge reports
Let's Correct That Small Mistake
Calculation of unbiased performance indicators from ISI Web of Knowledge reports
What's that?
Counts of publications and citations are widely used as indicators of performance of individual scientists. However, these measures produce a considerable bias because of publications with multiple authors. For a fair comparison of peoples' productivity or research quality, we should use fractional counts: publications and citations divided by the numbers of coauthors. See Põder (2010) for more information.
This program uses a report of a scientist's publications from ISI Web of Knowledge and calculates the unbiased indicators of his/her scientific output.
How to use?
- Search for all articles by an author on ISI Web of Knowledge
- Output all the records to Other Reference Software, namely Tab-delimited (Win)
- Load
savedrecs.txt
on this page
- Choose the desired parameters
- Press Calculate and get your hands on the unbiased counts of citations and articles
Program source code in Python is available under GNU General Public License. You can conveniently use it for user scripts on command line.
For general feedback and theoretical background contact Endel Põder, for code and bugs Märt Põder.
""" % ("CHECKED" if authors_type == "IGNORE" else "", "CHECKED" if authors_type == "INCLUDE" else "", "CHECKED" if authors_count == "COUNT" else "", ca_penalty)
else:
print "Usage: " + __file__ + " "
sys.exit(1)
else:
filename = sys.argv[1]
report_itself = io.open(filename, "rt", encoding="UTF-16", errors="replace").read()
source = report_itself.encode("utf-8").lstrip(codecs.BOM_UTF8).lstrip(codecs.BOM_UTF16_LE).lstrip(codecs.BOM_UTF16_BE).lstrip(codecs.BOM_UTF32_LE).lstrip(codecs.BOM_UTF32_BE)
refs = csv.reader(StringIO(source), delimiter="\t", quotechar='"')
else:
report_itself = None
refs = None
authors_count = None
authors_type = None
ca_penalty = None
# get variables from html form
try:
results_type = form["results_type"].value
except KeyError:
pass
try:
authors_type = form["authors_type"].value
except KeyError:
pass
try:
authors_count = form["authors_count"].value
except KeyError:
pass
try:
ca = form["ca_size"].value.strip()
if len(ca) > 0:
ca_penalty = int(ca)
except KeyError:
pass
try:
report_itself = form["isi_report"].value
source = unicode(report_itself, 'UTF-16').encode("utf-8").lstrip(codecs.BOM_UTF8).lstrip(codecs.BOM_UTF16_LE).lstrip(codecs.BOM_UTF16_BE).lstrip(codecs.BOM_UTF32_LE).lstrip(codecs.BOM_UTF32_BE)
refs = csv.reader(StringIO(source), delimiter="\t", quotechar='"')
except KeyError:
pass
# get corporate authors count from form if exists
if "ca_size" in form:
ca = form["ca_size"].value
if len(ca) > 0:
ca_penalty = int(ca)
if running_on_web:
if results_type == "HTML":
print "Content-Type: text/html"
print
elif results_type == "SHORT":
print "Content-Type: text/plain"
print
elif results_type == "CSV":
print "Content-Type: text/csv; charset=UTF-16"
print "Content-Disposition: attachment; filename=weighted-isi-report.csv"
print
elif results_type == "TEXT":
print "Content-Type: text/csv; charset=UTF-16"
print "Content-Disposition: attachment; filename=weighted-isi-report.txt"
print
elif results_type == "SOURCE":
print "Content-Type: text/plain; charset=UTF-16"
print "Content-Disposition: attachment; filename=unbiased-isi-report.py"
print
f = open(__file__, "r")
print f.read()
f.close()
sys.exit(0)
if refs is None:
print "No input"
sys.exit(2)
# default positions of columns (will be adjusted)
PT = 0
AU = 1
CA = 3
TC = 32
Z9 = 35
# walk through report
header = True
for row in refs:
# check values on first line
if header:
try:
PT = [i for i,x in enumerate(row) if x == "PT"][0]
AU = [i for i,x in enumerate(row) if x == "AU"][0]
CA = [i for i,x in enumerate(row) if x == "CA"][0]
TC = [i for i,x in enumerate(row) if x == "TC"][0]
Z9 = [i for i,x in enumerate(row) if x == "Z9"][0]
except IndexError:
pass
# this is not really needed anymore, just keeping it from previous iteration
if row[PT] != "PT" or row[AU] != "AU" or row[CA] != "CA" or row[TC] != "TC" or row[Z9] != "Z9":
print "Unintelligible file format."
sys.exit(3)
else:
header = False
if len(new_table[-1]) > 0:
new_table.append([])
# usual authors count
if row[AU] == "AU":
new_table[-1].append("_Authors")
new_table[-1].append("_Weight")
new_table[-1].append("_WeightedCit")
else:
authors = row[AU].count(";") + 1 # AU
if len(row[CA].strip()) > 0 and authors_type == "INCLUDE": # CA
try:
authors += int(row[CA].strip().split(" ")[0]) # first string is a nr
except ValueError:
if authors_count == "COUNT":
authors += ca_penalty # add average penalty
new_table[-1].append(str(authors))
weight = 1/float(authors)
new_table[-1].append(str(weight))
cites_str = row[TC] # TC
if len(cites_str) > 0:
cites += int(cites_str)
else:
cites_str = row[Z9] # Z9
cites += int(cites_str)
w_cites_this = float(cites_str)*weight
new_table[-1].append(str(w_cites_this))
w_cites += w_cites_this
articles += 1
w_articles += weight
new_table[-1].extend(row)
if results_type == "SHORT":
print "Articles: " + str(articles)
print "Weighted articles: " + str(round(w_articles, 1))
print "Citations: " + str(cites)
print "Weighted citations: " + str(round(w_cites, 1))
elif results_type == "HTML":
print ""
for row in new_table:
print "
"
print " | ".join(row)
print " |
"
elif results_type == "CSV":
writer = csv.writer(sys.stdout, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL)
writer.writerows(new_table)
elif results_type == "TEXT":
writer = csv.writer(sys.stdout, delimiter='\t', quotechar='"', quoting=csv.QUOTE_ALL)
writer.writerows(new_table)