Motivation is an important factor in software development. However, it is a subjective concept that is hard to quantify and study empirically. In order to use the wealth of data available about real software development projects in GitHub, we represent the motivation of developers using labeling functions. These are validated heuristics that need only be better than a guess, computable on a dataset. We define four labeling functions for motivation based on behavioral cues like working in diverse hours of the day. We validated the functions by agreement with respect to a developers survey, per person behavior, and temporal changes. We then apply them to 150 thousand developers working on GitHub projects. Using the identification of motivated developers, we measure developer performance gaps. We show that motivated developers have up to 70% longer activity period, produce up to 300% more commits, and invest up to 44% more time per commit.
The data from this article is here. The data and code from the survey, "A Large Scale Survey of Motivation in Software Development and Analysis of its Validity" is here.
The understand and use the code, start with the main
This is the replication package of "Motivation Research Using Labeling Functions" by Idan Amit and Dror G. Feitelson.
@inproceedings{10.1145/3661167.3661224,
author = {Amit, Idan and Feitelson, Dror G.},
title = {Motivation Research Using Labeling Functions},
year = {2024},
isbn = {9798400717017},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3661167.3661224},
doi = {10.1145/3661167.3661224},
abstract = {Motivation is an important factor in software development. However, it is a subjective concept that is hard to quantify and study empirically. In order to use the wealth of data available about real software development projects in GitHub, we represent the motivation of developers using labeling functions. These are validated heuristics that need only be better than a guess, computable on a dataset. We define four labeling functions for motivation based on behavioral cues like working in diverse hours of the day. We validated the functions by agreement with respect to a developers survey, per person behavior, and temporal changes. We then apply them to 150 thousand developers working on GitHub projects. Using the identification of motivated developers, we measure developer performance gaps. We show that motivated developers have up to 70\% longer activity period, produce up to 300\% more commits, and invest up to 44\% more time per commit.},
booktitle = {Proceedings of the 28th International Conference on Evaluation and Assessment in Software Engineering},
pages = {222–231},
numpages = {10},
keywords = {methodology, motivation, software engineering, weak supervision},
location = {<conf-loc>, <city>Salerno</city>, <country>Italy</country>, </conf-loc>},
series = {EASE '24}
}