diff --git a/README.md b/README.md index ca5a8a6..e798442 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,15 @@ This library is numpy friendly. ## Get the documentation and examples -* [Documentation](http://ibmdecisionoptimization.github.io/docplex-doc/) +* [Latest documentation](http://ibmdecisionoptimization.github.io/docplex-doc/) +* Documentation archives: + * [2.23.217](http://ibmdecisionoptimization.github.io/docplex-doc/2.23.217) + * [2.22.213](http://ibmdecisionoptimization.github.io/docplex-doc/2.22.213) + * [2.21.207](http://ibmdecisionoptimization.github.io/docplex-doc/2.21.207) + * [2.20.204](http://ibmdecisionoptimization.github.io/docplex-doc/2.20.204) + * [2.19.202](http://ibmdecisionoptimization.github.io/docplex-doc/2.19.202) + * [2.18.200](http://ibmdecisionoptimization.github.io/docplex-doc/2.18.200) + * [2.16.195](http://ibmdecisionoptimization.github.io/docplex-doc/2.16.195) * [Examples](https://github.com/IBMDecisionOptimization/docplex-examples) ## Get your IBM® ILOG CPLEX Optimization Studio edition diff --git a/examples/cp/basic/data/jobshop_blackbox_default.data b/examples/cp/basic/data/jobshop_blackbox_default.data new file mode 100644 index 0000000..6f09ed7 --- /dev/null +++ b/examples/cp/basic/data/jobshop_blackbox_default.data @@ -0,0 +1,11 @@ +10 10 +0 2500 3300 1 7400 8200 2 900 900 3 3200 4000 4 4900 4900 5 1100 1100 6 5900 6500 7 5600 5600 8 3900 4900 9 2000 2200 +0 3900 4700 2 8900 9100 4 6700 8300 9 1100 1100 3 6300 7500 1 2500 3100 6 4600 4600 5 4300 4900 7 7100 7300 8 2700 3300 +1 8600 9600 0 7700 9300 3 3400 4400 2 6400 8400 8 8900 9100 5 1000 1000 7 1100 1300 6 8700 9100 9 4100 4900 4 3100 3500 +1 7900 8300 2 9400 9600 0 6200 8000 4 9200 10600 6 900 900 8 4600 5800 7 7400 9600 3 9000 10600 9 2100 2300 5 4100 4500 +2 1300 1500 0 600 600 1 2200 2200 5 5300 6900 3 2500 2700 4 6400 7400 8 2000 2200 7 4600 5200 9 7200 7200 6 4600 6000 +2 7500 9300 1 200 200 5 5200 5200 3 8900 10100 8 4300 5300 9 7000 7400 0 4400 5000 6 6200 6800 4 600 600 7 2300 2700 +1 4300 4900 0 3400 4000 3 6000 6200 2 1300 1300 6 2800 3600 5 2000 2200 9 2900 3500 8 8300 9500 7 2900 3100 4 5400 5600 +2 2900 3300 0 8600 8600 1 4600 4600 5 6500 8300 4 2900 3500 6 7900 9700 8 1700 2100 9 4500 5100 7 3500 3700 3 6900 8900 +0 7200 8000 1 6300 7500 3 6700 8500 5 5100 5100 2 7700 9300 9 1000 1200 6 3600 4400 7 8300 9500 4 2600 2600 8 7300 7500 +1 8100 8900 0 1300 1300 2 5800 6400 6 700 700 8 5900 6900 9 7000 8200 5 4600 4800 3 5000 5400 4 8200 9800 7 4100 4900 diff --git a/examples/cp/basic/kmeans.py b/examples/cp/basic/kmeans.py new file mode 100644 index 0000000..f29ecda --- /dev/null +++ b/examples/cp/basic/kmeans.py @@ -0,0 +1,111 @@ +# -------------------------------------------------------------------------- +# Source file provided under Apache License, Version 2.0, January 2004, +# http://www.apache.org/licenses/ +# (c) Copyright IBM Corp. 2021, 2022 +# -------------------------------------------------------------------------- + +""" +K-means is a way of clustering points in a multi-dimensional space +where the set of points to be clustered are partitioned into k subsets. +The idea is to minimize the inter-point distances inside a cluster in +order to produce clusters which group together close points. + +See https://en.wikipedia.org/wiki/K-means_clustering +""" + + +import numpy as np +from docplex.cp.model import CpoModel +import docplex.cp.solver.solver as solver +from docplex.cp.utils import compare_natural + +def make_model(coords, k, trust_numerics=True): + """ + Build a K-means model from a set of coordinate vectors (points), + and a given number of clusters k. + + We assign each point to a cluster and minimize the objective which + is the sum of the squares of the distances of each point to + the centre of gravity of the cluster to which it belongs. + + Here, there are two ways of building the objective function. One + uses the sum of squares of the coordinates of points in a cluster + minus the size of the cluster times the center value. This is akin + to the calculation of variance vi E[X^2] - E[X]^2. This is the most + efficient but can be numerically unstable due to massive cancellation. + + The more numerically stable (but less efficient) way to calculate the + objective is the analog of the variance calculation (sum_i(X_i - mu_i)^2)/n + """ + # Sizes and ranges + n, d = coords.shape + N, D, K = range(n), range(d), range(k) + + # Model, and decision variables. x[c] = cluster to which node c belongs + mdl = CpoModel() + x = [mdl.integer_var(0, k-1, "C_{}".format(i)) for i in N] + + # Size (number of nodes) in each cluster. If this is zero, we make + # it 1 to avoid division by zero later (if a particular cluster is + # not used). + csize = [mdl.max(1, mdl.count(x, c)) for c in K] + + # Calculate total distance squared + total_dist2 = 0 + for c in K: # For each cluster + # Boolean vector saying which points are in this cluster + included = [x[i] == c for i in N] + for dim in D: # For each dimension + # Points for each point in the given dimension (x, y, z, ...) + point = coords[:, dim] + + # Calculate the cluster centre for this dimension + centre = mdl.scal_prod(included, point) / csize[c] + + # Calculate the total distance^2 for this cluster & dimension + if trust_numerics: + sum_of_x2 = mdl.scal_prod(included, (p**2 for p in point)) + dist2 = sum_of_x2 - centre**2 * csize[c] + else: + all_dist2 = ((centre - p)**2 for p in point) + dist2 = mdl.scal_prod(included, all_dist2) + + # Keep the total distance squared in a sum + total_dist2 += dist2 + + # Minimize the total distance squared + mdl.minimize(total_dist2) + return mdl + + +if __name__ == "__main__": + import sys + # Default values + n, d, k, sd = 500, 2, 5, 1234 + + # Accept number of points, number of dimensions, number of clusters, seed + if len(sys.argv) > 1: + n = int(sys.argv[1]) + if len(sys.argv) > 2: + d = int(sys.argv[2]) + if len(sys.argv) > 3: + k = int(sys.argv[3]) + if len(sys.argv) > 4: + sd = int(sys.argv[4]) + + # Message + print("Generating with N = {}, D = {}, K = {}".format(n, d, k)) + + # Seed and generate coordinates on the unit hypercube + np.random.seed(sd) + coords = np.random.uniform(0, 1, size=(n, d)) + + # Build model + mdl = make_model(coords, k) + + # Solve using constraint programming + mdl.solve(SearchType="Restart", TimeLimit=10, LogPeriod=50000) + + if compare_natural(solver.get_solver_version(), '22.1') >= 0: + # Solve using neighborhood search + mdl.solve(SearchType="Neighborhood", TimeLimit=10, LogPeriod=50000) diff --git a/examples/cp/basic/plant_location_with_kpis.py b/examples/cp/basic/plant_location_with_kpis.py index b08d769..9862570 100644 --- a/examples/cp/basic/plant_location_with_kpis.py +++ b/examples/cp/basic/plant_location_with_kpis.py @@ -37,7 +37,7 @@ """ from docplex.cp.model import CpoModel -from docplex.cp.config import context +import docplex.cp.solver.solver as solver from docplex.cp.utils import compare_natural from collections import deque import os @@ -99,7 +99,8 @@ mdl.add(mdl.minimize(obj)) # Add KPIs -if context.model.version is None or compare_natural(context.model.version, '12.9') >= 0: +sol_version = solver.get_solver_version() +if compare_natural(sol_version, '12.9') >= 0: mdl.add_kpi(mdl.sum(demand) / mdl.scal_prod(open, capacity), "Occupancy") mdl.add_kpi(mdl.min([load[l] / capacity[l] + (1 - open[l]) for l in range(nbLocation)]), "Min occupancy") @@ -113,7 +114,7 @@ msol = mdl.solve(TimeLimit=10, trace_log=False) # Set trace_log=True to have a real-time view of the KPIs if msol: print(" Objective value: {}".format(msol.get_objective_values()[0])) - if context.model.version is None or compare_natural(context.model.version, '12.9') >= 0: + if compare_natural(sol_version, '12.9') >= 0: print(" KPIs: {}".format(msol.get_kpis())) else: print(" No solution") diff --git a/examples/cp/basic/sched_jobshop_blackbox.py b/examples/cp/basic/sched_jobshop_blackbox.py new file mode 100644 index 0000000..a255f28 --- /dev/null +++ b/examples/cp/basic/sched_jobshop_blackbox.py @@ -0,0 +1,289 @@ +# -------------------------------------------------------------------------- +# Source file provided under Apache License, Version 2.0, January 2004, +# http://www.apache.org/licenses/ +# (c) Copyright IBM Corp. 2015, 2021 +# -------------------------------------------------------------------------- + +""" +Problem Description +------------------- + +This example illustrates how to use blackbox expressions to solve +a job-shop scheduling problem with uncertain operation durations. + +It is an alternative approach to the stochastic optimization technique +illustrated in example sched_stochastic_jobshop.cpp. + +The problem is an extension of the classical job-shop scheduling +problem (see sched_jobshop.cpp). + +In the classical job-shop scheduling problem a finite set of jobs is +processed on a finite set of machines. Each job is characterized by a +fixed order of operations, each of which is to be processed on a +specific machine for a specified duration. Each machine can process +at most one operation at a time and once an operation initiates +processing on a given machine it must complete processing +uninterrupted. The objective of the problem is to find a schedule +that minimizes the makespan of the schedule. + +In the present version of the problem, the duration of a given operation +is uncertain and supposed to be a uniform random variable varying in an +operation specific range [min, max]. The objective of the problem is to +find an ordering of the operations on the machines that minimizes +the average makespan of the schedule. + +The estimation of the average makespan is computed by a blackbox expression +avg_makespan on the set of sequence variables of the model (see the use of +function mean_makespan). This blackbox function simulates the execution of the +specified sequences of operations on the machines on a number of samples and +returns an estimation of the average makespan. + +The model uses this blackbox expression as objective function to be minimized. +Note that the model uses the average duration of operations (dmin+dmax)/2 as +deterministic duration to guide the search. One can show that the makespan +of the deterministic problem using the average durations is always lesser than +the average makespan, this is why it can be used as a lower bound on the blackbox +objective function. + +The simulation simulates the execution of operations with uncertain durations under +precedence constraints in order to estimate the average makespan. Two techniques +are available to choose the durations: Monte-Carlo sampling and Descriptive Sampling +(depending on the definition of constant DESC_SAMPLING). + +For each sample, Monte-Carlo sampling simply draws a random value in the +specified range for the duration of the each operation and simulates the execution +of this precedence graph to compute a sample of the makespan. + +Descriptive sampling [1] is a more robust technique for sampling operations duration +in the context of simulating a precedence graph. It ensures that for a given operation, +the probability distibution of its duration is efficiently sampled by controling +the input set of sampled values. + +Typically, a similar precision on the average makespan is achieved with 30 samples +of Descriptive sampling instead of typically 200 samples in Monte-Carlo sampling. + +[1] E. Saliby. "Descriptive Sampling: A Better Approach to Monte Carlo Simulation". +The Journal of the Operational Research Society +Vol. 41, No. 12 (Dec., 1990), pp. 1133-1142 + +The blackbox expression avg_makespan is passed as data fields its scope +(the array of sequence variables representing the sequence of operations +on each machines), but other parameters for the durations and some housekeeping +structures are also passed using functools.partial. + +For more information on blackbox expressions, see the concept "Blackbox expressions" +in the CP Optimizer C++ API Reference Manual. +""" + +# +# Problem parameters +# + +DEFAULT_FILENAME = "../../../examples/data/jobshop_blackbox_default.data" +DEFAULT_FILENAME = "data/jobshop_blackbox_default.data" +DEFAULT_TIMELIMIT = 30 +DESC_SAMPLING = True +NUM_SAMPLES = 300 if DESC_SAMPLING else 2000 +RANDOM_SEED = 1234 + +# +# Imports +# +import sys +try: + import numpy as np + import functools + import numba +except ImportError: + print("Please ensure you have installed modules 'numpy', 'functools' and 'numba'.") + sys.exit(0) + +from docplex.cp.utils import compare_natural +import docplex.cp.solver.solver as solver +# check Solver version +sol_version = solver.get_solver_version() +if compare_natural(sol_version, '22.1') < 0: + print("Blackbox functions are not implemented in this solver version: {}".format(sol_version)) + print("This example cannot be run.") + sys.exit(0) + +from docplex.cp.model import CpoModel, CpoBlackboxFunction + +# Core of the simulation. The simulation has been pre-compiled into the +# 'cstream' variable (command stream) which is made up of a series of commands: +# [predecessors] +# This is used to build the makespan by taking the end time to be the duration +# of the operation plus the max end time of the predecessors. + +@numba.jit(nopython=True) +def simulate(cstream, durations) : + num_ops, num_samples = durations.shape + end = np.empty(num_ops, dtype=np.float64) + total_ms = 0 + for s in range(num_samples): + cit = iter(cstream) + for i in range(num_ops): + n = next(cit) + e = 0 + for k in range(n): + e = max(e, end[next(cit)]) + o = next(cit) + end[o] = e + durations[o, s] + total_ms += np.max(end) + rms = total_ms / num_samples + return rms + + +# Three parts here. First, we build the predecessor graph which +# means that for each operation we can associate the previous on +# the machine and previous in the job. So, operations can have +# from 0 to 2 predecessors. +# +# Second, we order the operations topologically so that we can +# calculate the makespan in a purely feed-forward fashion to keep +# it fast. +# +# Thirdly, we perform the actual simulation. Because we are doing +# this with numba, we will pass just the simplest of data structures +# to the simulation function as number sometimes has trouble with +# non-numeric data. + +def mean_makespan(machine_sequences, # decided by CP Optimizer + itv_to_op, # map: interval var -> operation index + durations # durations per operation and scenario +): + # They should be topologically ordered. + num_ops = sum(len(ms) for ms in machine_sequences) + + # We assume a fixed job length = number of machines + job_length = len(machine_sequences) + + # Build predecessors + direct_pred = [None] * num_ops + used = [0] * num_ops + for ms in machine_sequences: + prev = -1 + for r, itv in enumerate(ms): + op_id = itv_to_op[itv] + pred = ([op_id-1] if (op_id % job_length) != 0 else []) + ([prev] if r != 0 else []) + for p in pred: + used[p] += 1 + direct_pred[op_id] = pred + prev = op_id + + # Build topological order + active = [ i for i in range(num_ops) if used[i] == 0 ] + order = [] + i = 0 + while i < len(active): + a = active[i] + order.append((a, direct_pred[a])) + for p in direct_pred[a]: + used[p] -= 1 + if used[p] == 0: + active.append(p) + i += 1 + order.reverse() + + # If we could not go over everything, + # there is a loop in the precedence graph + assert(len(active) == num_ops) + + # Simulate using numba. So use simple data structures + commands = [] + for i in range(num_ops): + commands.append(len(order[i][1])) + for p in order[i][1]: + commands.append(p) + commands.append(order[i][0]) + value = simulate(np.array(commands), durations) + return value + +# +# Make all durations for all operations in all scenarios +# +def make_durations(drange, num_samples, desc_sampling, seed): + rng = np.random.default_rng(seed) + num_ops = len(drange) + durations = np.empty((num_ops, num_samples)) + for i in range(num_ops): + dmin, dmax = drange[i] + if desc_sampling: + width = (dmax - dmin + 1) / num_samples + for s in range(num_samples): + durations[i,s] = np.floor(dmin + width * (s + rng.random())) + rng.shuffle(durations[i,:]) + else: + for s in range(num_samples): + durations[i,s] = rng.integers(dmin, dmax + 1) + return durations + + +def main(argv) : + filename = DEFAULT_FILENAME + time_limit = DEFAULT_TIMELIMIT + if len(argv) > 1: + if argv[1] != "-": + filename = argv[1] + if len(argv) > 2: + time_limit = float(argv[2]) + + try: + with open(filename) as f: + file_data = (int(elem) for elem in f.read().split()) + except FileNotFoundError as ex: + print("Could not open {}".format(filename)) + print("Usage: {}