@article{mehlhorn_cycle_nodate,
title = {Cycle {Bases} in {Graphs} {Structure}, {Algorithms}, {Applications}, {Open} {Problems}},
language = {en},
author = {Mehlhorn, Kurt},
note = {00000},
pages = {65},
}
@article{Berry2013,
title = {Time-{Scale} {Separation} from {Diffusion}-{Mapped} {Delay} {Coordinates}},
volume = {12},
url = {https://epubs.siam.org/doi/10.1137/12088183X},
doi = {10.1137/12088183X},
abstract = {It has long been known that the method of time-delay embedding can be used to reconstruct nonlinear dynamics from time series data. A less-appreciated fact is that the induced geometry of time-delay coordinates increasingly biases the reconstruction toward the stable directions as delays are added. This bias can be exploited, using the diffusion maps approach to dimension reduction, to extract dynamics on desired time scales from high-dimensional observed data. We demonstrate the technique on a wide range of examples, including data generated by a model of meandering spiral waves and video recordings of a liquid-crystal experiment.},
number = {2},
urldate = {2021-04-21},
journal = {SIAM Journal on Applied Dynamical Systems},
author = {Berry, T. and Cressman, J. R. and Gregurić-Ferenček, Z. and Sauer, T.},
month = jan,
year = {2013},
note = {00078
Publisher: Society for Industrial and Applied Mathematics},
pages = {618--649},
}
@inproceedings{Wegenkittl1997,
title = {Visualizing the behaviour of higher dimensional dynamical systems},
booktitle = {Proceedings. {Visualization}'97 (cat. {No}. {97CB36155})},
author = {Wegenkittl, Rainer and Loffelmann, Helwig and Groller, Eduard},
year = {1997},
note = {00129 },
pages = {119--125},
}
@article{Bartolovic2020,
title = {Phase {Space} {Projection} of {Dynamical} {Systems}},
volume = {39},
copyright = {© 2020 The Author(s) Computer Graphics Forum © 2020 The Eurographics Association and John Wiley \& Sons Ltd. Published by John Wiley \& Sons Ltd.},
issn = {1467-8659},
url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/cgf.13978},
doi = {https://doi.org/10.1111/cgf.13978},
abstract = {Dynamical systems are commonly used to describe the state of time-dependent systems. In many engineering and control problems, the state space is high-dimensional making it difficult to analyze and visualize the behavior of the system for varying input conditions. We present a novel dimensionality reduction technique that is tailored to high-dimensional dynamical systems. In contrast to standard general purpose dimensionality reduction algorithms, we use energy minimization to preserve properties of the flow in the high-dimensional space. Once the projection operator is optimized, further high-dimensional trajectories are projected easily. Our 3D projection maintains a number of useful flow properties, such as critical points and flow maps, and is optimized to match geometric characteristics of the high-dimensional input, as well as optional user constraints. We apply our method to trajectories traced in the phase spaces of second-order dynamical systems, including finite-sized objects in fluids, the circular restricted three-body problem and a damped double pendulum. We compare the projections with standard visualization techniques, such as PCA, t-SNE and UMAP, and visualize the dynamical systems with multiple coordinated views interactively, featuring a spatial embedding, projection to subspaces, our dimensionality reduction and a seed point exploration tool.},
language = {en},
number = {3},
urldate = {2021-04-20},
journal = {Computer Graphics Forum},
author = {Bartolovic, Nemanja and Gross, Markus and Günther, Tobias},
year = {2020},
note = {00000
\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1111/cgf.13978},
keywords = {CCS Concepts, Scientific visualization, • Human-centered computing → Visualization techniques},
pages = {253--264},
}
@article{Narendra1992,
title = {Neural networks and dynamical systems},
volume = {6},
issn = {0888-613X},
url = {https://www.sciencedirect.com/science/article/pii/0888613X9290014Q},
doi = {10.1016/0888-613X(92)90014-Q},
abstract = {Models for the identification and control of nonlinear dynamical systems using neural networks were introduced by Narendra and Parthasarathy in 1990, and methods for the adjustment of model parameters were also suggested. Simulation results of simple nonlinear systems were presented to demonstrate the feasibility of the schemes proposed. The concepts introduced at that time are investigated in this paper in greater detail. In particular, a number of questions that arise when the methods are applied to more complex systems are addressed. These include nonlinear systems of higher order as well as multivariable systems. The effect of using simpler models for both identification and control are discussed, and a new controller structure containing a linear part in addition to a multilayer neural network is introduced.},
language = {en},
number = {2},
urldate = {2021-04-14},
journal = {International Journal of Approximate Reasoning},
author = {Narendra, Kumpati S. and Parthasarathy, Kannan},
month = feb,
year = {1992},
note = {00072},
keywords = {backpropagation, control, dynamic backpropagation, dynamical systems, identification, neural networks},
pages = {109--131},
}
@article{behrmann_invertible_nodate,
title = {Invertible {Residual} {Networks}},
language = {en},
author = {Behrmann, Jens},
note = {00190},
pages = {30},
}
@inproceedings{Behrmann2019,
series = {Proceedings of machine learning research},
title = {Invertible residual networks},
volume = {97},
url = {http://proceedings.mlr.press/v97/behrmann19a.html},
abstract = {We show that standard ResNet architectures can be made invertible, allowing the same model to be used for classification, density estimation, and generation. Typically, enforcing invertibility requires partitioning dimensions or restricting network architectures. In contrast, our approach only requires adding a simple normalization step during training, already available in standard frameworks. Invertible ResNets define a generative model which can be trained by maximum likelihood on unlabeled data. To compute likelihoods, we introduce a tractable approximation to the Jacobian log-determinant of a residual block. Our empirical evaluation shows that invertible ResNets perform competitively with both state-of-the-art image classifiers and flow-based generative models, something that has not been previously achieved with a single architecture.},
booktitle = {Proceedings of the 36th international conference on machine learning},
publisher = {PMLR},
author = {Behrmann, Jens and Grathwohl, Will and Chen, Ricky T. Q. and Duvenaud, David and Jacobsen, Joern-Henrik},
editor = {Chaudhuri, Kamalika and Salakhutdinov, Ruslan},
month = jun,
year = {2019},
note = {00190
tex.pdf: http://proceedings.mlr.press/v97/behrmann19a/behrmann19a.pdf},
pages = {573--582},
}
@inproceedings{Ott2021,
title = {{ResNet} after all: {Neural} {ODEs} and their numerical solution},
url = {https://openreview.net/forum?id=HxzSxSxLOJZ},
booktitle = {International conference on learning representations},
author = {Ott, Katharina and Katiyar, Prateek and Hennig, Philipp and Tiemann, Michael},
year = {2021},
note = {00000 },
}
@article{Arnold2021,
title = {State–space modeling for control based on physics-informed neural networks},
volume = {101},
issn = {0952-1976},
url = {https://www.sciencedirect.com/science/article/pii/S0952197621000427},
doi = {10.1016/j.engappai.2021.104195},
abstract = {Dynamic system models, based on partial differential equations (PDEs), are often unsuitable for direct use in control or state estimation purposes, due to the high computational cost arising from the necessity to apply sophisticated numerical methods for a solution, such as semi-discretization, also known as spatial discretization. Hence, there is often an inevitable trade-off between accuracy and computational efficiency during the model reduction step to ensure real-time applicability. In this contribution, we propose a state–space model formulation, using so-called physics-informed neural networks. This modeling approach enables a highly efficient inclusion of complex physical system descriptions within the design of control or state estimation setups. The resulting state–space model does not require any numerical solution techniques during the state propagation, as each time step is based on the evaluation of a reasonably sized neural net that approximates the solution of the PDE. Thus, this approach is suitable for real-time applications of various complex dynamic systems that can be described by one or a set of PDEs. Besides the modeling approach itself, the contribution also provides an illustrative example of the state–space modeling method in the context of model predictive control, as well as state estimation with an extended Kalman filter. These methods will be applied to a system based on a numerical solution of the Burgers equation.},
language = {en},
urldate = {2021-03-18},
journal = {Engineering Applications of Artificial Intelligence},
author = {Arnold, Florian and King, Rudibert},
month = may,
year = {2021},
note = {00000},
keywords = {Machine learning, Model predictive control, Neural networks, State estimation, State–space},
pages = {104195},
}
@incollection{Doan2020,
address = {Cham},
title = {Learning {Hidden} {States} in a {Chaotic} {System}: {A} {Physics}-{Informed} {Echo} {State} {Network} {Approach}},
volume = {12142},
isbn = {978-3-030-50432-8 978-3-030-50433-5},
shorttitle = {Learning {Hidden} {States} in a {Chaotic} {System}},
url = {http://link.springer.com/10.1007/978-3-030-50433-5_9},
language = {en},
urldate = {2020-09-30},
booktitle = {Computational {Science} – {ICCS} 2020},
publisher = {Springer International Publishing},
author = {Doan, Nguyen Anh Khoa and Polifke, Wolfgang and Magri, Luca},
editor = {Krzhizhanovskaya, Valeria V. and Závodszky, Gábor and Lees, Michael H. and Dongarra, Jack J. and Sloot, Peter M. A. and Brissos, Sérgio and Teixeira, João},
year = {2020},
doi = {10.1007/978-3-030-50433-5_9},
note = {00000
Series Title: Lecture Notes in Computer Science},
pages = {117--123},
}
@article{Praditia2020,
title = {Improving {Thermochemical} {Energy} {Storage} {Dynamics} {Forecast} with {Physics}-{Inspired} {Neural} {Network} {Architecture}},
volume = {13},
issn = {1996-1073},
url = {https://www.mdpi.com/1996-1073/13/15/3873},
doi = {10.3390/en13153873},
abstract = {Thermochemical Energy Storage (TCES), specifically the calcium oxide (CaO)/calcium hydroxide (Ca(OH)2) system is a promising energy storage technology with relatively high energy density and low cost. However, the existing models available to predict the system\’s internal states are computationally expensive. An accurate and real-time capable model is therefore still required to improve its operational control. In this work, we implement a Physics-Informed Neural Network (PINN) to predict the dynamics of the TCES internal state. Our proposed framework addresses three physical aspects to build the PINN: (1) we choose a Nonlinear Autoregressive Network with Exogeneous Inputs (NARX) with deeper recurrence to address the nonlinear latency; (2) we train the network in closed-loop to capture the long-term dynamics; and (3) we incorporate physical regularisation during its training, calculated based on discretized mole and energy balance equations. To train the network, we perform numerical simulations on an ensemble of system parameters to obtain synthetic data. Even though the suggested approach provides results with the error of 3.96\×10\−4 which is in the same range as the result without physical regularisation, it is superior compared to conventional Artificial Neural Network (ANN) strategies because it ensures physical plausibility of the predictions, even in a highly dynamic and nonlinear problem. Consequently, the suggested PINN can be further developed for more complicated analysis of the TCES system.},
number = {15},
journal = {Energies},
author = {Praditia, Timothy and Walser, Thilo and Oladyshkin, Sergey and Nowak, Wolfgang},
year = {2020},
note = {00000},
}
@article{Wang2020a,
title = {Understanding and mitigating gradient pathologies in physics-informed neural networks},
url = {http://arxiv.org/abs/2001.04536},
abstract = {The widespread use of neural networks across different scientific domains often involves constraining them to satisfy certain symmetries, conservation laws, or other domain knowledge. Such constraints are often imposed as soft penalties during model training and effectively act as domain-specific regularizers of the empirical risk loss. Physics-informed neural networks is an example of this philosophy in which the outputs of deep neural networks are constrained to approximately satisfy a given set of partial differential equations. In this work we review recent advances in scientific machine learning with a specific focus on the effectiveness of physics-informed neural networks in predicting outcomes of physical systems and discovering hidden physics from noisy data. We will also identify and analyze a fundamental mode of failure of such approaches that is related to numerical stiffness leading to unbalanced back-propagated gradients during model training. To address this limitation we present a learning rate annealing algorithm that utilizes gradient statistics during model training to balance the interplay between different terms in composite loss functions. We also propose a novel neural network architecture that is more resilient to such gradient pathologies. Taken together, our developments provide new insights into the training of constrained neural networks and consistently improve the predictive accuracy of physics-informed neural networks by a factor of 50-100x across a range of problems in computational physics. All code and data accompanying this manuscript are publicly available at {\textbackslash}url\{https://github.com/PredictiveIntelligenceLab/GradientPathologiesPINNs\}.},
urldate = {2021-03-17},
journal = {arXiv:2001.04536 [cs, math, stat]},
author = {Wang, Sifan and Teng, Yujun and Perdikaris, Paris},
month = jan,
year = {2020},
note = {00032
arXiv: 2001.04536},
keywords = {Computer Science - Machine Learning, Mathematics - Numerical Analysis, Statistics - Machine Learning},
}
@article{Wang2020,
title = {When and why {PINNs} fail to train: {A} neural tangent kernel perspective},
shorttitle = {When and why {PINNs} fail to train},
url = {http://arxiv.org/abs/2007.14527},
abstract = {Physics-informed neural networks (PINNs) have lately received great attention thanks to their flexibility in tackling a wide range of forward and inverse problems involving partial differential equations. However, despite their noticeable empirical success, little is known about how such constrained neural networks behave during their training via gradient descent. More importantly, even less is known about why such models sometimes fail to train at all. In this work, we aim to investigate these questions through the lens of the Neural Tangent Kernel (NTK); a kernel that captures the behavior of fully-connected neural networks in the infinite width limit during training via gradient descent. Specifically, we derive the NTK of PINNs and prove that, under appropriate conditions, it converges to a deterministic kernel that stays constant during training in the infinite-width limit. This allows us to analyze the training dynamics of PINNs through the lens of their limiting NTK and find a remarkable discrepancy in the convergence rate of the different loss components contributing to the total training error. To address this fundamental pathology, we propose a novel gradient descent algorithm that utilizes the eigenvalues of the NTK to adaptively calibrate the convergence rate of the total training error. Finally, we perform a series of numerical experiments to verify the correctness of our theory and the practical effectiveness of the proposed algorithms. The data and code accompanying this manuscript are publicly available at {\textbackslash}url\{https://github.com/PredictiveIntelligenceLab/PINNsNTK\}.},
urldate = {2021-03-17},
journal = {arXiv:2007.14527 [cs, math, stat]},
author = {Wang, Sifan and Yu, Xinling and Perdikaris, Paris},
month = jul,
year = {2020},
note = {00010
arXiv: 2007.14527},
keywords = {Computer Science - Machine Learning, Mathematics - Numerical Analysis, Statistics - Machine Learning},
}
@article{rahaman_spectral_2019,
title = {On the {Spectral} {Bias} of {Neural} {Networks}},
url = {http://arxiv.org/abs/1806.08734},
abstract = {Neural networks are known to be a class of highly expressive functions able to fit even random input-output mappings with \$100{\textbackslash}\%\$ accuracy. In this work, we present properties of neural networks that complement this aspect of expressivity. By using tools from Fourier analysis, we show that deep ReLU networks are biased towards low frequency functions, meaning that they cannot have local fluctuations without affecting their global behavior. Intuitively, this property is in line with the observation that over-parameterized networks find simple patterns that generalize across data samples. We also investigate how the shape of the data manifold affects expressivity by showing evidence that learning high frequencies gets {\textbackslash}emph\{easier\} with increasing manifold complexity, and present a theoretical understanding of this behavior. Finally, we study the robustness of the frequency components with respect to parameter perturbation, to develop the intuition that the parameters must be finely tuned to express high frequency functions.},
urldate = {2021-03-17},
journal = {arXiv:1806.08734 [cs, stat]},
author = {Rahaman, Nasim and Baratin, Aristide and Arpit, Devansh and Draxler, Felix and Lin, Min and Hamprecht, Fred A. and Bengio, Yoshua and Courville, Aaron},
month = may,
year = {2019},
note = {00103
arXiv: 1806.08734},
keywords = {Computer Science - Machine Learning, Statistics - Machine Learning},
}
@misc{noauthor_191000359_nodate,
title = {[1910.00359] {Truth} or {Backpropaganda}? {An} {Empirical} {Investigation} of {Deep} {Learning} {Theory}},
url = {https://arxiv.org/abs/1910.00359},
urldate = {2021-03-16},
note = {00010},
}
@article{Li2018,
title = {Visualizing the {Loss} {Landscape} of {Neural} {Nets}},
url = {http://arxiv.org/abs/1712.09913},
abstract = {Neural network training relies on our ability to find "good" minimizers of highly non-convex loss functions. It is well-known that certain network architecture designs (e.g., skip connections) produce loss functions that train easier, and well-chosen training parameters (batch size, learning rate, optimizer) produce minimizers that generalize better. However, the reasons for these differences, and their effects on the underlying loss landscape, are not well understood. In this paper, we explore the structure of neural loss functions, and the effect of loss landscapes on generalization, using a range of visualization methods. First, we introduce a simple "filter normalization" method that helps us visualize loss function curvature and make meaningful side-by-side comparisons between loss functions. Then, using a variety of visualizations, we explore how network architecture affects the loss landscape, and how training parameters affect the shape of minimizers.},
urldate = {2021-03-15},
journal = {arXiv:1712.09913 [cs, stat]},
author = {Li, Hao and Xu, Zheng and Taylor, Gavin and Studer, Christoph and Goldstein, Tom},
month = nov,
year = {2018},
note = {00527
arXiv: 1712.09913},
keywords = {Computer Science - Computer Vision and Pattern Recognition, Computer Science - Machine Learning, Statistics - Machine Learning},
}
@article{razavi_review_2012,
title = {Review of surrogate modeling in water resources: {REVIEW}},
volume = {48},
issn = {00431397},
shorttitle = {Review of surrogate modeling in water resources},
url = {http://doi.wiley.com/10.1029/2011WR011527},
doi = {10.1029/2011WR011527},
language = {en},
number = {7},
urldate = {2021-03-12},
journal = {Water Resources Research},
author = {Razavi, Saman and Tolson, Bryan A. and Burn, Donald H.},
month = jul,
year = {2012},
note = {00511},
}
@inproceedings{viana_making_2010,
address = {Montreal, Quebec, Canada},
title = {Making the {Most} {Out} of {Surrogate} {Models}: {Tricks} of the {Trade}},
isbn = {978-0-7918-4409-0},
shorttitle = {Making the {Most} {Out} of {Surrogate} {Models}},
url = {https://asmedigitalcollection.asme.org/IDETC-CIE/proceedings/IDETC-CIE2010/44090/587/340462},
doi = {10.1115/DETC2010-28813},
urldate = {2021-03-12},
booktitle = {Volume 1: 36th {Design} {Automation} {Conference}, {Parts} {A} and {B}},
publisher = {ASMEDC},
author = {Viana, Felipe A. C. and Gogu, Christian and Haftka, Raphael T.},
month = jan,
year = {2010},
note = {00083},
pages = {587--598},
}
@article{gear_solving_1984,
title = {Solving {Ordinary} {Differential} {Equations} with {Discontinuities}},
volume = {10},
issn = {0098-3500},
url = {http://doi.acm.org/10.1145/356068.356071},
doi = {10.1145/356068.356071},
number = {1},
journal = {ACM Trans. Math. Softw.},
author = {Gear, C W and Osterby, O},
month = jan,
year = {1984},
note = {00136
Publisher: ACM
Place: New York, NY, USA},
pages = {23--44},
}
@article{westermann_surrogate_2019,
title = {Surrogate modelling for sustainable building design – {A} review},
volume = {198},
issn = {03787788},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0378778819302877},
doi = {10.1016/j.enbuild.2019.05.057},
language = {en},
urldate = {2021-03-12},
journal = {Energy and Buildings},
author = {Westermann, Paul and Evins, Ralph},
month = sep,
year = {2019},
note = {00034},
pages = {170--186},
}
@article{green_persistence_1986,
title = {Persistence of excitation in linear systems},
volume = {7},
issn = {01676911},
url = {https://linkinghub.elsevier.com/retrieve/pii/0167691186900526},
doi = {10.1016/0167-6911(86)90052-6},
language = {en},
number = {5},
urldate = {2021-02-24},
journal = {Systems \& Control Letters},
author = {Green, Michael and Moore, John B.},
month = sep,
year = {1986},
pages = {351--360},
}
@incollection{Paszke2019,
title = {{PyTorch}: {An} imperative style, high-performance deep learning library},
url = {http://papers.nips.cc/paper/9015-pytorch-an-imperative-style-high-performance-deep-learning-library.pdf},
booktitle = {Advances in neural information processing systems 32},
publisher = {Curran Associates, Inc.},
author = {Paszke, Adam and Gross, Sam and Massa, Francisco and Lerer, Adam and Bradbury, James and Chanan, Gregory and Killeen, Trevor and Lin, Zeming and Gimelshein, Natalia and Antiga, Luca and Desmaison, Alban and Kopf, Andreas and Yang, Edward and DeVito, Zachary and Raison, Martin and Tejani, Alykhan and Chilamkurthy, Sasank and Steiner, Benoit and Fang, Lu and Bai, Junjie and Chintala, Soumith},
editor = {Wallach, H. and Larochelle, H. and Beygelzimer, A. and dAlché-Buc, F. and Fox, E. and Garnett, R.},
year = {2019},
note = {02485},
pages = {8026--8037},
}
@article{Talathi2016,
title = {Improving performance of recurrent neural network with relu nonlinearity},
url = {http://arxiv.org/abs/1511.03771},
abstract = {In recent years significant progress has been made in successfully training recurrent neural networks (RNNs) on sequence learning problems involving long range temporal dependencies. The progress has been made on three fronts: (a) Algorithmic improvements involving sophisticated optimization techniques, (b) network design involving complex hidden layer nodes and specialized recurrent layer connections and (c) weight initialization methods. In this paper, we focus on recently proposed weight initialization with identity matrix for the recurrent weights in a RNN. This initialization is specifically proposed for hidden nodes with Rectified Linear Unit (ReLU) non linearity. We offer a simple dynamical systems perspective on weight initialization process, which allows us to propose a modified weight initialization strategy. We show that this initialization technique leads to successfully training RNNs composed of ReLUs. We demonstrate that our proposal produces comparable or better solution for three toy problems involving long range temporal structure: the addition problem, the multiplication problem and the MNIST classification problem using sequence of pixels. In addition, we present results for a benchmark action recognition problem.},
urldate = {2021-03-10},
journal = {arXiv:1511.03771 [cs]},
author = {Talathi, Sachin S. and Vartak, Aniket},
month = jun,
year = {2016},
note = {00061
arXiv: 1511.03771},
keywords = {Computer Science - Machine Learning, Computer Science - Neural and Evolutionary Computing},
}
@article{drgona_spectral_2020,
title = {Spectral {Analysis} and {Stability} of {Deep} {Neural} {Dynamics}},
url = {http://arxiv.org/abs/2011.13492},
abstract = {Our modern history of deep learning follows the arc of famous emergent disciplines in engineering (e.g. aero- and fluid dynamics) when theory lagged behind successful practical applications. Viewing neural networks from a dynamical systems perspective, in this work, we propose a novel characterization of deep neural networks as pointwise affine maps, making them accessible to a broader range of analysis methods to help close the gap between theory and practice. We begin by showing the equivalence of neural networks with parameter-varying affine maps parameterized by the state (feature) vector. As the paper's main results, we provide necessary and sufficient conditions for the global stability of generic deep feedforward neural networks. Further, we identify links between the spectral properties of layer-wise weight parametrizations, different activation functions, and their effect on the overall network's eigenvalue spectra. We analyze a range of neural networks with varying weight initializations, activation functions, bias terms, and depths. Our view of neural networks as affine parameter varying maps allows us to "crack open the black box" of global neural network dynamical behavior through visualization of stationary points, regions of attraction, state-space partitioning, eigenvalue spectra, and stability properties. Our analysis covers neural networks both as an end-to-end function and component-wise without simplifying assumptions or approximations. The methods we develop here provide tools to establish relationships between global neural dynamical properties and their constituent components which can aid in the principled design of neural networks for dynamics modeling and optimal control.},
urldate = {2021-03-10},
journal = {arXiv:2011.13492 [cs]},
author = {Drgona, Jan and Skomski, Elliott and Vasisht, Soumya and Tuor, Aaron and Vrabie, Draguna},
month = nov,
year = {2020},
note = {00000
arXiv: 2011.13492},
keywords = {Computer Science - Machine Learning, Computer Science - Neural and Evolutionary Computing},
}
@inproceedings{Ioffe2015,
title = {Batch {Normalization}: {Accelerating} {Deep} {Network} {Training} by {Reducing} {Internal} {Covariate} {Shift}},
shorttitle = {Batch {Normalization}},
url = {http://proceedings.mlr.press/v37/ioffe15.html},
abstract = {Training Deep Neural Networks is complicated by the fact that the distribution of each layer’s inputs changes during training, as the parameters of the previous layers change. This slows down the t...},
language = {en},
urldate = {2021-03-10},
booktitle = {International {Conference} on {Machine} {Learning}},
publisher = {PMLR},
author = {Ioffe, Sergey and Szegedy, Christian},
month = jun,
year = {2015},
note = {25400
ISSN: 1938-7228},
pages = {448--456},
}
@article{Santurkar2019,
title = {How {Does} {Batch} {Normalization} {Help} {Optimization}?},
url = {http://arxiv.org/abs/1805.11604},
abstract = {Batch Normalization (BatchNorm) is a widely adopted technique that enables faster and more stable training of deep neural networks (DNNs). Despite its pervasiveness, the exact reasons for BatchNorm's effectiveness are still poorly understood. The popular belief is that this effectiveness stems from controlling the change of the layers' input distributions during training to reduce the so-called "internal covariate shift". In this work, we demonstrate that such distributional stability of layer inputs has little to do with the success of BatchNorm. Instead, we uncover a more fundamental impact of BatchNorm on the training process: it makes the optimization landscape significantly smoother. This smoothness induces a more predictive and stable behavior of the gradients, allowing for faster training.},
urldate = {2021-03-10},
journal = {arXiv:1805.11604 [cs, stat]},
author = {Santurkar, Shibani and Tsipras, Dimitris and Ilyas, Andrew and Madry, Aleksander},
month = apr,
year = {2019},
note = {00587
arXiv: 1805.11604},
keywords = {Computer Science - Machine Learning, Computer Science - Neural and Evolutionary Computing, Statistics - Machine Learning},
}
@article{Kingma2017,
title = {Adam: {A} {Method} for {Stochastic} {Optimization}},
shorttitle = {Adam},
url = {http://arxiv.org/abs/1412.6980},
abstract = {We introduce Adam, an algorithm for first-order gradient-based optimization of stochastic objective functions, based on adaptive estimates of lower-order moments. The method is straightforward to implement, is computationally efficient, has little memory requirements, is invariant to diagonal rescaling of the gradients, and is well suited for problems that are large in terms of data and/or parameters. The method is also appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. The hyper-parameters have intuitive interpretations and typically require little tuning. Some connections to related algorithms, on which Adam was inspired, are discussed. We also analyze the theoretical convergence properties of the algorithm and provide a regret bound on the convergence rate that is comparable to the best known results under the online convex optimization framework. Empirical results demonstrate that Adam works well in practice and compares favorably to other stochastic optimization methods. Finally, we discuss AdaMax, a variant of Adam based on the infinity norm.},
urldate = {2021-03-10},
journal = {arXiv:1412.6980 [cs]},
author = {Kingma, Diederik P. and Ba, Jimmy},
month = jan,
year = {2017},
note = {67529
arXiv: 1412.6980},
keywords = {Computer Science - Machine Learning},
}
@inproceedings{glorot_understanding_2010,
title = {Understanding the difficulty of training deep feedforward neural networks},
url = {http://proceedings.mlr.press/v9/glorot10a.html},
abstract = {Whereas before 2006 it appears that deep multi-layer neural networks were not successfully trained, since then several algorithms have been shown to successfully train them, with experimental resul...},
language = {en},
urldate = {2021-03-10},
booktitle = {Proceedings of the {Thirteenth} {International} {Conference} on {Artificial} {Intelligence} and {Statistics}},
publisher = {JMLR Workshop and Conference Proceedings},
author = {Glorot, Xavier and Bengio, Yoshua},
month = mar,
year = {2010},
note = {11858
ISSN: 1938-7228},
pages = {249--256},
}
@article{esteve_large-time_2020,
title = {Large-time asymptotics in deep learning},
url = {http://arxiv.org/abs/2008.02491},
abstract = {It is by now well-known that practical deep supervised learning may roughly be cast as an optimal control problem for a specific discrete-time, nonlinear dynamical system called an artificial neural network. In this work, we consider the continuous-time formulation of the deep supervised learning problem, and study the latter's behavior when the final time horizon increases, a fact that can be interpreted as increasing the number of layers in the neural network setting.When considering the classical regularized empirical risk minimization problem, we show that, in long time, the optimal states converge to zero training error, namely approach the zero training error regime, whilst the optimal control parameters approach, on an appropriate scale, minimal norm parameters with corresponding states precisely in the zero training error regime. This result provides an alternative theoretical underpinning to the notion that neural networks learn best in the overparametrized regime, when seen from the large layer perspective. We also propose a learning problem consisting of minimizing a cost with a state tracking term, and establish the well-known turnpike property, which indicates that the solutions of the learning problem in long time intervals consist of three pieces, the first and the last of which being transient short-time arcs, and the middle piece being a long-time arc staying exponentially close to the optimal solution of an associated static learning problem. This property in fact stipulates a quantitative estimate for the number of layers required to reach the zero training error regime. Both of the aforementioned asymptotic regimes are addressed in the context of continuous-time and continuous space-time neural networks, the latter taking the form of nonlinear, integro-differential equations, hence covering residual neural networks with both fixed and possibly variable depths.},
urldate = {2021-03-09},
journal = {arXiv:2008.02491 [cs, math]},
author = {Esteve, Carlos and Geshkovski, Borjan and Pighin, Dario and Zuazua, Enrique},
month = aug,
year = {2020},
note = {00008
arXiv: 2008.02491},
keywords = {68T07, 34H05, 34H15, 93D23, 93D20, Computer Science - Machine Learning, Mathematics - Optimization and Control},
}
@article{He2015,
title = {Deep {Residual} {Learning} for {Image} {Recognition}},
url = {http://arxiv.org/abs/1512.03385},
abstract = {Deeper neural networks are more difficult to train. We present a residual learning framework to ease the training of networks that are substantially deeper than those used previously. We explicitly reformulate the layers as learning residual functions with reference to the layer inputs, instead of learning unreferenced functions. We provide comprehensive empirical evidence showing that these residual networks are easier to optimize, and can gain accuracy from considerably increased depth. On the ImageNet dataset we evaluate residual nets with a depth of up to 152 layers---8x deeper than VGG nets but still having lower complexity. An ensemble of these residual nets achieves 3.57\% error on the ImageNet test set. This result won the 1st place on the ILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100 and 1000 layers. The depth of representations is of central importance for many visual recognition tasks. Solely due to our extremely deep representations, we obtain a 28\% relative improvement on the COCO object detection dataset. Deep residual nets are foundations of our submissions to ILSVRC \& COCO 2015 competitions, where we also won the 1st places on the tasks of ImageNet detection, ImageNet localization, COCO detection, and COCO segmentation.},
urldate = {2021-03-02},
journal = {arXiv:1512.03385 [cs]},
author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
month = dec,
year = {2015},
note = {70948
arXiv: 1512.03385},
keywords = {Computer Science - Computer Vision and Pattern Recognition},
}
@article{liu_hierarchical_2020,
title = {Hierarchical {Deep} {Learning} of {Multiscale} {Differential} {Equation} {Time}-{Steppers}},
url = {http://arxiv.org/abs/2008.09768},
abstract = {Nonlinear differential equations rarely admit closed-form solutions, thus requiring numerical time-stepping algorithms to approximate solutions. Further, many systems characterized by multiscale physics exhibit dynamics over a vast range of timescales, making numerical integration computationally expensive due to numerical stiffness. In this work, we develop a hierarchy of deep neural network time-steppers to approximate the flow map of the dynamical system over a disparate range of time-scales. The resulting model is purely data-driven and leverages features of the multiscale dynamics, enabling numerical integration and forecasting that is both accurate and highly efficient. Moreover, similar ideas can be used to couple neural network-based models with classical numerical time-steppers. Our multiscale hierarchical time-stepping scheme provides important advantages over current time-stepping algorithms, including (i) circumventing numerical stiffness due to disparate time-scales, (ii) improved accuracy in comparison with leading neural-network architectures, (iii) efficiency in long-time simulation/forecasting due to explicit training of slow time-scale dynamics, and (iv) a flexible framework that is parallelizable and may be integrated with standard numerical time-stepping algorithms. The method is demonstrated on a wide range of nonlinear dynamical systems, including the Van der Pol oscillator, the Lorenz system, the Kuramoto-Sivashinsky equation, and fluid flow pass a cylinder; audio and video signals are also explored. On the sequence generation examples, we benchmark our algorithm against state-of-the-art methods, such as LSTM, reservoir computing, and clockwork RNN. Despite the structural simplicity of our method, it outperforms competing methods on numerical integration.},
urldate = {2021-03-03},
journal = {arXiv:2008.09768 [physics]},
author = {Liu, Yuying and Kutz, J. Nathan and Brunton, Steven L.},
month = aug,
year = {2020},
note = {00002
arXiv: 2008.09768},
keywords = {Computer Science - Machine Learning, Mathematics - Numerical Analysis, Physics - Computational Physics},
}
@article{Gholami2019,
title = {{ANODE}: {Unconditionally} {Accurate} {Memory}-{Efficient} {Gradients} for {Neural} {ODEs}},
shorttitle = {{ANODE}},
url = {http://arxiv.org/abs/1902.10298},
abstract = {Residual neural networks can be viewed as the forward Euler discretization of an Ordinary Differential Equation (ODE) with a unit time step. This has recently motivated researchers to explore other discretization approaches and train ODE based networks. However, an important challenge of neural ODEs is their prohibitive memory cost during gradient backpropogation. Recently a method proposed in [8], claimed that this memory overhead can be reduced from O(LN\_t), where N\_t is the number of time steps, down to O(L) by solving forward ODE backwards in time, where L is the depth of the network. However, we will show that this approach may lead to several problems: (i) it may be numerically unstable for ReLU/non-ReLU activations and general convolution operators, and (ii) the proposed optimize-then-discretize approach may lead to divergent training due to inconsistent gradients for small time step sizes. We discuss the underlying problems, and to address them we propose ANODE, an Adjoint based Neural ODE framework which avoids the numerical instability related problems noted above, and provides unconditionally accurate gradients. ANODE has a memory footprint of O(L) + O(N\_t), with the same computational cost as reversing ODE solve. We furthermore, discuss a memory efficient algorithm which can further reduce this footprint with a trade-off of additional computational cost. We show results on Cifar-10/100 datasets using ResNet and SqueezeNext neural networks.},
urldate = {2021-03-02},
journal = {arXiv:1902.10298 [cs]},
author = {Gholami, Amir and Keutzer, Kurt and Biros, George},
month = jul,
year = {2019},
note = {00055
arXiv: 1902.10298},
keywords = {Computer Science - Machine Learning},
}
@inproceedings{simpson_design_2008,
address = {Victoria, British Columbia, Canada},
title = {Design and {Analysis} of {Computer} {Experiments} in {Multidisciplinary} {Design} {Optimization}: {A} {Review} of {How} {Far} {We} {Have} {Come} - {Or} {Not}},
isbn = {978-1-60086-982-2},
shorttitle = {Design and {Analysis} of {Computer} {Experiments} in {Multidisciplinary} {Design} {Optimization}},
url = {http://arc.aiaa.org/doi/10.2514/6.2008-5802},
doi = {10.2514/6.2008-5802},
language = {en},
urldate = {2021-02-26},
booktitle = {12th {AIAA}/{ISSMO} {Multidisciplinary} {Analysis} and {Optimization} {Conference}},
publisher = {American Institute of Aeronautics and Astronautics},
author = {Simpson, Timothy and Toropov, Vasilli and Balabanov, Vladimir and Viana, Felipe},
month = sep,
year = {2008},
note = {00324},
}
@book{american_institute_of_aeronautics_and_astronautics_12th_2008,
address = {Reston, VA},
title = {12th {AIAA} / {September} 10-12, 2008.},
isbn = {978-1-56347-947-2},
language = {English},
publisher = {American Institute of Aeronautics and Astronautics},
author = {{American Institute of Aeronautics and Astronautics}},
year = {2008},
note = {00000
OCLC: 949741311},
}
@article{swischuk_projection-based_2019,
title = {Projection-based model reduction: {Formulations} for physics-based machine learning},
volume = {179},
issn = {00457930},
shorttitle = {Projection-based model reduction},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0045793018304250},
doi = {10.1016/j.compfluid.2018.07.021},
language = {en},
urldate = {2021-02-18},
journal = {Computers \& Fluids},
author = {Swischuk, Renee and Mainini, Laura and Peherstorfer, Benjamin and Willcox, Karen},
month = jan,
year = {2019},
note = {00104},
pages = {704--717},
}
@incollection{ilchmann_model_2017,
address = {Cham},
title = {Model {Order} {Reduction} for {Differential}-{Algebraic} {Equations}: {A} {Survey}},
isbn = {978-3-319-46617-0 978-3-319-46618-7},
shorttitle = {Model {Order} {Reduction} for {Differential}-{Algebraic} {Equations}},
url = {http://link.springer.com/10.1007/978-3-319-46618-7_3},
urldate = {2021-02-18},
booktitle = {Surveys in {Differential}-{Algebraic} {Equations} {IV}},
publisher = {Springer International Publishing},
author = {Benner, Peter and Stykel, Tatjana},
editor = {Ilchmann, Achim and Reis, Timo},
year = {2017},
doi = {10.1007/978-3-319-46618-7_3},
note = {00000
Series Title: Differential-Algebraic Equations Forum},
pages = {107--160},
}
@techreport{antoulas_survey_2000,
type = {{CAAM} {Technical} {Reports}},
title = {A survey of model reduction methods for large-scale systems},
url = {https://hdl.handle.net/1911/101963},
number = {TR00-38},
author = {Antoulas, Athanasios C and Sorensen, Danny C and Gugercin, Serkan},
year = {2000},
note = {00787},
}
@article{benner_survey_2015,
title = {A {Survey} of {Projection}-{Based} {Model} {Reduction} {Methods} for {Parametric} {Dynamical} {Systems}},
volume = {57},
issn = {0036-1445, 1095-7200},
url = {http://epubs.siam.org/doi/10.1137/130932715},
doi = {10.1137/130932715},
language = {en},
number = {4},
urldate = {2021-02-18},
journal = {SIAM Review},
author = {Benner, Peter and Gugercin, Serkan and Willcox, Karen},
month = jan,
year = {2015},
note = {00884},
pages = {483--531},
}
@book{hairer_geometric_2006,
title = {Geometric numerical integration: structure-preserving algorithms for ordinary differential equations},
volume = {31},
publisher = {Springer Science \& Business Media},
author = {Hairer, Ernst and Lubich, Christian and Wanner, Gerhard},
year = {2006},
note = {05268},
}
@article{kidger_hey_2020,
title = {"{Hey}, that's not an {ODE}": {Faster} {ODE} {Adjoints} with 12 {Lines} of {Code}},
shorttitle = {"{Hey}, that's not an {ODE}"},
url = {http://arxiv.org/abs/2009.09457},
abstract = {Neural differential equations may be trained by backpropagating gradients via the adjoint method, which is another differential equation typically solved using an adaptive-step-size numerical differential equation solver. A proposed step is accepted if its error, {\textbackslash}emph\{relative to some norm\}, is sufficiently small; else it is rejected, the step is shrunk, and the process is repeated. Here, we demonstrate that the particular structure of the adjoint equations makes the usual choices of norm (such as \$L{\textasciicircum}2\$) unnecessarily stringent. By replacing it with a more appropriate (semi)norm, fewer steps are unnecessarily rejected and the backpropagation is made faster. This requires only minor code modifications. Experiments on a wide range of tasks---including time series, generative modeling, and physical control---demonstrate a median improvement of 40\% fewer function evaluations. On some problems we see as much as 62\% fewer function evaluations, so that the overall training time is roughly halved.},
urldate = {2021-01-29},
journal = {arXiv:2009.09457 [cs, math]},
author = {Kidger, Patrick and Chen, Ricky T. Q. and Lyons, Terry},
month = sep,
year = {2020},
note = {00001
arXiv: 2009.09457},
keywords = {Computer Science - Machine Learning, Mathematics - Classical Analysis and ODEs},
}
@article{zhong_symplectic_2020,
title = {Symplectic {ODE}-{Net}: {Learning} {Hamiltonian} {Dynamics} with {Control}},
shorttitle = {Symplectic {ODE}-{Net}},
url = {http://arxiv.org/abs/1909.12077},
abstract = {In this paper, we introduce Symplectic ODE-Net (SymODEN), a deep learning framework which can infer the dynamics of a physical system, given by an ordinary differential equation (ODE), from observed state trajectories. To achieve better generalization with fewer training samples, SymODEN incorporates appropriate inductive bias by designing the associated computation graph in a physics-informed manner. In particular, we enforce Hamiltonian dynamics with control to learn the underlying dynamics in a transparent way, which can then be leveraged to draw insight about relevant physical aspects of the system, such as mass and potential energy. In addition, we propose a parametrization which can enforce this Hamiltonian formalism even when the generalized coordinate data is embedded in a high-dimensional space or we can only access velocity data instead of generalized momentum. This framework, by offering interpretable, physically-consistent models for physical systems, opens up new possibilities for synthesizing model-based control strategies.},
urldate = {2021-01-29},
journal = {arXiv:1909.12077 [physics, stat]},
author = {Zhong, Yaofeng Desmond and Dey, Biswadip and Chakraborty, Amit},
month = apr,
year = {2020},
note = {00035
arXiv: 1909.12077},
keywords = {Computer Science - Machine Learning, Electrical Engineering and Systems Science - Systems and Control, Physics - Computational Physics, Statistics - Machine Learning},
}
@article{Wu2019,
title = {Numerical {Aspects} for {Approximating} {Governing} {Equations} {Using} {Data}},
volume = {384},
issn = {00219991},
url = {http://arxiv.org/abs/1809.09170},
doi = {10.1016/j.jcp.2019.01.030},
abstract = {We present eﬀective numerical algorithms for locally recovering unknown governing diﬀerential equations from measurement data. We employ a set of standard basis functions, e.g., polynomials, to approximate the governing equation with high accuracy. Upon recasting the problem into a function approximation problem, we discuss several important aspects for accurate approximation. Most notably, we discuss the importance of using a large number of short bursts of trajectory data, rather than using data from a single long trajectory. Several options for the numerical algorithms to perform accurate approximation are then presented, along with an error estimate of the ﬁnal equation approximation. We then present an extensive set of numerical examples of both linear and nonlinear systems to demonstrate the properties and eﬀectiveness of our equation recovery algorithms.},
language = {en},
urldate = {2021-01-26},
journal = {Journal of Computational Physics},
author = {Wu, Kailiang and Xiu, Dongbin},
month = may,
year = {2019},
note = {00028
arXiv: 1809.09170},
keywords = {Computer Science - Machine Learning, Mathematics - Dynamical Systems, Mathematics - Numerical Analysis, Statistics - Machine Learning},
pages = {200--221},
}
@article{Shin2020,
title = {On the convergence of physics informed neural networks for linear second-order elliptic and parabolic type {PDEs}},
volume = {28},
issn = {1815-2406, 1991-7120},
url = {http://arxiv.org/abs/2004.01806},
doi = {10.4208/cicp.OA-2020-0193},
abstract = {Physics informed neural networks (PINNs) are deep learning based techniques for solving partial differential equations (PDEs) encounted in computational science and engineering. Guided by data and physical laws, PINNs find a neural network that approximates the solution to a system of PDEs. Such a neural network is obtained by minimizing a loss function in which any prior knowledge of PDEs and data are encoded. Despite its remarkable empirical success in one, two or three dimensional problems, there is little theoretical justification for PINNs. As the number of data grows, PINNs generate a sequence of minimizers which correspond to a sequence of neural networks. We want to answer the question: Does the sequence of minimizers converge to the solution to the PDE? We consider two classes of PDEs: linear second-order elliptic and parabolic. By adapting the Schauder approach and the maximum principle, we show that the sequence of minimizers strongly converges to the PDE solution in \$C{\textasciicircum}0\$. Furthermore, we show that if each minimizer satisfies the initial/boundary conditions, the convergence mode becomes \$H{\textasciicircum}1\$. Computational examples are provided to illustrate our theoretical findings. To the best of our knowledge, this is the first theoretical work that shows the consistency of PINNs.},
number = {5},
urldate = {2021-01-26},
journal = {Communications in Computational Physics},
author = {Shin, Yeonjong and Darbon, Jerome and Karniadakis, George Em},
month = jun,
year = {2020},
note = {00001
arXiv: 2004.01806},
keywords = {Computer Science - Machine Learning, Mathematics - Numerical Analysis},
pages = {2042--2074},
}
@inproceedings{thule_towards_2018,
address = {Toulouse, France},
title = {Towards {Verification} of {Hybrid} {Co}-simulation {Algorithms}},
isbn = {978-3-030-04771-9},
doi = {10.1007/978-3-030-04771-9_1},
booktitle = {Workshop on {Formal} {Co}-{Simulation} of {Cyber}-{Physical} {Systems}},
publisher = {Springer, Cham},
author = {Thule, Casper and Gomes, Cláudio and Deantoni, Julien and Larsen, Peter Gorm and Brauer, Jörg and Vangheluwe, Hans},
year = {2018},
note = {00014},
}
@article{kofman_quantized-state_2001,
title = {Quantized-state systems: a {DEVS} {Approach} for continuous system simulation},
volume = {18},
issn = {0740-6797},
number = {3},
journal = {Transactions of The Society for Modeling and Simulation International},
author = {Kofman, Ernesto and Junco, Sergio},
year = {2001},
note = {00234
Publisher: [La Jolla, CA]: The Society, c1984-c2001.},
pages = {123--132},
}
@article{marsden_discrete_2001,
title = {Discrete mechanics and variational integrators},
volume = {10},
issn = {0962-4929, 1474-0508},
url = {https://www.cambridge.org/core/product/identifier/S096249290100006X/type/journal_article},
doi = {10.1017/S096249290100006X},
abstract = {This paper gives a review of integration algorithms for finite dimensional
mechanical systems that are based on discrete variational principles. The
variational technique gives a unified treatment of many symplectic schemes,
including those of higher order, as well as a natural treatment of the discrete
Noether theorem. The approach also allows us to include forces, dissipation
and constraints in a natural way. Amongst the many specific schemes treated
as examples, the Verlet, SHAKE, RATTLE, Newmark, and the symplectic
partitioned Runge–Kutta schemes are presented.},
language = {en},
urldate = {2020-02-20},
journal = {Acta Numerica},
author = {Marsden, J. E. and West, M.},
month = may,
year = {2001},
note = {01566},
pages = {357--514},
}
@incollection{koziel_basics_2020,
address = {Cham},
title = {Basics of {Data}-{Driven} {Surrogate} {Modeling}},
isbn = {978-3-030-38925-3 978-3-030-38926-0},
url = {http://link.springer.com/10.1007/978-3-030-38926-0_2},
language = {en},
urldate = {2020-12-10},
booktitle = {Performance-{Driven} {Surrogate} {Modeling} of {High}-{Frequency} {Structures}},
publisher = {Springer International Publishing},
author = {Koziel, Slawomir and Pietrenko-Dabrowska, Anna},
collaborator = {Koziel, Slawomir and Pietrenko-Dabrowska, Anna},
year = {2020},
doi = {10.1007/978-3-030-38926-0_2},
note = {00000 },
pages = {23--58},
}
@article{forrester_recent_2009,
title = {Recent advances in surrogate-based optimization},
volume = {45},
issn = {03760421},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0376042108000766},
doi = {10.1016/j.paerosci.2008.11.001},
language = {en},
number = {1-3},
urldate = {2020-12-10},
journal = {Progress in Aerospace Sciences},
author = {Forrester, Alexander I.J. and Keane, Andy J.},
month = jan,
year = {2009},
note = {01643},
pages = {50--79},
}
@book{mohamed_machine_2018,
address = {Cham},
title = {Machine {Learning} for {Model} {Order} {Reduction}},
isbn = {978-3-319-75713-1 978-3-319-75714-8},
url = {http://link.springer.com/10.1007/978-3-319-75714-8},
language = {en},
urldate = {2020-12-10},
publisher = {Springer International Publishing},
author = {Mohamed, Khaled Salah},
year = {2018},
doi = {10.1007/978-3-319-75714-8},
note = {00013 },
}
@article{rackauckas_modern_nodate,
title = {Modern {Differential} {Equations} {Solver} {Software}: {Where} {We} {Are} and {Where} {We}'re {Headed}},
language = {en},
author = {Rackauckas, Chris},
note = {00000},
pages = {44},
}
@article{Plebe2019,
title = {The {Unbearable} {Shallow} {Understanding} of {Deep} {Learning}},
volume = {29},
issn = {1572-8641},
url = {https://doi.org/10.1007/s11023-019-09512-8},
doi = {10.1007/s11023-019-09512-8},
abstract = {This paper analyzes the rapid and unexpected rise of deep learning within Artificial Intelligence and its applications. It tackles the possible reasons for this remarkable success, providing candidate paths towards a satisfactory explanation of why it works so well, at least in some domains. A historical account is given for the ups and downs, which have characterized neural networks research and its evolution from “shallow” to “deep” learning architectures. A precise account of “success” is given, in order to sieve out aspects pertaining to marketing or sociology of research, and the remaining aspects seem to certify a genuine value of deep learning, calling for explanation. The alleged two main propelling factors for deep learning, namely computing hardware performance and neuroscience findings, are scrutinized, and evaluated as relevant but insufficient for a comprehensive explanation. We review various attempts that have been made to provide mathematical foundations able to justify the efficiency of deep learning, and we deem this is the most promising road to follow, even if the current achievements are too scattered and relevant for very limited classes of deep neural models. The authors’ take is that most of what can explain the very nature of why deep learning works at all and even very well across so many domains of application is still to be understood and further research, which addresses the theoretical foundation of artificial learning, is still very much needed.},
language = {en},
number = {4},
urldate = {2020-12-07},
journal = {Minds and Machines},
author = {Plebe, Alessio and Grasso, Giorgio},
month = dec,
year = {2019},
note = {00003},
pages = {515--553},
}
@article{wang_review_2007,
title = {Review of {Metamodeling} {Techniques} in {Support} of {Engineering} {Design} {Optimization}},
volume = {129},
issn = {1050-0472, 1528-9001},
url = {https://asmedigitalcollection.asme.org/mechanicaldesign/article/129/4/370/466824/Review-of-Metamodeling-Techniques-in-Support-of},
doi = {10.1115/1.2429697},
abstract = {Computation-intensive design problems are becoming increasingly common in manufacturing industries. The computation burden is often caused by expensive analysis and simulation processes in order to reach a comparable level of accuracy as physical testing data. To address such a challenge, approximation or metamodeling techniques are often used. Metamodeling techniques have been developed from many different disciplines including statistics, mathematics, computer science, and various engineering disciplines. These metamodels are initially developed as “surrogates” of the expensive simulation process in order to improve the overall computation efficiency. They are then found to be a valuable tool to support a wide scope of activities in modern engineering design, especially design optimization. This work reviews the state-of-the-art metamodel-based techniques from a practitioner’s perspective according to the role of metamodeling in supporting design optimization, including model approximation, design space exploration, problem formulation, and solving various types of optimization problems. Challenges and future development of metamodeling in support of engineering design is also analyzed and discussed.},
language = {en},
number = {4},
urldate = {2020-11-30},
journal = {Journal of Mechanical Design},
author = {Wang, G. Gary and Shan, S.},
month = apr,
year = {2007},
note = {01486},
pages = {370--380},
}
@book{forrester_engineering_2008,
address = {Chichester, West Sussex, England ; Hoboken, NJ},
title = {Engineering design via surrogate modelling: a practical guide},
isbn = {978-0-470-06068-1},
shorttitle = {Engineering design via surrogate modelling},
publisher = {J. Wiley},
author = {Forrester, Alexander I. J. and Sóbester, András and Keane, A. J.},
year = {2008},
note = {02311},
keywords = {Engineering design, Mathematical models, Statistical methods},
}
@article{deshmukh_design_2017,
title = {Design of {Dynamic} {Systems} {Using} {Surrogate} {Models} of {Derivative} {Functions}},
volume = {139},
issn = {1050-0472, 1528-9001},
url = {https://asmedigitalcollection.asme.org/mechanicaldesign/article/doi/10.1115/1.4037407/367011/Design-of-Dynamic-Systems-Using-Surrogate-Models},
doi = {10.1115/1.4037407},
abstract = {Optimization of dynamic systems often requires system simulation. Several important classes of dynamic system models have computationally expensive time derivative functions, resulting in simulations that are significantly slower than real time. This makes design optimization based on these models impractical. An efficient two-loop method, based on surrogate modeling, is presented here for solving dynamic system design problems with computationally expensive derivative functions. A surrogate model is constructed for only the derivative function instead of the simulation response. Simulation is performed based on the computationally inexpensive surrogate derivative function; this strategy preserves the nature of the dynamic system, and improves computational efficiency and accuracy compared to conventional surrogate modeling. The inner-loop optimization problem is solved for a given derivative function surrogate model (DFSM), and the outer loop updates the surrogate model based on optimization results. One unique challenge of this strategy is to ensure surrogate model accuracy in two regions: near the optimal point in the design space, and near the state trajectory in the state space corresponding to the optimal design. The initial evidence of method effectiveness is demonstrated first using two simple design examples, followed by a more detailed wind turbine codesign problem that accounts for aeroelastic effects and simultaneously optimizes physical and control system design. In the last example, a linear state-dependent model is used that requires computationally expensive matrix updates when either state or design variables change. Results indicate an order-of-magnitude reduction in function evaluations when compared to conventional surrogate modeling. The DFSM method is expected to be beneficial only for problems where derivative function evaluation expense, and not large problem dimension, is the primary contributor to solution expense (a restricted but important problem class). The initial studies presented here revealed opportunities for potential further method improvement and deeper investigation.},
language = {en},
number = {10},
urldate = {2020-11-30},
journal = {Journal of Mechanical Design},
author = {Deshmukh, Anand P. and Allison, James T.},
month = oct,
year = {2017},
note = {00003},
pages = {101402},
}
@book{astrom_feedback_2010,
title = {Feedback systems: an introduction for scientists and engineers},
isbn = {1-4008-2873-2},
publisher = {Princeton university press},
author = {Aström, Karl Johan and Murray, Richard M},
year = {2010},
note = {00000},
}
@article{fridman_tutorial_2014,
title = {Tutorial on {Lyapunov}-based methods for time-delay systems},
volume = {20},
issn = {09473580},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0947358014000764},
doi = {10.1016/j.ejcon.2014.10.001},
language = {en},
number = {6},
urldate = {2020-11-18},
journal = {European Journal of Control},
author = {Fridman, Emilia},
month = nov,
year = {2014},
note = {00183},
pages = {271--283},
}
@misc{derek_rowell_time-domain_2002,
title = {Time-{Domain} {Solution} of {LTI} {State} {Equations}},
author = {{Derek Rowell}},
year = {2002},
note = {00013},
}
@book{astrom_computer-controlled_2011,
title = {Computer-controlled systems: theory and design},
publisher = {Courier Corporation},
author = {Aström, Karl J and Wittenmark, Bjorn},
year = {2011},
note = {00003},
}
@inproceedings{gomes_stable_2017,
address = {Darmstadt, Germany},
title = {Stable {Adaptive} {Co}-simulation: {A} {Switched} {Systems} {Approach}},
volume = {35},
doi = {10.1007/978-3-030-14883-6_5},
booktitle = {{IUTAM} {Symposium} on {Co}-{Simulation} and {Solver} {Coupling}},
publisher = {Springer, Cham},
author = {Gomes, Cláudio and Legat, Benoît and Jungers, Raphaël M. and Vangheluwe, Hans},
year = {2017},
note = {00015},
pages = {81--97},
}
@inproceedings{gomes_minimally_2018,
address = {Miami Beach, FL, USA},
title = {Minimally {Constrained} {Stable} {Switched} {Systems} and {Application} to {Co}-simulation},
doi = {10.1109/CDC.2018.8619223},
booktitle = {{IEEE} {Conference} on {Decision} and {Control}},
author = {Gomes, Cláudio and Legat, Benoît and Jungers, Raphaël and Vangheluwe, Hans},
year = {2018},
note = {00009},
pages = {5676--5681},
}
@inproceedings{gomes_stabilization_2018,
address = {Toulouse, France},
title = {Stabilization {Technique} in {INTO}-{CPS}},
volume = {11176},
isbn = {978-3-030-04771-9},
doi = {10.1007/978-3-030-04771-9_4},
booktitle = {2nd {Workshop} on {Formal} {Co}-{Simulation} of {Cyber}-{Physical} {Systems}},
publisher = {Springer, Cham},
author = {Gomes, Cláudio and Thule, Casper and Lausdahl, Kenneth and Larsen, Peter Gorm and Vangheluwe, Hans},
year = {2018},
note = {00000},
}
@techreport{gomes_co-simulation_2018,
address = {Belgium},
title = {Co-simulation of {Continuous} {Systems}: {A} {Tutorial}},
shorttitle = {Co-simulation of {Continuous} {Systems}},
url = {http://arxiv.org/abs/1809.08463},
number = {arXiv:1809.08463},
urldate = {2018-09-25},
institution = {University of Antwerp},
author = {Gomes, Cláudio and Thule, Casper and Larsen, Peter Gorm and Denil, Joachim and Vangheluwe, Hans},
year = {2018},
note = {00013
arXiv: 1809.08463},
keywords = {65Y10, Computer Science - Computational Engineering, Finance, and Science, I.6.1, I.6.7, Mathematics - Classical Analysis and ODEs},
}
@book{hairer_solving_1996,
title = {Solving ordinary differential equations {II}: {Stiff} and differential-algebraic problems},
isbn = {3-540-60452-9},
number = {14},
publisher = {Springer-Verlag Berlin Heidelberg},
author = {Hairer, Ernst and Wanner, Gerhard},
year = {1996},
note = {00020},
}
@techreport{gomes_minimally_2018-1,
address = {Belgium},
title = {Minimally {Constrained} {Stable} {Switched} {Systems} and {Application} to {Co}-simulation},
url = {http://arxiv.org/abs/1809.02648},
number = {arXiv:1809.02648},
institution = {University of Antwerp},
author = {Gomes, Cláudio and Jungers, Raphaël and Legat, Benoît and Vangheluwe, Hans},
year = {2018},
note = {00009},
}
@book{stuart_dynamical_1998,
title = {Dynamical systems and numerical analysis},
volume = {2},
isbn = {0-521-64563-8},
publisher = {Cambridge University Press},
author = {Stuart, Andrew and Humphries, Anthony R.},
year = {1998},
note = {01100},
}
@book{wanner_solving_1991,
edition = {Springer S},
title = {Solving ordinary differential equations {I}: {Nonstiff} {Problems}},
volume = {1},
publisher = {Springer-Verlag},
author = {Wanner, G. and Hairer, E.},
year = {1991},
note = {00245},
}
@book{strang_introduction_1993,
title = {Introduction to linear algebra},
volume = {3},
publisher = {Wellesley-Cambridge Press Wellesley, MA},
author = {Strang, Gilbert and Strang, Gilbert and Strang, Gilbert and Strang, Gilbert},
year = {1993},
note = {03099},
}
@techreport{mit_introduction_2009,
title = {Introduction to {Numerical} {Methods} for {ODEs}},
institution = {Massachusetts Institute of Technology},
author = {{MIT}},
year = {2009},
note = {00000},
pages = {1--101},
}
@book{leveque_finite_2007,
title = {Finite difference methods for ordinary and partial differential equations: steady-state and time-dependent problems},
volume = {98},
isbn = {0-89871-629-2},
publisher = {Siam},
author = {LeVeque, Randall J},
year = {2007},
}
@article{pan_long-time_2018,
title = {Long-{Time} {Predictive} {Modeling} of {Nonlinear} {Dynamical} {Systems} {Using} {Neural} {Networks}},
volume = {2018},
issn = {1076-2787, 1099-0526},
url = {https://www.hindawi.com/journals/complexity/2018/4801012/},
doi = {10.1155/2018/4801012},
abstract = {We study the use of feedforward neural networks (FNN) to develop models of nonlinear dynamical systems from data. Emphasis is placed on predictions at long times, with limited data availability. Inspired by global stability analysis, and the observation of strong correlation between the local error and the maximal singular value of the Jacobian of the ANN, we introduce Jacobian regularization in the loss function. This regularization suppresses the sensitivity of the prediction to the local error and is shown to improve accuracy and robustness. Comparison between the proposed approach and sparse polynomial regression is presented in numerical examples ranging from simple ODE systems to nonlinear PDE systems including vortex shedding behind a cylinder and instability-driven buoyant mixing flow. Furthermore, limitations of feedforward neural networks are highlighted, especially when the training data does not include a low dimensional attractor. Strategies of data augmentation are presented as remedies to address these issues to a certain extent.},
language = {en},
urldate = {2020-11-18},
journal = {Complexity},
author = {Pan, Shaowu and Duraisamy, Karthik},
month = dec,
year = {2018},
note = {00040},
pages = {1--26},
}
@article{li_building_2017,
title = {Building {Energy} {Consumption} {Prediction}: {An} {Extreme} {Deep} {Learning} {Approach}},
volume = {10},
issn = {1996-1073},
shorttitle = {Building {Energy} {Consumption} {Prediction}},
url = {http://www.mdpi.com/1996-1073/10/10/1525},
doi = {10.3390/en10101525},
language = {en},
number = {10},
urldate = {2020-11-18},
journal = {Energies},
author = {Li, Chengdong and Ding, Zixiang and Zhao, Dongbin and Yi, Jianqiang and Zhang, Guiqing},
month = oct,
year = {2017},
note = {00126},
pages = {1525},
}
@article{tian_deep_2018,
title = {A {Deep} {Neural} {Network} {Model} for {Short}-{Term} {Load} {Forecast} {Based} on {Long} {Short}-{Term} {Memory} {Network} and {Convolutional} {Neural} {Network}},
volume = {11},
issn = {1996-1073},
url = {http://www.mdpi.com/1996-1073/11/12/3493},
doi = {10.3390/en11123493},
abstract = {Accurate electrical load forecasting is of great significance to help power companies in better scheduling and efficient management. Since high levels of uncertainties exist in the load time series, it is a challenging task to make accurate short-term load forecast (STLF). In recent years, deep learning approaches provide better performance to predict electrical load in real world cases. The convolutional neural network (CNN) can extract the local trend and capture the same pattern, and the long short-term memory (LSTM) is proposed to learn the relationship in time steps. In this paper, a new deep neural network framework that integrates the hidden feature of the CNN model and the LSTM model is proposed to improve the forecasting accuracy. The proposed model was tested in a real-world case, and detailed experiments were conducted to validate its practicality and stability. The forecasting performance of the proposed model was compared with the LSTM model and the CNN model. The Mean Absolute Error (MAE), Mean Absolute Percentage Error (MAPE) and Root Mean Square Error (RMSE) were used as the evaluation indexes. The experimental results demonstrate that the proposed model can achieve better and stable performance in STLF.},
language = {en},
number = {12},
urldate = {2020-11-18},
journal = {Energies},
author = {Tian, Chujie and Ma, Jian and Zhang, Chunhong and Zhan, Panpan},
month = dec,
year = {2018},
note = {00066},
pages = {3493},
}
@article{ekonomou_greek_2010,
title = {Greek long-term energy consumption prediction using artificial neural networks},
volume = {35},
issn = {03605442},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0360544209004514},
doi = {10.1016/j.energy.2009.10.018},
language = {en},
number = {2},
urldate = {2020-11-18},
journal = {Energy},
author = {Ekonomou, L.},
month = feb,
year = {2010},
note = {00383},
pages = {512--517},
}
@article{Verma2020,
title = {A survey on machine learning applied to dynamic physical systems},
journal = {arXiv preprint arXiv:2009.09719},
author = {Verma, Sagar},
year = {2020},
note = {00000},
}
@article{xue_district_2019,
title = {District {Heating} {Load} {Prediction} {Algorithm} {Based} on {Feature} {Fusion} {LSTM} {Model}},
volume = {12},
issn = {1996-1073},
url = {https://www.mdpi.com/1996-1073/12/11/2122},
doi = {10.3390/en12112122},
abstract = {The smart district heating system (SDHS) is an important element of the construction of smart cities in Northern China; it plays a significant role in meeting heating requirements and green energy saving in winter. Various Internet of Things (IoT) sensors and wireless transmission technologies are applied to monitor data in real-time and to form a historical database. The accurate prediction of heating loads based on massive historical datasets is the necessary condition and key basis for formulating an optimal heating control strategy in the SDHS, which contributes to the reduction in the consumption of energy and the improvement in the energy dispatching efficiency and accuracy. In order to achieve the high prediction accuracy of SDHS and to improve the representation ability of multi-time-scale features, a novel short-term heating load prediction algorithm based on a feature fusion long short-term memory (LSTM) model (FFLSTM) is proposed. Three characteristics, namely proximity, periodicity, and trend, are found after analyzing the heating load data from the aspect of the hourly time dimension. In order to comprehensively utilize the data’s intrinsic characteristics, three LSTM models are employed to make separate predictions, and, then, the prediction results based on internal features and other external features at the corresponding moments are imported into the high-level LSTM model for fusion processing, which brings a more accurate prediction result of the heating load. Detailed comparisons between the proposed FFLSTM algorithm and the-state-of-art algorithms are conducted in this paper. The experimental results show that the proposed FFLSTM algorithm outperforms others and can obtain a higher prediction accuracy. Furthermore, the impact of selecting different parameters of the FFLSTM model is also studied thoroughly.},
language = {en},
number = {11},
urldate = {2020-11-10},
journal = {Energies},
author = {Xue, Guixiang and Pan, Yu and Lin, Tao and Song, Jiancai and Qi, Chengying and Wang, Zhipan},
month = jun,
year = {2019},
note = {00004},
pages = {2122},
}
@article{san_neural_2018,
title = {Neural network closures for nonlinear model order reduction},
volume = {44},
issn = {1019-7168, 1572-9044},
url = {http://link.springer.com/10.1007/s10444-018-9590-z},
doi = {10.1007/s10444-018-9590-z},
language = {en},
number = {6},
urldate = {2020-09-20},
journal = {Advances in Computational Mathematics},
author = {San, Omer and Maulik, Romit},
month = dec,
year = {2018},
note = {00057},
pages = {1717--1750},
}
@article{san_artificial_2019,
title = {An artificial neural network framework for reduced order modeling of transient flows},
volume = {77},
issn = {10075704},
url = {https://linkinghub.elsevier.com/retrieve/pii/S1007570419301364},
doi = {10.1016/j.cnsns.2019.04.025},
language = {en},
urldate = {2020-09-20},
journal = {Communications in Nonlinear Science and Numerical Simulation},
author = {San, Omer and Maulik, Romit and Ahmed, Mansoor},
month = oct,
year = {2019},
note = {00030},
pages = {271--287},
}
@article{everett_sub-space_2017,
title = {A sub-space artificial neural network for mold cooling in injection molding},
volume = {79},
issn = {09574174},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0957417417301598},
doi = {10.1016/j.eswa.2017.03.013},
language = {en},
urldate = {2020-09-20},
journal = {Expert Systems with Applications},
author = {Everett, Scott E. and Dubay, Rickey},
month = aug,
year = {2017},
note = {00014},
pages = {358--371},
}
@article{mohan_deep_2018,
title = {A {Deep} {Learning} based {Approach} to {Reduced} {Order} {Modeling} for {Turbulent} {Flow} {Control} using {LSTM} {Neural} {Networks}},
url = {http://arxiv.org/abs/1804.09269},
abstract = {Reduced Order Modeling (ROM) for engineering applications has been a major research focus in the past few decades due to the unprecedented physical insight into turbulence offered by high-fidelity CFD. The primary goal of a ROM is to model the key physics/features of a flow-field without computing the full Navier-Stokes (NS) equations. This is accomplished by projecting the high-dimensional dynamics to a low-dimensional subspace, typically utilizing dimensionality reduction techniques like Proper Orthogonal Decomposition (POD), coupled with Galerkin projection. In this work, we demonstrate a deep learning based approach to build a ROM using the POD basis of canonical DNS datasets, for turbulent flow control applications. We find that a type of Recurrent Neural Network, the Long Short Term Memory (LSTM) which has been primarily utilized for problems like speech modeling and language translation, shows attractive potential in modeling temporal dynamics of turbulence. Additionally, we introduce the Hurst Exponent as a tool to study LSTM behavior for non-stationary data, and uncover useful characteristics that may aid ROM development for a variety of applications.},
urldate = {2020-09-20},
journal = {arXiv:1804.09269 [physics]},
author = {Mohan, Arvind T. and Gaitonde, Datta V.},
month = apr,
year = {2018},
note = {00071
arXiv: 1804.09269},
keywords = {Physics - Computational Physics, Physics - Fluid Dynamics},
}
@article{vincent_stacked_2010,
title = {Stacked denoising autoencoders: {Learning} useful representations in a deep network with a local denoising criterion.},
volume = {11},
number = {12},
journal = {Journal of machine learning research},
author = {Vincent, Pascal and Larochelle, Hugo and Lajoie, Isabelle and Bengio, Yoshua and Manzagol, Pierre-Antoine and Bottou, Léon},
year = {2010},
note = {05331},
}
@article{sato_model_2013,
title = {Model {Reduction} of {Three}-{Dimensional} {Eddy} {Current} {Problems} {Based} on the {Method} of {Snapshots}},
volume = {49},
issn = {0018-9464, 1941-0069},
url = {http://ieeexplore.ieee.org/document/6514733/},
doi = {10.1109/TMAG.2013.2241412},
number = {5},
urldate = {2020-10-27},
journal = {IEEE Transactions on Magnetics},
author = {Sato, Yuki and Igarashi, Hajime},
month = may,
year = {2013},
note = {00044},
pages = {1697--1700},
}
@article{Rueden2020,
series = {Lecture {Notes} in {Computer} {Science}},
title = {Combining {Machine} {Learning} and {Simulation} to a {Hybrid} {Modelling} {Approach}: {Current} and {Future} {Directions}},
volume = {12080},
shorttitle = {Combining {Machine} {Learning} and {Simulation} to a {Hybrid} {Modelling} {Approach}},
url = {http://link.springer.com/10.1007/978-3-030-44584-3_43},
doi = {10.1007/978-3-030-44584-3_43},
language = {en},
urldate = {2020-07-24},
journal = {Advances in Intelligent Data Analysis XVIII},
author = {von Rueden, Laura and Mayer, Sebastian and Sifa, Rafet and Bauckhage, Christian and Garcke, Jochen},
editor = {Berthold, Michael R. and Feelders, Ad and Krempl, Georg},
year = {2020},
pages = {548--560},
}
@article{Rueden2020a,
title = {Informed {Machine} {Learning} -- {A} {Taxonomy} and {Survey} of {Integrating} {Knowledge} into {Learning} {Systems}},
url = {http://arxiv.org/abs/1903.12394},
abstract = {Despite its great success, machine learning can have its limits when dealing with insufficient training data. A potential solution is the additional integration of prior knowledge into the training process, which leads to the notion of informed machine learning. In this paper, we present a structured overview of various approaches in this field. First, we provide a definition and propose a concept for informed machine learning, which illustrates its building blocks and distinguishes it from conventional machine learning. Second, we introduce a taxonomy that serves as a classification framework for informed machine learning approaches. It considers the source of knowledge, its representation, and its integration into the machine learning pipeline. Third, we survey related research and describe how different knowledge representations such as algebraic equations, logic rules, or simulation results can be used in learning systems. This evaluation of numerous papers on the basis of our taxonomy uncovers key methods in the field of informed machine learning.},
urldate = {2020-08-12},
journal = {arXiv:1903.12394 [cs, stat]},
author = {von Rueden, Laura and Mayer, Sebastian and Beckh, Katharina and Georgiev, Bogdan and Giesselbach, Sven and Heese, Raoul and Kirsch, Birgit and Pfrommer, Julius and Pick, Annika and Ramamurthy, Rajkumar and Walczak, Michal and Garcke, Jochen and Bauckhage, Christian and Schuecker, Jannis},
month = feb,
year = {2020},
keywords = {Computer Science - Artificial Intelligence, Computer Science - Machine Learning, Statistics - Machine Learning},
}
@misc{dataset_nonlinear,
title = {Nonlinear {Benchmark}},
url = {https://sites.google.com/view/nonlinear-benchmark/benchmarks},
language = {da},
urldate = {2020-11-04},
note = {00396},
}
@misc{Moor1997,
title = {{DAISY} : {A} database for identification of systems},
shorttitle = {{DAISY}},
url = {/paper/DAISY-%3A-A-database-for-identification-of-systems-Moor-Gersem/07a85578601f5f1ab9a52375a9ead67422768050},
abstract = {We point out the existence of a disturbing deficiency in the field of system identification, namely the fact that many results, published in papers, are not reproducible. In many cases, datasets and time series, that are used to illustrate identification methods and algorithms in these publications, are not freely available. We propose to remedy this serious deficiency by setting up a publically accessible website, called DAISY, to which authors can submit datasets that are used to illustrate certain claims and algorithms in their papers. Several additional benefits are discussed as well.},
language = {en},
urldate = {2020-11-04},
journal = {undefined},
author = {Moor, B. D. and Gersem, P. D. and Schutter, B. D. and Favoreel, W.},
year = {1997},
note = {00308},
}
@book{Michie1994,
title = {Machine learning, neural and statistical classification},
author = {Michie, D. and Spiegelhalter, D. J. and Taylor, C. C.},
year = {1994},
note = {00003},
}
@book{Bishop2006,
address = {New York},
series = {Information {Science} and {Statistics}},
title = {Pattern {Recognition} and {Machine} {Learning}},
isbn = {978-0-387-31073-2},
url = {https://www.springer.com/gp/book/9780387310732},
abstract = {The dramatic growth in practical applications for machine learning over the last ten years has been accompanied by many important developments in the underlying algorithms and techniques. For example, Bayesian methods have grown from a specialist niche to become mainstream, while graphical models have emerged as a general framework for describing and applying probabilistic techniques. The practical applicability of Bayesian methods has been greatly enhanced by the development of a range of approximate inference algorithms such as variational Bayes and expectation propagation, while new models based on kernels have had a significant impact on both algorithms and applications. This completely new textbook reflects these recent developments while providing a comprehensive introduction to the fields of pattern recognition and machine learning. It is aimed at advanced undergraduates or first-year PhD students, as well as researchers and practitioners. No previous knowledge of pattern recognition or machine learning concepts is assumed. Familiarity with multivariate calculus and basic linear algebra is required, and some experience in the use of probabilities would be helpful though not essential as the book includes a self-contained introduction to basic probability theory. The book is suitable for courses on machine learning, statistics, computer science, signal processing, computer vision, data mining, and bioinformatics. Extensive support is provided for course instructors, including more than 400 exercises, graded according to difficulty. Example solutions for a subset of the exercises are available from the book web site, while solutions for the remainder can be obtained by instructors from the publisher. The book is supported by a great deal of additional material, and the reader is encouraged to visit the book web site for the latest information. Christopher M. Bishop is Deputy Director of Microsoft Research Cambridge, and holds a Chair in Computer Science at the University of Edinburgh. He is a Fellow of Darwin College Cambridge, a Fellow of the Royal Academy of Engineering, and a Fellow of the Royal Society of Edinburgh. His previous textbook "Neural Networks for Pattern Recognition" has been widely adopted. Coming soon: *For students, worked solutions to a subset of exercises available on a public web site (for exercises marked "www" in the text) *For instructors, worked solutions to remaining exercises from the Springer web site *Lecture slides to accompany each chapter *Data sets available for download},
language = {en},
urldate = {2020-11-04},
publisher = {Springer-Verlag},
author = {Bishop, Christopher},
year = {2006},
note = {46526},
}
@book{nilsson_learning_1965,
title = {Learning {Machines}},
abstract = {This book is about machines that learn to discover hidden relationships in data. A constant sfream of data bombards our senses and millions of sensory channels carry information into our brains. Brains are also learning machines that condition,},
author = {Nilsson, Nils J. and Sejnowski, Terrence J. and White, Halbert and Sejnowski, Terrence J. and White, Halbert},
year = {1965},
note = {02299},
}
@article{michie1994machine,
title = {Machine learning},
volume = {13},
number = {1994},
journal = {Neural and Statistical Classification},
author = {Michie, Donald and Spiegelhalter, David J and Taylor, CC and {others}},
year = {1994},
note = {04044
Publisher: Technometrics},
pages = {1--298},
}
@book{brunton_data-driven_2019,
title = {Data-driven science and engineering: machine learning, dynamical systems, and control},
publisher = {Taylor \& Francis},
author = {Brunton, Steven L.},
year = {2019},
doi = {10.1017/9781108380690},
note = {00000
tex.ids: Brunton2019},
}
@article{liu_dual-dimer_2020,
title = {A {Dual}-{Dimer} {Method} for {Training} {Physics}-{Constrained} {Neural} {Networks} with {Minimax} {Architecture}},
url = {http://arxiv.org/abs/2005.00615},
urldate = {2020-05-18},
journal = {arXiv:2005.00615 [cs, stat]},
author = {Liu, Dehao and Wang, Yan},
month = may,
year = {2020},
note = {tex.ids: Liu2020a
arXiv: 2005.00615},
keywords = {Computer Science - Machine Learning, Statistics - Machine Learning},
}
@inproceedings{lynch_neural_2001,
address = {Washington, DC, USA},
title = {Neural network applications in physics},
volume = {3},
isbn = {978-0-7803-7044-9},
url = {http://ieeexplore.ieee.org/document/938482/},
doi = {10.1109/IJCNN.2001.938482},
urldate = {2020-11-03},
booktitle = {{IJCNN}'01. {International} {Joint} {Conference} on {Neural} {Networks}. {Proceedings} ({Cat}. {No}.{01CH37222})},
publisher = {IEEE},
author = {Lynch, M. and Patel, H. and Abrahamse, A. and Rupa Rajendran, A. and Medsker, L.},
year = {2001},
note = {00007},
pages = {2054--2058},
}
@article{Garage2019,
title = {Garage: {A} toolkit for reproducible reinforcement learning research},
url = {https://github.com/rlworkgroup/garage},
journal = {GitHub repository},
author = {contributors, The garage},
year = {2019},
note = {00000
Publisher: GitHub
tex.commit: be070842071f736eb24f28e4b902a9f144f5c97b},
}
@article{lillicrap_continuous_2019,
title = {Continuous control with deep reinforcement learning},
url = {http://arxiv.org/abs/1509.02971},
abstract = {We adapt the ideas underlying the success of Deep Q-Learning to the continuous action domain. We present an actor-critic, model-free algorithm based on the deterministic policy gradient that can operate over continuous action spaces. Using the same learning algorithm, network architecture and hyper-parameters, our algorithm robustly solves more than 20 simulated physics tasks, including classic problems such as cartpole swing-up, dexterous manipulation, legged locomotion and car driving. Our algorithm is able to find policies whose performance is competitive with those found by a planning algorithm with full access to the dynamics of the domain and its derivatives. We further demonstrate that for many of the tasks the algorithm can learn policies end-to-end: directly from raw pixel inputs.},
urldate = {2020-11-02},
journal = {arXiv:1509.02971 [cs, stat]},
author = {Lillicrap, Timothy P. and Hunt, Jonathan J. and Pritzel, Alexander and Heess, Nicolas and Erez, Tom and Tassa, Yuval and Silver, David and Wierstra, Daan},
month = jul,
year = {2019},
note = {04389
arXiv: 1509.02971},
keywords = {Computer Science - Machine Learning, Statistics - Machine Learning},
}
@article{Duan2016,
title = {Benchmarking {Deep} {Reinforcement} {Learning} for {Continuous} {Control}},
url = {http://arxiv.org/abs/1604.06778},
abstract = {Recently, researchers have made significant progress combining the advances in deep learning for learning feature representations with reinforcement learning. Some notable examples include training agents to play Atari games based on raw pixel data and to acquire advanced manipulation skills using raw sensory inputs. However, it has been difficult to quantify progress in the domain of continuous control due to the lack of a commonly adopted benchmark. In this work, we present a benchmark suite of continuous control tasks, including classic tasks like cart-pole swing-up, tasks with very high state and action dimensionality such as 3D humanoid locomotion, tasks with partial observations, and tasks with hierarchical structure. We report novel findings based on the systematic evaluation of a range of implemented reinforcement learning algorithms. Both the benchmark and reference implementations are released at https://github.com/rllab/rllab in order to facilitate experimental reproducibility and to encourage adoption by other researchers.},
urldate = {2020-11-02},
journal = {arXiv:1604.06778 [cs]},
author = {Duan, Yan and Chen, Xi and Houthooft, Rein and Schulman, John and Abbeel, Pieter},
month = may,
year = {2016},
note = {00954
arXiv: 1604.06778},
keywords = {Computer Science - Artificial Intelligence, Computer Science - Machine Learning, Computer Science - Robotics},
}
@article{Kroll2014,
title = {Benchmark problems for nonlinear system identification and control using {Soft} {Computing} methods: {Need} and overview},
volume = {25},
issn = {1568-4946},
shorttitle = {Benchmark problems for nonlinear system identification and control using {Soft} {Computing} methods},
url = {http://www.sciencedirect.com/science/article/pii/S1568494614003998},
doi = {10.1016/j.asoc.2014.08.034},
abstract = {Using benchmark problems to demonstrate and compare novel methods to the work of others could be more widely adopted by the Soft Computing community. This article contains a collection of several benchmark problems in nonlinear control and system identification, which are presented in a standardized format. Each problem is augmented by examples where it has been adopted for comparison. The selected examples range from component to plant level problems and originate mainly from the areas of mechatronics/drives and process systems. The authors hope that this overview contributes to a better adoption of benchmarking in method development, test and demonstration.},
language = {en},
urldate = {2020-11-02},
journal = {Applied Soft Computing},
author = {Kroll, Andreas and Schulte, Horst},
month = dec,
year = {2014},
note = {00044},
keywords = {Benchmarking, Nonlinear control, Nonlinear system identification},
pages = {496--513},
}
@article{Brockman2016,
title = {{OpenAI} {Gym}},
url = {http://arxiv.org/abs/1606.01540},
abstract = {OpenAI Gym is a toolkit for reinforcement learning research. It includes a growing collection of benchmark problems that expose a common interface, and a website where people can share their results and compare the performance of algorithms. This whitepaper discusses the components of OpenAI Gym and the design decisions that went into the software.},
urldate = {2020-10-21},
journal = {arXiv:1606.01540 [cs]},
author = {Brockman, Greg and Cheung, Vicki and Pettersson, Ludwig and Schneider, Jonas and Schulman, John and Tang, Jie and Zaremba, Wojciech},
month = jun,
year = {2016},
note = {01869
arXiv: 1606.01540},
keywords = {Computer Science - Artificial Intelligence, Computer Science - Machine Learning},
}
@article{gomes_co-simulation_2018-1,
title = {Co-simulation: a {Survey}},
volume = {51},
doi = {10.1145/3179993},
number = {3},
journal = {ACM Computing Surveys},
author = {Gomes, Cláudio and Thule, Casper and Broman, David and Larsen, Peter Gorm and Vangheluwe, Hans},
year = {2018},
note = {00114},
pages = {49:1--49:33},
}